[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/llm 9541d34656 08/71: Enable streaming in Open AI with
From: |
ELPA Syncer |
Subject: |
[elpa] externals/llm 9541d34656 08/71: Enable streaming in Open AI with plz modifications |
Date: |
Fri, 17 May 2024 00:58:43 -0400 (EDT) |
branch: externals/llm
commit 9541d3465678d6f6013b855b62c672c6873573c3
Author: Andrew Hyatt <ahyatt@gmail.com>
Commit: Andrew Hyatt <ahyatt@gmail.com>
Enable streaming in Open AI with plz modifications
---
llm-openai.el | 158 +++++++++++++++++++----------------------------------
llm-request-plz.el | 18 ++++--
2 files changed, 70 insertions(+), 106 deletions(-)
diff --git a/llm-openai.el b/llm-openai.el
index 5927bf60b4..7e4186fe4c 100644
--- a/llm-openai.el
+++ b/llm-openai.el
@@ -219,8 +219,9 @@ PROMPT is the prompt that needs to be updated with the
response."
(llm-provider-utils-process-result
provider prompt
(llm-openai--normalize-function-calls
- (if (consp response) (llm-openai--extract-chat-response response)
- (llm-openai--get-partial-chat-response response))))))
+ (if response
+ (llm-openai--extract-chat-response response)
+ (llm-openai--get-partial-chat-response nil))))))
(cl-defmethod llm-chat-async ((provider llm-openai) prompt response-callback
error-callback)
(llm-openai--check-key provider)
@@ -253,115 +254,70 @@ PROMPT is the prompt that needs to be updated with the
response."
(defvar-local llm-openai-current-response ""
"The response so far from the server.")
-(defvar-local llm-openai-last-response 0
- "The number of the last streaming response we read.
-The responses from OpenAI are not numbered, but we just number
-them from 1 to however many are sent.")
-
(defun llm-openai--get-partial-chat-response (response)
- "Return the text in the partial chat response from RESPONSE."
- ;; To begin with, we should still be in the buffer with the actual response.
- (let ((current-response llm-openai-current-response)
- (last-response llm-openai-last-response))
- (with-temp-buffer
- (insert response)
- (let* ((complete-rx (rx (seq "finish_reason\":" (1+ (or ?\[ ?\] alpha))
"}]}" line-end)))
- (end-pos (save-excursion (goto-char (point-max))
- (when (search-backward-regexp
- complete-rx
- nil t)
- (line-end-position)))))
- (when end-pos
- (let* ((all-lines (seq-filter
- (lambda (line) (string-match-p complete-rx line))
- (split-string (buffer-substring-no-properties 1
end-pos) "\n")))
- (processed-lines
- (mapcar (lambda (line)
- (let ((delta (assoc-default
- 'delta
- (aref (assoc-default
- 'choices
- (json-read-from-string
- (replace-regexp-in-string
"data: " "" line)))
- 0))))
- (or (assoc-default 'content delta)
- (assoc-default 'tool_calls delta))))
- (seq-subseq all-lines last-response))))
- (if (stringp (car processed-lines))
- ;; The data is a string - a normal response, which we just
- ;; append to current-response (assuming it's also a string,
- ;; which it should be).
- (setq current-response
- (concat current-response (string-join processed-lines
"")))
- ;; If this is a streaming function call, current-response will be
- ;; a vector of function plists, containing the function name and
the arguments
- ;; as JSON.
- (when (equal "" current-response)
- (setq current-response (make-vector (length (car
processed-lines))
- nil)))
- (cl-loop for calls in processed-lines do
- (cl-loop for call in (append calls nil) do
- (let* ((index (assoc-default 'index call))
- (plist (aref current-response index))
- (function (assoc-default 'function
call))
- (name (assoc-default 'name function))
- (id (assoc-default 'id call))
- (arguments (assoc-default 'arguments
function)))
- (when name (setq plist (plist-put plist
:name name)))
- (when id (setq plist (plist-put plist :id
id)))
- (setq plist (plist-put plist :arguments
- (concat (plist-get
plist :arguments)
- arguments)))
- (aset current-response index plist)))))
-
- (setq last-response (length all-lines))))))
- ;; Has to be >= because when we store plists the length doesn't change, but
- ;; we still want to store the new response. For text, it should indeed be
- ;; ever-growing (but sometimes it shrinks and we don't want to store that).
- (when (>= (length current-response) (length llm-openai-current-response))
- (setq llm-openai-current-response current-response)
- (setq llm-openai-last-response last-response))
- ;; If we are dealing with function calling, massage it to look like the
- ;; normal function calling output.
- (if (vectorp current-response)
- (apply #'vector
- (mapcar (lambda (plist)
- `((id . ,(plist-get plist :id))
- (type . function)
- (function
- .
- ((name . ,(plist-get plist :name))
- (arguments . ,(plist-get plist :arguments))))))
- current-response))
- current-response)))
+ "Return the text in the partial chat response from RESPONSE.
+RESPONSE can be nil if the response is complete."
+ (when response
+ (let* ((delta (assoc-default 'delta (aref (assoc-default 'choices
response) 0)))
+ (content-or-call (or (assoc-default 'content delta)
+ (assoc-default 'tool_calls delta))))
+ (when content-or-call
+ (if (stringp content-or-call)
+ (setq llm-openai-current-response
+ (concat llm-openai-current-response content-or-call))
+ (let* ((index (assoc-default 'index content-or-call))
+ (plist (aref llm-openai-current-response index))
+ (function (assoc-default 'function content-or-call))
+ (name (assoc-default 'name function))
+ (id (assoc-default 'id content-or-call))
+ (arguments (assoc-default 'arguments function)))
+ (when name (setq plist (plist-put plist :name name)))
+ (when id (setq plist (plist-put plist :id id)))
+ (setq plist (plist-put plist :arguments
+ (concat (plist-get plist :arguments)
+ arguments)))
+ (aset llm-openai-current-response index plist))))))
+ (if (vectorp llm-openai-current-response)
+ (apply #'vector
+ (mapcar (lambda (plist)
+ `((id . ,(plist-get plist :id))
+ (type . function)
+ (function
+ .
+ ((name . ,(plist-get plist :name))
+ (arguments . ,(plist-get plist :arguments))))))
+ llm-openai-current-response))
+ llm-openai-current-response))
(cl-defmethod llm-chat-streaming ((provider llm-openai) prompt partial-callback
response-callback error-callback)
(llm-openai--check-key provider)
(let ((buf (current-buffer)))
- (llm-request-async (llm-openai--url provider "chat/completions")
- :headers (llm-openai--headers provider)
- :data (llm-openai--chat-request (llm-openai-chat-model
provider) prompt t)
- :on-error (lambda (_ data)
- (let ((errdata (cdr (assoc 'error data))))
- (llm-request-callback-in-buffer
- buf error-callback 'error
- (format "Problem calling Open AI: %s
message: %s"
- (cdr (assoc 'type errdata))
- (cdr (assoc 'message
errdata))))))
- :on-partial (lambda (data)
- (when-let ((response
(llm-openai--get-partial-chat-response data)))
- ;; We only send partial text updates,
not
- ;; updates related to function calls.
- (when (stringp response)
- (llm-request-callback-in-buffer buf
partial-callback response))))
- :on-success-raw (lambda (data)
+ (llm-request-plz-async (llm-openai--url provider "chat/completions")
+ :headers (llm-openai--headers provider)
+ :data (llm-openai--chat-request
(llm-openai-chat-model provider) prompt t)
+ :on-error (lambda (_ data)
+ (let ((errdata (cdr
+ (assoc 'error
+
(json-read-from-string data)))))
+ (llm-request-callback-in-buffer
+ buf error-callback 'error
+ (format "Problem calling Open AI: %s
message: %s"
+ (cdr (assoc 'type errdata))
+ (cdr (assoc 'message
errdata))))))
+ :on-partial (lambda (data)
+ (when (not (equal data "[DONE]"))
+ (when-let ((response
(llm-openai--get-partial-chat-response
+
(json-read-from-string data))))
+ (when (stringp response)
+ (llm-request-callback-in-buffer
buf partial-callback response)))))
+ :on-success (lambda (_)
(llm-request-callback-in-buffer
buf
response-callback
(llm-openai--process-and-return
- provider prompt
- data error-callback))))))
+ provider prompt nil
+ error-callback))))))
(cl-defmethod llm-name ((_ llm-openai))
"Open AI")
diff --git a/llm-request-plz.el b/llm-request-plz.el
index 1467cf23cf..d7bd3813b2 100644
--- a/llm-request-plz.el
+++ b/llm-request-plz.el
@@ -93,7 +93,8 @@ TIMEOUT is the number of seconds to wait for a response."
:data data
:timeout timeout))
-(cl-defun llm-request-plz-async (url &key headers data on-success
on-success-raw on-error _on-partial event-stream-handlers timeout)
+(cl-defun llm-request-plz-async (url &key headers data on-success
on-success-raw on-error
+ on-partial timeout)
"Make a request to URL.
Nothing will be returned.
@@ -119,10 +120,17 @@ optional argument, and mostly useful for streaming. If
not set,
the buffer is turned into JSON and passed to ON-SUCCESS."
(plz-media-type-request
'post url
- :as `(media-types ,(cons (cons "text/event-stream"
- (plz-media-type:text/event-stream
- :events event-stream-handlers))
- plz-media-types))
+ :as `(media-types
+ ,(cons
+ (cons "text/event-stream"
+ (plz-media-type:text/event-stream
+ :events `(("message" . ,(lambda (_ event)
+ (funcall on-partial
+
(plz-event-source-event-data event))))
+ ("error" . ,(lambda (_ event)
+ (funcall on-error
+ 'error
(plz-event-source-event-data event)))))))
+ plz-media-types))
:body (when data
(encode-coding-string (json-encode data) 'utf-8))
:headers (append headers
- [elpa] externals/llm updated (efe218ac13 -> 478afbcb41), ELPA Syncer, 2024/05/17
- [elpa] externals/llm 1f3b018dcb 03/71: Merge pull request #26 from r0man/plz, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 829bedabe6 04/71: Support for vertex embeddings, adding callback compatibility, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 2ac956a060 05/71: Add support for the application/x-ndjson media type, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 63f2b8ffbc 10/71: Merge branch 'main' into plz, ELPA Syncer, 2024/05/17
- [elpa] externals/llm a9cd296cd8 02/71: Add llm-request-plz.el, ELPA Syncer, 2024/05/17
- [elpa] externals/llm f33475eeae 01/71: Add plz and media type and event source extensions, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 41d0889bcb 06/71: Make sync use of plz return data instead of object, converted Gemini, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 84678edfae 07/71: Merge pull request #28 from r0man/plz, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 9541d34656 08/71: Enable streaming in Open AI with plz modifications,
ELPA Syncer <=
- [elpa] externals/llm c9ab8664ce 09/71: Support function streaming with Open AI & plz, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 3988fecb53 11/71: Make separate function for event streaming, w/ client-side handlers, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 9ce3d9e003 13/71: Port claude to llm-request-plz, ELPA Syncer, 2024/05/17
- [elpa] externals/llm f9213b981c 14/71: Use the plz request module for everything in Claude, ELPA Syncer, 2024/05/17
- [elpa] externals/llm cdbb41528c 18/71: Fix issue advancing the process buffer, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 3441784ae2 19/71: Fix error handling., ELPA Syncer, 2024/05/17
- [elpa] externals/llm 6f9c604e58 20/71: Decode body and chunks using a coding system, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 62495de57f 21/71: Don't decode error response twice, ELPA Syncer, 2024/05/17
- [elpa] externals/llm 03afb1d795 23/71: Use symbols for event types & improve timeout handling, ELPA Syncer, 2024/05/17
- [elpa] externals/llm e07c84b81b 24/71: Improve Curl/HTTP error handling, ELPA Syncer, 2024/05/17