From 7a43ab468a0dd0d7da8b9f8b4f242f41eb73fb4e Mon Sep 17 00:00:00 2001 From: Alvaro Mateos Date: Mon, 30 Oct 2023 17:53:12 +0100 Subject: [PATCH 1/3] Streaming preview --- autogen/oai/client.py | 58 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index 35705f2b0fc..bf87abe97c0 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -8,11 +8,14 @@ from flaml.automl.logger import logger_formatter from autogen.oai.openai_utils import get_key +from autogen.token_count_utils import count_token try: from openai import OpenAI, APIError from openai.types.chat import ChatCompletion + from openai.types.chat.chat_completion import ChatCompletionMessage, Choice from openai.types.completion import Completion + from openai.types.completion_usage import CompletionUsage import diskcache ERROR = None @@ -233,9 +236,8 @@ def yes_or_no_filter(context, response): response.pass_filter = pass_filter # TODO: add response.cost return response - completions = client.chat.completions if "messages" in params else client.completions try: - response = completions.create(**params) + response = self._completions_create(client, params) except APIError: logger.debug(f"config {i} failed", exc_info=1) if i == last: @@ -245,6 +247,58 @@ def yes_or_no_filter(context, response): # Cache the response cache.set(key, response) return response + + def _completions_create(self, client, params): + completions = client.chat.completions if "messages" in params else client.completions + # If streaming is enabled, iterate over the chunks of the response + if params.get("stream", False) and "messages" in params: + response_content = "" + completion_tokens = 0 + + # Set the terminal text color to green + print("\033[32m", end='') + + # Send the chat completion request to OpenAI's API and process the response in chunks + for chunk in completions.create(**params): + if chunk.choices: + content = chunk.choices[0].delta.content + # If content is present, print it to the terminal and update response variables + if content is not None: + print(content, end='', flush=True) + response_content += content + completion_tokens += 1 + + # Reset the terminal text color + print("\033[0m\n") + + # Prepare the final ChatCompletion object based on the accumulated data + prompt_tokens = count_token(params["messages"], chunk.model) + response = ChatCompletion( + id=chunk.id, + created=chunk.created, + model=chunk.model, + object=chunk.object, + choices=[ + Choice( + finish_reason=chunk.choices[0].finish_reason, + index=chunk.choices[0].index, + message=ChatCompletionMessage( + role='assistant', + content=response_content, + function_call=chunk.choices[0].delta.function_call + ) + ) + ], + usage=CompletionUsage( + prompt_tokens = prompt_tokens, + completion_tokens = completion_tokens, + total_tokens = prompt_tokens + completion_tokens + ) + ) + else: + # If streaming is not enabled, send a regular chat completion request + response = completions.create(**params) + return response @classmethod def extract_text_or_function_call(cls, response: ChatCompletion | Completion) -> List[str]: From 75278f16a79a0ef542eb6bede033ba8d5251b5e8 Mon Sep 17 00:00:00 2001 From: Alvaro Mateos Date: Mon, 30 Oct 2023 23:44:45 +0100 Subject: [PATCH 2/3] Enable streaming support for openai v1.0.0b3 --- autogen/oai/client.py | 61 +++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index bf87abe97c0..77a59a7f5b8 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -250,9 +250,11 @@ def yes_or_no_filter(context, response): def _completions_create(self, client, params): completions = client.chat.completions if "messages" in params else client.completions - # If streaming is enabled, iterate over the chunks of the response - if params.get("stream", False) and "messages" in params: - response_content = "" + # If streaming is enabled, has messages, and does not have functions, then + # iterate over the chunks of the response + if (params.get("stream", False) and "messages" in params and 'functions' not in params): + response_contents = [""] * params.get('n', 1) + finish_reasons = [""] * params.get('n', 1) completion_tokens = 0 # Set the terminal text color to green @@ -261,42 +263,51 @@ def _completions_create(self, client, params): # Send the chat completion request to OpenAI's API and process the response in chunks for chunk in completions.create(**params): if chunk.choices: - content = chunk.choices[0].delta.content - # If content is present, print it to the terminal and update response variables - if content is not None: - print(content, end='', flush=True) - response_content += content - completion_tokens += 1 - + for choice in chunk.choices: + content = choice.delta.content + finish_reasons[choice.index] = choice.finish_reason + # If content is present, print it to the terminal and update response variables + if content is not None: + print(content, end='', flush=True) + response_contents[choice.index] += content + completion_tokens += 1 + else: + print() + # Reset the terminal text color print("\033[0m\n") # Prepare the final ChatCompletion object based on the accumulated data - prompt_tokens = count_token(params["messages"], chunk.model) + model = chunk.model.replace("gpt-35", "gpt-3.5") # hack for Azure API + prompt_tokens = count_token(params["messages"], model) response = ChatCompletion( id=chunk.id, - created=chunk.created, model=chunk.model, - object=chunk.object, - choices=[ - Choice( - finish_reason=chunk.choices[0].finish_reason, - index=chunk.choices[0].index, - message=ChatCompletionMessage( - role='assistant', - content=response_content, - function_call=chunk.choices[0].delta.function_call - ) - ) - ], + created=chunk.created, + object='chat.completion', + choices=[], usage=CompletionUsage( prompt_tokens = prompt_tokens, completion_tokens = completion_tokens, total_tokens = prompt_tokens + completion_tokens ) ) + for i in range(len(response_contents)): + response.choices.append( + Choice( + index=i, + finish_reason=finish_reasons[i], + message=ChatCompletionMessage( + role='assistant', + content=response_contents[i], + function_call=None + ) + ) + ) else: - # If streaming is not enabled, send a regular chat completion request + # If streaming is not enabled, send a regular chat completion request + # Ensure streaming is disabled + params['stream'] = False response = completions.create(**params) return response From 51251129b166273635052a3ae7836cf2350fd972 Mon Sep 17 00:00:00 2001 From: Alvaro Mateos Date: Tue, 31 Oct 2023 10:19:42 +0100 Subject: [PATCH 3/3] Fixed code formatting issues with pre-commit --- autogen/oai/client.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index 77a59a7f5b8..86abf3de2e4 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -247,19 +247,19 @@ def yes_or_no_filter(context, response): # Cache the response cache.set(key, response) return response - + def _completions_create(self, client, params): completions = client.chat.completions if "messages" in params else client.completions # If streaming is enabled, has messages, and does not have functions, then # iterate over the chunks of the response - if (params.get("stream", False) and "messages" in params and 'functions' not in params): - response_contents = [""] * params.get('n', 1) - finish_reasons = [""] * params.get('n', 1) + if params.get("stream", False) and "messages" in params and "functions" not in params: + response_contents = [""] * params.get("n", 1) + finish_reasons = [""] * params.get("n", 1) completion_tokens = 0 - + # Set the terminal text color to green - print("\033[32m", end='') - + print("\033[32m", end="") + # Send the chat completion request to OpenAI's API and process the response in chunks for chunk in completions.create(**params): if chunk.choices: @@ -268,29 +268,29 @@ def _completions_create(self, client, params): finish_reasons[choice.index] = choice.finish_reason # If content is present, print it to the terminal and update response variables if content is not None: - print(content, end='', flush=True) + print(content, end="", flush=True) response_contents[choice.index] += content completion_tokens += 1 else: print() - + # Reset the terminal text color print("\033[0m\n") - + # Prepare the final ChatCompletion object based on the accumulated data - model = chunk.model.replace("gpt-35", "gpt-3.5") # hack for Azure API + model = chunk.model.replace("gpt-35", "gpt-3.5") # hack for Azure API prompt_tokens = count_token(params["messages"], model) response = ChatCompletion( id=chunk.id, model=chunk.model, created=chunk.created, - object='chat.completion', + object="chat.completion", choices=[], usage=CompletionUsage( - prompt_tokens = prompt_tokens, - completion_tokens = completion_tokens, - total_tokens = prompt_tokens + completion_tokens - ) + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), ) for i in range(len(response_contents)): response.choices.append( @@ -298,16 +298,14 @@ def _completions_create(self, client, params): index=i, finish_reason=finish_reasons[i], message=ChatCompletionMessage( - role='assistant', - content=response_contents[i], - function_call=None - ) + role="assistant", content=response_contents[i], function_call=None + ), ) ) else: # If streaming is not enabled, send a regular chat completion request # Ensure streaming is disabled - params['stream'] = False + params["stream"] = False response = completions.create(**params) return response