From 7a43ab468a0dd0d7da8b9f8b4f242f41eb73fb4e Mon Sep 17 00:00:00 2001
From: Alvaro Mateos <alvaroma@live.com>
Date: Mon, 30 Oct 2023 17:53:12 +0100
Subject: [PATCH 1/3] Streaming preview

---
 autogen/oai/client.py | 58 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 35705f2b0fc..bf87abe97c0 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -8,11 +8,14 @@
 from flaml.automl.logger import logger_formatter
 
 from autogen.oai.openai_utils import get_key
+from autogen.token_count_utils import count_token
 
 try:
     from openai import OpenAI, APIError
     from openai.types.chat import ChatCompletion
+    from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
     from openai.types.completion import Completion
+    from openai.types.completion_usage import CompletionUsage
     import diskcache
 
     ERROR = None
@@ -233,9 +236,8 @@ def yes_or_no_filter(context, response):
                             response.pass_filter = pass_filter
                             # TODO: add response.cost
                             return response
-                completions = client.chat.completions if "messages" in params else client.completions
                 try:
-                    response = completions.create(**params)
+                    response = self._completions_create(client, params)
                 except APIError:
                     logger.debug(f"config {i} failed", exc_info=1)
                     if i == last:
@@ -245,6 +247,58 @@ def yes_or_no_filter(context, response):
                         # Cache the response
                         cache.set(key, response)
                     return response
+        
+    def _completions_create(self, client, params):
+        completions = client.chat.completions if "messages" in params else client.completions
+        # If streaming is enabled, iterate over the chunks of the response
+        if params.get("stream", False) and "messages" in params:
+            response_content = ""
+            completion_tokens = 0
+            
+            # Set the terminal text color to green
+            print("\033[32m", end='')
+            
+            # Send the chat completion request to OpenAI's API and process the response in chunks
+            for chunk in completions.create(**params):
+                if chunk.choices:
+                    content = chunk.choices[0].delta.content
+                    # If content is present, print it to the terminal and update response variables
+                    if content is not None:
+                        print(content, end='', flush=True)
+                        response_content += content
+                        completion_tokens += 1
+            
+            # Reset the terminal text color
+            print("\033[0m\n")
+            
+            # Prepare the final ChatCompletion object based on the accumulated data
+            prompt_tokens = count_token(params["messages"], chunk.model)
+            response = ChatCompletion(
+                id=chunk.id,
+                created=chunk.created,
+                model=chunk.model,
+                object=chunk.object,
+                choices=[
+                    Choice(
+                        finish_reason=chunk.choices[0].finish_reason,
+                        index=chunk.choices[0].index,
+                        message=ChatCompletionMessage(
+                            role='assistant',
+                            content=response_content,
+                            function_call=chunk.choices[0].delta.function_call
+                        )
+                    )
+                ],
+                usage=CompletionUsage(
+                    prompt_tokens = prompt_tokens,
+                    completion_tokens = completion_tokens,
+                    total_tokens = prompt_tokens + completion_tokens
+                )
+            )
+        else:
+             # If streaming is not enabled, send a regular chat completion request
+            response = completions.create(**params)
+        return response
 
     @classmethod
     def extract_text_or_function_call(cls, response: ChatCompletion | Completion) -> List[str]:

From 75278f16a79a0ef542eb6bede033ba8d5251b5e8 Mon Sep 17 00:00:00 2001
From: Alvaro Mateos <alvaroma@live.com>
Date: Mon, 30 Oct 2023 23:44:45 +0100
Subject: [PATCH 2/3] Enable streaming support for openai v1.0.0b3

---
 autogen/oai/client.py | 61 +++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index bf87abe97c0..77a59a7f5b8 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -250,9 +250,11 @@ def yes_or_no_filter(context, response):
         
     def _completions_create(self, client, params):
         completions = client.chat.completions if "messages" in params else client.completions
-        # If streaming is enabled, iterate over the chunks of the response
-        if params.get("stream", False) and "messages" in params:
-            response_content = ""
+        # If streaming is enabled, has messages, and does not have functions, then
+        # iterate over the chunks of the response
+        if (params.get("stream", False) and "messages" in params and 'functions' not in params):
+            response_contents = [""] * params.get('n', 1)
+            finish_reasons = [""] * params.get('n', 1)            
             completion_tokens = 0
             
             # Set the terminal text color to green
@@ -261,42 +263,51 @@ def _completions_create(self, client, params):
             # Send the chat completion request to OpenAI's API and process the response in chunks
             for chunk in completions.create(**params):
                 if chunk.choices:
-                    content = chunk.choices[0].delta.content
-                    # If content is present, print it to the terminal and update response variables
-                    if content is not None:
-                        print(content, end='', flush=True)
-                        response_content += content
-                        completion_tokens += 1
-            
+                    for choice in chunk.choices:
+                        content = choice.delta.content
+                        finish_reasons[choice.index] = choice.finish_reason
+                        # If content is present, print it to the terminal and update response variables
+                        if content is not None:
+                            print(content, end='', flush=True)
+                            response_contents[choice.index] += content
+                            completion_tokens += 1
+                        else:
+                            print()
+                            
             # Reset the terminal text color
             print("\033[0m\n")
             
             # Prepare the final ChatCompletion object based on the accumulated data
-            prompt_tokens = count_token(params["messages"], chunk.model)
+            model = chunk.model.replace("gpt-35", "gpt-3.5") # hack for Azure API
+            prompt_tokens = count_token(params["messages"], model)
             response = ChatCompletion(
                 id=chunk.id,
-                created=chunk.created,
                 model=chunk.model,
-                object=chunk.object,
-                choices=[
-                    Choice(
-                        finish_reason=chunk.choices[0].finish_reason,
-                        index=chunk.choices[0].index,
-                        message=ChatCompletionMessage(
-                            role='assistant',
-                            content=response_content,
-                            function_call=chunk.choices[0].delta.function_call
-                        )
-                    )
-                ],
+                created=chunk.created,
+                object='chat.completion',
+                choices=[],
                 usage=CompletionUsage(
                     prompt_tokens = prompt_tokens,
                     completion_tokens = completion_tokens,
                     total_tokens = prompt_tokens + completion_tokens
                 )
             )
+            for i in range(len(response_contents)):
+                response.choices.append(
+                    Choice(
+                        index=i,
+                        finish_reason=finish_reasons[i],
+                        message=ChatCompletionMessage(
+                            role='assistant',
+                            content=response_contents[i],
+                            function_call=None
+                        )
+                    )
+                )
         else:
-             # If streaming is not enabled, send a regular chat completion request
+            # If streaming is not enabled, send a regular chat completion request
+            # Ensure streaming is disabled
+            params['stream'] = False
             response = completions.create(**params)
         return response
 

From 51251129b166273635052a3ae7836cf2350fd972 Mon Sep 17 00:00:00 2001
From: Alvaro Mateos <alvaroma@live.com>
Date: Tue, 31 Oct 2023 10:19:42 +0100
Subject: [PATCH 3/3] Fixed code formatting issues with pre-commit

---
 autogen/oai/client.py | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 77a59a7f5b8..86abf3de2e4 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -247,19 +247,19 @@ def yes_or_no_filter(context, response):
                         # Cache the response
                         cache.set(key, response)
                     return response
-        
+
     def _completions_create(self, client, params):
         completions = client.chat.completions if "messages" in params else client.completions
         # If streaming is enabled, has messages, and does not have functions, then
         # iterate over the chunks of the response
-        if (params.get("stream", False) and "messages" in params and 'functions' not in params):
-            response_contents = [""] * params.get('n', 1)
-            finish_reasons = [""] * params.get('n', 1)            
+        if params.get("stream", False) and "messages" in params and "functions" not in params:
+            response_contents = [""] * params.get("n", 1)
+            finish_reasons = [""] * params.get("n", 1)
             completion_tokens = 0
-            
+
             # Set the terminal text color to green
-            print("\033[32m", end='')
-            
+            print("\033[32m", end="")
+
             # Send the chat completion request to OpenAI's API and process the response in chunks
             for chunk in completions.create(**params):
                 if chunk.choices:
@@ -268,29 +268,29 @@ def _completions_create(self, client, params):
                         finish_reasons[choice.index] = choice.finish_reason
                         # If content is present, print it to the terminal and update response variables
                         if content is not None:
-                            print(content, end='', flush=True)
+                            print(content, end="", flush=True)
                             response_contents[choice.index] += content
                             completion_tokens += 1
                         else:
                             print()
-                            
+
             # Reset the terminal text color
             print("\033[0m\n")
-            
+
             # Prepare the final ChatCompletion object based on the accumulated data
-            model = chunk.model.replace("gpt-35", "gpt-3.5") # hack for Azure API
+            model = chunk.model.replace("gpt-35", "gpt-3.5")  # hack for Azure API
             prompt_tokens = count_token(params["messages"], model)
             response = ChatCompletion(
                 id=chunk.id,
                 model=chunk.model,
                 created=chunk.created,
-                object='chat.completion',
+                object="chat.completion",
                 choices=[],
                 usage=CompletionUsage(
-                    prompt_tokens = prompt_tokens,
-                    completion_tokens = completion_tokens,
-                    total_tokens = prompt_tokens + completion_tokens
-                )
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    total_tokens=prompt_tokens + completion_tokens,
+                ),
             )
             for i in range(len(response_contents)):
                 response.choices.append(
@@ -298,16 +298,14 @@ def _completions_create(self, client, params):
                         index=i,
                         finish_reason=finish_reasons[i],
                         message=ChatCompletionMessage(
-                            role='assistant',
-                            content=response_contents[i],
-                            function_call=None
-                        )
+                            role="assistant", content=response_contents[i], function_call=None
+                        ),
                     )
                 )
         else:
             # If streaming is not enabled, send a regular chat completion request
             # Ensure streaming is disabled
-            params['stream'] = False
+            params["stream"] = False
             response = completions.create(**params)
         return response