Skip to content

Commit

Permalink
Enable streaming support for openai ChatCompletion microsoft#217
Browse files Browse the repository at this point in the history
  • Loading branch information
Alvaromah committed Oct 28, 2023
1 parent 5694486 commit 749fb52
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 1 deletion.
57 changes: 57 additions & 0 deletions autogen/oai/chat_completion_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import openai
import tiktoken

class ChatCompletionProxy():
@classmethod
def _prompt_tokens(cls, messages):
# Get the encoding for OpenAI's "cl100k_base" model
encoding = tiktoken.get_encoding("cl100k_base")

# Calculate the total number of tokens in the prompt
# by iterating over each message in the 'messages' list,
# encoding its content, and summing up the token counts.
return sum([len(encoding.encode(msg['content'])) for msg in messages])

@classmethod
def create(cls, *args, **kwargs):
# Check if streaming is enabled in the function arguments
if kwargs.get("stream", False):
response_content = ""
completion_tokens = 0

# Set the terminal text color to green for better visibility
print("\033[32m", end='')

# Send the chat completion request to OpenAI's API and process the response in chunks
for chunk in openai.ChatCompletion.create(*args, **kwargs):
if chunk["choices"]:
content = chunk["choices"][0].get("delta", {}).get("content")
# If content is present, print it to the terminal and update response variables
if content is not None:
print(content, end='', flush=True)
response_content += content
completion_tokens += 1

# Reset the terminal text color
print("\033[0m\n")

# Prepare the final response object based on the accumulated data
response = chunk
response["choices"][0]["message"] = {
'role': 'assistant',
'content': response_content
}

prompt_tokens = cls._prompt_tokens(kwargs["messages"])
# Add usage information to the response
response["usage"] = {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens
}
else:
# If streaming is not enabled, send a regular chat completion request
response = openai.ChatCompletion.create(*args, **kwargs)

# Return the final response object
return response
3 changes: 2 additions & 1 deletion autogen/oai/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from flaml.tune.space import is_constant
from flaml.automl.logger import logger_formatter
from .openai_utils import get_key
from .chat_completion_proxy import ChatCompletionProxy
from collections import defaultdict

try:
Expand Down Expand Up @@ -207,7 +208,7 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
cls._book_keeping(config, response)
return response
openai_completion = (
openai.ChatCompletion
ChatCompletionProxy # Support streaming for chat models
if config["model"].replace("gpt-35-turbo", "gpt-3.5-turbo") in cls.chat_models
or issubclass(cls, ChatCompletion)
else openai.Completion
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

install_requires = [
"openai<1",
"tiktoken",
"diskcache",
"termcolor",
"flaml",
Expand Down

0 comments on commit 749fb52

Please sign in to comment.