Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support streaming output for OpenAI o1-preview and o1-mini #10890

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 1 addition & 49 deletions api/core/model_runtime/model_providers/openai/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,19 +615,11 @@ def _chat_generate(
prompt_messages = self._clear_illegal_prompt_messages(model, prompt_messages)

# o1 compatibility
block_as_stream = False
if model.startswith("o1"):
if "max_tokens" in model_parameters:
model_parameters["max_completion_tokens"] = model_parameters["max_tokens"]
del model_parameters["max_tokens"]

if stream:
block_as_stream = True
stream = False

if "stream_options" in extra_model_kwargs:
del extra_model_kwargs["stream_options"]

if "stop" in extra_model_kwargs:
del extra_model_kwargs["stop"]

Expand All @@ -644,47 +636,7 @@ def _chat_generate(
if stream:
return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools)

block_result = self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools)

if block_as_stream:
return self._handle_chat_block_as_stream_response(block_result, prompt_messages, stop)

return block_result

def _handle_chat_block_as_stream_response(
self,
block_result: LLMResult,
prompt_messages: list[PromptMessage],
stop: Optional[list[str]] = None,
) -> Generator[LLMResultChunk, None, None]:
"""
Handle llm chat response

:param model: model name
:param credentials: credentials
:param response: response
:param prompt_messages: prompt messages
:param tools: tools for tool calling
:param stop: stop words
:return: llm response chunk generator
"""
text = block_result.message.content
text = cast(str, text)

if stop:
text = self.enforce_stop_tokens(text, stop)

yield LLMResultChunk(
model=block_result.model,
prompt_messages=prompt_messages,
system_fingerprint=block_result.system_fingerprint,
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(content=text),
finish_reason="stop",
usage=block_result.usage,
),
)
return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools)

def _handle_chat_generate_response(
self,
Expand Down
18 changes: 2 additions & 16 deletions api/core/model_runtime/model_providers/openrouter/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,7 @@ def _generate(
user: Optional[str] = None,
) -> Union[LLMResult, Generator]:
self._update_credential(model, credentials)

block_as_stream = False
if model.startswith("openai/o1"):
block_as_stream = True
stop = None

# invoke block as stream
if stream and block_as_stream:
return self._generate_block_as_stream(
model, credentials, prompt_messages, model_parameters, tools, stop, user
)
else:
return super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
return super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)

def _generate_block_as_stream(
self,
Expand All @@ -69,9 +57,7 @@ def _generate_block_as_stream(
stop: Optional[list[str]] = None,
user: Optional[str] = None,
) -> Generator:
resp: LLMResult = super()._generate(
model, credentials, prompt_messages, model_parameters, tools, stop, False, user
)
resp = super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, False, user)

yield LLMResultChunk(
model=model,
Expand Down