From af4ba81ee1c7e299bd84a29477e6ceaceb68976f Mon Sep 17 00:00:00 2001 From: John Wang Date: Tue, 1 Aug 2023 15:29:39 +0800 Subject: [PATCH] feat: fix azure completion choices return empty --- api/core/llm/streamable_azure_open_ai.py | 59 +++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/api/core/llm/streamable_azure_open_ai.py b/api/core/llm/streamable_azure_open_ai.py index 0151d0210f724a..ab67f5abcb75e1 100644 --- a/api/core/llm/streamable_azure_open_ai.py +++ b/api/core/llm/streamable_azure_open_ai.py @@ -1,5 +1,7 @@ -from langchain.callbacks.manager import Callbacks +from langchain.callbacks.manager import Callbacks, CallbackManagerForLLMRun from langchain.llms import AzureOpenAI +from langchain.llms.openai import _streaming_response_template, completion_with_retry, _update_response, \ + update_token_usage from langchain.schema import LLMResult from typing import Optional, List, Dict, Mapping, Any, Union, Tuple @@ -67,3 +69,58 @@ def generate( @classmethod def get_kwargs_from_model_params(cls, params: dict): return params + + def _generate( + self, + prompts: List[str], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> LLMResult: + """Call out to OpenAI's endpoint with k unique prompts. + + Args: + prompts: The prompts to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The full LLM output. + + Example: + .. code-block:: python + + response = openai.generate(["Tell me a joke."]) + """ + params = self._invocation_params + params = {**params, **kwargs} + sub_prompts = self.get_sub_prompts(params, prompts, stop) + choices = [] + token_usage: Dict[str, int] = {} + # Get the token usage from the response. + # Includes prompt, completion, and total tokens used. + _keys = {"completion_tokens", "prompt_tokens", "total_tokens"} + for _prompts in sub_prompts: + if self.streaming: + if len(_prompts) > 1: + raise ValueError("Cannot stream results with multiple prompts.") + params["stream"] = True + response = _streaming_response_template() + for stream_resp in completion_with_retry( + self, prompt=_prompts, **params + ): + if len(stream_resp["choices"]) > 0: + if run_manager: + run_manager.on_llm_new_token( + stream_resp["choices"][0]["text"], + verbose=self.verbose, + logprobs=stream_resp["choices"][0]["logprobs"], + ) + _update_response(response, stream_resp) + choices.extend(response["choices"]) + else: + response = completion_with_retry(self, prompt=_prompts, **params) + choices.extend(response["choices"]) + if not self.streaming: + # Can't update token usage if streaming + update_token_usage(_keys, response, token_usage) + return self.create_llm_result(choices, prompts, token_usage) \ No newline at end of file