diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py index 70e074f..73cf684 100644 --- a/src/groq/_streaming.py +++ b/src/groq/_streaming.py @@ -55,6 +55,8 @@ def __stream__(self) -> Iterator[_T]: iterator = self._iter_events() for sse in iterator: + if sse.data.startswith("[DONE]"): + break yield process_data(data=sse.json(), cast_to=cast_to, response=response) # Ensure the entire stream is consumed @@ -119,6 +121,8 @@ async def __stream__(self) -> AsyncIterator[_T]: iterator = self._iter_events() async for sse in iterator: + if sse.data.startswith("[DONE]"): + break yield process_data(data=sse.json(), cast_to=cast_to, response=response) # Ensure the entire stream is consumed diff --git a/src/groq/lib/chat_completion_chunk.py b/src/groq/lib/chat_completion_chunk.py deleted file mode 100644 index 318646f..0000000 --- a/src/groq/lib/chat_completion_chunk.py +++ /dev/null @@ -1,110 +0,0 @@ -# File Manually added to support streaming -# File is in libs instead of models to avoid conflicts with stainless bot - -from typing import List, Optional - -from .._models import BaseModel -from ..types.chat.chat_completion import Usage - -__all__ = [ - "ChatCompletionChunk", - "Choice", - "ChoiceLogprobs", - "ChoiceLogprobsContent", - "ChoiceLogprobsContentTopLogprob", - "ChoiceDelta", - "ChoiceDeltaFunctionCall", - "ChoiceDeltaToolCall", - "ChoiceDeltaToolCallFunction", -] - - -class ChoiceDeltaFunctionCall(BaseModel): - arguments: Optional[str] = None - """ - The arguments to call the function with, as generated by the model in JSON - format. Note that the model does not always generate valid JSON, and may - hallucinate parameters not defined by your function schema. Validate the - arguments in your code before calling your function. - """ - - name: Optional[str] = None - """The name of the function to call.""" - - -class ChoiceLogprobsContentTopLogprob(BaseModel): - token: Optional[str] = None - - bytes: Optional[List[int]] = None - - logprob: Optional[float] = None - - -class ChoiceLogprobsContent(BaseModel): - token: Optional[str] = None - - bytes: Optional[List[int]] = None - - logprob: Optional[float] = None - - top_logprobs: Optional[List[ChoiceLogprobsContentTopLogprob]] = None - - -class ChoiceLogprobs(BaseModel): - content: Optional[List[ChoiceLogprobsContent]] = None - - -class ChoiceDeltaToolCallFunction(BaseModel): - arguments: Optional[str] = None - - name: Optional[str] = None - - -class ChoiceDeltaToolCall(BaseModel): - index: int - - id: Optional[str] = None - - function: Optional[ChoiceDeltaToolCallFunction] = None - - type: Optional[str] = None - - -class ChoiceDelta(BaseModel): - content: str - - role: str - - function_call: Optional[ChoiceDeltaFunctionCall] = None - - tool_calls: Optional[List[ChoiceDeltaToolCall]] = None - - -class Choice(BaseModel): - delta: ChoiceDelta - - finish_reason: str - - index: int - - logprobs: ChoiceLogprobs - - -class XGroq(BaseModel): - usage: Usage - - -class ChatCompletionChunk(BaseModel): - id: str - - choices: List[Choice] - - created: int - - model: str - - object: str - - system_fingerprint: str - - x_groq: Optional[XGroq] diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py index 94eaafd..c9bd886 100644 --- a/src/groq/resources/chat/completions.py +++ b/src/groq/resources/chat/completions.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Dict, List, Union, Iterable, Optional +from typing import Dict, List, Union, Iterable, Optional, overload from typing_extensions import Literal import httpx @@ -20,11 +20,13 @@ async_to_raw_response_wrapper, async_to_streamed_response_wrapper, ) +from ..._streaming import Stream, AsyncStream from ...types.chat import completion_create_params from ..._base_client import ( make_request_options, ) from ...types.chat.chat_completion import ChatCompletion +from ...types.chat.chat_completion_chunk import ChatCompletionChunk from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam @@ -41,11 +43,12 @@ def with_raw_response(self) -> CompletionsWithRawResponse: def with_streaming_response(self) -> CompletionsWithStreamingResponse: return CompletionsWithStreamingResponse(self) + @overload def create( self, *, messages: Iterable[ChatCompletionMessageParam], - model: Union[str, Literal["gemma-7b-it", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768"]], + model: str, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN, functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN, @@ -57,7 +60,7 @@ def create( response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN, tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN, @@ -71,6 +74,104 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: + ... + + @overload + def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: str, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN, + functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Literal[True], + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[ChatCompletionChunk]: + ... + + @overload + def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: str, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN, + functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: bool, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | Stream[ChatCompletionChunk]: + ... + + def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, Literal["gemma-7b-it", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768"]], + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN, + functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | Stream[ChatCompletionChunk]: """ Creates a model response for the given chat conversation. @@ -210,6 +311,8 @@ def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, + stream=stream or False, + stream_cls=Stream[ChatCompletionChunk], ) @@ -222,11 +325,12 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse: def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: return AsyncCompletionsWithStreamingResponse(self) + @overload async def create( self, *, messages: Iterable[ChatCompletionMessageParam], - model: Union[str, Literal["gemma-7b-it", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768"]], + model: str, frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN, functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN, @@ -238,7 +342,7 @@ async def create( response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[bool] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN, tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN, @@ -252,6 +356,104 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: + ... + + @overload + async def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: str, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN, + functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Literal[True], + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[ChatCompletionChunk]: + ... + + @overload + async def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: str, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN, + functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: bool, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: + ... + + async def create( + self, + *, + messages: Iterable[ChatCompletionMessageParam], + model: Union[str, Literal["gemma-7b-it", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768"]], + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN, + functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[bool] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN, + seed: Optional[int] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN, + tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN, + top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: Optional[str] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: """ Creates a model response for the given chat conversation. @@ -391,6 +593,8 @@ async def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, + stream=stream or False, + stream_cls=AsyncStream[ChatCompletionChunk], ) diff --git a/src/groq/types/chat/__init__.py b/src/groq/types/chat/__init__.py index 756dbea..5d122d2 100644 --- a/src/groq/types/chat/__init__.py +++ b/src/groq/types/chat/__init__.py @@ -4,6 +4,7 @@ from .chat_completion import ChatCompletion as ChatCompletion from .chat_completion_role import ChatCompletionRole as ChatCompletionRole +from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage from .completion_create_params import CompletionCreateParams as CompletionCreateParams from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam diff --git a/src/groq/types/chat/chat_completion_chunk.py b/src/groq/types/chat/chat_completion_chunk.py new file mode 100644 index 0000000..06d2a86 --- /dev/null +++ b/src/groq/types/chat/chat_completion_chunk.py @@ -0,0 +1,158 @@ +# File Manually added to support streaming +# File is in libs instead of models to avoid conflicts with stainless bot + +from typing import List, Optional +from typing_extensions import Literal + +from ..._models import BaseModel +from ..completion_usage import CompletionUsage +from .chat_completion_token_logprob import ChatCompletionTokenLogprob + +__all__ = [ + "ChatCompletionChunk", + "Choice", + "ChoiceDelta", + "ChoiceDeltaFunctionCall", + "ChoiceDeltaToolCall", + "ChoiceDeltaToolCallFunction", + "ChoiceLogprobs", +] + + +class ChoiceDeltaFunctionCall(BaseModel): + arguments: Optional[str] = None + """ + The arguments to call the function with, as generated by the model in JSON + format. Note that the model does not always generate valid JSON, and may + hallucinate parameters not defined by your function schema. Validate the + arguments in your code before calling your function. + """ + + name: Optional[str] = None + """The name of the function to call.""" + + +class ChoiceDeltaToolCallFunction(BaseModel): + arguments: Optional[str] = None + """ + The arguments to call the function with, as generated by the model in JSON + format. Note that the model does not always generate valid JSON, and may + hallucinate parameters not defined by your function schema. Validate the + arguments in your code before calling your function. + """ + + name: Optional[str] = None + """The name of the function to call.""" + + +class ChoiceDeltaToolCall(BaseModel): + index: int + + id: Optional[str] = None + """The ID of the tool call.""" + + function: Optional[ChoiceDeltaToolCallFunction] = None + + type: Optional[Literal["function"]] = None + """The type of the tool. Currently, only `function` is supported.""" + + +class ChoiceDelta(BaseModel): + content: Optional[str] = None + """The contents of the chunk message.""" + + function_call: Optional[ChoiceDeltaFunctionCall] = None + """Deprecated and replaced by `tool_calls`. + + The name and arguments of a function that should be called, as generated by the + model. + """ + + role: Optional[Literal["system", "user", "assistant", "tool"]] = None + """The role of the author of this message.""" + + tool_calls: Optional[List[ChoiceDeltaToolCall]] = None + + +class ChoiceLogprobs(BaseModel): + content: Optional[List[ChatCompletionTokenLogprob]] = None + """A list of message content tokens with log probability information.""" + + +class Choice(BaseModel): + delta: ChoiceDelta + """A chat completion delta generated by streamed model responses.""" + + finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter", "function_call"]] = None + """The reason the model stopped generating tokens. + + This will be `stop` if the model hit a natural stop point or a provided stop + sequence, `length` if the maximum number of tokens specified in the request was + reached, `content_filter` if content was omitted due to a flag from our content + filters, `tool_calls` if the model called a tool, or `function_call` + (deprecated) if the model called a function. + """ + + index: int + """The index of the choice in the list of choices.""" + + logprobs: Optional[ChoiceLogprobs] = None + """Log probability information for the choice.""" + + +class XGroq(BaseModel): + id: Optional[str] + """ + A groq request ID which can be used by to refer to a specific request to groq support + Only sent with the first chunk + """ + + usage: Optional[CompletionUsage] + """Usage information for the stream. Only sent in the final chunk""" + + error: Optional[str] + """ An error string indicating why a stream was stopped early""" + + +class ChatCompletionChunk(BaseModel): + id: str + """A unique identifier for the chat completion. Each chunk has the same ID.""" + + choices: List[Choice] + """A list of chat completion choices. + + Can contain more than one elements if `n` is greater than 1. Can also be empty + for the last chunk if you set `stream_options: {"include_usage": true}`. + """ + + created: int + """The Unix timestamp (in seconds) of when the chat completion was created. + + Each chunk has the same timestamp. + """ + + model: str + """The model to generate the completion.""" + + object: Literal["chat.completion.chunk"] + """The object type, which is always `chat.completion.chunk`.""" + + system_fingerprint: Optional[str] = None + """ + This fingerprint represents the backend configuration that the model runs with. + Can be used in conjunction with the `seed` request parameter to understand when + backend changes have been made that might impact determinism. + """ + + usage: Optional[CompletionUsage] = None + """ + An optional field that will only be present when you set + `stream_options: {"include_usage": true}` in your request. When present, it + contains a null value except for the last chunk which contains the token usage + statistics for the entire request. + """ + + x_groq: Optional[XGroq] + """ + Additional metadata provided by groq. + """ diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 5842d13..3995abb 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -58,7 +58,7 @@ def test_method_create_with_all_params(self, client: Groq) -> None: response_format={"type": "json_object"}, seed=0, stop="\n", - stream=True, + stream=False, temperature=1, tool_choice="none", tools=[ @@ -174,7 +174,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N response_format={"type": "json_object"}, seed=0, stop="\n", - stream=True, + stream=False, temperature=1, tool_choice="none", tools=[