diff --git a/.stats.yml b/.stats.yml index 3cc042fe0a..e3a0040a5a 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ -configured_endpoints: 68 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-2e0e0678be19d1118fd796af291822075e40538dba326611e177e9f3dc245a53.yml +configured_endpoints: 69 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-779ea2754025daf5e18eb8ceb203ec321692636bc3a999338556a479178efa6c.yml diff --git a/api.md b/api.md index 3e88faab91..f51089745d 100644 --- a/api.md +++ b/api.md @@ -47,6 +47,7 @@ from openai.types.chat import ( ChatCompletionContentPartInputAudio, ChatCompletionContentPartRefusal, ChatCompletionContentPartText, + ChatCompletionDeveloperMessageParam, ChatCompletionFunctionCallOption, ChatCompletionFunctionMessageParam, ChatCompletionMessage, @@ -55,6 +56,7 @@ from openai.types.chat import ( ChatCompletionModality, ChatCompletionNamedToolChoice, ChatCompletionPredictionContent, + ChatCompletionReasoningEffort, ChatCompletionRole, ChatCompletionStreamOptions, ChatCompletionSystemMessageParam, @@ -234,6 +236,20 @@ Methods: # Beta +## Realtime + +### Sessions + +Types: + +```python +from openai.types.beta.realtime import Session, SessionCreateResponse +``` + +Methods: + +- client.beta.realtime.sessions.create(\*\*params) -> SessionCreateResponse + ## VectorStores Types: diff --git a/src/openai/resources/beta/beta.py b/src/openai/resources/beta/beta.py index 642ba664ba..42ea9b88e5 100644 --- a/src/openai/resources/beta/beta.py +++ b/src/openai/resources/beta/beta.py @@ -20,6 +20,14 @@ ThreadsWithStreamingResponse, AsyncThreadsWithStreamingResponse, ) +from .realtime.realtime import ( + Realtime, + AsyncRealtime, + RealtimeWithRawResponse, + AsyncRealtimeWithRawResponse, + RealtimeWithStreamingResponse, + AsyncRealtimeWithStreamingResponse, +) from .vector_stores.vector_stores import ( VectorStores, AsyncVectorStores, @@ -33,6 +41,10 @@ class Beta(SyncAPIResource): + @cached_property + def realtime(self) -> Realtime: + return Realtime(self._client) + @cached_property def vector_stores(self) -> VectorStores: return VectorStores(self._client) @@ -66,6 +78,10 @@ def with_streaming_response(self) -> BetaWithStreamingResponse: class AsyncBeta(AsyncAPIResource): + @cached_property + def realtime(self) -> AsyncRealtime: + return AsyncRealtime(self._client) + @cached_property def vector_stores(self) -> AsyncVectorStores: return AsyncVectorStores(self._client) @@ -102,6 +118,10 @@ class BetaWithRawResponse: def __init__(self, beta: Beta) -> None: self._beta = beta + @cached_property + def realtime(self) -> RealtimeWithRawResponse: + return RealtimeWithRawResponse(self._beta.realtime) + @cached_property def vector_stores(self) -> VectorStoresWithRawResponse: return VectorStoresWithRawResponse(self._beta.vector_stores) @@ -119,6 +139,10 @@ class AsyncBetaWithRawResponse: def __init__(self, beta: AsyncBeta) -> None: self._beta = beta + @cached_property + def realtime(self) -> AsyncRealtimeWithRawResponse: + return AsyncRealtimeWithRawResponse(self._beta.realtime) + @cached_property def vector_stores(self) -> AsyncVectorStoresWithRawResponse: return AsyncVectorStoresWithRawResponse(self._beta.vector_stores) @@ -136,6 +160,10 @@ class BetaWithStreamingResponse: def __init__(self, beta: Beta) -> None: self._beta = beta + @cached_property + def realtime(self) -> RealtimeWithStreamingResponse: + return RealtimeWithStreamingResponse(self._beta.realtime) + @cached_property def vector_stores(self) -> VectorStoresWithStreamingResponse: return VectorStoresWithStreamingResponse(self._beta.vector_stores) @@ -153,6 +181,10 @@ class AsyncBetaWithStreamingResponse: def __init__(self, beta: AsyncBeta) -> None: self._beta = beta + @cached_property + def realtime(self) -> AsyncRealtimeWithStreamingResponse: + return AsyncRealtimeWithStreamingResponse(self._beta.realtime) + @cached_property def vector_stores(self) -> AsyncVectorStoresWithStreamingResponse: return AsyncVectorStoresWithStreamingResponse(self._beta.vector_stores) diff --git a/src/openai/resources/beta/realtime/__init__.py b/src/openai/resources/beta/realtime/__init__.py new file mode 100644 index 0000000000..474434e6e1 --- /dev/null +++ b/src/openai/resources/beta/realtime/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .realtime import ( + Realtime, + AsyncRealtime, + RealtimeWithRawResponse, + AsyncRealtimeWithRawResponse, + RealtimeWithStreamingResponse, + AsyncRealtimeWithStreamingResponse, +) +from .sessions import ( + Sessions, + AsyncSessions, + SessionsWithRawResponse, + AsyncSessionsWithRawResponse, + SessionsWithStreamingResponse, + AsyncSessionsWithStreamingResponse, +) + +__all__ = [ + "Sessions", + "AsyncSessions", + "SessionsWithRawResponse", + "AsyncSessionsWithRawResponse", + "SessionsWithStreamingResponse", + "AsyncSessionsWithStreamingResponse", + "Realtime", + "AsyncRealtime", + "RealtimeWithRawResponse", + "AsyncRealtimeWithRawResponse", + "RealtimeWithStreamingResponse", + "AsyncRealtimeWithStreamingResponse", +] diff --git a/src/openai/resources/beta/realtime/realtime.py b/src/openai/resources/beta/realtime/realtime.py new file mode 100644 index 0000000000..e57e0be503 --- /dev/null +++ b/src/openai/resources/beta/realtime/realtime.py @@ -0,0 +1,102 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from .sessions import ( + Sessions, + AsyncSessions, + SessionsWithRawResponse, + AsyncSessionsWithRawResponse, + SessionsWithStreamingResponse, + AsyncSessionsWithStreamingResponse, +) +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource + +__all__ = ["Realtime", "AsyncRealtime"] + + +class Realtime(SyncAPIResource): + @cached_property + def sessions(self) -> Sessions: + return Sessions(self._client) + + @cached_property + def with_raw_response(self) -> RealtimeWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return the + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return RealtimeWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> RealtimeWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return RealtimeWithStreamingResponse(self) + + +class AsyncRealtime(AsyncAPIResource): + @cached_property + def sessions(self) -> AsyncSessions: + return AsyncSessions(self._client) + + @cached_property + def with_raw_response(self) -> AsyncRealtimeWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return the + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncRealtimeWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncRealtimeWithStreamingResponse(self) + + +class RealtimeWithRawResponse: + def __init__(self, realtime: Realtime) -> None: + self._realtime = realtime + + @cached_property + def sessions(self) -> SessionsWithRawResponse: + return SessionsWithRawResponse(self._realtime.sessions) + + +class AsyncRealtimeWithRawResponse: + def __init__(self, realtime: AsyncRealtime) -> None: + self._realtime = realtime + + @cached_property + def sessions(self) -> AsyncSessionsWithRawResponse: + return AsyncSessionsWithRawResponse(self._realtime.sessions) + + +class RealtimeWithStreamingResponse: + def __init__(self, realtime: Realtime) -> None: + self._realtime = realtime + + @cached_property + def sessions(self) -> SessionsWithStreamingResponse: + return SessionsWithStreamingResponse(self._realtime.sessions) + + +class AsyncRealtimeWithStreamingResponse: + def __init__(self, realtime: AsyncRealtime) -> None: + self._realtime = realtime + + @cached_property + def sessions(self) -> AsyncSessionsWithStreamingResponse: + return AsyncSessionsWithStreamingResponse(self._realtime.sessions) diff --git a/src/openai/resources/beta/realtime/sessions.py b/src/openai/resources/beta/realtime/sessions.py new file mode 100644 index 0000000000..1d1ee701e5 --- /dev/null +++ b/src/openai/resources/beta/realtime/sessions.py @@ -0,0 +1,337 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Union, Iterable +from typing_extensions import Literal + +import httpx + +from .... import _legacy_response +from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ...._utils import ( + maybe_transform, + async_maybe_transform, +) +from ...._compat import cached_property +from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper +from ...._base_client import make_request_options +from ....types.beta.realtime import session_create_params +from ....types.beta.realtime.session_create_response import SessionCreateResponse + +__all__ = ["Sessions", "AsyncSessions"] + + +class Sessions(SyncAPIResource): + @cached_property + def with_raw_response(self) -> SessionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return the + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return SessionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> SessionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return SessionsWithStreamingResponse(self) + + def create( + self, + *, + model: Literal[ + "gpt-4o-realtime-preview", + "gpt-4o-realtime-preview-2024-10-01", + "gpt-4o-realtime-preview-2024-12-17", + "gpt-4o-mini-realtime-preview", + "gpt-4o-mini-realtime-preview-2024-12-17", + ], + input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN, + instructions: str | NotGiven = NOT_GIVEN, + max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN, + modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN, + output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: str | NotGiven = NOT_GIVEN, + tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN, + turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN, + voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SessionCreateResponse: + """ + Create an ephemeral API token for use in client-side applications with the + Realtime API. Can be configured with the same session parameters as the + `session.update` client event. + + It responds with a session object, plus a `client_secret` key which contains a + usable ephemeral API token that can be used to authenticate browser clients for + the Realtime API. + + Args: + model: The Realtime model used for this session. + + input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + + input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to + `null` to turn off once on. Input audio transcription is not native to the + model, since the model consumes audio directly. Transcription runs + asynchronously through Whisper and should be treated as rough guidance rather + than the representation understood by the model. + + instructions: The default system instructions (i.e. system message) prepended to model calls. + This field allows the client to guide the model on desired responses. The model + can be instructed on response content and format, (e.g. "be extremely succinct", + "act friendly", "here are examples of good responses") and on audio behavior + (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The + instructions are not guaranteed to be followed by the model, but they provide + guidance to the model on the desired behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + + max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + + modalities: The set of modalities the model can respond with. To disable audio, set this to + ["text"]. + + output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + + temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. + + tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify + a function. + + tools: Tools (functions) available to the model. + + turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD + means that the model will detect the start and end of speech based on audio + volume and respond at the end of user speech. + + voice: The voice the model uses to respond. Voice cannot be changed during the session + once the model has responded with audio at least once. Current voice options are + `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return self._post( + "/realtime/sessions", + body=maybe_transform( + { + "model": model, + "input_audio_format": input_audio_format, + "input_audio_transcription": input_audio_transcription, + "instructions": instructions, + "max_response_output_tokens": max_response_output_tokens, + "modalities": modalities, + "output_audio_format": output_audio_format, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "turn_detection": turn_detection, + "voice": voice, + }, + session_create_params.SessionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=SessionCreateResponse, + ) + + +class AsyncSessions(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncSessionsWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return the + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers + """ + return AsyncSessionsWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/openai/openai-python#with_streaming_response + """ + return AsyncSessionsWithStreamingResponse(self) + + async def create( + self, + *, + model: Literal[ + "gpt-4o-realtime-preview", + "gpt-4o-realtime-preview-2024-10-01", + "gpt-4o-realtime-preview-2024-12-17", + "gpt-4o-mini-realtime-preview", + "gpt-4o-mini-realtime-preview-2024-12-17", + ], + input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN, + instructions: str | NotGiven = NOT_GIVEN, + max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN, + modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN, + output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: str | NotGiven = NOT_GIVEN, + tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN, + turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN, + voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SessionCreateResponse: + """ + Create an ephemeral API token for use in client-side applications with the + Realtime API. Can be configured with the same session parameters as the + `session.update` client event. + + It responds with a session object, plus a `client_secret` key which contains a + usable ephemeral API token that can be used to authenticate browser clients for + the Realtime API. + + Args: + model: The Realtime model used for this session. + + input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + + input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to + `null` to turn off once on. Input audio transcription is not native to the + model, since the model consumes audio directly. Transcription runs + asynchronously through Whisper and should be treated as rough guidance rather + than the representation understood by the model. + + instructions: The default system instructions (i.e. system message) prepended to model calls. + This field allows the client to guide the model on desired responses. The model + can be instructed on response content and format, (e.g. "be extremely succinct", + "act friendly", "here are examples of good responses") and on audio behavior + (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The + instructions are not guaranteed to be followed by the model, but they provide + guidance to the model on the desired behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + + max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + + modalities: The set of modalities the model can respond with. To disable audio, set this to + ["text"]. + + output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. + + temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8. + + tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify + a function. + + tools: Tools (functions) available to the model. + + turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD + means that the model will detect the start and end of speech based on audio + volume and respond at the end of user speech. + + voice: The voice the model uses to respond. Voice cannot be changed during the session + once the model has responded with audio at least once. Current voice options are + `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})} + return await self._post( + "/realtime/sessions", + body=await async_maybe_transform( + { + "model": model, + "input_audio_format": input_audio_format, + "input_audio_transcription": input_audio_transcription, + "instructions": instructions, + "max_response_output_tokens": max_response_output_tokens, + "modalities": modalities, + "output_audio_format": output_audio_format, + "temperature": temperature, + "tool_choice": tool_choice, + "tools": tools, + "turn_detection": turn_detection, + "voice": voice, + }, + session_create_params.SessionCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=SessionCreateResponse, + ) + + +class SessionsWithRawResponse: + def __init__(self, sessions: Sessions) -> None: + self._sessions = sessions + + self.create = _legacy_response.to_raw_response_wrapper( + sessions.create, + ) + + +class AsyncSessionsWithRawResponse: + def __init__(self, sessions: AsyncSessions) -> None: + self._sessions = sessions + + self.create = _legacy_response.async_to_raw_response_wrapper( + sessions.create, + ) + + +class SessionsWithStreamingResponse: + def __init__(self, sessions: Sessions) -> None: + self._sessions = sessions + + self.create = to_streamed_response_wrapper( + sessions.create, + ) + + +class AsyncSessionsWithStreamingResponse: + def __init__(self, sessions: AsyncSessions) -> None: + self._sessions = sessions + + self.create = async_to_streamed_response_wrapper( + sessions.create, + ) diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py index 84e6cf9b72..f13b8c0b45 100644 --- a/src/openai/resources/chat/completions.py +++ b/src/openai/resources/chat/completions.py @@ -20,6 +20,7 @@ from ..._streaming import Stream, AsyncStream from ...types.chat import ( ChatCompletionAudioParam, + ChatCompletionReasoningEffort, completion_create_params, ) from ..._base_client import make_request_options @@ -30,6 +31,7 @@ from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam from ...types.chat.chat_completion_audio_param import ChatCompletionAudioParam from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam +from ...types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam from ...types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam @@ -77,6 +79,7 @@ def create( parallel_tool_calls: bool | NotGiven = NOT_GIVEN, prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, @@ -104,6 +107,12 @@ def create( [vision](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio) guides. + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + Args: messages: A list of messages comprising the conversation so far. Depending on the [model](https://platform.openai.com/docs/models) you use, different message @@ -124,16 +133,18 @@ def create( existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) - function_call: Deprecated in favor of `tool_choice`. - Controls which (if any) function is called by the model. `none` means the model - will not call a function and instead generates a message. `auto` means the model - can pick between generating a message or calling a function. Specifying a - particular function via `{"name": "my_function"}` forces the model to call that + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + `none` is the default when no functions are present. `auto` is the default if functions are present. @@ -195,13 +206,14 @@ def create( whether they appear in the text so far, increasing the model's likelihood to talk about new topics. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + reasoning_effort: **o1 models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. - response_format: An object specifying the format that the model must output. Compatible with - [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), - [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini), - [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and - all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + response_format: An object specifying the format that the model must output. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema. Learn more @@ -257,9 +269,8 @@ def create( temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more - focused and deterministic. - - We generally recommend altering this or `top_p` but not both. + focused and deterministic. We generally recommend altering this or `top_p` but + not both. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can @@ -320,6 +331,7 @@ def create( parallel_tool_calls: bool | NotGiven = NOT_GIVEN, prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, @@ -346,6 +358,12 @@ def create( [vision](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio) guides. + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + Args: messages: A list of messages comprising the conversation so far. Depending on the [model](https://platform.openai.com/docs/models) you use, different message @@ -373,16 +391,18 @@ def create( existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) - function_call: Deprecated in favor of `tool_choice`. - Controls which (if any) function is called by the model. `none` means the model - will not call a function and instead generates a message. `auto` means the model - can pick between generating a message or calling a function. Specifying a - particular function via `{"name": "my_function"}` forces the model to call that + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + `none` is the default when no functions are present. `auto` is the default if functions are present. @@ -444,13 +464,14 @@ def create( whether they appear in the text so far, increasing the model's likelihood to talk about new topics. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + reasoning_effort: **o1 models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. - response_format: An object specifying the format that the model must output. Compatible with - [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), - [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini), - [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and - all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + response_format: An object specifying the format that the model must output. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema. Learn more @@ -499,9 +520,8 @@ def create( temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more - focused and deterministic. - - We generally recommend altering this or `top_p` but not both. + focused and deterministic. We generally recommend altering this or `top_p` but + not both. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can @@ -562,6 +582,7 @@ def create( parallel_tool_calls: bool | NotGiven = NOT_GIVEN, prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, @@ -588,6 +609,12 @@ def create( [vision](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio) guides. + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + Args: messages: A list of messages comprising the conversation so far. Depending on the [model](https://platform.openai.com/docs/models) you use, different message @@ -615,16 +642,18 @@ def create( existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) - function_call: Deprecated in favor of `tool_choice`. - Controls which (if any) function is called by the model. `none` means the model - will not call a function and instead generates a message. `auto` means the model - can pick between generating a message or calling a function. Specifying a - particular function via `{"name": "my_function"}` forces the model to call that + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + `none` is the default when no functions are present. `auto` is the default if functions are present. @@ -686,13 +715,14 @@ def create( whether they appear in the text so far, increasing the model's likelihood to talk about new topics. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + reasoning_effort: **o1 models only** - response_format: An object specifying the format that the model must output. Compatible with - [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), - [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini), - [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and - all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: An object specifying the format that the model must output. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema. Learn more @@ -741,9 +771,8 @@ def create( temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more - focused and deterministic. - - We generally recommend altering this or `top_p` but not both. + focused and deterministic. We generally recommend altering this or `top_p` but + not both. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can @@ -803,6 +832,7 @@ def create( parallel_tool_calls: bool | NotGiven = NOT_GIVEN, prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, @@ -843,6 +873,7 @@ def create( "parallel_tool_calls": parallel_tool_calls, "prediction": prediction, "presence_penalty": presence_penalty, + "reasoning_effort": reasoning_effort, "response_format": response_format, "seed": seed, "service_tier": service_tier, @@ -908,6 +939,7 @@ async def create( parallel_tool_calls: bool | NotGiven = NOT_GIVEN, prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, @@ -935,6 +967,12 @@ async def create( [vision](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio) guides. + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + Args: messages: A list of messages comprising the conversation so far. Depending on the [model](https://platform.openai.com/docs/models) you use, different message @@ -955,16 +993,18 @@ async def create( existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) - function_call: Deprecated in favor of `tool_choice`. - Controls which (if any) function is called by the model. `none` means the model - will not call a function and instead generates a message. `auto` means the model - can pick between generating a message or calling a function. Specifying a - particular function via `{"name": "my_function"}` forces the model to call that + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + `none` is the default when no functions are present. `auto` is the default if functions are present. @@ -1026,13 +1066,14 @@ async def create( whether they appear in the text so far, increasing the model's likelihood to talk about new topics. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + reasoning_effort: **o1 models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. - response_format: An object specifying the format that the model must output. Compatible with - [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), - [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini), - [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and - all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + response_format: An object specifying the format that the model must output. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema. Learn more @@ -1088,9 +1129,8 @@ async def create( temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more - focused and deterministic. - - We generally recommend altering this or `top_p` but not both. + focused and deterministic. We generally recommend altering this or `top_p` but + not both. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can @@ -1151,6 +1191,7 @@ async def create( parallel_tool_calls: bool | NotGiven = NOT_GIVEN, prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, @@ -1177,6 +1218,12 @@ async def create( [vision](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio) guides. + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + Args: messages: A list of messages comprising the conversation so far. Depending on the [model](https://platform.openai.com/docs/models) you use, different message @@ -1204,16 +1251,18 @@ async def create( existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) - function_call: Deprecated in favor of `tool_choice`. - Controls which (if any) function is called by the model. `none` means the model - will not call a function and instead generates a message. `auto` means the model - can pick between generating a message or calling a function. Specifying a - particular function via `{"name": "my_function"}` forces the model to call that + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + `none` is the default when no functions are present. `auto` is the default if functions are present. @@ -1275,13 +1324,14 @@ async def create( whether they appear in the text so far, increasing the model's likelihood to talk about new topics. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + reasoning_effort: **o1 models only** + + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. - response_format: An object specifying the format that the model must output. Compatible with - [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), - [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini), - [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and - all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + response_format: An object specifying the format that the model must output. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema. Learn more @@ -1330,9 +1380,8 @@ async def create( temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more - focused and deterministic. - - We generally recommend altering this or `top_p` but not both. + focused and deterministic. We generally recommend altering this or `top_p` but + not both. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can @@ -1393,6 +1442,7 @@ async def create( parallel_tool_calls: bool | NotGiven = NOT_GIVEN, prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, @@ -1419,6 +1469,12 @@ async def create( [vision](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio) guides. + Parameter support can differ depending on the model used to generate the + response, particularly for newer reasoning models. Parameters that are only + supported for reasoning models are noted below. For the current state of + unsupported parameters in reasoning models, + [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning). + Args: messages: A list of messages comprising the conversation so far. Depending on the [model](https://platform.openai.com/docs/models) you use, different message @@ -1446,16 +1502,18 @@ async def create( existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) - function_call: Deprecated in favor of `tool_choice`. - Controls which (if any) function is called by the model. `none` means the model - will not call a function and instead generates a message. `auto` means the model - can pick between generating a message or calling a function. Specifying a - particular function via `{"name": "my_function"}` forces the model to call that + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + `none` is the default when no functions are present. `auto` is the default if functions are present. @@ -1517,13 +1575,14 @@ async def create( whether they appear in the text so far, increasing the model's likelihood to talk about new topics. - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + reasoning_effort: **o1 models only** - response_format: An object specifying the format that the model must output. Compatible with - [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), - [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini), - [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and - all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. + + response_format: An object specifying the format that the model must output. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema. Learn more @@ -1572,9 +1631,8 @@ async def create( temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more - focused and deterministic. - - We generally recommend altering this or `top_p` but not both. + focused and deterministic. We generally recommend altering this or `top_p` but + not both. tool_choice: Controls which (if any) tool is called by the model. `none` means the model will not call any tool and instead generates a message. `auto` means the model can @@ -1634,6 +1692,7 @@ async def create( parallel_tool_calls: bool | NotGiven = NOT_GIVEN, prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN, presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN, @@ -1674,6 +1733,7 @@ async def create( "parallel_tool_calls": parallel_tool_calls, "prediction": prediction, "presence_penalty": presence_penalty, + "reasoning_effort": reasoning_effort, "response_format": response_format, "seed": seed, "service_tier": service_tier, diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py index 4024bf79f3..21773fbf96 100644 --- a/src/openai/resources/fine_tuning/jobs/jobs.py +++ b/src/openai/resources/fine_tuning/jobs/jobs.py @@ -64,6 +64,7 @@ def create( training_file: str, hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN, integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN, + method: job_create_params.Method | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, suffix: Optional[str] | NotGiven = NOT_GIVEN, validation_file: Optional[str] | NotGiven = NOT_GIVEN, @@ -96,17 +97,22 @@ def create( your file with the purpose `fine-tune`. The contents of the file should differ depending on if the model uses the - [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or + [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) + format, or if the fine-tuning method uses the + [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) format. See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) for more details. - hyperparameters: The hyperparameters used for the fine-tuning job. + hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated + in favor of `method`, and should be passed in under the `method` parameter. integrations: A list of integrations to enable for your fine-tuning job. + method: The method used for fine-tuning. + seed: The seed controls the reproducibility of the job. Passing in the same seed and job parameters should produce the same results, but may differ in rare cases. If a seed is not specified, one will be generated for you. @@ -146,6 +152,7 @@ def create( "training_file": training_file, "hyperparameters": hyperparameters, "integrations": integrations, + "method": method, "seed": seed, "suffix": suffix, "validation_file": validation_file, @@ -355,6 +362,7 @@ async def create( training_file: str, hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN, integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN, + method: job_create_params.Method | NotGiven = NOT_GIVEN, seed: Optional[int] | NotGiven = NOT_GIVEN, suffix: Optional[str] | NotGiven = NOT_GIVEN, validation_file: Optional[str] | NotGiven = NOT_GIVEN, @@ -387,17 +395,22 @@ async def create( your file with the purpose `fine-tune`. The contents of the file should differ depending on if the model uses the - [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or + [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) + format, or if the fine-tuning method uses the + [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) format. See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) for more details. - hyperparameters: The hyperparameters used for the fine-tuning job. + hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated + in favor of `method`, and should be passed in under the `method` parameter. integrations: A list of integrations to enable for your fine-tuning job. + method: The method used for fine-tuning. + seed: The seed controls the reproducibility of the job. Passing in the same seed and job parameters should produce the same results, but may differ in rare cases. If a seed is not specified, one will be generated for you. @@ -437,6 +450,7 @@ async def create( "training_file": training_file, "hyperparameters": hyperparameters, "integrations": integrations, + "method": method, "seed": seed, "suffix": suffix, "validation_file": validation_file, diff --git a/src/openai/types/beta/realtime/__init__.py b/src/openai/types/beta/realtime/__init__.py new file mode 100644 index 0000000000..1c5246db7a --- /dev/null +++ b/src/openai/types/beta/realtime/__init__.py @@ -0,0 +1,6 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from .session_create_params import SessionCreateParams as SessionCreateParams +from .session_create_response import SessionCreateResponse as SessionCreateResponse diff --git a/src/openai/types/beta/realtime/session_create_params.py b/src/openai/types/beta/realtime/session_create_params.py new file mode 100644 index 0000000000..f56f2c5c22 --- /dev/null +++ b/src/openai/types/beta/realtime/session_create_params.py @@ -0,0 +1,149 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Union, Iterable +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["SessionCreateParams", "InputAudioTranscription", "Tool", "TurnDetection"] + + +class SessionCreateParams(TypedDict, total=False): + model: Required[ + Literal[ + "gpt-4o-realtime-preview", + "gpt-4o-realtime-preview-2024-10-01", + "gpt-4o-realtime-preview-2024-12-17", + "gpt-4o-mini-realtime-preview", + "gpt-4o-mini-realtime-preview-2024-12-17", + ] + ] + """The Realtime model used for this session.""" + + input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] + """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + input_audio_transcription: InputAudioTranscription + """ + Configuration for input audio transcription, defaults to off and can be set to + `null` to turn off once on. Input audio transcription is not native to the + model, since the model consumes audio directly. Transcription runs + asynchronously through Whisper and should be treated as rough guidance rather + than the representation understood by the model. + """ + + instructions: str + """The default system instructions (i.e. + + system message) prepended to model calls. This field allows the client to guide + the model on desired responses. The model can be instructed on response content + and format, (e.g. "be extremely succinct", "act friendly", "here are examples of + good responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not guaranteed to be + followed by the model, but they provide guidance to the model on the desired + behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + """ + + max_response_output_tokens: Union[int, Literal["inf"]] + """ + Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + """ + + modalities: List[Literal["text", "audio"]] + """The set of modalities the model can respond with. + + To disable audio, set this to ["text"]. + """ + + output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] + """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + temperature: float + """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.""" + + tool_choice: str + """How the model chooses tools. + + Options are `auto`, `none`, `required`, or specify a function. + """ + + tools: Iterable[Tool] + """Tools (functions) available to the model.""" + + turn_detection: TurnDetection + """Configuration for turn detection. + + Can be set to `null` to turn off. Server VAD means that the model will detect + the start and end of speech based on audio volume and respond at the end of user + speech. + """ + + voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] + """The voice the model uses to respond. + + Voice cannot be changed during the session once the model has responded with + audio at least once. Current voice options are `alloy`, `ash`, `ballad`, + `coral`, `echo` `sage`, `shimmer` and `verse`. + """ + + +class InputAudioTranscription(TypedDict, total=False): + model: str + """ + The model to use for transcription, `whisper-1` is the only currently supported + model. + """ + + +class Tool(TypedDict, total=False): + description: str + """ + The description of the function, including guidance on when and how to call it, + and guidance about what to tell the user when calling (if anything). + """ + + name: str + """The name of the function.""" + + parameters: object + """Parameters of the function in JSON Schema.""" + + type: Literal["function"] + """The type of the tool, i.e. `function`.""" + + +class TurnDetection(TypedDict, total=False): + create_response: bool + """Whether or not to automatically generate a response when VAD is enabled. + + `true` by default. + """ + + prefix_padding_ms: int + """Amount of audio to include before the VAD detected speech (in milliseconds). + + Defaults to 300ms. + """ + + silence_duration_ms: int + """Duration of silence to detect speech stop (in milliseconds). + + Defaults to 500ms. With shorter values the model will respond more quickly, but + may jump in on short pauses from the user. + """ + + threshold: float + """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. + + A higher threshold will require louder audio to activate the model, and thus + might perform better in noisy environments. + """ + + type: str + """Type of turn detection, only `server_vad` is currently supported.""" diff --git a/src/openai/types/beta/realtime/session_create_response.py b/src/openai/types/beta/realtime/session_create_response.py new file mode 100644 index 0000000000..31f591b261 --- /dev/null +++ b/src/openai/types/beta/realtime/session_create_response.py @@ -0,0 +1,150 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Union, Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["SessionCreateResponse", "ClientSecret", "InputAudioTranscription", "Tool", "TurnDetection"] + + +class ClientSecret(BaseModel): + expires_at: Optional[int] = None + """Timestamp for when the token expires. + + Currently, all tokens expire after one minute. + """ + + value: Optional[str] = None + """ + Ephemeral key usable in client environments to authenticate connections to the + Realtime API. Use this in client-side environments rather than a standard API + token, which should only be used server-side. + """ + + +class InputAudioTranscription(BaseModel): + model: Optional[str] = None + """ + The model to use for transcription, `whisper-1` is the only currently supported + model. + """ + + +class Tool(BaseModel): + description: Optional[str] = None + """ + The description of the function, including guidance on when and how to call it, + and guidance about what to tell the user when calling (if anything). + """ + + name: Optional[str] = None + """The name of the function.""" + + parameters: Optional[object] = None + """Parameters of the function in JSON Schema.""" + + type: Optional[Literal["function"]] = None + """The type of the tool, i.e. `function`.""" + + +class TurnDetection(BaseModel): + prefix_padding_ms: Optional[int] = None + """Amount of audio to include before the VAD detected speech (in milliseconds). + + Defaults to 300ms. + """ + + silence_duration_ms: Optional[int] = None + """Duration of silence to detect speech stop (in milliseconds). + + Defaults to 500ms. With shorter values the model will respond more quickly, but + may jump in on short pauses from the user. + """ + + threshold: Optional[float] = None + """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. + + A higher threshold will require louder audio to activate the model, and thus + might perform better in noisy environments. + """ + + type: Optional[str] = None + """Type of turn detection, only `server_vad` is currently supported.""" + + +class SessionCreateResponse(BaseModel): + client_secret: Optional[ClientSecret] = None + """Ephemeral key returned by the API.""" + + input_audio_format: Optional[str] = None + """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + input_audio_transcription: Optional[InputAudioTranscription] = None + """ + Configuration for input audio transcription, defaults to off and can be set to + `null` to turn off once on. Input audio transcription is not native to the + model, since the model consumes audio directly. Transcription runs + asynchronously through Whisper and should be treated as rough guidance rather + than the representation understood by the model. + """ + + instructions: Optional[str] = None + """The default system instructions (i.e. + + system message) prepended to model calls. This field allows the client to guide + the model on desired responses. The model can be instructed on response content + and format, (e.g. "be extremely succinct", "act friendly", "here are examples of + good responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not guaranteed to be + followed by the model, but they provide guidance to the model on the desired + behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + """ + + max_response_output_tokens: Union[int, Literal["inf"], None] = None + """ + Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + """ + + modalities: Optional[List[Literal["text", "audio"]]] = None + """The set of modalities the model can respond with. + + To disable audio, set this to ["text"]. + """ + + output_audio_format: Optional[str] = None + """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + temperature: Optional[float] = None + """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.""" + + tool_choice: Optional[str] = None + """How the model chooses tools. + + Options are `auto`, `none`, `required`, or specify a function. + """ + + tools: Optional[List[Tool]] = None + """Tools (functions) available to the model.""" + + turn_detection: Optional[TurnDetection] = None + """Configuration for turn detection. + + Can be set to `null` to turn off. Server VAD means that the model will detect + the start and end of speech based on audio volume and respond at the end of user + speech. + """ + + voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None + """The voice the model uses to respond. + + Voice cannot be changed during the session once the model has responded with + audio at least once. Current voice options are `alloy`, `ash`, `ballad`, + `coral`, `echo` `sage`, `shimmer` and `verse`. + """ diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py index ef562a4b94..962dc51da0 100644 --- a/src/openai/types/chat/__init__.py +++ b/src/openai/types/chat/__init__.py @@ -13,6 +13,7 @@ from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob +from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort as ChatCompletionReasoningEffort from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam @@ -28,6 +29,9 @@ from .chat_completion_content_part_text_param import ( ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam, ) +from .chat_completion_developer_message_param import ( + ChatCompletionDeveloperMessageParam as ChatCompletionDeveloperMessageParam, +) from .chat_completion_message_tool_call_param import ( ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam, ) diff --git a/src/openai/types/chat/chat_completion_developer_message_param.py b/src/openai/types/chat/chat_completion_developer_message_param.py new file mode 100644 index 0000000000..01e4fdb654 --- /dev/null +++ b/src/openai/types/chat/chat_completion_developer_message_param.py @@ -0,0 +1,25 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union, Iterable +from typing_extensions import Literal, Required, TypedDict + +from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam + +__all__ = ["ChatCompletionDeveloperMessageParam"] + + +class ChatCompletionDeveloperMessageParam(TypedDict, total=False): + content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]] + """The contents of the developer message.""" + + role: Required[Literal["developer"]] + """The role of the messages author, in this case `developer`.""" + + name: str + """An optional name for the participant. + + Provides the model information to differentiate between participants of the same + role. + """ diff --git a/src/openai/types/chat/chat_completion_message_param.py b/src/openai/types/chat/chat_completion_message_param.py index ec65d94cae..942da24304 100644 --- a/src/openai/types/chat/chat_completion_message_param.py +++ b/src/openai/types/chat/chat_completion_message_param.py @@ -10,10 +10,12 @@ from .chat_completion_system_message_param import ChatCompletionSystemMessageParam from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam +from .chat_completion_developer_message_param import ChatCompletionDeveloperMessageParam __all__ = ["ChatCompletionMessageParam"] ChatCompletionMessageParam: TypeAlias = Union[ + ChatCompletionDeveloperMessageParam, ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam, ChatCompletionAssistantMessageParam, diff --git a/src/openai/types/chat/chat_completion_reasoning_effort.py b/src/openai/types/chat/chat_completion_reasoning_effort.py new file mode 100644 index 0000000000..9e7946974a --- /dev/null +++ b/src/openai/types/chat/chat_completion_reasoning_effort.py @@ -0,0 +1,7 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal, TypeAlias + +__all__ = ["ChatCompletionReasoningEffort"] + +ChatCompletionReasoningEffort: TypeAlias = Literal["low", "medium", "high"] diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py index e838858314..f168ddea6e 100644 --- a/src/openai/types/chat/completion_create_params.py +++ b/src/openai/types/chat/completion_create_params.py @@ -10,6 +10,7 @@ from .chat_completion_tool_param import ChatCompletionToolParam from .chat_completion_audio_param import ChatCompletionAudioParam from .chat_completion_message_param import ChatCompletionMessageParam +from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort from ..shared_params.function_parameters import FunctionParameters from ..shared_params.response_format_text import ResponseFormatText from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam @@ -60,19 +61,21 @@ class CompletionCreateParamsBase(TypedDict, total=False): Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. - - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) """ function_call: FunctionCall """Deprecated in favor of `tool_choice`. - Controls which (if any) function is called by the model. `none` means the model - will not call a function and instead generates a message. `auto` means the model - can pick between generating a message or calling a function. Specifying a - particular function via `{"name": "my_function"}` forces the model to call that + Controls which (if any) function is called by the model. + + `none` means the model will not call a function and instead generates a message. + + `auto` means the model can pick between generating a message or calling a function. + Specifying a particular function via `{"name": "my_function"}` forces the model + to call that function. + `none` is the default when no functions are present. `auto` is the default if functions are present. """ @@ -164,18 +167,20 @@ class CompletionCreateParamsBase(TypedDict, total=False): Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. + """ + + reasoning_effort: ChatCompletionReasoningEffort + """**o1 models only** - [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation) + Constrains effort on reasoning for + [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently + supported values are `low`, `medium`, and `high`. Reducing reasoning effort can + result in faster responses and fewer tokens used on reasoning in a response. """ response_format: ResponseFormat """An object specifying the format that the model must output. - Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), - [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini), - [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and - all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. - Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema. Learn more in the @@ -237,9 +242,8 @@ class CompletionCreateParamsBase(TypedDict, total=False): """What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like - 0.2 will make it more focused and deterministic. - - We generally recommend altering this or `top_p` but not both. + 0.2 will make it more focused and deterministic. We generally recommend altering + this or `top_p` but not both. """ tool_choice: ChatCompletionToolChoiceOptionParam diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py index 3567a3ba65..e1ac464320 100644 --- a/src/openai/types/chat_model.py +++ b/src/openai/types/chat_model.py @@ -5,6 +5,8 @@ __all__ = ["ChatModel"] ChatModel: TypeAlias = Literal[ + "o1", + "o1-2024-12-17", "o1-preview", "o1-preview-2024-09-12", "o1-mini", @@ -13,10 +15,11 @@ "gpt-4o-2024-11-20", "gpt-4o-2024-08-06", "gpt-4o-2024-05-13", - "gpt-4o-realtime-preview", - "gpt-4o-realtime-preview-2024-10-01", "gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-10-01", + "gpt-4o-audio-preview-2024-12-17", + "gpt-4o-mini-audio-preview", + "gpt-4o-mini-audio-preview-2024-12-17", "chatgpt-4o-latest", "gpt-4o-mini", "gpt-4o-mini-2024-07-18", diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py index 7ac8792787..f5a11c2107 100644 --- a/src/openai/types/fine_tuning/fine_tuning_job.py +++ b/src/openai/types/fine_tuning/fine_tuning_job.py @@ -6,7 +6,16 @@ from ..._models import BaseModel from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject -__all__ = ["FineTuningJob", "Error", "Hyperparameters"] +__all__ = [ + "FineTuningJob", + "Error", + "Hyperparameters", + "Method", + "MethodDpo", + "MethodDpoHyperparameters", + "MethodSupervised", + "MethodSupervisedHyperparameters", +] class Error(BaseModel): @@ -24,15 +33,96 @@ class Error(BaseModel): class Hyperparameters(BaseModel): - n_epochs: Union[Literal["auto"], int] + batch_size: Union[Literal["auto"], int, None] = None + """Number of examples in each batch. + + A larger batch size means that model parameters are updated less frequently, but + with lower variance. + """ + + learning_rate_multiplier: Union[Literal["auto"], float, None] = None + """Scaling factor for the learning rate. + + A smaller learning rate may be useful to avoid overfitting. + """ + + n_epochs: Union[Literal["auto"], int, None] = None + """The number of epochs to train the model for. + + An epoch refers to one full cycle through the training dataset. + """ + + +class MethodDpoHyperparameters(BaseModel): + batch_size: Union[Literal["auto"], int, None] = None + """Number of examples in each batch. + + A larger batch size means that model parameters are updated less frequently, but + with lower variance. + """ + + beta: Union[Literal["auto"], float, None] = None + """The beta value for the DPO method. + + A higher beta value will increase the weight of the penalty between the policy + and reference model. + """ + + learning_rate_multiplier: Union[Literal["auto"], float, None] = None + """Scaling factor for the learning rate. + + A smaller learning rate may be useful to avoid overfitting. + """ + + n_epochs: Union[Literal["auto"], int, None] = None """The number of epochs to train the model for. - An epoch refers to one full cycle through the training dataset. "auto" decides - the optimal number of epochs based on the size of the dataset. If setting the - number manually, we support any number between 1 and 50 epochs. + An epoch refers to one full cycle through the training dataset. """ +class MethodDpo(BaseModel): + hyperparameters: Optional[MethodDpoHyperparameters] = None + """The hyperparameters used for the fine-tuning job.""" + + +class MethodSupervisedHyperparameters(BaseModel): + batch_size: Union[Literal["auto"], int, None] = None + """Number of examples in each batch. + + A larger batch size means that model parameters are updated less frequently, but + with lower variance. + """ + + learning_rate_multiplier: Union[Literal["auto"], float, None] = None + """Scaling factor for the learning rate. + + A smaller learning rate may be useful to avoid overfitting. + """ + + n_epochs: Union[Literal["auto"], int, None] = None + """The number of epochs to train the model for. + + An epoch refers to one full cycle through the training dataset. + """ + + +class MethodSupervised(BaseModel): + hyperparameters: Optional[MethodSupervisedHyperparameters] = None + """The hyperparameters used for the fine-tuning job.""" + + +class Method(BaseModel): + dpo: Optional[MethodDpo] = None + """Configuration for the DPO fine-tuning method.""" + + supervised: Optional[MethodSupervised] = None + """Configuration for the supervised fine-tuning method.""" + + type: Optional[Literal["supervised", "dpo"]] = None + """The type of method. Is either `supervised` or `dpo`.""" + + class FineTuningJob(BaseModel): id: str """The object identifier, which can be referenced in the API endpoints.""" @@ -61,8 +151,7 @@ class FineTuningJob(BaseModel): hyperparameters: Hyperparameters """The hyperparameters used for the fine-tuning job. - See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) - for more details. + This value will only be returned when running `supervised` jobs. """ model: str @@ -118,3 +207,6 @@ class FineTuningJob(BaseModel): integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None """A list of integrations to enable for this fine-tuning job.""" + + method: Optional[Method] = None + """The method used for fine-tuning.""" diff --git a/src/openai/types/fine_tuning/fine_tuning_job_event.py b/src/openai/types/fine_tuning/fine_tuning_job_event.py index 2d204bb980..1d728bd765 100644 --- a/src/openai/types/fine_tuning/fine_tuning_job_event.py +++ b/src/openai/types/fine_tuning/fine_tuning_job_event.py @@ -1,5 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +import builtins +from typing import Optional from typing_extensions import Literal from ..._models import BaseModel @@ -9,11 +11,22 @@ class FineTuningJobEvent(BaseModel): id: str + """The object identifier.""" created_at: int + """The Unix timestamp (in seconds) for when the fine-tuning job was created.""" level: Literal["info", "warn", "error"] + """The log level of the event.""" message: str + """The message of the event.""" object: Literal["fine_tuning.job.event"] + """The object type, which is always "fine_tuning.job.event".""" + + data: Optional[builtins.object] = None + """The data associated with the event.""" + + type: Optional[Literal["message", "metrics"]] = None + """The type of event.""" diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py index 8814229b2e..09c3f8571c 100644 --- a/src/openai/types/fine_tuning/job_create_params.py +++ b/src/openai/types/fine_tuning/job_create_params.py @@ -5,7 +5,17 @@ from typing import List, Union, Iterable, Optional from typing_extensions import Literal, Required, TypedDict -__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"] +__all__ = [ + "JobCreateParams", + "Hyperparameters", + "Integration", + "IntegrationWandb", + "Method", + "MethodDpo", + "MethodDpoHyperparameters", + "MethodSupervised", + "MethodSupervisedHyperparameters", +] class JobCreateParams(TypedDict, total=False): @@ -26,8 +36,10 @@ class JobCreateParams(TypedDict, total=False): your file with the purpose `fine-tune`. The contents of the file should differ depending on if the model uses the - [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or + [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) + format, or if the fine-tuning method uses the + [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) format. See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) @@ -35,11 +47,17 @@ class JobCreateParams(TypedDict, total=False): """ hyperparameters: Hyperparameters - """The hyperparameters used for the fine-tuning job.""" + """ + The hyperparameters used for the fine-tuning job. This value is now deprecated + in favor of `method`, and should be passed in under the `method` parameter. + """ integrations: Optional[Iterable[Integration]] """A list of integrations to enable for your fine-tuning job.""" + method: Method + """The method used for fine-tuning.""" + seed: Optional[int] """The seed controls the reproducibility of the job. @@ -134,3 +152,73 @@ class Integration(TypedDict, total=False): can set an explicit display name for your run, add tags to your run, and set a default entity (team, username, etc) to be associated with your run. """ + + +class MethodDpoHyperparameters(TypedDict, total=False): + batch_size: Union[Literal["auto"], int] + """Number of examples in each batch. + + A larger batch size means that model parameters are updated less frequently, but + with lower variance. + """ + + beta: Union[Literal["auto"], float] + """The beta value for the DPO method. + + A higher beta value will increase the weight of the penalty between the policy + and reference model. + """ + + learning_rate_multiplier: Union[Literal["auto"], float] + """Scaling factor for the learning rate. + + A smaller learning rate may be useful to avoid overfitting. + """ + + n_epochs: Union[Literal["auto"], int] + """The number of epochs to train the model for. + + An epoch refers to one full cycle through the training dataset. + """ + + +class MethodDpo(TypedDict, total=False): + hyperparameters: MethodDpoHyperparameters + """The hyperparameters used for the fine-tuning job.""" + + +class MethodSupervisedHyperparameters(TypedDict, total=False): + batch_size: Union[Literal["auto"], int] + """Number of examples in each batch. + + A larger batch size means that model parameters are updated less frequently, but + with lower variance. + """ + + learning_rate_multiplier: Union[Literal["auto"], float] + """Scaling factor for the learning rate. + + A smaller learning rate may be useful to avoid overfitting. + """ + + n_epochs: Union[Literal["auto"], int] + """The number of epochs to train the model for. + + An epoch refers to one full cycle through the training dataset. + """ + + +class MethodSupervised(TypedDict, total=False): + hyperparameters: MethodSupervisedHyperparameters + """The hyperparameters used for the fine-tuning job.""" + + +class Method(TypedDict, total=False): + dpo: MethodDpo + """Configuration for the DPO fine-tuning method.""" + + supervised: MethodSupervised + """Configuration for the supervised fine-tuning method.""" + + type: Literal["supervised", "dpo"] + """The type of method. Is either `supervised` or `dpo`.""" diff --git a/tests/api_resources/beta/realtime/__init__.py b/tests/api_resources/beta/realtime/__init__.py new file mode 100644 index 0000000000..fd8019a9a1 --- /dev/null +++ b/tests/api_resources/beta/realtime/__init__.py @@ -0,0 +1 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. diff --git a/tests/api_resources/beta/realtime/test_sessions.py b/tests/api_resources/beta/realtime/test_sessions.py new file mode 100644 index 0000000000..65bfa27572 --- /dev/null +++ b/tests/api_resources/beta/realtime/test_sessions.py @@ -0,0 +1,146 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from openai import OpenAI, AsyncOpenAI +from tests.utils import assert_matches_type +from openai.types.beta.realtime import SessionCreateResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestSessions: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_create(self, client: OpenAI) -> None: + session = client.beta.realtime.sessions.create( + model="gpt-4o-realtime-preview", + ) + assert_matches_type(SessionCreateResponse, session, path=["response"]) + + @parametrize + def test_method_create_with_all_params(self, client: OpenAI) -> None: + session = client.beta.realtime.sessions.create( + model="gpt-4o-realtime-preview", + input_audio_format="pcm16", + input_audio_transcription={"model": "model"}, + instructions="instructions", + max_response_output_tokens=0, + modalities=["text"], + output_audio_format="pcm16", + temperature=0, + tool_choice="tool_choice", + tools=[ + { + "description": "description", + "name": "name", + "parameters": {}, + "type": "function", + } + ], + turn_detection={ + "create_response": True, + "prefix_padding_ms": 0, + "silence_duration_ms": 0, + "threshold": 0, + "type": "type", + }, + voice="alloy", + ) + assert_matches_type(SessionCreateResponse, session, path=["response"]) + + @parametrize + def test_raw_response_create(self, client: OpenAI) -> None: + response = client.beta.realtime.sessions.with_raw_response.create( + model="gpt-4o-realtime-preview", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + session = response.parse() + assert_matches_type(SessionCreateResponse, session, path=["response"]) + + @parametrize + def test_streaming_response_create(self, client: OpenAI) -> None: + with client.beta.realtime.sessions.with_streaming_response.create( + model="gpt-4o-realtime-preview", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + session = response.parse() + assert_matches_type(SessionCreateResponse, session, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncSessions: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_create(self, async_client: AsyncOpenAI) -> None: + session = await async_client.beta.realtime.sessions.create( + model="gpt-4o-realtime-preview", + ) + assert_matches_type(SessionCreateResponse, session, path=["response"]) + + @parametrize + async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None: + session = await async_client.beta.realtime.sessions.create( + model="gpt-4o-realtime-preview", + input_audio_format="pcm16", + input_audio_transcription={"model": "model"}, + instructions="instructions", + max_response_output_tokens=0, + modalities=["text"], + output_audio_format="pcm16", + temperature=0, + tool_choice="tool_choice", + tools=[ + { + "description": "description", + "name": "name", + "parameters": {}, + "type": "function", + } + ], + turn_detection={ + "create_response": True, + "prefix_padding_ms": 0, + "silence_duration_ms": 0, + "threshold": 0, + "type": "type", + }, + voice="alloy", + ) + assert_matches_type(SessionCreateResponse, session, path=["response"]) + + @parametrize + async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None: + response = await async_client.beta.realtime.sessions.with_raw_response.create( + model="gpt-4o-realtime-preview", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + session = response.parse() + assert_matches_type(SessionCreateResponse, session, path=["response"]) + + @parametrize + async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None: + async with async_client.beta.realtime.sessions.with_streaming_response.create( + model="gpt-4o-realtime-preview", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + session = await response.parse() + assert_matches_type(SessionCreateResponse, session, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index d2e786cfe0..523fcc6ed9 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -25,7 +25,7 @@ def test_method_create_overload_1(self, client: OpenAI) -> None: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -38,7 +38,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None: messages=[ { "content": "string", - "role": "system", + "role": "developer", "name": "name", } ], @@ -69,6 +69,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None: "type": "content", }, presence_penalty=-2, + reasoning_effort="low", response_format={"type": "text"}, seed=-9007199254740991, service_tier="auto", @@ -101,7 +102,7 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -118,7 +119,7 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -137,7 +138,7 @@ def test_method_create_overload_2(self, client: OpenAI) -> None: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -151,7 +152,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None: messages=[ { "content": "string", - "role": "system", + "role": "developer", "name": "name", } ], @@ -183,6 +184,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None: "type": "content", }, presence_penalty=-2, + reasoning_effort="low", response_format={"type": "text"}, seed=-9007199254740991, service_tier="auto", @@ -214,7 +216,7 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -231,7 +233,7 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -255,7 +257,7 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -268,7 +270,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn messages=[ { "content": "string", - "role": "system", + "role": "developer", "name": "name", } ], @@ -299,6 +301,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn "type": "content", }, presence_penalty=-2, + reasoning_effort="low", response_format={"type": "text"}, seed=-9007199254740991, service_tier="auto", @@ -331,7 +334,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) - messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -348,7 +351,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -367,7 +370,7 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -381,7 +384,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn messages=[ { "content": "string", - "role": "system", + "role": "developer", "name": "name", } ], @@ -413,6 +416,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn "type": "content", }, presence_penalty=-2, + reasoning_effort="low", response_format={"type": "text"}, seed=-9007199254740991, service_tier="auto", @@ -444,7 +448,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) - messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -461,7 +465,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py index ad218bcb36..050edba367 100644 --- a/tests/api_resources/fine_tuning/test_jobs.py +++ b/tests/api_resources/fine_tuning/test_jobs.py @@ -50,6 +50,24 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None: }, } ], + method={ + "dpo": { + "hyperparameters": { + "batch_size": "auto", + "beta": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + }, + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + }, + "type": "supervised", + }, seed=42, suffix="x", validation_file="file-abc123", @@ -271,6 +289,24 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> }, } ], + method={ + "dpo": { + "hyperparameters": { + "batch_size": "auto", + "beta": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + }, + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + }, + "type": "supervised", + }, seed=42, suffix="x", validation_file="file-abc123", diff --git a/tests/test_client.py b/tests/test_client.py index 7751e7d463..e0d23403b1 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -795,7 +795,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -827,7 +827,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -859,7 +859,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -891,7 +891,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -1663,7 +1663,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -1696,7 +1696,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -1729,7 +1729,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o", @@ -1762,7 +1762,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: messages=[ { "content": "string", - "role": "system", + "role": "developer", } ], model="gpt-4o",