diff --git a/.stats.yml b/.stats.yml
index 3cc042fe0a..e3a0040a5a 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
-configured_endpoints: 68
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-2e0e0678be19d1118fd796af291822075e40538dba326611e177e9f3dc245a53.yml
+configured_endpoints: 69
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-779ea2754025daf5e18eb8ceb203ec321692636bc3a999338556a479178efa6c.yml
diff --git a/api.md b/api.md
index 3e88faab91..f51089745d 100644
--- a/api.md
+++ b/api.md
@@ -47,6 +47,7 @@ from openai.types.chat import (
     ChatCompletionContentPartInputAudio,
     ChatCompletionContentPartRefusal,
     ChatCompletionContentPartText,
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionFunctionCallOption,
     ChatCompletionFunctionMessageParam,
     ChatCompletionMessage,
@@ -55,6 +56,7 @@ from openai.types.chat import (
     ChatCompletionModality,
     ChatCompletionNamedToolChoice,
     ChatCompletionPredictionContent,
+    ChatCompletionReasoningEffort,
     ChatCompletionRole,
     ChatCompletionStreamOptions,
     ChatCompletionSystemMessageParam,
@@ -234,6 +236,20 @@ Methods:
 
 # Beta
 
+## Realtime
+
+### Sessions
+
+Types:
+
+```python
+from openai.types.beta.realtime import Session, SessionCreateResponse
+```
+
+Methods:
+
+- <code title="post /realtime/sessions">client.beta.realtime.sessions.<a href="./src/openai/resources/beta/realtime/sessions.py">create</a>(\*\*<a href="src/openai/types/beta/realtime/session_create_params.py">params</a>) -> <a href="./src/openai/types/beta/realtime/session_create_response.py">SessionCreateResponse</a></code>
+
 ## VectorStores
 
 Types:
diff --git a/src/openai/resources/beta/beta.py b/src/openai/resources/beta/beta.py
index 642ba664ba..42ea9b88e5 100644
--- a/src/openai/resources/beta/beta.py
+++ b/src/openai/resources/beta/beta.py
@@ -20,6 +20,14 @@
     ThreadsWithStreamingResponse,
     AsyncThreadsWithStreamingResponse,
 )
+from .realtime.realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
 from .vector_stores.vector_stores import (
     VectorStores,
     AsyncVectorStores,
@@ -33,6 +41,10 @@
 
 
 class Beta(SyncAPIResource):
+    @cached_property
+    def realtime(self) -> Realtime:
+        return Realtime(self._client)
+
     @cached_property
     def vector_stores(self) -> VectorStores:
         return VectorStores(self._client)
@@ -66,6 +78,10 @@ def with_streaming_response(self) -> BetaWithStreamingResponse:
 
 
 class AsyncBeta(AsyncAPIResource):
+    @cached_property
+    def realtime(self) -> AsyncRealtime:
+        return AsyncRealtime(self._client)
+
     @cached_property
     def vector_stores(self) -> AsyncVectorStores:
         return AsyncVectorStores(self._client)
@@ -102,6 +118,10 @@ class BetaWithRawResponse:
     def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
+    @cached_property
+    def realtime(self) -> RealtimeWithRawResponse:
+        return RealtimeWithRawResponse(self._beta.realtime)
+
     @cached_property
     def vector_stores(self) -> VectorStoresWithRawResponse:
         return VectorStoresWithRawResponse(self._beta.vector_stores)
@@ -119,6 +139,10 @@ class AsyncBetaWithRawResponse:
     def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithRawResponse:
+        return AsyncRealtimeWithRawResponse(self._beta.realtime)
+
     @cached_property
     def vector_stores(self) -> AsyncVectorStoresWithRawResponse:
         return AsyncVectorStoresWithRawResponse(self._beta.vector_stores)
@@ -136,6 +160,10 @@ class BetaWithStreamingResponse:
     def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
+    @cached_property
+    def realtime(self) -> RealtimeWithStreamingResponse:
+        return RealtimeWithStreamingResponse(self._beta.realtime)
+
     @cached_property
     def vector_stores(self) -> VectorStoresWithStreamingResponse:
         return VectorStoresWithStreamingResponse(self._beta.vector_stores)
@@ -153,6 +181,10 @@ class AsyncBetaWithStreamingResponse:
     def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
+    @cached_property
+    def realtime(self) -> AsyncRealtimeWithStreamingResponse:
+        return AsyncRealtimeWithStreamingResponse(self._beta.realtime)
+
     @cached_property
     def vector_stores(self) -> AsyncVectorStoresWithStreamingResponse:
         return AsyncVectorStoresWithStreamingResponse(self._beta.vector_stores)
diff --git a/src/openai/resources/beta/realtime/__init__.py b/src/openai/resources/beta/realtime/__init__.py
new file mode 100644
index 0000000000..474434e6e1
--- /dev/null
+++ b/src/openai/resources/beta/realtime/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Sessions",
+    "AsyncSessions",
+    "SessionsWithRawResponse",
+    "AsyncSessionsWithRawResponse",
+    "SessionsWithStreamingResponse",
+    "AsyncSessionsWithStreamingResponse",
+    "Realtime",
+    "AsyncRealtime",
+    "RealtimeWithRawResponse",
+    "AsyncRealtimeWithRawResponse",
+    "RealtimeWithStreamingResponse",
+    "AsyncRealtimeWithStreamingResponse",
+]
diff --git a/src/openai/resources/beta/realtime/realtime.py b/src/openai/resources/beta/realtime/realtime.py
new file mode 100644
index 0000000000..e57e0be503
--- /dev/null
+++ b/src/openai/resources/beta/realtime/realtime.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+
+class Realtime(SyncAPIResource):
+    @cached_property
+    def sessions(self) -> Sessions:
+        return Sessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RealtimeWithStreamingResponse(self)
+
+
+class AsyncRealtime(AsyncAPIResource):
+    @cached_property
+    def sessions(self) -> AsyncSessions:
+        return AsyncSessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRealtimeWithStreamingResponse(self)
+
+
+class RealtimeWithRawResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithRawResponse:
+        return SessionsWithRawResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeWithRawResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithRawResponse:
+        return AsyncSessionsWithRawResponse(self._realtime.sessions)
+
+
+class RealtimeWithStreamingResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithStreamingResponse:
+        return SessionsWithStreamingResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeWithStreamingResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithStreamingResponse:
+        return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
diff --git a/src/openai/resources/beta/realtime/sessions.py b/src/openai/resources/beta/realtime/sessions.py
new file mode 100644
index 0000000000..1d1ee701e5
--- /dev/null
+++ b/src/openai/resources/beta/realtime/sessions.py
@@ -0,0 +1,337 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import session_create_params
+from ....types.beta.realtime.session_create_response import SessionCreateResponse
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SessionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ],
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          model: The Realtime model used for this session.
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through Whisper and should be treated as rough guidance rather
+              than the representation understood by the model.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+              means that the model will detect the start and end of speech based on audio
+              volume and respond at the end of user speech.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/realtime/sessions",
+            body=maybe_transform(
+                {
+                    "model": model,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class AsyncSessions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSessionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ],
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          model: The Realtime model used for this session.
+
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through Whisper and should be treated as rough guidance rather
+              than the representation understood by the model.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+              means that the model will detect the start and end of speech based on audio
+              volume and respond at the end of user speech.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/realtime/sessions",
+            body=await async_maybe_transform(
+                {
+                    "model": model,
+                    "input_audio_format": input_audio_format,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class SessionsWithRawResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithRawResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class SessionsWithStreamingResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = to_streamed_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithStreamingResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = async_to_streamed_response_wrapper(
+            sessions.create,
+        )
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index 84e6cf9b72..f13b8c0b45 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -20,6 +20,7 @@
 from ..._streaming import Stream, AsyncStream
 from ...types.chat import (
     ChatCompletionAudioParam,
+    ChatCompletionReasoningEffort,
     completion_create_params,
 )
 from ..._base_client import make_request_options
@@ -30,6 +31,7 @@
 from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
 from ...types.chat.chat_completion_audio_param import ChatCompletionAudioParam
 from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ...types.chat.chat_completion_reasoning_effort import ChatCompletionReasoningEffort
 from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 from ...types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
 from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
@@ -77,6 +79,7 @@ def create(
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -104,6 +107,12 @@ def create(
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -124,16 +133,18 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -195,13 +206,14 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -257,9 +269,8 @@ def create(
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -320,6 +331,7 @@ def create(
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -346,6 +358,12 @@ def create(
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -373,16 +391,18 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -444,13 +464,14 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -499,9 +520,8 @@ def create(
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -562,6 +582,7 @@ def create(
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -588,6 +609,12 @@ def create(
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -615,16 +642,18 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -686,13 +715,14 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -741,9 +771,8 @@ def create(
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -803,6 +832,7 @@ def create(
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -843,6 +873,7 @@ def create(
                     "parallel_tool_calls": parallel_tool_calls,
                     "prediction": prediction,
                     "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "seed": seed,
                     "service_tier": service_tier,
@@ -908,6 +939,7 @@ async def create(
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -935,6 +967,12 @@ async def create(
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -955,16 +993,18 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -1026,13 +1066,14 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -1088,9 +1129,8 @@ async def create(
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -1151,6 +1191,7 @@ async def create(
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -1177,6 +1218,12 @@ async def create(
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -1204,16 +1251,18 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -1275,13 +1324,14 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -1330,9 +1380,8 @@ async def create(
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -1393,6 +1442,7 @@ async def create(
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -1419,6 +1469,12 @@ async def create(
         [vision](https://platform.openai.com/docs/guides/vision), and
         [audio](https://platform.openai.com/docs/guides/audio) guides.
 
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
         Args:
           messages: A list of messages comprising the conversation so far. Depending on the
               [model](https://platform.openai.com/docs/models) you use, different message
@@ -1446,16 +1502,18 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
-
           function_call: Deprecated in favor of `tool_choice`.
 
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
               function.
 
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
               `none` is the default when no functions are present. `auto` is the default if
               functions are present.
 
@@ -1517,13 +1575,14 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+          reasoning_effort: **o1 models only**
 
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-              [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
 
               Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
               Outputs which ensures the model will match your supplied JSON schema. Learn more
@@ -1572,9 +1631,8 @@ async def create(
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tool and instead generates a message. `auto` means the model can
@@ -1634,6 +1692,7 @@ async def create(
         parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
@@ -1674,6 +1733,7 @@ async def create(
                     "parallel_tool_calls": parallel_tool_calls,
                     "prediction": prediction,
                     "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "seed": seed,
                     "service_tier": service_tier,
diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
index 4024bf79f3..21773fbf96 100644
--- a/src/openai/resources/fine_tuning/jobs/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -64,6 +64,7 @@ def create(
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
         integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
@@ -96,17 +97,22 @@ def create(
               your file with the purpose `fine-tune`.
 
               The contents of the file should differ depending on if the model uses the
-              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
               [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
               format.
 
               See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
 
           integrations: A list of integrations to enable for your fine-tuning job.
 
+          method: The method used for fine-tuning.
+
           seed: The seed controls the reproducibility of the job. Passing in the same seed and
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
@@ -146,6 +152,7 @@ def create(
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
+                    "method": method,
                     "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
@@ -355,6 +362,7 @@ async def create(
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
         integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
@@ -387,17 +395,22 @@ async def create(
               your file with the purpose `fine-tune`.
 
               The contents of the file should differ depending on if the model uses the
-              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
               [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
               format.
 
               See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
 
           integrations: A list of integrations to enable for your fine-tuning job.
 
+          method: The method used for fine-tuning.
+
           seed: The seed controls the reproducibility of the job. Passing in the same seed and
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
@@ -437,6 +450,7 @@ async def create(
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
+                    "method": method,
                     "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
diff --git a/src/openai/types/beta/realtime/__init__.py b/src/openai/types/beta/realtime/__init__.py
new file mode 100644
index 0000000000..1c5246db7a
--- /dev/null
+++ b/src/openai/types/beta/realtime/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .session_create_params import SessionCreateParams as SessionCreateParams
+from .session_create_response import SessionCreateResponse as SessionCreateResponse
diff --git a/src/openai/types/beta/realtime/session_create_params.py b/src/openai/types/beta/realtime/session_create_params.py
new file mode 100644
index 0000000000..f56f2c5c22
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_create_params.py
@@ -0,0 +1,149 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["SessionCreateParams", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class SessionCreateParams(TypedDict, total=False):
+    model: Required[
+        Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+    ]
+    """The Realtime model used for this session."""
+
+    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: InputAudioTranscription
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through Whisper and should be treated as rough guidance rather
+    than the representation understood by the model.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Iterable[Tool]
+    """Tools (functions) available to the model."""
+
+    turn_detection: TurnDetection
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class InputAudioTranscription(TypedDict, total=False):
+    model: str
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+
+class Tool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(TypedDict, total=False):
+    create_response: bool
+    """Whether or not to automatically generate a response when VAD is enabled.
+
+    `true` by default.
+    """
+
+    prefix_padding_ms: int
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: float
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: str
+    """Type of turn detection, only `server_vad` is currently supported."""
diff --git a/src/openai/types/beta/realtime/session_create_response.py b/src/openai/types/beta/realtime/session_create_response.py
new file mode 100644
index 0000000000..31f591b261
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_create_response.py
@@ -0,0 +1,150 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["SessionCreateResponse", "ClientSecret", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class ClientSecret(BaseModel):
+    expires_at: Optional[int] = None
+    """Timestamp for when the token expires.
+
+    Currently, all tokens expire after one minute.
+    """
+
+    value: Optional[str] = None
+    """
+    Ephemeral key usable in client environments to authenticate connections to the
+    Realtime API. Use this in client-side environments rather than a standard API
+    token, which should only be used server-side.
+    """
+
+
+class InputAudioTranscription(BaseModel):
+    model: Optional[str] = None
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+
+class Tool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(BaseModel):
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[str] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class SessionCreateResponse(BaseModel):
+    client_secret: Optional[ClientSecret] = None
+    """Ephemeral key returned by the API."""
+
+    input_audio_format: Optional[str] = None
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: Optional[InputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through Whisper and should be treated as rough guidance rather
+    than the representation understood by the model.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Optional[str] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[Tool]] = None
+    """Tools (functions) available to the model."""
+
+    turn_detection: Optional[TurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py
index ef562a4b94..962dc51da0 100644
--- a/src/openai/types/chat/__init__.py
+++ b/src/openai/types/chat/__init__.py
@@ -13,6 +13,7 @@
 from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
+from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort as ChatCompletionReasoningEffort
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
 from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
 from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
@@ -28,6 +29,9 @@
 from .chat_completion_content_part_text_param import (
     ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam,
 )
+from .chat_completion_developer_message_param import (
+    ChatCompletionDeveloperMessageParam as ChatCompletionDeveloperMessageParam,
+)
 from .chat_completion_message_tool_call_param import (
     ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam,
 )
diff --git a/src/openai/types/chat/chat_completion_developer_message_param.py b/src/openai/types/chat/chat_completion_developer_message_param.py
new file mode 100644
index 0000000000..01e4fdb654
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_developer_message_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
+__all__ = ["ChatCompletionDeveloperMessageParam"]
+
+
+class ChatCompletionDeveloperMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
+    """The contents of the developer message."""
+
+    role: Required[Literal["developer"]]
+    """The role of the messages author, in this case `developer`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/src/openai/types/chat/chat_completion_message_param.py b/src/openai/types/chat/chat_completion_message_param.py
index ec65d94cae..942da24304 100644
--- a/src/openai/types/chat/chat_completion_message_param.py
+++ b/src/openai/types/chat/chat_completion_message_param.py
@@ -10,10 +10,12 @@
 from .chat_completion_system_message_param import ChatCompletionSystemMessageParam
 from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam
 from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
+from .chat_completion_developer_message_param import ChatCompletionDeveloperMessageParam
 
 __all__ = ["ChatCompletionMessageParam"]
 
 ChatCompletionMessageParam: TypeAlias = Union[
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionSystemMessageParam,
     ChatCompletionUserMessageParam,
     ChatCompletionAssistantMessageParam,
diff --git a/src/openai/types/chat/chat_completion_reasoning_effort.py b/src/openai/types/chat/chat_completion_reasoning_effort.py
new file mode 100644
index 0000000000..9e7946974a
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_reasoning_effort.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatCompletionReasoningEffort"]
+
+ChatCompletionReasoningEffort: TypeAlias = Literal["low", "medium", "high"]
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index e838858314..f168ddea6e 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -10,6 +10,7 @@
 from .chat_completion_tool_param import ChatCompletionToolParam
 from .chat_completion_audio_param import ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam
+from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort
 from ..shared_params.function_parameters import FunctionParameters
 from ..shared_params.response_format_text import ResponseFormatText
 from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
@@ -60,19 +61,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
-
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
     """
 
     function_call: FunctionCall
     """Deprecated in favor of `tool_choice`.
 
-    Controls which (if any) function is called by the model. `none` means the model
-    will not call a function and instead generates a message. `auto` means the model
-    can pick between generating a message or calling a function. Specifying a
-    particular function via `{"name": "my_function"}` forces the model to call that
+    Controls which (if any) function is called by the model.
+
+    `none` means the model will not call a function and instead generates a message.
+
+    `auto` means the model can pick between generating a message or calling a
     function.
 
+    Specifying a particular function via `{"name": "my_function"}` forces the model
+    to call that function.
+
     `none` is the default when no functions are present. `auto` is the default if
     functions are present.
     """
@@ -164,18 +167,20 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
+    """
+
+    reasoning_effort: ChatCompletionReasoningEffort
+    """**o1 models only**
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
     """
 
     response_format: ResponseFormat
     """An object specifying the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
-    [GPT-4o mini](https://platform.openai.com/docs/models#gpt-4o-mini),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
     Outputs which ensures the model will match your supplied JSON schema. Learn more
     in the
@@ -237,9 +242,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """What sampling temperature to use, between 0 and 2.
 
     Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-
-    We generally recommend altering this or `top_p` but not both.
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
     """
 
     tool_choice: ChatCompletionToolChoiceOptionParam
diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py
index 3567a3ba65..e1ac464320 100644
--- a/src/openai/types/chat_model.py
+++ b/src/openai/types/chat_model.py
@@ -5,6 +5,8 @@
 __all__ = ["ChatModel"]
 
 ChatModel: TypeAlias = Literal[
+    "o1",
+    "o1-2024-12-17",
     "o1-preview",
     "o1-preview-2024-09-12",
     "o1-mini",
@@ -13,10 +15,11 @@
     "gpt-4o-2024-11-20",
     "gpt-4o-2024-08-06",
     "gpt-4o-2024-05-13",
-    "gpt-4o-realtime-preview",
-    "gpt-4o-realtime-preview-2024-10-01",
     "gpt-4o-audio-preview",
     "gpt-4o-audio-preview-2024-10-01",
+    "gpt-4o-audio-preview-2024-12-17",
+    "gpt-4o-mini-audio-preview",
+    "gpt-4o-mini-audio-preview-2024-12-17",
     "chatgpt-4o-latest",
     "gpt-4o-mini",
     "gpt-4o-mini-2024-07-18",
diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py
index 7ac8792787..f5a11c2107 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job.py
@@ -6,7 +6,16 @@
 from ..._models import BaseModel
 from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
-__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
+__all__ = [
+    "FineTuningJob",
+    "Error",
+    "Hyperparameters",
+    "Method",
+    "MethodDpo",
+    "MethodDpoHyperparameters",
+    "MethodSupervised",
+    "MethodSupervisedHyperparameters",
+]
 
 
 class Error(BaseModel):
@@ -24,15 +33,96 @@ class Error(BaseModel):
 
 
 class Hyperparameters(BaseModel):
-    n_epochs: Union[Literal["auto"], int]
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodDpoHyperparameters(BaseModel):
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float, None] = None
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
     """The number of epochs to train the model for.
 
-    An epoch refers to one full cycle through the training dataset. "auto" decides
-    the optimal number of epochs based on the size of the dataset. If setting the
-    number manually, we support any number between 1 and 50 epochs.
+    An epoch refers to one full cycle through the training dataset.
     """
 
 
+class MethodDpo(BaseModel):
+    hyperparameters: Optional[MethodDpoHyperparameters] = None
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class MethodSupervisedHyperparameters(BaseModel):
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodSupervised(BaseModel):
+    hyperparameters: Optional[MethodSupervisedHyperparameters] = None
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class Method(BaseModel):
+    dpo: Optional[MethodDpo] = None
+    """Configuration for the DPO fine-tuning method."""
+
+    supervised: Optional[MethodSupervised] = None
+    """Configuration for the supervised fine-tuning method."""
+
+    type: Optional[Literal["supervised", "dpo"]] = None
+    """The type of method. Is either `supervised` or `dpo`."""
+
+
 class FineTuningJob(BaseModel):
     id: str
     """The object identifier, which can be referenced in the API endpoints."""
@@ -61,8 +151,7 @@ class FineTuningJob(BaseModel):
     hyperparameters: Hyperparameters
     """The hyperparameters used for the fine-tuning job.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-    for more details.
+    This value will only be returned when running `supervised` jobs.
     """
 
     model: str
@@ -118,3 +207,6 @@ class FineTuningJob(BaseModel):
 
     integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
     """A list of integrations to enable for this fine-tuning job."""
+
+    method: Optional[Method] = None
+    """The method used for fine-tuning."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_event.py b/src/openai/types/fine_tuning/fine_tuning_job_event.py
index 2d204bb980..1d728bd765 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job_event.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job_event.py
@@ -1,5 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+import builtins
+from typing import Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
@@ -9,11 +11,22 @@
 
 class FineTuningJobEvent(BaseModel):
     id: str
+    """The object identifier."""
 
     created_at: int
+    """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
 
     level: Literal["info", "warn", "error"]
+    """The log level of the event."""
 
     message: str
+    """The message of the event."""
 
     object: Literal["fine_tuning.job.event"]
+    """The object type, which is always "fine_tuning.job.event"."""
+
+    data: Optional[builtins.object] = None
+    """The data associated with the event."""
+
+    type: Optional[Literal["message", "metrics"]] = None
+    """The type of event."""
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index 8814229b2e..09c3f8571c 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -5,7 +5,17 @@
 from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
+__all__ = [
+    "JobCreateParams",
+    "Hyperparameters",
+    "Integration",
+    "IntegrationWandb",
+    "Method",
+    "MethodDpo",
+    "MethodDpoHyperparameters",
+    "MethodSupervised",
+    "MethodSupervisedHyperparameters",
+]
 
 
 class JobCreateParams(TypedDict, total=False):
@@ -26,8 +36,10 @@ class JobCreateParams(TypedDict, total=False):
     your file with the purpose `fine-tune`.
 
     The contents of the file should differ depending on if the model uses the
-    [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+    [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
     [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+    format, or if the fine-tuning method uses the
+    [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
     format.
 
     See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
@@ -35,11 +47,17 @@ class JobCreateParams(TypedDict, total=False):
     """
 
     hyperparameters: Hyperparameters
-    """The hyperparameters used for the fine-tuning job."""
+    """
+    The hyperparameters used for the fine-tuning job. This value is now deprecated
+    in favor of `method`, and should be passed in under the `method` parameter.
+    """
 
     integrations: Optional[Iterable[Integration]]
     """A list of integrations to enable for your fine-tuning job."""
 
+    method: Method
+    """The method used for fine-tuning."""
+
     seed: Optional[int]
     """The seed controls the reproducibility of the job.
 
@@ -134,3 +152,73 @@ class Integration(TypedDict, total=False):
     can set an explicit display name for your run, add tags to your run, and set a
     default entity (team, username, etc) to be associated with your run.
     """
+
+
+class MethodDpoHyperparameters(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float]
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodDpo(TypedDict, total=False):
+    hyperparameters: MethodDpoHyperparameters
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class MethodSupervisedHyperparameters(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodSupervised(TypedDict, total=False):
+    hyperparameters: MethodSupervisedHyperparameters
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class Method(TypedDict, total=False):
+    dpo: MethodDpo
+    """Configuration for the DPO fine-tuning method."""
+
+    supervised: MethodSupervised
+    """Configuration for the supervised fine-tuning method."""
+
+    type: Literal["supervised", "dpo"]
+    """The type of method. Is either `supervised` or `dpo`."""
diff --git a/tests/api_resources/beta/realtime/__init__.py b/tests/api_resources/beta/realtime/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/beta/realtime/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/realtime/test_sessions.py b/tests/api_resources/beta/realtime/test_sessions.py
new file mode 100644
index 0000000000..65bfa27572
--- /dev/null
+++ b/tests/api_resources/beta/realtime/test_sessions.py
@@ -0,0 +1,146 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.beta.realtime import SessionCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestSessions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        session = client.beta.realtime.sessions.create(
+            model="gpt-4o-realtime-preview",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        session = client.beta.realtime.sessions.create(
+            model="gpt-4o-realtime-preview",
+            input_audio_format="pcm16",
+            input_audio_transcription={"model": "model"},
+            instructions="instructions",
+            max_response_output_tokens=0,
+            modalities=["text"],
+            output_audio_format="pcm16",
+            temperature=0,
+            tool_choice="tool_choice",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            turn_detection={
+                "create_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "type",
+            },
+            voice="alloy",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.beta.realtime.sessions.with_raw_response.create(
+            model="gpt-4o-realtime-preview",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.realtime.sessions.with_streaming_response.create(
+            model="gpt-4o-realtime-preview",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncSessions:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.realtime.sessions.create(
+            model="gpt-4o-realtime-preview",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.realtime.sessions.create(
+            model="gpt-4o-realtime-preview",
+            input_audio_format="pcm16",
+            input_audio_transcription={"model": "model"},
+            instructions="instructions",
+            max_response_output_tokens=0,
+            modalities=["text"],
+            output_audio_format="pcm16",
+            temperature=0,
+            tool_choice="tool_choice",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            turn_detection={
+                "create_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "type",
+            },
+            voice="alloy",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.realtime.sessions.with_raw_response.create(
+            model="gpt-4o-realtime-preview",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.realtime.sessions.with_streaming_response.create(
+            model="gpt-4o-realtime-preview",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index d2e786cfe0..523fcc6ed9 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -25,7 +25,7 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -38,7 +38,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                     "name": "name",
                 }
             ],
@@ -69,6 +69,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
                 "type": "content",
             },
             presence_penalty=-2,
+            reasoning_effort="low",
             response_format={"type": "text"},
             seed=-9007199254740991,
             service_tier="auto",
@@ -101,7 +102,7 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -118,7 +119,7 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -137,7 +138,7 @@ def test_method_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -151,7 +152,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                     "name": "name",
                 }
             ],
@@ -183,6 +184,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
                 "type": "content",
             },
             presence_penalty=-2,
+            reasoning_effort="low",
             response_format={"type": "text"},
             seed=-9007199254740991,
             service_tier="auto",
@@ -214,7 +216,7 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -231,7 +233,7 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -255,7 +257,7 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -268,7 +270,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                     "name": "name",
                 }
             ],
@@ -299,6 +301,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 "type": "content",
             },
             presence_penalty=-2,
+            reasoning_effort="low",
             response_format={"type": "text"},
             seed=-9007199254740991,
             service_tier="auto",
@@ -331,7 +334,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -348,7 +351,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -367,7 +370,7 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -381,7 +384,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                     "name": "name",
                 }
             ],
@@ -413,6 +416,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                 "type": "content",
             },
             presence_penalty=-2,
+            reasoning_effort="low",
             response_format={"type": "text"},
             seed=-9007199254740991,
             service_tier="auto",
@@ -444,7 +448,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -461,7 +465,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py
index ad218bcb36..050edba367 100644
--- a/tests/api_resources/fine_tuning/test_jobs.py
+++ b/tests/api_resources/fine_tuning/test_jobs.py
@@ -50,6 +50,24 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
                     },
                 }
             ],
+            method={
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "type": "supervised",
+            },
             seed=42,
             suffix="x",
             validation_file="file-abc123",
@@ -271,6 +289,24 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
                     },
                 }
             ],
+            method={
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
+                },
+                "type": "supervised",
+            },
             seed=42,
             suffix="x",
             validation_file="file-abc123",
diff --git a/tests/test_client.py b/tests/test_client.py
index 7751e7d463..e0d23403b1 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -795,7 +795,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -827,7 +827,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -859,7 +859,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -891,7 +891,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -1663,7 +1663,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -1696,7 +1696,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -1729,7 +1729,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",
@@ -1762,7 +1762,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
             model="gpt-4o",