From 20487ea0080969511e7c41f199387b87a84f6ab4 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 19:46:31 +0000
Subject: [PATCH 1/4] chore(examples): minor formatting changes (#633)

---
 tests/api_resources/test_messages.py | 88 ++++++++++++++--------------
 tests/test_client.py                 |  8 +--
 2 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/tests/api_resources/test_messages.py b/tests/api_resources/test_messages.py
index 94dc8cc6..e3685408 100644
--- a/tests/api_resources/test_messages.py
+++ b/tests/api_resources/test_messages.py
@@ -23,8 +23,8 @@ def test_method_create_overload_1(self, client: Anthropic) -> None:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -37,8 +37,8 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -47,16 +47,14 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
             stream=False,
             system=[
                 {
-                    "type": "text",
                     "text": "Today's date is 2024-06-01.",
+                    "type": "text",
                 }
             ],
             temperature=1,
             tool_choice={"type": "auto"},
             tools=[
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -70,10 +68,10 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -87,10 +85,10 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -104,6 +102,8 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
             ],
             top_k=5,
@@ -117,8 +117,8 @@ def test_raw_response_create_overload_1(self, client: Anthropic) -> None:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -135,8 +135,8 @@ def test_streaming_response_create_overload_1(self, client: Anthropic) -> None:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -155,8 +155,8 @@ def test_method_create_overload_2(self, client: Anthropic) -> None:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -170,8 +170,8 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -180,16 +180,14 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
             stop_sequences=["string", "string", "string"],
             system=[
                 {
-                    "type": "text",
                     "text": "Today's date is 2024-06-01.",
+                    "type": "text",
                 }
             ],
             temperature=1,
             tool_choice={"type": "auto"},
             tools=[
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -203,10 +201,10 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -220,10 +218,10 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -237,6 +235,8 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
             ],
             top_k=5,
@@ -250,8 +250,8 @@ def test_raw_response_create_overload_2(self, client: Anthropic) -> None:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -268,8 +268,8 @@ def test_streaming_response_create_overload_2(self, client: Anthropic) -> None:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -293,8 +293,8 @@ async def test_method_create_overload_1(self, async_client: AsyncAnthropic) -> N
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -307,8 +307,8 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -317,16 +317,14 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             stream=False,
             system=[
                 {
-                    "type": "text",
                     "text": "Today's date is 2024-06-01.",
+                    "type": "text",
                 }
             ],
             temperature=1,
             tool_choice={"type": "auto"},
             tools=[
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -340,10 +338,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -357,10 +355,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -374,6 +372,8 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
             ],
             top_k=5,
@@ -387,8 +387,8 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncAnthropic
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -405,8 +405,8 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncAnt
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -425,8 +425,8 @@ async def test_method_create_overload_2(self, async_client: AsyncAnthropic) -> N
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -440,8 +440,8 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -450,16 +450,14 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             stop_sequences=["string", "string", "string"],
             system=[
                 {
-                    "type": "text",
                     "text": "Today's date is 2024-06-01.",
+                    "type": "text",
                 }
             ],
             temperature=1,
             tool_choice={"type": "auto"},
             tools=[
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -473,10 +471,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -490,10 +488,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
                 {
-                    "description": "Get the current weather in a given location",
-                    "name": "x",
                     "input_schema": {
                         "type": "object",
                         "properties": {
@@ -507,6 +505,8 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                             },
                         },
                     },
+                    "name": "x",
+                    "description": "Get the current weather in a given location",
                 },
             ],
             top_k=5,
@@ -520,8 +520,8 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncAnthropic
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -538,8 +538,8 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncAnt
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
diff --git a/tests/test_client.py b/tests/test_client.py
index 23364599..e20da702 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -890,8 +890,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -922,8 +922,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -1790,8 +1790,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",
@@ -1823,8 +1823,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             max_tokens=1024,
             messages=[
                 {
-                    "role": "user",
                     "content": "Hello, world",
+                    "role": "user",
                 }
             ],
             model="claude-3-5-sonnet-20240620",

From 397841125164a2420d5abf8f45d47f2467e36cd9 Mon Sep 17 00:00:00 2001
From: Stainless Bot <dev@stainlessapi.com>
Date: Mon, 12 Aug 2024 19:52:48 +0000
Subject: [PATCH 2/4] feat(api): add prompt caching beta

---
 .stats.yml                                    |    4 +-
 api.md                                        |   28 +
 src/anthropic/_client.py                      |    8 +
 src/anthropic/resources/__init__.py           |   14 +
 src/anthropic/resources/beta/__init__.py      |   33 +
 src/anthropic/resources/beta/beta.py          |   81 +
 .../resources/beta/prompt_caching/__init__.py |   33 +
 .../resources/beta/prompt_caching/messages.py | 1774 +++++++++++++++++
 .../beta/prompt_caching/prompt_caching.py     |   80 +
 src/anthropic/resources/messages.py           |   24 +-
 src/anthropic/types/beta/__init__.py          |    3 +
 .../types/beta/prompt_caching/__init__.py     |   26 +
 .../prompt_caching/message_create_params.py   |  311 +++
 ...hing_beta_cache_control_ephemeral_param.py |   11 +
 .../prompt_caching_beta_image_block_param.py  |   32 +
 .../prompt_caching_beta_message.py            |  109 +
 .../prompt_caching_beta_message_param.py      |   33 +
 .../prompt_caching_beta_text_block_param.py   |   18 +
 .../prompt_caching_beta_tool_param.py         |   41 +
 ...pt_caching_beta_tool_result_block_param.py |   26 +
 ...rompt_caching_beta_tool_use_block_param.py |   22 +
 .../prompt_caching_beta_usage.py              |   21 +
 ...prompt_caching_beta_message_start_event.py |   14 +
 ...rompt_caching_beta_message_stream_event.py |   26 +
 src/anthropic/types/message_create_params.py  |    4 +-
 tests/api_resources/beta/__init__.py          |    1 +
 .../beta/prompt_caching/__init__.py           |    1 +
 .../beta/prompt_caching/test_messages.py      |  570 ++++++
 28 files changed, 3332 insertions(+), 16 deletions(-)
 create mode 100644 src/anthropic/resources/beta/__init__.py
 create mode 100644 src/anthropic/resources/beta/beta.py
 create mode 100644 src/anthropic/resources/beta/prompt_caching/__init__.py
 create mode 100644 src/anthropic/resources/beta/prompt_caching/messages.py
 create mode 100644 src/anthropic/resources/beta/prompt_caching/prompt_caching.py
 create mode 100644 src/anthropic/types/beta/__init__.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/__init__.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/message_create_params.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
 create mode 100644 src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
 create mode 100644 tests/api_resources/beta/__init__.py
 create mode 100644 tests/api_resources/beta/prompt_caching/__init__.py
 create mode 100644 tests/api_resources/beta/prompt_caching/test_messages.py

diff --git a/.stats.yml b/.stats.yml
index 4d2e1943..49961a7f 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
-configured_endpoints: 2
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-4769b27b6e13acc458cc71fbadd8676ea8074d76f91e37b96eaa97464c4e97af.yml
+configured_endpoints: 3
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-fb94a03f85580f7eacef034518becfb463502e6d74b0f7932f6153239de23a5b.yml
diff --git a/api.md b/api.md
index 63896541..aeb4e3d3 100644
--- a/api.md
+++ b/api.md
@@ -40,3 +40,31 @@ Methods:
 
 - <code title="post /v1/messages">client.messages.<a href="./src/anthropic/resources/messages.py">create</a>(\*\*<a href="src/anthropic/types/message_create_params.py">params</a>) -> <a href="./src/anthropic/types/message.py">Message</a></code>
 - <code>client.messages.<a href="./src/anthropic/resources/messages.py">stream</a>(\*args) -> MessageStreamManager[MessageStream] | MessageStreamManager[MessageStreamT]</code>
+
+# Beta
+
+## PromptCaching
+
+### Messages
+
+Types:
+
+```python
+from anthropic.types.beta.prompt_caching import (
+    PromptCachingBetaCacheControlEphemeral,
+    PromptCachingBetaImageBlockParam,
+    PromptCachingBetaMessage,
+    PromptCachingBetaMessageParam,
+    PromptCachingBetaTextBlockParam,
+    PromptCachingBetaTool,
+    PromptCachingBetaToolResultBlockParam,
+    PromptCachingBetaToolUseBlockParam,
+    PromptCachingBetaUsage,
+    RawPromptCachingBetaMessageStartEvent,
+    RawPromptCachingBetaMessageStreamEvent,
+)
+```
+
+Methods:
+
+- <code title="post /v1/messages?beta=prompt_caching">client.beta.prompt_caching.messages.<a href="./src/anthropic/resources/beta/prompt_caching/messages.py">create</a>(\*\*<a href="src/anthropic/types/beta/prompt_caching/message_create_params.py">params</a>) -> <a href="./src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py">PromptCachingBetaMessage</a></code>
diff --git a/src/anthropic/_client.py b/src/anthropic/_client.py
index ac148940..693e3a96 100644
--- a/src/anthropic/_client.py
+++ b/src/anthropic/_client.py
@@ -58,6 +58,7 @@
 class Anthropic(SyncAPIClient):
     completions: resources.Completions
     messages: resources.Messages
+    beta: resources.Beta
     with_raw_response: AnthropicWithRawResponse
     with_streaming_response: AnthropicWithStreamedResponse
 
@@ -136,6 +137,7 @@ def __init__(
 
         self.completions = resources.Completions(self)
         self.messages = resources.Messages(self)
+        self.beta = resources.Beta(self)
         self.with_raw_response = AnthropicWithRawResponse(self)
         self.with_streaming_response = AnthropicWithStreamedResponse(self)
 
@@ -320,6 +322,7 @@ def _make_status_error(
 class AsyncAnthropic(AsyncAPIClient):
     completions: resources.AsyncCompletions
     messages: resources.AsyncMessages
+    beta: resources.AsyncBeta
     with_raw_response: AsyncAnthropicWithRawResponse
     with_streaming_response: AsyncAnthropicWithStreamedResponse
 
@@ -398,6 +401,7 @@ def __init__(
 
         self.completions = resources.AsyncCompletions(self)
         self.messages = resources.AsyncMessages(self)
+        self.beta = resources.AsyncBeta(self)
         self.with_raw_response = AsyncAnthropicWithRawResponse(self)
         self.with_streaming_response = AsyncAnthropicWithStreamedResponse(self)
 
@@ -583,24 +587,28 @@ class AnthropicWithRawResponse:
     def __init__(self, client: Anthropic) -> None:
         self.completions = resources.CompletionsWithRawResponse(client.completions)
         self.messages = resources.MessagesWithRawResponse(client.messages)
+        self.beta = resources.BetaWithRawResponse(client.beta)
 
 
 class AsyncAnthropicWithRawResponse:
     def __init__(self, client: AsyncAnthropic) -> None:
         self.completions = resources.AsyncCompletionsWithRawResponse(client.completions)
         self.messages = resources.AsyncMessagesWithRawResponse(client.messages)
+        self.beta = resources.AsyncBetaWithRawResponse(client.beta)
 
 
 class AnthropicWithStreamedResponse:
     def __init__(self, client: Anthropic) -> None:
         self.completions = resources.CompletionsWithStreamingResponse(client.completions)
         self.messages = resources.MessagesWithStreamingResponse(client.messages)
+        self.beta = resources.BetaWithStreamingResponse(client.beta)
 
 
 class AsyncAnthropicWithStreamedResponse:
     def __init__(self, client: AsyncAnthropic) -> None:
         self.completions = resources.AsyncCompletionsWithStreamingResponse(client.completions)
         self.messages = resources.AsyncMessagesWithStreamingResponse(client.messages)
+        self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
 
 
 Client = Anthropic
diff --git a/src/anthropic/resources/__init__.py b/src/anthropic/resources/__init__.py
index cc6cc5be..318d5cdd 100644
--- a/src/anthropic/resources/__init__.py
+++ b/src/anthropic/resources/__init__.py
@@ -1,5 +1,13 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
 from .messages import (
     Messages,
     AsyncMessages,
@@ -30,4 +38,10 @@
     "AsyncMessagesWithRawResponse",
     "MessagesWithStreamingResponse",
     "AsyncMessagesWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
 ]
diff --git a/src/anthropic/resources/beta/__init__.py b/src/anthropic/resources/beta/__init__.py
new file mode 100644
index 00000000..9dacb008
--- /dev/null
+++ b/src/anthropic/resources/beta/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    Beta,
+    AsyncBeta,
+    BetaWithRawResponse,
+    AsyncBetaWithRawResponse,
+    BetaWithStreamingResponse,
+    AsyncBetaWithStreamingResponse,
+)
+from .prompt_caching import (
+    PromptCaching,
+    AsyncPromptCaching,
+    PromptCachingWithRawResponse,
+    AsyncPromptCachingWithRawResponse,
+    PromptCachingWithStreamingResponse,
+    AsyncPromptCachingWithStreamingResponse,
+)
+
+__all__ = [
+    "PromptCaching",
+    "AsyncPromptCaching",
+    "PromptCachingWithRawResponse",
+    "AsyncPromptCachingWithRawResponse",
+    "PromptCachingWithStreamingResponse",
+    "AsyncPromptCachingWithStreamingResponse",
+    "Beta",
+    "AsyncBeta",
+    "BetaWithRawResponse",
+    "AsyncBetaWithRawResponse",
+    "BetaWithStreamingResponse",
+    "AsyncBetaWithStreamingResponse",
+]
diff --git a/src/anthropic/resources/beta/beta.py b/src/anthropic/resources/beta/beta.py
new file mode 100644
index 00000000..7654921c
--- /dev/null
+++ b/src/anthropic/resources/beta/beta.py
@@ -0,0 +1,81 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .prompt_caching import (
+    PromptCaching,
+    AsyncPromptCaching,
+    PromptCachingWithRawResponse,
+    AsyncPromptCachingWithRawResponse,
+    PromptCachingWithStreamingResponse,
+    AsyncPromptCachingWithStreamingResponse,
+)
+from .prompt_caching.prompt_caching import PromptCaching, AsyncPromptCaching
+
+__all__ = ["Beta", "AsyncBeta"]
+
+
+class Beta(SyncAPIResource):
+    @cached_property
+    def prompt_caching(self) -> PromptCaching:
+        return PromptCaching(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> BetaWithRawResponse:
+        return BetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BetaWithStreamingResponse:
+        return BetaWithStreamingResponse(self)
+
+
+class AsyncBeta(AsyncAPIResource):
+    @cached_property
+    def prompt_caching(self) -> AsyncPromptCaching:
+        return AsyncPromptCaching(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        return AsyncBetaWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        return AsyncBetaWithStreamingResponse(self)
+
+
+class BetaWithRawResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def prompt_caching(self) -> PromptCachingWithRawResponse:
+        return PromptCachingWithRawResponse(self._beta.prompt_caching)
+
+
+class AsyncBetaWithRawResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def prompt_caching(self) -> AsyncPromptCachingWithRawResponse:
+        return AsyncPromptCachingWithRawResponse(self._beta.prompt_caching)
+
+
+class BetaWithStreamingResponse:
+    def __init__(self, beta: Beta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def prompt_caching(self) -> PromptCachingWithStreamingResponse:
+        return PromptCachingWithStreamingResponse(self._beta.prompt_caching)
+
+
+class AsyncBetaWithStreamingResponse:
+    def __init__(self, beta: AsyncBeta) -> None:
+        self._beta = beta
+
+    @cached_property
+    def prompt_caching(self) -> AsyncPromptCachingWithStreamingResponse:
+        return AsyncPromptCachingWithStreamingResponse(self._beta.prompt_caching)
diff --git a/src/anthropic/resources/beta/prompt_caching/__init__.py b/src/anthropic/resources/beta/prompt_caching/__init__.py
new file mode 100644
index 00000000..ccf0b0a8
--- /dev/null
+++ b/src/anthropic/resources/beta/prompt_caching/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from .prompt_caching import (
+    PromptCaching,
+    AsyncPromptCaching,
+    PromptCachingWithRawResponse,
+    AsyncPromptCachingWithRawResponse,
+    PromptCachingWithStreamingResponse,
+    AsyncPromptCachingWithStreamingResponse,
+)
+
+__all__ = [
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "PromptCaching",
+    "AsyncPromptCaching",
+    "PromptCachingWithRawResponse",
+    "AsyncPromptCachingWithRawResponse",
+    "PromptCachingWithStreamingResponse",
+    "AsyncPromptCachingWithStreamingResponse",
+]
diff --git a/src/anthropic/resources/beta/prompt_caching/messages.py b/src/anthropic/resources/beta/prompt_caching/messages.py
new file mode 100644
index 00000000..c2023c18
--- /dev/null
+++ b/src/anthropic/resources/beta/prompt_caching/messages.py
@@ -0,0 +1,1774 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, overload
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    is_given,
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._constants import DEFAULT_TIMEOUT
+from ...._streaming import Stream, AsyncStream
+from ...._base_client import make_request_options
+from ....types.model_param import ModelParam
+from ....types.beta.prompt_caching import message_create_params
+from ....types.beta.prompt_caching.prompt_caching_beta_message import PromptCachingBetaMessage
+from ....types.beta.prompt_caching.prompt_caching_beta_tool_param import PromptCachingBetaToolParam
+from ....types.beta.prompt_caching.prompt_caching_beta_message_param import PromptCachingBetaMessageParam
+from ....types.beta.prompt_caching.prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
+from ....types.beta.prompt_caching.raw_prompt_caching_beta_message_stream_event import (
+    RawPromptCachingBetaMessageStreamEvent,
+)
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PromptCachingBetaMessage:
+        """
+        Create a Message.
+
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages. The first message must always use the `user` role.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        stream: Literal[True],
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[RawPromptCachingBetaMessageStreamEvent]:
+        """
+        Create a Message.
+
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages. The first message must always use the `user` role.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        stream: bool,
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PromptCachingBetaMessage | Stream[RawPromptCachingBetaMessageStreamEvent]:
+        """
+        Create a Message.
+
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages. The first message must always use the `user` role.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PromptCachingBetaMessage | Stream[RawPromptCachingBetaMessageStreamEvent]:
+        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = 600
+        extra_headers = {"anthropic-beta": "prompt-caching-2024-07-31", **(extra_headers or {})}
+        return self._post(
+            "/v1/messages?beta=prompt_caching",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PromptCachingBetaMessage,
+            stream=stream or False,
+            stream_cls=Stream[RawPromptCachingBetaMessageStreamEvent],
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PromptCachingBetaMessage:
+        """
+        Create a Message.
+
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages. The first message must always use the `user` role.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        stream: Literal[True],
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
+        """
+        Create a Message.
+
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages. The first message must always use the `user` role.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        stream: bool,
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PromptCachingBetaMessage | AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
+        """
+        Create a Message.
+
+        Send a structured list of input messages with text and/or image content, and the
+        model will generate the next message in the conversation.
+
+        The Messages API can be used for either single queries or stateless multi-turn
+        conversations.
+
+        Args:
+          max_tokens: The maximum number of tokens to generate before stopping.
+
+              Note that our models may stop _before_ reaching this maximum. This parameter
+              only specifies the absolute maximum number of tokens to generate.
+
+              Different models have different maximum values for this parameter. See
+              [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+          messages: Input messages.
+
+              Our models are trained to operate on alternating `user` and `assistant`
+              conversational turns. When creating a new `Message`, you specify the prior
+              conversational turns with the `messages` parameter, and the model then generates
+              the next `Message` in the conversation.
+
+              Each input message must be an object with a `role` and `content`. You can
+              specify a single `user`-role message, or you can include multiple `user` and
+              `assistant` messages. The first message must always use the `user` role.
+
+              If the final message uses the `assistant` role, the response content will
+              continue immediately from the content in that message. This can be used to
+              constrain part of the model's response.
+
+              Example with a single `user` message:
+
+              ```json
+              [{ "role": "user", "content": "Hello, Claude" }]
+              ```
+
+              Example with multiple conversational turns:
+
+              ```json
+              [
+                { "role": "user", "content": "Hello there." },
+                { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+                { "role": "user", "content": "Can you explain LLMs in plain English?" }
+              ]
+              ```
+
+              Example with a partially-filled response from Claude:
+
+              ```json
+              [
+                {
+                  "role": "user",
+                  "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+                },
+                { "role": "assistant", "content": "The best answer is (" }
+              ]
+              ```
+
+              Each input message `content` may be either a single `string` or an array of
+              content blocks, where each block has a specific `type`. Using a `string` for
+              `content` is shorthand for an array of one content block of type `"text"`. The
+              following input messages are equivalent:
+
+              ```json
+              { "role": "user", "content": "Hello, Claude" }
+              ```
+
+              ```json
+              { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+              ```
+
+              Starting with Claude 3 models, you can also send image content blocks:
+
+              ```json
+              {
+                "role": "user",
+                "content": [
+                  {
+                    "type": "image",
+                    "source": {
+                      "type": "base64",
+                      "media_type": "image/jpeg",
+                      "data": "/9j/4AAQSkZJRg..."
+                    }
+                  },
+                  { "type": "text", "text": "What is in this image?" }
+                ]
+              }
+              ```
+
+              We currently support the `base64` source type for images, and the `image/jpeg`,
+              `image/png`, `image/gif`, and `image/webp` media types.
+
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
+
+              Note that if you want to include a
+              [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+              the top-level `system` parameter — there is no `"system"` role for input
+              messages in the Messages API.
+
+          model: The model that will complete your prompt.\n\nSee
+              [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+              details and options.
+
+          stream: Whether to incrementally stream the response using server-sent events.
+
+              See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+              details.
+
+          metadata: An object describing metadata about the request.
+
+          stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+              Our models will normally stop when they have naturally completed their turn,
+              which will result in a response `stop_reason` of `"end_turn"`.
+
+              If you want the model to stop generating when it encounters custom strings of
+              text, you can use the `stop_sequences` parameter. If the model encounters one of
+              the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+              and the response `stop_sequence` value will contain the matched stop sequence.
+
+          system: System prompt.
+
+              A system prompt is a way of providing context and instructions to Claude, such
+              as specifying a particular goal or role. See our
+              [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+          temperature: Amount of randomness injected into the response.
+
+              Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+              for analytical / multiple choice, and closer to `1.0` for creative and
+              generative tasks.
+
+              Note that even with `temperature` of `0.0`, the results will not be fully
+              deterministic.
+
+          tool_choice: How the model should use the provided tools. The model can use a specific tool,
+              any available tool, or decide by itself.
+
+          tools: Definitions of tools that the model may use.
+
+              If you include `tools` in your API request, the model may return `tool_use`
+              content blocks that represent the model's use of those tools. You can then run
+              those tools using the tool input generated by the model and then optionally
+              return results back to the model using `tool_result` content blocks.
+
+              Each tool definition includes:
+
+              - `name`: Name of the tool.
+              - `description`: Optional, but strongly-recommended description of the tool.
+              - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+                shape that the model will produce in `tool_use` output content blocks.
+
+              For example, if you defined `tools` as:
+
+              ```json
+              [
+                {
+                  "name": "get_stock_price",
+                  "description": "Get the current stock price for a given ticker symbol.",
+                  "input_schema": {
+                    "type": "object",
+                    "properties": {
+                      "ticker": {
+                        "type": "string",
+                        "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+                      }
+                    },
+                    "required": ["ticker"]
+                  }
+                }
+              ]
+              ```
+
+              And then asked the model "What's the S&P 500 at today?", the model might produce
+              `tool_use` content blocks in the response like this:
+
+              ```json
+              [
+                {
+                  "type": "tool_use",
+                  "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "name": "get_stock_price",
+                  "input": { "ticker": "^GSPC" }
+                }
+              ]
+              ```
+
+              You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+              input, and return the following back to the model in a subsequent `user`
+              message:
+
+              ```json
+              [
+                {
+                  "type": "tool_result",
+                  "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+                  "content": "259.75 USD"
+                }
+              ]
+              ```
+
+              Tools can be used for workflows that include running client-side tools and
+              functions, or more generally whenever you want the model to produce a particular
+              JSON structure of output.
+
+              See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+          top_k: Only sample from the top K options for each subsequent token.
+
+              Used to remove "long tail" low probability responses.
+              [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          top_p: Use nucleus sampling.
+
+              In nucleus sampling, we compute the cumulative distribution over all the options
+              for each subsequent token in decreasing probability order and cut it off once it
+              reaches a particular probability specified by `top_p`. You should either alter
+              `temperature` or `top_p`, but not both.
+
+              Recommended for advanced use cases only. You usually only need to use
+              `temperature`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PromptCachingBetaMessage | AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
+        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = 600
+        extra_headers = {"anthropic-beta": "prompt-caching-2024-07-31", **(extra_headers or {})}
+        return await self._post(
+            "/v1/messages?beta=prompt_caching",
+            body=await async_maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": stream,
+                    "system": system,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PromptCachingBetaMessage,
+            stream=stream or False,
+            stream_cls=AsyncStream[RawPromptCachingBetaMessageStreamEvent],
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            messages.create,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            messages.create,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.create = to_streamed_response_wrapper(
+            messages.create,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.create = async_to_streamed_response_wrapper(
+            messages.create,
+        )
diff --git a/src/anthropic/resources/beta/prompt_caching/prompt_caching.py b/src/anthropic/resources/beta/prompt_caching/prompt_caching.py
new file mode 100644
index 00000000..f15180e1
--- /dev/null
+++ b/src/anthropic/resources/beta/prompt_caching/prompt_caching.py
@@ -0,0 +1,80 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["PromptCaching", "AsyncPromptCaching"]
+
+
+class PromptCaching(SyncAPIResource):
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> PromptCachingWithRawResponse:
+        return PromptCachingWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PromptCachingWithStreamingResponse:
+        return PromptCachingWithStreamingResponse(self)
+
+
+class AsyncPromptCaching(AsyncAPIResource):
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncPromptCachingWithRawResponse:
+        return AsyncPromptCachingWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPromptCachingWithStreamingResponse:
+        return AsyncPromptCachingWithStreamingResponse(self)
+
+
+class PromptCachingWithRawResponse:
+    def __init__(self, prompt_caching: PromptCaching) -> None:
+        self._prompt_caching = prompt_caching
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._prompt_caching.messages)
+
+
+class AsyncPromptCachingWithRawResponse:
+    def __init__(self, prompt_caching: AsyncPromptCaching) -> None:
+        self._prompt_caching = prompt_caching
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._prompt_caching.messages)
+
+
+class PromptCachingWithStreamingResponse:
+    def __init__(self, prompt_caching: PromptCaching) -> None:
+        self._prompt_caching = prompt_caching
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._prompt_caching.messages)
+
+
+class AsyncPromptCachingWithStreamingResponse:
+    def __init__(self, prompt_caching: AsyncPromptCaching) -> None:
+        self._prompt_caching = prompt_caching
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._prompt_caching.messages)
diff --git a/src/anthropic/resources/messages.py b/src/anthropic/resources/messages.py
index 8b6ab106..8f303984 100644
--- a/src/anthropic/resources/messages.py
+++ b/src/anthropic/resources/messages.py
@@ -162,8 +162,8 @@ def create(
               We currently support the `base64` source type for images, and the `image/jpeg`,
               `image/png`, `image/gif`, and `image/webp` media types.
 
-              See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
-              input examples.
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
 
               Note that if you want to include a
               [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -425,8 +425,8 @@ def create(
               We currently support the `base64` source type for images, and the `image/jpeg`,
               `image/png`, `image/gif`, and `image/webp` media types.
 
-              See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
-              input examples.
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
 
               Note that if you want to include a
               [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -688,8 +688,8 @@ def create(
               We currently support the `base64` source type for images, and the `image/jpeg`,
               `image/png`, `image/gif`, and `image/webp` media types.
 
-              See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
-              input examples.
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
 
               Note that if you want to include a
               [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -1068,8 +1068,8 @@ async def create(
               We currently support the `base64` source type for images, and the `image/jpeg`,
               `image/png`, `image/gif`, and `image/webp` media types.
 
-              See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
-              input examples.
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
 
               Note that if you want to include a
               [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -1331,8 +1331,8 @@ async def create(
               We currently support the `base64` source type for images, and the `image/jpeg`,
               `image/png`, `image/gif`, and `image/webp` media types.
 
-              See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
-              input examples.
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
 
               Note that if you want to include a
               [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -1594,8 +1594,8 @@ async def create(
               We currently support the `base64` source type for images, and the `image/jpeg`,
               `image/png`, `image/gif`, and `image/webp` media types.
 
-              See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
-              input examples.
+              See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+              more input examples.
 
               Note that if you want to include a
               [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
diff --git a/src/anthropic/types/beta/__init__.py b/src/anthropic/types/beta/__init__.py
new file mode 100644
index 00000000..f8ee8b14
--- /dev/null
+++ b/src/anthropic/types/beta/__init__.py
@@ -0,0 +1,3 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
diff --git a/src/anthropic/types/beta/prompt_caching/__init__.py b/src/anthropic/types/beta/prompt_caching/__init__.py
new file mode 100644
index 00000000..3b4004fc
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/__init__.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .message_create_params import MessageCreateParams as MessageCreateParams
+from .prompt_caching_beta_usage import PromptCachingBetaUsage as PromptCachingBetaUsage
+from .prompt_caching_beta_message import PromptCachingBetaMessage as PromptCachingBetaMessage
+from .prompt_caching_beta_tool_param import PromptCachingBetaToolParam as PromptCachingBetaToolParam
+from .prompt_caching_beta_message_param import PromptCachingBetaMessageParam as PromptCachingBetaMessageParam
+from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam as PromptCachingBetaTextBlockParam
+from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam as PromptCachingBetaImageBlockParam
+from .prompt_caching_beta_tool_use_block_param import (
+    PromptCachingBetaToolUseBlockParam as PromptCachingBetaToolUseBlockParam,
+)
+from .prompt_caching_beta_tool_result_block_param import (
+    PromptCachingBetaToolResultBlockParam as PromptCachingBetaToolResultBlockParam,
+)
+from .raw_prompt_caching_beta_message_start_event import (
+    RawPromptCachingBetaMessageStartEvent as RawPromptCachingBetaMessageStartEvent,
+)
+from .raw_prompt_caching_beta_message_stream_event import (
+    RawPromptCachingBetaMessageStreamEvent as RawPromptCachingBetaMessageStreamEvent,
+)
+from .prompt_caching_beta_cache_control_ephemeral_param import (
+    PromptCachingBetaCacheControlEphemeralParam as PromptCachingBetaCacheControlEphemeralParam,
+)
diff --git a/src/anthropic/types/beta/prompt_caching/message_create_params.py b/src/anthropic/types/beta/prompt_caching/message_create_params.py
new file mode 100644
index 00000000..9baf68e3
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/message_create_params.py
@@ -0,0 +1,311 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ...model_param import ModelParam
+from .prompt_caching_beta_tool_param import PromptCachingBetaToolParam
+from .prompt_caching_beta_message_param import PromptCachingBetaMessageParam
+from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
+
+__all__ = [
+    "MessageCreateParamsBase",
+    "Metadata",
+    "ToolChoice",
+    "ToolChoiceToolChoiceAuto",
+    "ToolChoiceToolChoiceAny",
+    "ToolChoiceToolChoiceTool",
+    "MessageCreateParamsNonStreaming",
+    "MessageCreateParamsStreaming",
+]
+
+
+class MessageCreateParamsBase(TypedDict, total=False):
+    max_tokens: Required[int]
+    """The maximum number of tokens to generate before stopping.
+
+    Note that our models may stop _before_ reaching this maximum. This parameter
+    only specifies the absolute maximum number of tokens to generate.
+
+    Different models have different maximum values for this parameter. See
+    [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+    """
+
+    messages: Required[Iterable[PromptCachingBetaMessageParam]]
+    """Input messages.
+
+    Our models are trained to operate on alternating `user` and `assistant`
+    conversational turns. When creating a new `Message`, you specify the prior
+    conversational turns with the `messages` parameter, and the model then generates
+    the next `Message` in the conversation.
+
+    Each input message must be an object with a `role` and `content`. You can
+    specify a single `user`-role message, or you can include multiple `user` and
+    `assistant` messages. The first message must always use the `user` role.
+
+    If the final message uses the `assistant` role, the response content will
+    continue immediately from the content in that message. This can be used to
+    constrain part of the model's response.
+
+    Example with a single `user` message:
+
+    ```json
+    [{ "role": "user", "content": "Hello, Claude" }]
+    ```
+
+    Example with multiple conversational turns:
+
+    ```json
+    [
+      { "role": "user", "content": "Hello there." },
+      { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+      { "role": "user", "content": "Can you explain LLMs in plain English?" }
+    ]
+    ```
+
+    Example with a partially-filled response from Claude:
+
+    ```json
+    [
+      {
+        "role": "user",
+        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+      },
+      { "role": "assistant", "content": "The best answer is (" }
+    ]
+    ```
+
+    Each input message `content` may be either a single `string` or an array of
+    content blocks, where each block has a specific `type`. Using a `string` for
+    `content` is shorthand for an array of one content block of type `"text"`. The
+    following input messages are equivalent:
+
+    ```json
+    { "role": "user", "content": "Hello, Claude" }
+    ```
+
+    ```json
+    { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+    ```
+
+    Starting with Claude 3 models, you can also send image content blocks:
+
+    ```json
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "image",
+          "source": {
+            "type": "base64",
+            "media_type": "image/jpeg",
+            "data": "/9j/4AAQSkZJRg..."
+          }
+        },
+        { "type": "text", "text": "What is in this image?" }
+      ]
+    }
+    ```
+
+    We currently support the `base64` source type for images, and the `image/jpeg`,
+    `image/png`, `image/gif`, and `image/webp` media types.
+
+    See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+    more input examples.
+
+    Note that if you want to include a
+    [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+    the top-level `system` parameter — there is no `"system"` role for input
+    messages in the Messages API.
+    """
+
+    model: Required[ModelParam]
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    metadata: Metadata
+    """An object describing metadata about the request."""
+
+    stop_sequences: List[str]
+    """Custom text sequences that will cause the model to stop generating.
+
+    Our models will normally stop when they have naturally completed their turn,
+    which will result in a response `stop_reason` of `"end_turn"`.
+
+    If you want the model to stop generating when it encounters custom strings of
+    text, you can use the `stop_sequences` parameter. If the model encounters one of
+    the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+    and the response `stop_sequence` value will contain the matched stop sequence.
+    """
+
+    system: Union[str, Iterable[PromptCachingBetaTextBlockParam]]
+    """System prompt.
+
+    A system prompt is a way of providing context and instructions to Claude, such
+    as specifying a particular goal or role. See our
+    [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+    """
+
+    temperature: float
+    """Amount of randomness injected into the response.
+
+    Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+    for analytical / multiple choice, and closer to `1.0` for creative and
+    generative tasks.
+
+    Note that even with `temperature` of `0.0`, the results will not be fully
+    deterministic.
+    """
+
+    tool_choice: ToolChoice
+    """How the model should use the provided tools.
+
+    The model can use a specific tool, any available tool, or decide by itself.
+    """
+
+    tools: Iterable[PromptCachingBetaToolParam]
+    """Definitions of tools that the model may use.
+
+    If you include `tools` in your API request, the model may return `tool_use`
+    content blocks that represent the model's use of those tools. You can then run
+    those tools using the tool input generated by the model and then optionally
+    return results back to the model using `tool_result` content blocks.
+
+    Each tool definition includes:
+
+    - `name`: Name of the tool.
+    - `description`: Optional, but strongly-recommended description of the tool.
+    - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+      shape that the model will produce in `tool_use` output content blocks.
+
+    For example, if you defined `tools` as:
+
+    ```json
+    [
+      {
+        "name": "get_stock_price",
+        "description": "Get the current stock price for a given ticker symbol.",
+        "input_schema": {
+          "type": "object",
+          "properties": {
+            "ticker": {
+              "type": "string",
+              "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+            }
+          },
+          "required": ["ticker"]
+        }
+      }
+    ]
+    ```
+
+    And then asked the model "What's the S&P 500 at today?", the model might produce
+    `tool_use` content blocks in the response like this:
+
+    ```json
+    [
+      {
+        "type": "tool_use",
+        "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "name": "get_stock_price",
+        "input": { "ticker": "^GSPC" }
+      }
+    ]
+    ```
+
+    You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+    input, and return the following back to the model in a subsequent `user`
+    message:
+
+    ```json
+    [
+      {
+        "type": "tool_result",
+        "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+        "content": "259.75 USD"
+      }
+    ]
+    ```
+
+    Tools can be used for workflows that include running client-side tools and
+    functions, or more generally whenever you want the model to produce a particular
+    JSON structure of output.
+
+    See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+    """
+
+    top_k: int
+    """Only sample from the top K options for each subsequent token.
+
+    Used to remove "long tail" low probability responses.
+    [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+    Recommended for advanced use cases only. You usually only need to use
+    `temperature`.
+    """
+
+    top_p: float
+    """Use nucleus sampling.
+
+    In nucleus sampling, we compute the cumulative distribution over all the options
+    for each subsequent token in decreasing probability order and cut it off once it
+    reaches a particular probability specified by `top_p`. You should either alter
+    `temperature` or `top_p`, but not both.
+
+    Recommended for advanced use cases only. You usually only need to use
+    `temperature`.
+    """
+
+
+class Metadata(TypedDict, total=False):
+    user_id: Optional[str]
+    """An external identifier for the user who is associated with the request.
+
+    This should be a uuid, hash value, or other opaque identifier. Anthropic may use
+    this id to help detect abuse. Do not include any identifying information such as
+    name, email address, or phone number.
+    """
+
+
+class ToolChoiceToolChoiceAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+
+
+class ToolChoiceToolChoiceAny(TypedDict, total=False):
+    type: Required[Literal["any"]]
+
+
+class ToolChoiceToolChoiceTool(TypedDict, total=False):
+    name: Required[str]
+    """The name of the tool to use."""
+
+    type: Required[Literal["tool"]]
+
+
+ToolChoice: TypeAlias = Union[ToolChoiceToolChoiceAuto, ToolChoiceToolChoiceAny, ToolChoiceToolChoiceTool]
+
+
+class MessageCreateParamsNonStreaming(MessageCreateParamsBase):
+    stream: Literal[False]
+    """Whether to incrementally stream the response using server-sent events.
+
+    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+    details.
+    """
+
+
+class MessageCreateParamsStreaming(MessageCreateParamsBase):
+    stream: Required[Literal[True]]
+    """Whether to incrementally stream the response using server-sent events.
+
+    See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+    details.
+    """
+
+
+MessageCreateParams = Union[MessageCreateParamsNonStreaming, MessageCreateParamsStreaming]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py
new file mode 100644
index 00000000..8370b938
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["PromptCachingBetaCacheControlEphemeralParam"]
+
+
+class PromptCachingBetaCacheControlEphemeralParam(TypedDict, total=False):
+    type: Required[Literal["ephemeral"]]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
new file mode 100644
index 00000000..02dfb0bc
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from ...._types import Base64FileInput
+from ...._utils import PropertyInfo
+from ...._models import set_pydantic_config
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaImageBlockParam", "Source"]
+
+
+class Source(TypedDict, total=False):
+    data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
+
+    media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+
+    type: Required[Literal["base64"]]
+
+
+set_pydantic_config(Source, {"arbitrary_types_allowed": True})
+
+
+class PromptCachingBetaImageBlockParam(TypedDict, total=False):
+    source: Required[Source]
+
+    type: Required[Literal["image"]]
+
+    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
new file mode 100644
index 00000000..2cc49a2c
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
@@ -0,0 +1,109 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...model import Model
+from ...._models import BaseModel
+from ...content_block import ContentBlock
+from .prompt_caching_beta_usage import PromptCachingBetaUsage
+
+__all__ = ["PromptCachingBetaMessage"]
+
+
+class PromptCachingBetaMessage(BaseModel):
+    id: str
+    """Unique object identifier.
+
+    The format and length of IDs may change over time.
+    """
+
+    content: List[ContentBlock]
+    """Content generated by the model.
+
+    This is an array of content blocks, each of which has a `type` that determines
+    its shape.
+
+    Example:
+
+    ```json
+    [{ "type": "text", "text": "Hi, I'm Claude." }]
+    ```
+
+    If the request input `messages` ended with an `assistant` turn, then the
+    response `content` will continue directly from that last turn. You can use this
+    to constrain the model's output.
+
+    For example, if the input `messages` were:
+
+    ```json
+    [
+      {
+        "role": "user",
+        "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+      },
+      { "role": "assistant", "content": "The best answer is (" }
+    ]
+    ```
+
+    Then the response `content` might be:
+
+    ```json
+    [{ "type": "text", "text": "B)" }]
+    ```
+    """
+
+    model: Model
+    """
+    The model that will complete your prompt.\n\nSee
+    [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+    details and options.
+    """
+
+    role: Literal["assistant"]
+    """Conversational role of the generated message.
+
+    This will always be `"assistant"`.
+    """
+
+    stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]] = None
+    """The reason that we stopped.
+
+    This may be one the following values:
+
+    - `"end_turn"`: the model reached a natural stopping point
+    - `"max_tokens"`: we exceeded the requested `max_tokens` or the model's maximum
+    - `"stop_sequence"`: one of your provided custom `stop_sequences` was generated
+    - `"tool_use"`: the model invoked one or more tools
+
+    In non-streaming mode this value is always non-null. In streaming mode, it is
+    null in the `message_start` event and non-null otherwise.
+    """
+
+    stop_sequence: Optional[str] = None
+    """Which custom stop sequence was generated, if any.
+
+    This value will be a non-null string if one of your custom stop sequences was
+    generated.
+    """
+
+    type: Literal["message"]
+    """Object type.
+
+    For Messages, this is always `"message"`.
+    """
+
+    usage: PromptCachingBetaUsage
+    """Billing and rate-limit usage.
+
+    Anthropic's API bills and rate-limits by token counts, as tokens represent the
+    underlying cost to our systems.
+
+    Under the hood, the API transforms requests into a format suitable for the
+    model. The model's output then goes through a parsing stage before becoming an
+    API response. As a result, the token counts in `usage` will not match one-to-one
+    with the exact visible content of an API request or response.
+
+    For example, `output_tokens` will be non-zero, even for an empty string response
+    from Claude.
+    """
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
new file mode 100644
index 00000000..f88093e2
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from ...content_block import ContentBlock
+from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
+from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam
+from .prompt_caching_beta_tool_use_block_param import PromptCachingBetaToolUseBlockParam
+from .prompt_caching_beta_tool_result_block_param import PromptCachingBetaToolResultBlockParam
+
+__all__ = ["PromptCachingBetaMessageParam"]
+
+
+class PromptCachingBetaMessageParam(TypedDict, total=False):
+    content: Required[
+        Union[
+            str,
+            Iterable[
+                Union[
+                    PromptCachingBetaTextBlockParam,
+                    PromptCachingBetaImageBlockParam,
+                    PromptCachingBetaToolUseBlockParam,
+                    PromptCachingBetaToolResultBlockParam,
+                    ContentBlock,
+                ]
+            ],
+        ]
+    ]
+
+    role: Required[Literal["user", "assistant"]]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
new file mode 100644
index 00000000..cbb463d2
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaTextBlockParam"]
+
+
+class PromptCachingBetaTextBlockParam(TypedDict, total=False):
+    text: Required[str]
+
+    type: Required[Literal["text"]]
+
+    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
new file mode 100644
index 00000000..f9feb37f
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaToolParam", "InputSchema"]
+
+
+class InputSchemaTyped(TypedDict, total=False):
+    type: Required[Literal["object"]]
+
+    properties: Optional[object]
+
+
+InputSchema: TypeAlias = Union[InputSchemaTyped, Dict[str, object]]
+
+
+class PromptCachingBetaToolParam(TypedDict, total=False):
+    input_schema: Required[InputSchema]
+    """[JSON schema](https://json-schema.org/) for this tool's input.
+
+    This defines the shape of the `input` that your tool accepts and that the model
+    will produce.
+    """
+
+    name: Required[str]
+
+    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
+
+    description: str
+    """Description of what this tool does.
+
+    Tool descriptions should be as detailed as possible. The more information that
+    the model has about what the tool is and how to use it, the better it will
+    perform. You can use natural language descriptions to reinforce important
+    aspects of the tool input JSON schema.
+    """
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
new file mode 100644
index 00000000..6c1ca718
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
+from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaToolResultBlockParam", "Content"]
+
+Content: TypeAlias = Union[PromptCachingBetaTextBlockParam, PromptCachingBetaImageBlockParam]
+
+
+class PromptCachingBetaToolResultBlockParam(TypedDict, total=False):
+    tool_use_id: Required[str]
+
+    type: Required[Literal["tool_result"]]
+
+    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
+
+    content: Union[str, Iterable[Content]]
+
+    is_error: bool
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
new file mode 100644
index 00000000..35ccf446
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaToolUseBlockParam"]
+
+
+class PromptCachingBetaToolUseBlockParam(TypedDict, total=False):
+    id: Required[str]
+
+    input: Required[object]
+
+    name: Required[str]
+
+    type: Required[Literal["tool_use"]]
+
+    cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
new file mode 100644
index 00000000..20d23004
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["PromptCachingBetaUsage"]
+
+
+class PromptCachingBetaUsage(BaseModel):
+    cache_creation_input_tokens: Optional[int] = None
+    """The number of input tokens used to create the cache entry."""
+
+    cache_read_input_tokens: Optional[int] = None
+    """The number of input tokens read from the cache."""
+
+    input_tokens: int
+    """The number of input tokens which were used."""
+
+    output_tokens: int
+    """The number of output tokens which were used."""
diff --git a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
new file mode 100644
index 00000000..9d055e22
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .prompt_caching_beta_message import PromptCachingBetaMessage
+
+__all__ = ["RawPromptCachingBetaMessageStartEvent"]
+
+
+class RawPromptCachingBetaMessageStartEvent(BaseModel):
+    message: PromptCachingBetaMessage
+
+    type: Literal["message_start"]
diff --git a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
new file mode 100644
index 00000000..58099baf
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...raw_message_stop_event import RawMessageStopEvent
+from ...raw_message_delta_event import RawMessageDeltaEvent
+from ...raw_content_block_stop_event import RawContentBlockStopEvent
+from ...raw_content_block_delta_event import RawContentBlockDeltaEvent
+from ...raw_content_block_start_event import RawContentBlockStartEvent
+from .raw_prompt_caching_beta_message_start_event import RawPromptCachingBetaMessageStartEvent
+
+__all__ = ["RawPromptCachingBetaMessageStreamEvent"]
+
+RawPromptCachingBetaMessageStreamEvent: TypeAlias = Annotated[
+    Union[
+        RawPromptCachingBetaMessageStartEvent,
+        RawMessageDeltaEvent,
+        RawMessageStopEvent,
+        RawContentBlockStartEvent,
+        RawContentBlockDeltaEvent,
+        RawContentBlockStopEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/anthropic/types/message_create_params.py b/src/anthropic/types/message_create_params.py
index 4b593038..76efa7f1 100644
--- a/src/anthropic/types/message_create_params.py
+++ b/src/anthropic/types/message_create_params.py
@@ -112,8 +112,8 @@ class MessageCreateParamsBase(TypedDict, total=False):
     We currently support the `base64` source type for images, and the `image/jpeg`,
     `image/png`, `image/gif`, and `image/webp` media types.
 
-    See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
-    input examples.
+    See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+    more input examples.
 
     Note that if you want to include a
     [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
diff --git a/tests/api_resources/beta/__init__.py b/tests/api_resources/beta/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/beta/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/prompt_caching/__init__.py b/tests/api_resources/beta/prompt_caching/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/beta/prompt_caching/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/prompt_caching/test_messages.py b/tests/api_resources/beta/prompt_caching/test_messages.py
new file mode 100644
index 00000000..44e298df
--- /dev/null
+++ b/tests/api_resources/beta/prompt_caching/test_messages.py
@@ -0,0 +1,570 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from anthropic import Anthropic, AsyncAnthropic
+from tests.utils import assert_matches_type
+from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestMessages:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create_overload_1(self, client: Anthropic) -> None:
+        message = client.beta.prompt_caching.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+        )
+        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> None:
+        message = client.beta.prompt_caching.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
+            stop_sequences=["string", "string", "string"],
+            stream=False,
+            system=[
+                {
+                    "text": "Today's date is 2024-06-01.",
+                    "type": "text",
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ],
+            temperature=1,
+            tool_choice={"type": "auto"},
+            tools=[
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+            ],
+            top_k=5,
+            top_p=0.7,
+        )
+        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+    @parametrize
+    def test_raw_response_create_overload_1(self, client: Anthropic) -> None:
+        response = client.beta.prompt_caching.messages.with_raw_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: Anthropic) -> None:
+        with client.beta.prompt_caching.messages.with_streaming_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_create_overload_2(self, client: Anthropic) -> None:
+        message_stream = client.beta.prompt_caching.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            stream=True,
+        )
+        message_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> None:
+        message_stream = client.beta.prompt_caching.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            stream=True,
+            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
+            stop_sequences=["string", "string", "string"],
+            system=[
+                {
+                    "text": "Today's date is 2024-06-01.",
+                    "type": "text",
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ],
+            temperature=1,
+            tool_choice={"type": "auto"},
+            tools=[
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+            ],
+            top_k=5,
+            top_p=0.7,
+        )
+        message_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: Anthropic) -> None:
+        response = client.beta.prompt_caching.messages.with_raw_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: Anthropic) -> None:
+        with client.beta.prompt_caching.messages.with_streaming_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncMessages:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create_overload_1(self, async_client: AsyncAnthropic) -> None:
+        message = await async_client.beta.prompt_caching.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+        )
+        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncAnthropic) -> None:
+        message = await async_client.beta.prompt_caching.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
+            stop_sequences=["string", "string", "string"],
+            stream=False,
+            system=[
+                {
+                    "text": "Today's date is 2024-06-01.",
+                    "type": "text",
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ],
+            temperature=1,
+            tool_choice={"type": "auto"},
+            tools=[
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+            ],
+            top_k=5,
+            top_p=0.7,
+        )
+        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create_overload_1(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.beta.prompt_caching.messages.with_raw_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.beta.prompt_caching.messages.with_streaming_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncAnthropic) -> None:
+        message_stream = await async_client.beta.prompt_caching.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            stream=True,
+        )
+        await message_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncAnthropic) -> None:
+        message_stream = await async_client.beta.prompt_caching.messages.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            stream=True,
+            metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
+            stop_sequences=["string", "string", "string"],
+            system=[
+                {
+                    "text": "Today's date is 2024-06-01.",
+                    "type": "text",
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ],
+            temperature=1,
+            tool_choice={"type": "auto"},
+            tools=[
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+                {
+                    "input_schema": {
+                        "type": "object",
+                        "properties": {
+                            "location": {
+                                "description": "The city and state, e.g. San Francisco, CA",
+                                "type": "string",
+                            },
+                            "unit": {
+                                "description": "Unit for the output - one of (celsius, fahrenheit)",
+                                "type": "string",
+                            },
+                        },
+                    },
+                    "name": "x",
+                    "cache_control": {"type": "ephemeral"},
+                    "description": "Get the current weather in a given location",
+                },
+            ],
+            top_k=5,
+            top_p=0.7,
+        )
+        await message_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncAnthropic) -> None:
+        response = await async_client.beta.prompt_caching.messages.with_raw_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncAnthropic) -> None:
+        async with async_client.beta.prompt_caching.messages.with_streaming_response.create(
+            max_tokens=1024,
+            messages=[
+                {
+                    "content": "Hello, world",
+                    "role": "user",
+                }
+            ],
+            model="claude-3-5-sonnet-20240620",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True

From 98a0a7b9c679539c98d212b12c0a9a950fd6371d Mon Sep 17 00:00:00 2001
From: Robert Craigie <robert@craigie.dev>
Date: Tue, 13 Aug 2024 15:38:32 -0400
Subject: [PATCH 3/4] feat(client): add streaming helpers for prompt caching

---
 src/anthropic/lib/streaming/__init__.py       |   6 +
 .../_prompt_caching_beta_messages.py          | 423 ++++++++++++++++++
 .../streaming/_prompt_caching_beta_types.py   |  32 ++
 .../resources/beta/prompt_caching/messages.py | 119 +++++
 4 files changed, 580 insertions(+)
 create mode 100644 src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
 create mode 100644 src/anthropic/lib/streaming/_prompt_caching_beta_types.py

diff --git a/src/anthropic/lib/streaming/__init__.py b/src/anthropic/lib/streaming/__init__.py
index 0ab41209..fbd25b02 100644
--- a/src/anthropic/lib/streaming/__init__.py
+++ b/src/anthropic/lib/streaming/__init__.py
@@ -11,3 +11,9 @@
     MessageStreamManager as MessageStreamManager,
     AsyncMessageStreamManager as AsyncMessageStreamManager,
 )
+from ._prompt_caching_beta_messages import (
+    PromptCachingBetaMessageStream as PromptCachingBetaMessageStream,
+    AsyncPromptCachingBetaMessageStream as AsyncPromptCachingBetaMessageStream,
+    PromptCachingBetaMessageStreamManager as PromptCachingBetaMessageStreamManager,
+    AsyncPromptCachingBetaMessageStreamManager as AsyncPromptCachingBetaMessageStreamManager,
+)
diff --git a/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py b/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
new file mode 100644
index 00000000..df727ea8
--- /dev/null
+++ b/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
@@ -0,0 +1,423 @@
+from __future__ import annotations
+
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Callable, cast
+from typing_extensions import Self, Iterator, Awaitable, AsyncIterator, assert_never
+
+import httpx
+
+from ...types import ContentBlock
+from ..._utils import consume_sync_iterator, consume_async_iterator
+from ..._models import build, construct_type
+from ..._streaming import Stream, AsyncStream
+from ._prompt_caching_beta_types import (
+    TextEvent,
+    InputJsonEvent,
+    MessageStopEvent,
+    ContentBlockStopEvent,
+    PromptCachingBetaMessageStreamEvent,
+)
+from ...types.beta.prompt_caching import PromptCachingBetaMessage, RawPromptCachingBetaMessageStreamEvent
+
+if TYPE_CHECKING:
+    from ..._client import Anthropic, AsyncAnthropic
+
+
+class PromptCachingBetaMessageStream:
+    text_stream: Iterator[str]
+    """Iterator over just the text deltas in the stream.
+
+    ```py
+    for text in stream.text_stream:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    response: httpx.Response
+
+    def __init__(
+        self,
+        *,
+        cast_to: type[RawPromptCachingBetaMessageStreamEvent],
+        response: httpx.Response,
+        client: Anthropic,
+    ) -> None:
+        self.response = response
+        self._cast_to = cast_to
+        self._client = client
+
+        self.text_stream = self.__stream_text__()
+        self.__final_message_snapshot: PromptCachingBetaMessage | None = None
+
+        self._iterator = self.__stream__()
+        self._raw_stream: Stream[RawPromptCachingBetaMessageStreamEvent] = Stream(
+            cast_to=cast_to, response=response, client=client
+        )
+
+    def __next__(self) -> PromptCachingBetaMessageStreamEvent:
+        return self._iterator.__next__()
+
+    def __iter__(self) -> Iterator[PromptCachingBetaMessageStreamEvent]:
+        for item in self._iterator:
+            yield item
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        self.response.close()
+
+    def get_final_message(self) -> PromptCachingBetaMessage:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `PromptCachingBetaMessage` object.
+        """
+        self.until_done()
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    def get_final_text(self) -> str:
+        """Returns all `text` content blocks concatenated together.
+
+        > [!NOTE]
+        > Currently the API will only respond with a single content block.
+
+        Will raise an error if no `text` content blocks were returned.
+        """
+        message = self.get_final_message()
+        text_blocks: list[str] = []
+        for block in message.content:
+            if block.type == "text":
+                text_blocks.append(block.text)
+
+        if not text_blocks:
+            raise RuntimeError("Expected to have received at least 1 text block")
+
+        return "".join(text_blocks)
+
+    def until_done(self) -> None:
+        """Blocks until the stream has been consumed"""
+        consume_sync_iterator(self)
+
+    # properties
+    @property
+    def current_message_snapshot(self) -> PromptCachingBetaMessage:
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    def __stream__(self) -> Iterator[PromptCachingBetaMessageStreamEvent]:
+        for sse_event in self._raw_stream:
+            self.__final_message_snapshot = accumulate_event(
+                event=sse_event,
+                current_snapshot=self.__final_message_snapshot,
+            )
+
+            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
+            for event in events_to_fire:
+                yield event
+
+    def __stream_text__(self) -> Iterator[str]:
+        for chunk in self:
+            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
+                yield chunk.delta.text
+
+
+class PromptCachingBetaMessageStreamManager:
+    """Wrapper over PromptCachingBetaMessageStream that is returned by `.stream()`.
+
+    ```py
+    with client.beta.prompt_caching.messages.stream(...) as stream:
+        for chunk in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Callable[[], Stream[RawPromptCachingBetaMessageStreamEvent]],
+    ) -> None:
+        self.__stream: PromptCachingBetaMessageStream | None = None
+        self.__api_request = api_request
+
+    def __enter__(self) -> PromptCachingBetaMessageStream:
+        raw_stream = self.__api_request()
+
+        self.__stream = PromptCachingBetaMessageStream(
+            cast_to=raw_stream._cast_to,
+            response=raw_stream.response,
+            client=raw_stream._client,
+        )
+
+        return self.__stream
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            self.__stream.close()
+
+
+class AsyncPromptCachingBetaMessageStream:
+    text_stream: AsyncIterator[str]
+    """Async iterator over just the text deltas in the stream.
+
+    ```py
+    async for text in stream.text_stream:
+        print(text, end="", flush=True)
+    print()
+    ```
+    """
+
+    response: httpx.Response
+
+    def __init__(
+        self,
+        *,
+        cast_to: type[RawPromptCachingBetaMessageStreamEvent],
+        response: httpx.Response,
+        client: AsyncAnthropic,
+    ) -> None:
+        self.response = response
+        self._cast_to = cast_to
+        self._client = client
+
+        self.text_stream = self.__stream_text__()
+        self.__final_message_snapshot: PromptCachingBetaMessage | None = None
+
+        self._iterator = self.__stream__()
+        self._raw_stream: AsyncStream[RawPromptCachingBetaMessageStreamEvent] = AsyncStream(
+            cast_to=cast_to, response=response, client=client
+        )
+
+    async def __anext__(self) -> PromptCachingBetaMessageStreamEvent:
+        return await self._iterator.__anext__()
+
+    async def __aiter__(self) -> AsyncIterator[PromptCachingBetaMessageStreamEvent]:
+        async for item in self._iterator:
+            yield item
+
+    async def __aenter__(self) -> Self:
+        return self
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the response and release the connection.
+
+        Automatically called if the response body is read to completion.
+        """
+        await self.response.aclose()
+
+    async def get_final_message(self) -> PromptCachingBetaMessage:
+        """Waits until the stream has been read to completion and returns
+        the accumulated `PromptCachingBetaMessage` object.
+        """
+        await self.until_done()
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    async def get_final_text(self) -> str:
+        """Returns all `text` content blocks concatenated together.
+
+        > [!NOTE]
+        > Currently the API will only respond with a single content block.
+
+        Will raise an error if no `text` content blocks were returned.
+        """
+        message = await self.get_final_message()
+        text_blocks: list[str] = []
+        for block in message.content:
+            if block.type == "text":
+                text_blocks.append(block.text)
+
+        if not text_blocks:
+            raise RuntimeError("Expected to have received at least 1 text block")
+
+        return "".join(text_blocks)
+
+    async def until_done(self) -> None:
+        """Waits until the stream has been consumed"""
+        await consume_async_iterator(self)
+
+    # properties
+    @property
+    def current_message_snapshot(self) -> PromptCachingBetaMessage:
+        assert self.__final_message_snapshot is not None
+        return self.__final_message_snapshot
+
+    async def __stream__(self) -> AsyncIterator[PromptCachingBetaMessageStreamEvent]:
+        async for sse_event in self._raw_stream:
+            self.__final_message_snapshot = accumulate_event(
+                event=sse_event,
+                current_snapshot=self.__final_message_snapshot,
+            )
+
+            events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
+            for event in events_to_fire:
+                yield event
+
+    async def __stream_text__(self) -> AsyncIterator[str]:
+        async for chunk in self:
+            if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
+                yield chunk.delta.text
+
+
+class AsyncPromptCachingBetaMessageStreamManager:
+    """Wrapper over AsyncMessageStream that is returned by `.stream()`
+    so that an async context manager can be used without `await`ing the
+    original client call.
+
+    ```py
+    async with client.messages.stream(...) as stream:
+        async for chunk in stream:
+            ...
+    ```
+    """
+
+    def __init__(
+        self,
+        api_request: Awaitable[AsyncStream[RawPromptCachingBetaMessageStreamEvent]],
+    ) -> None:
+        self.__stream: AsyncPromptCachingBetaMessageStream | None = None
+        self.__api_request = api_request
+
+    async def __aenter__(self) -> AsyncPromptCachingBetaMessageStream:
+        raw_stream = await self.__api_request
+
+        self.__stream = AsyncPromptCachingBetaMessageStream(
+            cast_to=raw_stream._cast_to,
+            response=raw_stream.response,
+            client=raw_stream._client,
+        )
+
+        return self.__stream
+
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        if self.__stream is not None:
+            await self.__stream.close()
+
+
+def build_events(
+    *,
+    event: RawPromptCachingBetaMessageStreamEvent,
+    message_snapshot: PromptCachingBetaMessage,
+) -> list[PromptCachingBetaMessageStreamEvent]:
+    events_to_fire: list[PromptCachingBetaMessageStreamEvent] = []
+
+    if event.type == "message_start":
+        events_to_fire.append(event)
+    elif event.type == "message_delta":
+        events_to_fire.append(event)
+    elif event.type == "message_stop":
+        events_to_fire.append(build(MessageStopEvent, type="message_stop", message=message_snapshot))
+    elif event.type == "content_block_start":
+        events_to_fire.append(event)
+    elif event.type == "content_block_delta":
+        events_to_fire.append(event)
+
+        content_block = message_snapshot.content[event.index]
+        if event.delta.type == "text_delta" and content_block.type == "text":
+            events_to_fire.append(
+                build(
+                    TextEvent,
+                    type="text",
+                    text=event.delta.text,
+                    snapshot=content_block.text,
+                )
+            )
+        elif event.delta.type == "input_json_delta" and content_block.type == "tool_use":
+            events_to_fire.append(
+                build(
+                    InputJsonEvent,
+                    type="input_json",
+                    partial_json=event.delta.partial_json,
+                    snapshot=content_block.input,
+                )
+            )
+    elif event.type == "content_block_stop":
+        content_block = message_snapshot.content[event.index]
+
+        events_to_fire.append(
+            build(ContentBlockStopEvent, type="content_block_stop", index=event.index, content_block=content_block),
+        )
+    else:
+        # we only want exhaustive checking for linters, not at runtime
+        if TYPE_CHECKING:  # type: ignore[unreachable]
+            assert_never(event)
+
+    return events_to_fire
+
+
+JSON_BUF_PROPERTY = "__json_buf"
+
+
+def accumulate_event(
+    *,
+    event: RawPromptCachingBetaMessageStreamEvent,
+    current_snapshot: PromptCachingBetaMessage | None,
+) -> PromptCachingBetaMessage:
+    if current_snapshot is None:
+        if event.type == "message_start":
+            return PromptCachingBetaMessage.construct(**cast(Any, event.message.to_dict()))
+
+        raise RuntimeError(f'Unexpected event order, got {event.type} before "message_start"')
+
+    if event.type == "content_block_start":
+        # TODO: check index
+        current_snapshot.content.append(
+            cast(
+                ContentBlock,
+                construct_type(type_=ContentBlock, value=event.content_block.model_dump()),
+            ),
+        )
+    elif event.type == "content_block_delta":
+        content = current_snapshot.content[event.index]
+        if content.type == "text" and event.delta.type == "text_delta":
+            content.text += event.delta.text
+        elif content.type == "tool_use" and event.delta.type == "input_json_delta":
+            from jiter import from_json
+
+            # we need to keep track of the raw JSON string as well so that we can
+            # re-parse it for each delta, for now we just store it as an untyped
+            # property on the snapshot
+            json_buf = cast(bytes, getattr(content, JSON_BUF_PROPERTY, b""))
+            json_buf += bytes(event.delta.partial_json, "utf-8")
+
+            if json_buf:
+                content.input = from_json(json_buf, partial_mode=True)
+
+            setattr(content, JSON_BUF_PROPERTY, json_buf)
+    elif event.type == "message_delta":
+        current_snapshot.stop_reason = event.delta.stop_reason
+        current_snapshot.stop_sequence = event.delta.stop_sequence
+        current_snapshot.usage.output_tokens = event.usage.output_tokens
+
+    return current_snapshot
diff --git a/src/anthropic/lib/streaming/_prompt_caching_beta_types.py b/src/anthropic/lib/streaming/_prompt_caching_beta_types.py
new file mode 100644
index 00000000..d8fdce52
--- /dev/null
+++ b/src/anthropic/lib/streaming/_prompt_caching_beta_types.py
@@ -0,0 +1,32 @@
+from typing import Union
+from typing_extensions import Literal
+
+from ._types import (
+    TextEvent,
+    InputJsonEvent,
+    RawMessageDeltaEvent,
+    ContentBlockStopEvent,
+    RawContentBlockDeltaEvent,
+    RawContentBlockStartEvent,
+)
+from ...types import RawMessageStopEvent
+from ...types.beta.prompt_caching import PromptCachingBetaMessage, RawPromptCachingBetaMessageStartEvent
+
+
+class MessageStopEvent(RawMessageStopEvent):
+    type: Literal["message_stop"]
+
+    message: PromptCachingBetaMessage
+
+
+PromptCachingBetaMessageStreamEvent = Union[
+    RawPromptCachingBetaMessageStartEvent,
+    MessageStopEvent,
+    # same as non-beta
+    TextEvent,
+    InputJsonEvent,
+    RawMessageDeltaEvent,
+    RawContentBlockStartEvent,
+    RawContentBlockDeltaEvent,
+    ContentBlockStopEvent,
+]
diff --git a/src/anthropic/resources/beta/prompt_caching/messages.py b/src/anthropic/resources/beta/prompt_caching/messages.py
index c2023c18..53e94ecd 100644
--- a/src/anthropic/resources/beta/prompt_caching/messages.py
+++ b/src/anthropic/resources/beta/prompt_caching/messages.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from typing import List, Union, Iterable, overload
+from functools import partial
 from typing_extensions import Literal
 
 import httpx
@@ -21,6 +22,7 @@
 from ...._constants import DEFAULT_TIMEOUT
 from ...._streaming import Stream, AsyncStream
 from ...._base_client import make_request_options
+from ....lib.streaming import PromptCachingBetaMessageStreamManager, AsyncPromptCachingBetaMessageStreamManager
 from ....types.model_param import ModelParam
 from ....types.beta.prompt_caching import message_create_params
 from ....types.beta.prompt_caching.prompt_caching_beta_message import PromptCachingBetaMessage
@@ -885,6 +887,65 @@ def create(
             stream_cls=Stream[RawPromptCachingBetaMessageStreamEvent],
         )
 
+    def stream(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PromptCachingBetaMessageStreamManager:
+        """Create a Message stream"""
+        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = 600
+
+        extra_headers = {
+            "anthropic-beta": "prompt-caching-2024-07-31",
+            "X-Stainless-Stream-Helper": "beta.prompt_caching.messages",
+            **(extra_headers or {}),
+        }
+        request = partial(
+            self._post,
+            "/v1/messages?beta=prompt_caching",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": True,
+                    "system": system,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PromptCachingBetaMessage,
+            stream=True,
+            stream_cls=Stream[RawPromptCachingBetaMessageStreamEvent],
+        )
+        return PromptCachingBetaMessageStreamManager(request)
+
 
 class AsyncMessages(AsyncAPIResource):
     @cached_property
@@ -1737,6 +1798,64 @@ async def create(
             stream_cls=AsyncStream[RawPromptCachingBetaMessageStreamEvent],
         )
 
+    def stream(
+        self,
+        *,
+        max_tokens: int,
+        messages: Iterable[PromptCachingBetaMessageParam],
+        model: ModelParam,
+        metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+        stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+        system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+        top_k: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPromptCachingBetaMessageStreamManager:
+        """Create a Message stream"""
+        if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+            timeout = 600
+
+        extra_headers = {
+            "anthropic-beta": "prompt-caching-2024-07-31",
+            "X-Stainless-Stream-Helper": "beta.prompt_caching.messages",
+            **(extra_headers or {}),
+        }
+        request = self._post(
+            "/v1/messages?beta=prompt_caching",
+            body=maybe_transform(
+                {
+                    "max_tokens": max_tokens,
+                    "messages": messages,
+                    "model": model,
+                    "metadata": metadata,
+                    "stop_sequences": stop_sequences,
+                    "stream": True,
+                    "system": system,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_k": top_k,
+                    "top_p": top_p,
+                },
+                message_create_params.MessageCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PromptCachingBetaMessage,
+            stream=True,
+            stream_cls=AsyncStream[RawPromptCachingBetaMessageStreamEvent],
+        )
+        return AsyncPromptCachingBetaMessageStreamManager(request)
+
 
 class MessagesWithRawResponse:
     def __init__(self, messages: Messages) -> None:

From b69ff7be11ffe00fb469ddc6421dce5235ef1f53 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 14 Aug 2024 15:46:22 +0000
Subject: [PATCH 4/4] release: 0.34.0

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 14 ++++++++++++++
 pyproject.toml                |  2 +-
 src/anthropic/_version.py     |  2 +-
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index bc3d0ace..e4e1c3ce 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.33.1"
+  ".": "0.34.0"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f4311f8a..c62a16e4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # Changelog
 
+## 0.34.0 (2024-08-14)
+
+Full Changelog: [v0.33.1...v0.34.0](https://github.com/anthropics/anthropic-sdk-python/compare/v0.33.1...v0.34.0)
+
+### Features
+
+* **api:** add prompt caching beta ([3978411](https://github.com/anthropics/anthropic-sdk-python/commit/397841125164a2420d5abf8f45d47f2467e36cd9))
+* **client:** add streaming helpers for prompt caching ([98a0a7b](https://github.com/anthropics/anthropic-sdk-python/commit/98a0a7b9c679539c98d212b12c0a9a950fd6371d))
+
+
+### Chores
+
+* **examples:** minor formatting changes ([#633](https://github.com/anthropics/anthropic-sdk-python/issues/633)) ([20487ea](https://github.com/anthropics/anthropic-sdk-python/commit/20487ea0080969511e7c41f199387b87a84f6ab4))
+
 ## 0.33.1 (2024-08-12)
 
 Full Changelog: [v0.33.0...v0.33.1](https://github.com/anthropics/anthropic-sdk-python/compare/v0.33.0...v0.33.1)
diff --git a/pyproject.toml b/pyproject.toml
index e563ae76..5733c094 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "anthropic"
-version = "0.33.1"
+version = "0.34.0"
 description = "The official Python library for the anthropic API"
 dynamic = ["readme"]
 license = "MIT"
diff --git a/src/anthropic/_version.py b/src/anthropic/_version.py
index 4d8c83b1..1d20411a 100644
--- a/src/anthropic/_version.py
+++ b/src/anthropic/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "anthropic"
-__version__ = "0.33.1"  # x-release-please-version
+__version__ = "0.34.0"  # x-release-please-version