diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index bc3d0ace..e4e1c3ce 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.33.1"
+ ".": "0.34.0"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 4d2e1943..49961a7f 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
-configured_endpoints: 2
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-4769b27b6e13acc458cc71fbadd8676ea8074d76f91e37b96eaa97464c4e97af.yml
+configured_endpoints: 3
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/anthropic-fb94a03f85580f7eacef034518becfb463502e6d74b0f7932f6153239de23a5b.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f4311f8a..c62a16e4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
# Changelog
+## 0.34.0 (2024-08-14)
+
+Full Changelog: [v0.33.1...v0.34.0](https://github.com/anthropics/anthropic-sdk-python/compare/v0.33.1...v0.34.0)
+
+### Features
+
+* **api:** add prompt caching beta ([3978411](https://github.com/anthropics/anthropic-sdk-python/commit/397841125164a2420d5abf8f45d47f2467e36cd9))
+* **client:** add streaming helpers for prompt caching ([98a0a7b](https://github.com/anthropics/anthropic-sdk-python/commit/98a0a7b9c679539c98d212b12c0a9a950fd6371d))
+
+
+### Chores
+
+* **examples:** minor formatting changes ([#633](https://github.com/anthropics/anthropic-sdk-python/issues/633)) ([20487ea](https://github.com/anthropics/anthropic-sdk-python/commit/20487ea0080969511e7c41f199387b87a84f6ab4))
+
## 0.33.1 (2024-08-12)
Full Changelog: [v0.33.0...v0.33.1](https://github.com/anthropics/anthropic-sdk-python/compare/v0.33.0...v0.33.1)
diff --git a/api.md b/api.md
index 63896541..aeb4e3d3 100644
--- a/api.md
+++ b/api.md
@@ -40,3 +40,31 @@ Methods:
- client.messages.create(\*\*params) -> Message
- client.messages.stream(\*args) -> MessageStreamManager[MessageStream] | MessageStreamManager[MessageStreamT]
+
+# Beta
+
+## PromptCaching
+
+### Messages
+
+Types:
+
+```python
+from anthropic.types.beta.prompt_caching import (
+ PromptCachingBetaCacheControlEphemeral,
+ PromptCachingBetaImageBlockParam,
+ PromptCachingBetaMessage,
+ PromptCachingBetaMessageParam,
+ PromptCachingBetaTextBlockParam,
+ PromptCachingBetaTool,
+ PromptCachingBetaToolResultBlockParam,
+ PromptCachingBetaToolUseBlockParam,
+ PromptCachingBetaUsage,
+ RawPromptCachingBetaMessageStartEvent,
+ RawPromptCachingBetaMessageStreamEvent,
+)
+```
+
+Methods:
+
+- client.beta.prompt_caching.messages.create(\*\*params) -> PromptCachingBetaMessage
diff --git a/pyproject.toml b/pyproject.toml
index e563ae76..5733c094 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "anthropic"
-version = "0.33.1"
+version = "0.34.0"
description = "The official Python library for the anthropic API"
dynamic = ["readme"]
license = "MIT"
diff --git a/src/anthropic/_client.py b/src/anthropic/_client.py
index ac148940..693e3a96 100644
--- a/src/anthropic/_client.py
+++ b/src/anthropic/_client.py
@@ -58,6 +58,7 @@
class Anthropic(SyncAPIClient):
completions: resources.Completions
messages: resources.Messages
+ beta: resources.Beta
with_raw_response: AnthropicWithRawResponse
with_streaming_response: AnthropicWithStreamedResponse
@@ -136,6 +137,7 @@ def __init__(
self.completions = resources.Completions(self)
self.messages = resources.Messages(self)
+ self.beta = resources.Beta(self)
self.with_raw_response = AnthropicWithRawResponse(self)
self.with_streaming_response = AnthropicWithStreamedResponse(self)
@@ -320,6 +322,7 @@ def _make_status_error(
class AsyncAnthropic(AsyncAPIClient):
completions: resources.AsyncCompletions
messages: resources.AsyncMessages
+ beta: resources.AsyncBeta
with_raw_response: AsyncAnthropicWithRawResponse
with_streaming_response: AsyncAnthropicWithStreamedResponse
@@ -398,6 +401,7 @@ def __init__(
self.completions = resources.AsyncCompletions(self)
self.messages = resources.AsyncMessages(self)
+ self.beta = resources.AsyncBeta(self)
self.with_raw_response = AsyncAnthropicWithRawResponse(self)
self.with_streaming_response = AsyncAnthropicWithStreamedResponse(self)
@@ -583,24 +587,28 @@ class AnthropicWithRawResponse:
def __init__(self, client: Anthropic) -> None:
self.completions = resources.CompletionsWithRawResponse(client.completions)
self.messages = resources.MessagesWithRawResponse(client.messages)
+ self.beta = resources.BetaWithRawResponse(client.beta)
class AsyncAnthropicWithRawResponse:
def __init__(self, client: AsyncAnthropic) -> None:
self.completions = resources.AsyncCompletionsWithRawResponse(client.completions)
self.messages = resources.AsyncMessagesWithRawResponse(client.messages)
+ self.beta = resources.AsyncBetaWithRawResponse(client.beta)
class AnthropicWithStreamedResponse:
def __init__(self, client: Anthropic) -> None:
self.completions = resources.CompletionsWithStreamingResponse(client.completions)
self.messages = resources.MessagesWithStreamingResponse(client.messages)
+ self.beta = resources.BetaWithStreamingResponse(client.beta)
class AsyncAnthropicWithStreamedResponse:
def __init__(self, client: AsyncAnthropic) -> None:
self.completions = resources.AsyncCompletionsWithStreamingResponse(client.completions)
self.messages = resources.AsyncMessagesWithStreamingResponse(client.messages)
+ self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
Client = Anthropic
diff --git a/src/anthropic/_version.py b/src/anthropic/_version.py
index 4d8c83b1..1d20411a 100644
--- a/src/anthropic/_version.py
+++ b/src/anthropic/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "anthropic"
-__version__ = "0.33.1" # x-release-please-version
+__version__ = "0.34.0" # x-release-please-version
diff --git a/src/anthropic/lib/streaming/__init__.py b/src/anthropic/lib/streaming/__init__.py
index 0ab41209..fbd25b02 100644
--- a/src/anthropic/lib/streaming/__init__.py
+++ b/src/anthropic/lib/streaming/__init__.py
@@ -11,3 +11,9 @@
MessageStreamManager as MessageStreamManager,
AsyncMessageStreamManager as AsyncMessageStreamManager,
)
+from ._prompt_caching_beta_messages import (
+ PromptCachingBetaMessageStream as PromptCachingBetaMessageStream,
+ AsyncPromptCachingBetaMessageStream as AsyncPromptCachingBetaMessageStream,
+ PromptCachingBetaMessageStreamManager as PromptCachingBetaMessageStreamManager,
+ AsyncPromptCachingBetaMessageStreamManager as AsyncPromptCachingBetaMessageStreamManager,
+)
diff --git a/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py b/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
new file mode 100644
index 00000000..df727ea8
--- /dev/null
+++ b/src/anthropic/lib/streaming/_prompt_caching_beta_messages.py
@@ -0,0 +1,423 @@
+from __future__ import annotations
+
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Callable, cast
+from typing_extensions import Self, Iterator, Awaitable, AsyncIterator, assert_never
+
+import httpx
+
+from ...types import ContentBlock
+from ..._utils import consume_sync_iterator, consume_async_iterator
+from ..._models import build, construct_type
+from ..._streaming import Stream, AsyncStream
+from ._prompt_caching_beta_types import (
+ TextEvent,
+ InputJsonEvent,
+ MessageStopEvent,
+ ContentBlockStopEvent,
+ PromptCachingBetaMessageStreamEvent,
+)
+from ...types.beta.prompt_caching import PromptCachingBetaMessage, RawPromptCachingBetaMessageStreamEvent
+
+if TYPE_CHECKING:
+ from ..._client import Anthropic, AsyncAnthropic
+
+
+class PromptCachingBetaMessageStream:
+ text_stream: Iterator[str]
+ """Iterator over just the text deltas in the stream.
+
+ ```py
+ for text in stream.text_stream:
+ print(text, end="", flush=True)
+ print()
+ ```
+ """
+
+ response: httpx.Response
+
+ def __init__(
+ self,
+ *,
+ cast_to: type[RawPromptCachingBetaMessageStreamEvent],
+ response: httpx.Response,
+ client: Anthropic,
+ ) -> None:
+ self.response = response
+ self._cast_to = cast_to
+ self._client = client
+
+ self.text_stream = self.__stream_text__()
+ self.__final_message_snapshot: PromptCachingBetaMessage | None = None
+
+ self._iterator = self.__stream__()
+ self._raw_stream: Stream[RawPromptCachingBetaMessageStreamEvent] = Stream(
+ cast_to=cast_to, response=response, client=client
+ )
+
+ def __next__(self) -> PromptCachingBetaMessageStreamEvent:
+ return self._iterator.__next__()
+
+ def __iter__(self) -> Iterator[PromptCachingBetaMessageStreamEvent]:
+ for item in self._iterator:
+ yield item
+
+ def __enter__(self) -> Self:
+ return self
+
+ def __exit__(
+ self,
+ exc_type: type[BaseException] | None,
+ exc: BaseException | None,
+ exc_tb: TracebackType | None,
+ ) -> None:
+ self.close()
+
+ def close(self) -> None:
+ """
+ Close the response and release the connection.
+
+ Automatically called if the response body is read to completion.
+ """
+ self.response.close()
+
+ def get_final_message(self) -> PromptCachingBetaMessage:
+ """Waits until the stream has been read to completion and returns
+ the accumulated `PromptCachingBetaMessage` object.
+ """
+ self.until_done()
+ assert self.__final_message_snapshot is not None
+ return self.__final_message_snapshot
+
+ def get_final_text(self) -> str:
+ """Returns all `text` content blocks concatenated together.
+
+ > [!NOTE]
+ > Currently the API will only respond with a single content block.
+
+ Will raise an error if no `text` content blocks were returned.
+ """
+ message = self.get_final_message()
+ text_blocks: list[str] = []
+ for block in message.content:
+ if block.type == "text":
+ text_blocks.append(block.text)
+
+ if not text_blocks:
+ raise RuntimeError("Expected to have received at least 1 text block")
+
+ return "".join(text_blocks)
+
+ def until_done(self) -> None:
+ """Blocks until the stream has been consumed"""
+ consume_sync_iterator(self)
+
+ # properties
+ @property
+ def current_message_snapshot(self) -> PromptCachingBetaMessage:
+ assert self.__final_message_snapshot is not None
+ return self.__final_message_snapshot
+
+ def __stream__(self) -> Iterator[PromptCachingBetaMessageStreamEvent]:
+ for sse_event in self._raw_stream:
+ self.__final_message_snapshot = accumulate_event(
+ event=sse_event,
+ current_snapshot=self.__final_message_snapshot,
+ )
+
+ events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
+ for event in events_to_fire:
+ yield event
+
+ def __stream_text__(self) -> Iterator[str]:
+ for chunk in self:
+ if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
+ yield chunk.delta.text
+
+
+class PromptCachingBetaMessageStreamManager:
+ """Wrapper over PromptCachingBetaMessageStream that is returned by `.stream()`.
+
+ ```py
+ with client.beta.prompt_caching.messages.stream(...) as stream:
+ for chunk in stream:
+ ...
+ ```
+ """
+
+ def __init__(
+ self,
+ api_request: Callable[[], Stream[RawPromptCachingBetaMessageStreamEvent]],
+ ) -> None:
+ self.__stream: PromptCachingBetaMessageStream | None = None
+ self.__api_request = api_request
+
+ def __enter__(self) -> PromptCachingBetaMessageStream:
+ raw_stream = self.__api_request()
+
+ self.__stream = PromptCachingBetaMessageStream(
+ cast_to=raw_stream._cast_to,
+ response=raw_stream.response,
+ client=raw_stream._client,
+ )
+
+ return self.__stream
+
+ def __exit__(
+ self,
+ exc_type: type[BaseException] | None,
+ exc: BaseException | None,
+ exc_tb: TracebackType | None,
+ ) -> None:
+ if self.__stream is not None:
+ self.__stream.close()
+
+
+class AsyncPromptCachingBetaMessageStream:
+ text_stream: AsyncIterator[str]
+ """Async iterator over just the text deltas in the stream.
+
+ ```py
+ async for text in stream.text_stream:
+ print(text, end="", flush=True)
+ print()
+ ```
+ """
+
+ response: httpx.Response
+
+ def __init__(
+ self,
+ *,
+ cast_to: type[RawPromptCachingBetaMessageStreamEvent],
+ response: httpx.Response,
+ client: AsyncAnthropic,
+ ) -> None:
+ self.response = response
+ self._cast_to = cast_to
+ self._client = client
+
+ self.text_stream = self.__stream_text__()
+ self.__final_message_snapshot: PromptCachingBetaMessage | None = None
+
+ self._iterator = self.__stream__()
+ self._raw_stream: AsyncStream[RawPromptCachingBetaMessageStreamEvent] = AsyncStream(
+ cast_to=cast_to, response=response, client=client
+ )
+
+ async def __anext__(self) -> PromptCachingBetaMessageStreamEvent:
+ return await self._iterator.__anext__()
+
+ async def __aiter__(self) -> AsyncIterator[PromptCachingBetaMessageStreamEvent]:
+ async for item in self._iterator:
+ yield item
+
+ async def __aenter__(self) -> Self:
+ return self
+
+ async def __aexit__(
+ self,
+ exc_type: type[BaseException] | None,
+ exc: BaseException | None,
+ exc_tb: TracebackType | None,
+ ) -> None:
+ await self.close()
+
+ async def close(self) -> None:
+ """
+ Close the response and release the connection.
+
+ Automatically called if the response body is read to completion.
+ """
+ await self.response.aclose()
+
+ async def get_final_message(self) -> PromptCachingBetaMessage:
+ """Waits until the stream has been read to completion and returns
+ the accumulated `PromptCachingBetaMessage` object.
+ """
+ await self.until_done()
+ assert self.__final_message_snapshot is not None
+ return self.__final_message_snapshot
+
+ async def get_final_text(self) -> str:
+ """Returns all `text` content blocks concatenated together.
+
+ > [!NOTE]
+ > Currently the API will only respond with a single content block.
+
+ Will raise an error if no `text` content blocks were returned.
+ """
+ message = await self.get_final_message()
+ text_blocks: list[str] = []
+ for block in message.content:
+ if block.type == "text":
+ text_blocks.append(block.text)
+
+ if not text_blocks:
+ raise RuntimeError("Expected to have received at least 1 text block")
+
+ return "".join(text_blocks)
+
+ async def until_done(self) -> None:
+ """Waits until the stream has been consumed"""
+ await consume_async_iterator(self)
+
+ # properties
+ @property
+ def current_message_snapshot(self) -> PromptCachingBetaMessage:
+ assert self.__final_message_snapshot is not None
+ return self.__final_message_snapshot
+
+ async def __stream__(self) -> AsyncIterator[PromptCachingBetaMessageStreamEvent]:
+ async for sse_event in self._raw_stream:
+ self.__final_message_snapshot = accumulate_event(
+ event=sse_event,
+ current_snapshot=self.__final_message_snapshot,
+ )
+
+ events_to_fire = build_events(event=sse_event, message_snapshot=self.current_message_snapshot)
+ for event in events_to_fire:
+ yield event
+
+ async def __stream_text__(self) -> AsyncIterator[str]:
+ async for chunk in self:
+ if chunk.type == "content_block_delta" and chunk.delta.type == "text_delta":
+ yield chunk.delta.text
+
+
+class AsyncPromptCachingBetaMessageStreamManager:
+ """Wrapper over AsyncMessageStream that is returned by `.stream()`
+ so that an async context manager can be used without `await`ing the
+ original client call.
+
+ ```py
+ async with client.messages.stream(...) as stream:
+ async for chunk in stream:
+ ...
+ ```
+ """
+
+ def __init__(
+ self,
+ api_request: Awaitable[AsyncStream[RawPromptCachingBetaMessageStreamEvent]],
+ ) -> None:
+ self.__stream: AsyncPromptCachingBetaMessageStream | None = None
+ self.__api_request = api_request
+
+ async def __aenter__(self) -> AsyncPromptCachingBetaMessageStream:
+ raw_stream = await self.__api_request
+
+ self.__stream = AsyncPromptCachingBetaMessageStream(
+ cast_to=raw_stream._cast_to,
+ response=raw_stream.response,
+ client=raw_stream._client,
+ )
+
+ return self.__stream
+
+ async def __aexit__(
+ self,
+ exc_type: type[BaseException] | None,
+ exc: BaseException | None,
+ exc_tb: TracebackType | None,
+ ) -> None:
+ if self.__stream is not None:
+ await self.__stream.close()
+
+
+def build_events(
+ *,
+ event: RawPromptCachingBetaMessageStreamEvent,
+ message_snapshot: PromptCachingBetaMessage,
+) -> list[PromptCachingBetaMessageStreamEvent]:
+ events_to_fire: list[PromptCachingBetaMessageStreamEvent] = []
+
+ if event.type == "message_start":
+ events_to_fire.append(event)
+ elif event.type == "message_delta":
+ events_to_fire.append(event)
+ elif event.type == "message_stop":
+ events_to_fire.append(build(MessageStopEvent, type="message_stop", message=message_snapshot))
+ elif event.type == "content_block_start":
+ events_to_fire.append(event)
+ elif event.type == "content_block_delta":
+ events_to_fire.append(event)
+
+ content_block = message_snapshot.content[event.index]
+ if event.delta.type == "text_delta" and content_block.type == "text":
+ events_to_fire.append(
+ build(
+ TextEvent,
+ type="text",
+ text=event.delta.text,
+ snapshot=content_block.text,
+ )
+ )
+ elif event.delta.type == "input_json_delta" and content_block.type == "tool_use":
+ events_to_fire.append(
+ build(
+ InputJsonEvent,
+ type="input_json",
+ partial_json=event.delta.partial_json,
+ snapshot=content_block.input,
+ )
+ )
+ elif event.type == "content_block_stop":
+ content_block = message_snapshot.content[event.index]
+
+ events_to_fire.append(
+ build(ContentBlockStopEvent, type="content_block_stop", index=event.index, content_block=content_block),
+ )
+ else:
+ # we only want exhaustive checking for linters, not at runtime
+ if TYPE_CHECKING: # type: ignore[unreachable]
+ assert_never(event)
+
+ return events_to_fire
+
+
+JSON_BUF_PROPERTY = "__json_buf"
+
+
+def accumulate_event(
+ *,
+ event: RawPromptCachingBetaMessageStreamEvent,
+ current_snapshot: PromptCachingBetaMessage | None,
+) -> PromptCachingBetaMessage:
+ if current_snapshot is None:
+ if event.type == "message_start":
+ return PromptCachingBetaMessage.construct(**cast(Any, event.message.to_dict()))
+
+ raise RuntimeError(f'Unexpected event order, got {event.type} before "message_start"')
+
+ if event.type == "content_block_start":
+ # TODO: check index
+ current_snapshot.content.append(
+ cast(
+ ContentBlock,
+ construct_type(type_=ContentBlock, value=event.content_block.model_dump()),
+ ),
+ )
+ elif event.type == "content_block_delta":
+ content = current_snapshot.content[event.index]
+ if content.type == "text" and event.delta.type == "text_delta":
+ content.text += event.delta.text
+ elif content.type == "tool_use" and event.delta.type == "input_json_delta":
+ from jiter import from_json
+
+ # we need to keep track of the raw JSON string as well so that we can
+ # re-parse it for each delta, for now we just store it as an untyped
+ # property on the snapshot
+ json_buf = cast(bytes, getattr(content, JSON_BUF_PROPERTY, b""))
+ json_buf += bytes(event.delta.partial_json, "utf-8")
+
+ if json_buf:
+ content.input = from_json(json_buf, partial_mode=True)
+
+ setattr(content, JSON_BUF_PROPERTY, json_buf)
+ elif event.type == "message_delta":
+ current_snapshot.stop_reason = event.delta.stop_reason
+ current_snapshot.stop_sequence = event.delta.stop_sequence
+ current_snapshot.usage.output_tokens = event.usage.output_tokens
+
+ return current_snapshot
diff --git a/src/anthropic/lib/streaming/_prompt_caching_beta_types.py b/src/anthropic/lib/streaming/_prompt_caching_beta_types.py
new file mode 100644
index 00000000..d8fdce52
--- /dev/null
+++ b/src/anthropic/lib/streaming/_prompt_caching_beta_types.py
@@ -0,0 +1,32 @@
+from typing import Union
+from typing_extensions import Literal
+
+from ._types import (
+ TextEvent,
+ InputJsonEvent,
+ RawMessageDeltaEvent,
+ ContentBlockStopEvent,
+ RawContentBlockDeltaEvent,
+ RawContentBlockStartEvent,
+)
+from ...types import RawMessageStopEvent
+from ...types.beta.prompt_caching import PromptCachingBetaMessage, RawPromptCachingBetaMessageStartEvent
+
+
+class MessageStopEvent(RawMessageStopEvent):
+ type: Literal["message_stop"]
+
+ message: PromptCachingBetaMessage
+
+
+PromptCachingBetaMessageStreamEvent = Union[
+ RawPromptCachingBetaMessageStartEvent,
+ MessageStopEvent,
+ # same as non-beta
+ TextEvent,
+ InputJsonEvent,
+ RawMessageDeltaEvent,
+ RawContentBlockStartEvent,
+ RawContentBlockDeltaEvent,
+ ContentBlockStopEvent,
+]
diff --git a/src/anthropic/resources/__init__.py b/src/anthropic/resources/__init__.py
index cc6cc5be..318d5cdd 100644
--- a/src/anthropic/resources/__init__.py
+++ b/src/anthropic/resources/__init__.py
@@ -1,5 +1,13 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from .beta import (
+ Beta,
+ AsyncBeta,
+ BetaWithRawResponse,
+ AsyncBetaWithRawResponse,
+ BetaWithStreamingResponse,
+ AsyncBetaWithStreamingResponse,
+)
from .messages import (
Messages,
AsyncMessages,
@@ -30,4 +38,10 @@
"AsyncMessagesWithRawResponse",
"MessagesWithStreamingResponse",
"AsyncMessagesWithStreamingResponse",
+ "Beta",
+ "AsyncBeta",
+ "BetaWithRawResponse",
+ "AsyncBetaWithRawResponse",
+ "BetaWithStreamingResponse",
+ "AsyncBetaWithStreamingResponse",
]
diff --git a/src/anthropic/resources/beta/__init__.py b/src/anthropic/resources/beta/__init__.py
new file mode 100644
index 00000000..9dacb008
--- /dev/null
+++ b/src/anthropic/resources/beta/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+ Beta,
+ AsyncBeta,
+ BetaWithRawResponse,
+ AsyncBetaWithRawResponse,
+ BetaWithStreamingResponse,
+ AsyncBetaWithStreamingResponse,
+)
+from .prompt_caching import (
+ PromptCaching,
+ AsyncPromptCaching,
+ PromptCachingWithRawResponse,
+ AsyncPromptCachingWithRawResponse,
+ PromptCachingWithStreamingResponse,
+ AsyncPromptCachingWithStreamingResponse,
+)
+
+__all__ = [
+ "PromptCaching",
+ "AsyncPromptCaching",
+ "PromptCachingWithRawResponse",
+ "AsyncPromptCachingWithRawResponse",
+ "PromptCachingWithStreamingResponse",
+ "AsyncPromptCachingWithStreamingResponse",
+ "Beta",
+ "AsyncBeta",
+ "BetaWithRawResponse",
+ "AsyncBetaWithRawResponse",
+ "BetaWithStreamingResponse",
+ "AsyncBetaWithStreamingResponse",
+]
diff --git a/src/anthropic/resources/beta/beta.py b/src/anthropic/resources/beta/beta.py
new file mode 100644
index 00000000..7654921c
--- /dev/null
+++ b/src/anthropic/resources/beta/beta.py
@@ -0,0 +1,81 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .prompt_caching import (
+ PromptCaching,
+ AsyncPromptCaching,
+ PromptCachingWithRawResponse,
+ AsyncPromptCachingWithRawResponse,
+ PromptCachingWithStreamingResponse,
+ AsyncPromptCachingWithStreamingResponse,
+)
+from .prompt_caching.prompt_caching import PromptCaching, AsyncPromptCaching
+
+__all__ = ["Beta", "AsyncBeta"]
+
+
+class Beta(SyncAPIResource):
+ @cached_property
+ def prompt_caching(self) -> PromptCaching:
+ return PromptCaching(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> BetaWithRawResponse:
+ return BetaWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> BetaWithStreamingResponse:
+ return BetaWithStreamingResponse(self)
+
+
+class AsyncBeta(AsyncAPIResource):
+ @cached_property
+ def prompt_caching(self) -> AsyncPromptCaching:
+ return AsyncPromptCaching(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncBetaWithRawResponse:
+ return AsyncBetaWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+ return AsyncBetaWithStreamingResponse(self)
+
+
+class BetaWithRawResponse:
+ def __init__(self, beta: Beta) -> None:
+ self._beta = beta
+
+ @cached_property
+ def prompt_caching(self) -> PromptCachingWithRawResponse:
+ return PromptCachingWithRawResponse(self._beta.prompt_caching)
+
+
+class AsyncBetaWithRawResponse:
+ def __init__(self, beta: AsyncBeta) -> None:
+ self._beta = beta
+
+ @cached_property
+ def prompt_caching(self) -> AsyncPromptCachingWithRawResponse:
+ return AsyncPromptCachingWithRawResponse(self._beta.prompt_caching)
+
+
+class BetaWithStreamingResponse:
+ def __init__(self, beta: Beta) -> None:
+ self._beta = beta
+
+ @cached_property
+ def prompt_caching(self) -> PromptCachingWithStreamingResponse:
+ return PromptCachingWithStreamingResponse(self._beta.prompt_caching)
+
+
+class AsyncBetaWithStreamingResponse:
+ def __init__(self, beta: AsyncBeta) -> None:
+ self._beta = beta
+
+ @cached_property
+ def prompt_caching(self) -> AsyncPromptCachingWithStreamingResponse:
+ return AsyncPromptCachingWithStreamingResponse(self._beta.prompt_caching)
diff --git a/src/anthropic/resources/beta/prompt_caching/__init__.py b/src/anthropic/resources/beta/prompt_caching/__init__.py
new file mode 100644
index 00000000..ccf0b0a8
--- /dev/null
+++ b/src/anthropic/resources/beta/prompt_caching/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .messages import (
+ Messages,
+ AsyncMessages,
+ MessagesWithRawResponse,
+ AsyncMessagesWithRawResponse,
+ MessagesWithStreamingResponse,
+ AsyncMessagesWithStreamingResponse,
+)
+from .prompt_caching import (
+ PromptCaching,
+ AsyncPromptCaching,
+ PromptCachingWithRawResponse,
+ AsyncPromptCachingWithRawResponse,
+ PromptCachingWithStreamingResponse,
+ AsyncPromptCachingWithStreamingResponse,
+)
+
+__all__ = [
+ "Messages",
+ "AsyncMessages",
+ "MessagesWithRawResponse",
+ "AsyncMessagesWithRawResponse",
+ "MessagesWithStreamingResponse",
+ "AsyncMessagesWithStreamingResponse",
+ "PromptCaching",
+ "AsyncPromptCaching",
+ "PromptCachingWithRawResponse",
+ "AsyncPromptCachingWithRawResponse",
+ "PromptCachingWithStreamingResponse",
+ "AsyncPromptCachingWithStreamingResponse",
+]
diff --git a/src/anthropic/resources/beta/prompt_caching/messages.py b/src/anthropic/resources/beta/prompt_caching/messages.py
new file mode 100644
index 00000000..53e94ecd
--- /dev/null
+++ b/src/anthropic/resources/beta/prompt_caching/messages.py
@@ -0,0 +1,1893 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, overload
+from functools import partial
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+ is_given,
+ required_args,
+ maybe_transform,
+ async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._constants import DEFAULT_TIMEOUT
+from ...._streaming import Stream, AsyncStream
+from ...._base_client import make_request_options
+from ....lib.streaming import PromptCachingBetaMessageStreamManager, AsyncPromptCachingBetaMessageStreamManager
+from ....types.model_param import ModelParam
+from ....types.beta.prompt_caching import message_create_params
+from ....types.beta.prompt_caching.prompt_caching_beta_message import PromptCachingBetaMessage
+from ....types.beta.prompt_caching.prompt_caching_beta_tool_param import PromptCachingBetaToolParam
+from ....types.beta.prompt_caching.prompt_caching_beta_message_param import PromptCachingBetaMessageParam
+from ....types.beta.prompt_caching.prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
+from ....types.beta.prompt_caching.raw_prompt_caching_beta_message_stream_event import (
+ RawPromptCachingBetaMessageStreamEvent,
+)
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> MessagesWithRawResponse:
+ return MessagesWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> MessagesWithStreamingResponse:
+ return MessagesWithStreamingResponse(self)
+
+ @overload
+ def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ stream: Literal[False] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PromptCachingBetaMessage:
+ """
+ Create a Message.
+
+ Send a structured list of input messages with text and/or image content, and the
+ model will generate the next message in the conversation.
+
+ The Messages API can be used for either single queries or stateless multi-turn
+ conversations.
+
+ Args:
+ max_tokens: The maximum number of tokens to generate before stopping.
+
+ Note that our models may stop _before_ reaching this maximum. This parameter
+ only specifies the absolute maximum number of tokens to generate.
+
+ Different models have different maximum values for this parameter. See
+ [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+ messages: Input messages.
+
+ Our models are trained to operate on alternating `user` and `assistant`
+ conversational turns. When creating a new `Message`, you specify the prior
+ conversational turns with the `messages` parameter, and the model then generates
+ the next `Message` in the conversation.
+
+ Each input message must be an object with a `role` and `content`. You can
+ specify a single `user`-role message, or you can include multiple `user` and
+ `assistant` messages. The first message must always use the `user` role.
+
+ If the final message uses the `assistant` role, the response content will
+ continue immediately from the content in that message. This can be used to
+ constrain part of the model's response.
+
+ Example with a single `user` message:
+
+ ```json
+ [{ "role": "user", "content": "Hello, Claude" }]
+ ```
+
+ Example with multiple conversational turns:
+
+ ```json
+ [
+ { "role": "user", "content": "Hello there." },
+ { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+ { "role": "user", "content": "Can you explain LLMs in plain English?" }
+ ]
+ ```
+
+ Example with a partially-filled response from Claude:
+
+ ```json
+ [
+ {
+ "role": "user",
+ "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+ },
+ { "role": "assistant", "content": "The best answer is (" }
+ ]
+ ```
+
+ Each input message `content` may be either a single `string` or an array of
+ content blocks, where each block has a specific `type`. Using a `string` for
+ `content` is shorthand for an array of one content block of type `"text"`. The
+ following input messages are equivalent:
+
+ ```json
+ { "role": "user", "content": "Hello, Claude" }
+ ```
+
+ ```json
+ { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+ ```
+
+ Starting with Claude 3 models, you can also send image content blocks:
+
+ ```json
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": "image/jpeg",
+ "data": "/9j/4AAQSkZJRg..."
+ }
+ },
+ { "type": "text", "text": "What is in this image?" }
+ ]
+ }
+ ```
+
+ We currently support the `base64` source type for images, and the `image/jpeg`,
+ `image/png`, `image/gif`, and `image/webp` media types.
+
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
+
+ Note that if you want to include a
+ [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+ the top-level `system` parameter — there is no `"system"` role for input
+ messages in the Messages API.
+
+ model: The model that will complete your prompt.\n\nSee
+ [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+ details and options.
+
+ metadata: An object describing metadata about the request.
+
+ stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+ Our models will normally stop when they have naturally completed their turn,
+ which will result in a response `stop_reason` of `"end_turn"`.
+
+ If you want the model to stop generating when it encounters custom strings of
+ text, you can use the `stop_sequences` parameter. If the model encounters one of
+ the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+ and the response `stop_sequence` value will contain the matched stop sequence.
+
+ stream: Whether to incrementally stream the response using server-sent events.
+
+ See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+ details.
+
+ system: System prompt.
+
+ A system prompt is a way of providing context and instructions to Claude, such
+ as specifying a particular goal or role. See our
+ [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+ temperature: Amount of randomness injected into the response.
+
+ Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+ for analytical / multiple choice, and closer to `1.0` for creative and
+ generative tasks.
+
+ Note that even with `temperature` of `0.0`, the results will not be fully
+ deterministic.
+
+ tool_choice: How the model should use the provided tools. The model can use a specific tool,
+ any available tool, or decide by itself.
+
+ tools: Definitions of tools that the model may use.
+
+ If you include `tools` in your API request, the model may return `tool_use`
+ content blocks that represent the model's use of those tools. You can then run
+ those tools using the tool input generated by the model and then optionally
+ return results back to the model using `tool_result` content blocks.
+
+ Each tool definition includes:
+
+ - `name`: Name of the tool.
+ - `description`: Optional, but strongly-recommended description of the tool.
+ - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+ shape that the model will produce in `tool_use` output content blocks.
+
+ For example, if you defined `tools` as:
+
+ ```json
+ [
+ {
+ "name": "get_stock_price",
+ "description": "Get the current stock price for a given ticker symbol.",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "ticker": {
+ "type": "string",
+ "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+ }
+ },
+ "required": ["ticker"]
+ }
+ }
+ ]
+ ```
+
+ And then asked the model "What's the S&P 500 at today?", the model might produce
+ `tool_use` content blocks in the response like this:
+
+ ```json
+ [
+ {
+ "type": "tool_use",
+ "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "name": "get_stock_price",
+ "input": { "ticker": "^GSPC" }
+ }
+ ]
+ ```
+
+ You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+ input, and return the following back to the model in a subsequent `user`
+ message:
+
+ ```json
+ [
+ {
+ "type": "tool_result",
+ "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "content": "259.75 USD"
+ }
+ ]
+ ```
+
+ Tools can be used for workflows that include running client-side tools and
+ functions, or more generally whenever you want the model to produce a particular
+ JSON structure of output.
+
+ See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+ top_k: Only sample from the top K options for each subsequent token.
+
+ Used to remove "long tail" low probability responses.
+ [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ top_p: Use nucleus sampling.
+
+ In nucleus sampling, we compute the cumulative distribution over all the options
+ for each subsequent token in decreasing probability order and cut it off once it
+ reaches a particular probability specified by `top_p`. You should either alter
+ `temperature` or `top_p`, but not both.
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ stream: Literal[True],
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> Stream[RawPromptCachingBetaMessageStreamEvent]:
+ """
+ Create a Message.
+
+ Send a structured list of input messages with text and/or image content, and the
+ model will generate the next message in the conversation.
+
+ The Messages API can be used for either single queries or stateless multi-turn
+ conversations.
+
+ Args:
+ max_tokens: The maximum number of tokens to generate before stopping.
+
+ Note that our models may stop _before_ reaching this maximum. This parameter
+ only specifies the absolute maximum number of tokens to generate.
+
+ Different models have different maximum values for this parameter. See
+ [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+ messages: Input messages.
+
+ Our models are trained to operate on alternating `user` and `assistant`
+ conversational turns. When creating a new `Message`, you specify the prior
+ conversational turns with the `messages` parameter, and the model then generates
+ the next `Message` in the conversation.
+
+ Each input message must be an object with a `role` and `content`. You can
+ specify a single `user`-role message, or you can include multiple `user` and
+ `assistant` messages. The first message must always use the `user` role.
+
+ If the final message uses the `assistant` role, the response content will
+ continue immediately from the content in that message. This can be used to
+ constrain part of the model's response.
+
+ Example with a single `user` message:
+
+ ```json
+ [{ "role": "user", "content": "Hello, Claude" }]
+ ```
+
+ Example with multiple conversational turns:
+
+ ```json
+ [
+ { "role": "user", "content": "Hello there." },
+ { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+ { "role": "user", "content": "Can you explain LLMs in plain English?" }
+ ]
+ ```
+
+ Example with a partially-filled response from Claude:
+
+ ```json
+ [
+ {
+ "role": "user",
+ "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+ },
+ { "role": "assistant", "content": "The best answer is (" }
+ ]
+ ```
+
+ Each input message `content` may be either a single `string` or an array of
+ content blocks, where each block has a specific `type`. Using a `string` for
+ `content` is shorthand for an array of one content block of type `"text"`. The
+ following input messages are equivalent:
+
+ ```json
+ { "role": "user", "content": "Hello, Claude" }
+ ```
+
+ ```json
+ { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+ ```
+
+ Starting with Claude 3 models, you can also send image content blocks:
+
+ ```json
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": "image/jpeg",
+ "data": "/9j/4AAQSkZJRg..."
+ }
+ },
+ { "type": "text", "text": "What is in this image?" }
+ ]
+ }
+ ```
+
+ We currently support the `base64` source type for images, and the `image/jpeg`,
+ `image/png`, `image/gif`, and `image/webp` media types.
+
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
+
+ Note that if you want to include a
+ [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+ the top-level `system` parameter — there is no `"system"` role for input
+ messages in the Messages API.
+
+ model: The model that will complete your prompt.\n\nSee
+ [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+ details and options.
+
+ stream: Whether to incrementally stream the response using server-sent events.
+
+ See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+ details.
+
+ metadata: An object describing metadata about the request.
+
+ stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+ Our models will normally stop when they have naturally completed their turn,
+ which will result in a response `stop_reason` of `"end_turn"`.
+
+ If you want the model to stop generating when it encounters custom strings of
+ text, you can use the `stop_sequences` parameter. If the model encounters one of
+ the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+ and the response `stop_sequence` value will contain the matched stop sequence.
+
+ system: System prompt.
+
+ A system prompt is a way of providing context and instructions to Claude, such
+ as specifying a particular goal or role. See our
+ [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+ temperature: Amount of randomness injected into the response.
+
+ Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+ for analytical / multiple choice, and closer to `1.0` for creative and
+ generative tasks.
+
+ Note that even with `temperature` of `0.0`, the results will not be fully
+ deterministic.
+
+ tool_choice: How the model should use the provided tools. The model can use a specific tool,
+ any available tool, or decide by itself.
+
+ tools: Definitions of tools that the model may use.
+
+ If you include `tools` in your API request, the model may return `tool_use`
+ content blocks that represent the model's use of those tools. You can then run
+ those tools using the tool input generated by the model and then optionally
+ return results back to the model using `tool_result` content blocks.
+
+ Each tool definition includes:
+
+ - `name`: Name of the tool.
+ - `description`: Optional, but strongly-recommended description of the tool.
+ - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+ shape that the model will produce in `tool_use` output content blocks.
+
+ For example, if you defined `tools` as:
+
+ ```json
+ [
+ {
+ "name": "get_stock_price",
+ "description": "Get the current stock price for a given ticker symbol.",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "ticker": {
+ "type": "string",
+ "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+ }
+ },
+ "required": ["ticker"]
+ }
+ }
+ ]
+ ```
+
+ And then asked the model "What's the S&P 500 at today?", the model might produce
+ `tool_use` content blocks in the response like this:
+
+ ```json
+ [
+ {
+ "type": "tool_use",
+ "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "name": "get_stock_price",
+ "input": { "ticker": "^GSPC" }
+ }
+ ]
+ ```
+
+ You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+ input, and return the following back to the model in a subsequent `user`
+ message:
+
+ ```json
+ [
+ {
+ "type": "tool_result",
+ "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "content": "259.75 USD"
+ }
+ ]
+ ```
+
+ Tools can be used for workflows that include running client-side tools and
+ functions, or more generally whenever you want the model to produce a particular
+ JSON structure of output.
+
+ See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+ top_k: Only sample from the top K options for each subsequent token.
+
+ Used to remove "long tail" low probability responses.
+ [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ top_p: Use nucleus sampling.
+
+ In nucleus sampling, we compute the cumulative distribution over all the options
+ for each subsequent token in decreasing probability order and cut it off once it
+ reaches a particular probability specified by `top_p`. You should either alter
+ `temperature` or `top_p`, but not both.
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ stream: bool,
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PromptCachingBetaMessage | Stream[RawPromptCachingBetaMessageStreamEvent]:
+ """
+ Create a Message.
+
+ Send a structured list of input messages with text and/or image content, and the
+ model will generate the next message in the conversation.
+
+ The Messages API can be used for either single queries or stateless multi-turn
+ conversations.
+
+ Args:
+ max_tokens: The maximum number of tokens to generate before stopping.
+
+ Note that our models may stop _before_ reaching this maximum. This parameter
+ only specifies the absolute maximum number of tokens to generate.
+
+ Different models have different maximum values for this parameter. See
+ [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+ messages: Input messages.
+
+ Our models are trained to operate on alternating `user` and `assistant`
+ conversational turns. When creating a new `Message`, you specify the prior
+ conversational turns with the `messages` parameter, and the model then generates
+ the next `Message` in the conversation.
+
+ Each input message must be an object with a `role` and `content`. You can
+ specify a single `user`-role message, or you can include multiple `user` and
+ `assistant` messages. The first message must always use the `user` role.
+
+ If the final message uses the `assistant` role, the response content will
+ continue immediately from the content in that message. This can be used to
+ constrain part of the model's response.
+
+ Example with a single `user` message:
+
+ ```json
+ [{ "role": "user", "content": "Hello, Claude" }]
+ ```
+
+ Example with multiple conversational turns:
+
+ ```json
+ [
+ { "role": "user", "content": "Hello there." },
+ { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+ { "role": "user", "content": "Can you explain LLMs in plain English?" }
+ ]
+ ```
+
+ Example with a partially-filled response from Claude:
+
+ ```json
+ [
+ {
+ "role": "user",
+ "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+ },
+ { "role": "assistant", "content": "The best answer is (" }
+ ]
+ ```
+
+ Each input message `content` may be either a single `string` or an array of
+ content blocks, where each block has a specific `type`. Using a `string` for
+ `content` is shorthand for an array of one content block of type `"text"`. The
+ following input messages are equivalent:
+
+ ```json
+ { "role": "user", "content": "Hello, Claude" }
+ ```
+
+ ```json
+ { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+ ```
+
+ Starting with Claude 3 models, you can also send image content blocks:
+
+ ```json
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": "image/jpeg",
+ "data": "/9j/4AAQSkZJRg..."
+ }
+ },
+ { "type": "text", "text": "What is in this image?" }
+ ]
+ }
+ ```
+
+ We currently support the `base64` source type for images, and the `image/jpeg`,
+ `image/png`, `image/gif`, and `image/webp` media types.
+
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
+
+ Note that if you want to include a
+ [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+ the top-level `system` parameter — there is no `"system"` role for input
+ messages in the Messages API.
+
+ model: The model that will complete your prompt.\n\nSee
+ [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+ details and options.
+
+ stream: Whether to incrementally stream the response using server-sent events.
+
+ See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+ details.
+
+ metadata: An object describing metadata about the request.
+
+ stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+ Our models will normally stop when they have naturally completed their turn,
+ which will result in a response `stop_reason` of `"end_turn"`.
+
+ If you want the model to stop generating when it encounters custom strings of
+ text, you can use the `stop_sequences` parameter. If the model encounters one of
+ the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+ and the response `stop_sequence` value will contain the matched stop sequence.
+
+ system: System prompt.
+
+ A system prompt is a way of providing context and instructions to Claude, such
+ as specifying a particular goal or role. See our
+ [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+ temperature: Amount of randomness injected into the response.
+
+ Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+ for analytical / multiple choice, and closer to `1.0` for creative and
+ generative tasks.
+
+ Note that even with `temperature` of `0.0`, the results will not be fully
+ deterministic.
+
+ tool_choice: How the model should use the provided tools. The model can use a specific tool,
+ any available tool, or decide by itself.
+
+ tools: Definitions of tools that the model may use.
+
+ If you include `tools` in your API request, the model may return `tool_use`
+ content blocks that represent the model's use of those tools. You can then run
+ those tools using the tool input generated by the model and then optionally
+ return results back to the model using `tool_result` content blocks.
+
+ Each tool definition includes:
+
+ - `name`: Name of the tool.
+ - `description`: Optional, but strongly-recommended description of the tool.
+ - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+ shape that the model will produce in `tool_use` output content blocks.
+
+ For example, if you defined `tools` as:
+
+ ```json
+ [
+ {
+ "name": "get_stock_price",
+ "description": "Get the current stock price for a given ticker symbol.",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "ticker": {
+ "type": "string",
+ "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+ }
+ },
+ "required": ["ticker"]
+ }
+ }
+ ]
+ ```
+
+ And then asked the model "What's the S&P 500 at today?", the model might produce
+ `tool_use` content blocks in the response like this:
+
+ ```json
+ [
+ {
+ "type": "tool_use",
+ "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "name": "get_stock_price",
+ "input": { "ticker": "^GSPC" }
+ }
+ ]
+ ```
+
+ You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+ input, and return the following back to the model in a subsequent `user`
+ message:
+
+ ```json
+ [
+ {
+ "type": "tool_result",
+ "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "content": "259.75 USD"
+ }
+ ]
+ ```
+
+ Tools can be used for workflows that include running client-side tools and
+ functions, or more generally whenever you want the model to produce a particular
+ JSON structure of output.
+
+ See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+ top_k: Only sample from the top K options for each subsequent token.
+
+ Used to remove "long tail" low probability responses.
+ [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ top_p: Use nucleus sampling.
+
+ In nucleus sampling, we compute the cumulative distribution over all the options
+ for each subsequent token in decreasing probability order and cut it off once it
+ reaches a particular probability specified by `top_p`. You should either alter
+ `temperature` or `top_p`, but not both.
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+ def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PromptCachingBetaMessage | Stream[RawPromptCachingBetaMessageStreamEvent]:
+ if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+ timeout = 600
+ extra_headers = {"anthropic-beta": "prompt-caching-2024-07-31", **(extra_headers or {})}
+ return self._post(
+ "/v1/messages?beta=prompt_caching",
+ body=maybe_transform(
+ {
+ "max_tokens": max_tokens,
+ "messages": messages,
+ "model": model,
+ "metadata": metadata,
+ "stop_sequences": stop_sequences,
+ "stream": stream,
+ "system": system,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_k": top_k,
+ "top_p": top_p,
+ },
+ message_create_params.MessageCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=PromptCachingBetaMessage,
+ stream=stream or False,
+ stream_cls=Stream[RawPromptCachingBetaMessageStreamEvent],
+ )
+
+ def stream(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PromptCachingBetaMessageStreamManager:
+ """Create a Message stream"""
+ if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+ timeout = 600
+
+ extra_headers = {
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ "X-Stainless-Stream-Helper": "beta.prompt_caching.messages",
+ **(extra_headers or {}),
+ }
+ request = partial(
+ self._post,
+ "/v1/messages?beta=prompt_caching",
+ body=maybe_transform(
+ {
+ "max_tokens": max_tokens,
+ "messages": messages,
+ "model": model,
+ "metadata": metadata,
+ "stop_sequences": stop_sequences,
+ "stream": True,
+ "system": system,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_k": top_k,
+ "top_p": top_p,
+ },
+ message_create_params.MessageCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=PromptCachingBetaMessage,
+ stream=True,
+ stream_cls=Stream[RawPromptCachingBetaMessageStreamEvent],
+ )
+ return PromptCachingBetaMessageStreamManager(request)
+
+
+class AsyncMessages(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+ return AsyncMessagesWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+ return AsyncMessagesWithStreamingResponse(self)
+
+ @overload
+ async def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ stream: Literal[False] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PromptCachingBetaMessage:
+ """
+ Create a Message.
+
+ Send a structured list of input messages with text and/or image content, and the
+ model will generate the next message in the conversation.
+
+ The Messages API can be used for either single queries or stateless multi-turn
+ conversations.
+
+ Args:
+ max_tokens: The maximum number of tokens to generate before stopping.
+
+ Note that our models may stop _before_ reaching this maximum. This parameter
+ only specifies the absolute maximum number of tokens to generate.
+
+ Different models have different maximum values for this parameter. See
+ [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+ messages: Input messages.
+
+ Our models are trained to operate on alternating `user` and `assistant`
+ conversational turns. When creating a new `Message`, you specify the prior
+ conversational turns with the `messages` parameter, and the model then generates
+ the next `Message` in the conversation.
+
+ Each input message must be an object with a `role` and `content`. You can
+ specify a single `user`-role message, or you can include multiple `user` and
+ `assistant` messages. The first message must always use the `user` role.
+
+ If the final message uses the `assistant` role, the response content will
+ continue immediately from the content in that message. This can be used to
+ constrain part of the model's response.
+
+ Example with a single `user` message:
+
+ ```json
+ [{ "role": "user", "content": "Hello, Claude" }]
+ ```
+
+ Example with multiple conversational turns:
+
+ ```json
+ [
+ { "role": "user", "content": "Hello there." },
+ { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+ { "role": "user", "content": "Can you explain LLMs in plain English?" }
+ ]
+ ```
+
+ Example with a partially-filled response from Claude:
+
+ ```json
+ [
+ {
+ "role": "user",
+ "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+ },
+ { "role": "assistant", "content": "The best answer is (" }
+ ]
+ ```
+
+ Each input message `content` may be either a single `string` or an array of
+ content blocks, where each block has a specific `type`. Using a `string` for
+ `content` is shorthand for an array of one content block of type `"text"`. The
+ following input messages are equivalent:
+
+ ```json
+ { "role": "user", "content": "Hello, Claude" }
+ ```
+
+ ```json
+ { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+ ```
+
+ Starting with Claude 3 models, you can also send image content blocks:
+
+ ```json
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": "image/jpeg",
+ "data": "/9j/4AAQSkZJRg..."
+ }
+ },
+ { "type": "text", "text": "What is in this image?" }
+ ]
+ }
+ ```
+
+ We currently support the `base64` source type for images, and the `image/jpeg`,
+ `image/png`, `image/gif`, and `image/webp` media types.
+
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
+
+ Note that if you want to include a
+ [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+ the top-level `system` parameter — there is no `"system"` role for input
+ messages in the Messages API.
+
+ model: The model that will complete your prompt.\n\nSee
+ [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+ details and options.
+
+ metadata: An object describing metadata about the request.
+
+ stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+ Our models will normally stop when they have naturally completed their turn,
+ which will result in a response `stop_reason` of `"end_turn"`.
+
+ If you want the model to stop generating when it encounters custom strings of
+ text, you can use the `stop_sequences` parameter. If the model encounters one of
+ the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+ and the response `stop_sequence` value will contain the matched stop sequence.
+
+ stream: Whether to incrementally stream the response using server-sent events.
+
+ See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+ details.
+
+ system: System prompt.
+
+ A system prompt is a way of providing context and instructions to Claude, such
+ as specifying a particular goal or role. See our
+ [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+ temperature: Amount of randomness injected into the response.
+
+ Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+ for analytical / multiple choice, and closer to `1.0` for creative and
+ generative tasks.
+
+ Note that even with `temperature` of `0.0`, the results will not be fully
+ deterministic.
+
+ tool_choice: How the model should use the provided tools. The model can use a specific tool,
+ any available tool, or decide by itself.
+
+ tools: Definitions of tools that the model may use.
+
+ If you include `tools` in your API request, the model may return `tool_use`
+ content blocks that represent the model's use of those tools. You can then run
+ those tools using the tool input generated by the model and then optionally
+ return results back to the model using `tool_result` content blocks.
+
+ Each tool definition includes:
+
+ - `name`: Name of the tool.
+ - `description`: Optional, but strongly-recommended description of the tool.
+ - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+ shape that the model will produce in `tool_use` output content blocks.
+
+ For example, if you defined `tools` as:
+
+ ```json
+ [
+ {
+ "name": "get_stock_price",
+ "description": "Get the current stock price for a given ticker symbol.",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "ticker": {
+ "type": "string",
+ "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+ }
+ },
+ "required": ["ticker"]
+ }
+ }
+ ]
+ ```
+
+ And then asked the model "What's the S&P 500 at today?", the model might produce
+ `tool_use` content blocks in the response like this:
+
+ ```json
+ [
+ {
+ "type": "tool_use",
+ "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "name": "get_stock_price",
+ "input": { "ticker": "^GSPC" }
+ }
+ ]
+ ```
+
+ You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+ input, and return the following back to the model in a subsequent `user`
+ message:
+
+ ```json
+ [
+ {
+ "type": "tool_result",
+ "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "content": "259.75 USD"
+ }
+ ]
+ ```
+
+ Tools can be used for workflows that include running client-side tools and
+ functions, or more generally whenever you want the model to produce a particular
+ JSON structure of output.
+
+ See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+ top_k: Only sample from the top K options for each subsequent token.
+
+ Used to remove "long tail" low probability responses.
+ [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ top_p: Use nucleus sampling.
+
+ In nucleus sampling, we compute the cumulative distribution over all the options
+ for each subsequent token in decreasing probability order and cut it off once it
+ reaches a particular probability specified by `top_p`. You should either alter
+ `temperature` or `top_p`, but not both.
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ stream: Literal[True],
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
+ """
+ Create a Message.
+
+ Send a structured list of input messages with text and/or image content, and the
+ model will generate the next message in the conversation.
+
+ The Messages API can be used for either single queries or stateless multi-turn
+ conversations.
+
+ Args:
+ max_tokens: The maximum number of tokens to generate before stopping.
+
+ Note that our models may stop _before_ reaching this maximum. This parameter
+ only specifies the absolute maximum number of tokens to generate.
+
+ Different models have different maximum values for this parameter. See
+ [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+ messages: Input messages.
+
+ Our models are trained to operate on alternating `user` and `assistant`
+ conversational turns. When creating a new `Message`, you specify the prior
+ conversational turns with the `messages` parameter, and the model then generates
+ the next `Message` in the conversation.
+
+ Each input message must be an object with a `role` and `content`. You can
+ specify a single `user`-role message, or you can include multiple `user` and
+ `assistant` messages. The first message must always use the `user` role.
+
+ If the final message uses the `assistant` role, the response content will
+ continue immediately from the content in that message. This can be used to
+ constrain part of the model's response.
+
+ Example with a single `user` message:
+
+ ```json
+ [{ "role": "user", "content": "Hello, Claude" }]
+ ```
+
+ Example with multiple conversational turns:
+
+ ```json
+ [
+ { "role": "user", "content": "Hello there." },
+ { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+ { "role": "user", "content": "Can you explain LLMs in plain English?" }
+ ]
+ ```
+
+ Example with a partially-filled response from Claude:
+
+ ```json
+ [
+ {
+ "role": "user",
+ "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+ },
+ { "role": "assistant", "content": "The best answer is (" }
+ ]
+ ```
+
+ Each input message `content` may be either a single `string` or an array of
+ content blocks, where each block has a specific `type`. Using a `string` for
+ `content` is shorthand for an array of one content block of type `"text"`. The
+ following input messages are equivalent:
+
+ ```json
+ { "role": "user", "content": "Hello, Claude" }
+ ```
+
+ ```json
+ { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+ ```
+
+ Starting with Claude 3 models, you can also send image content blocks:
+
+ ```json
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": "image/jpeg",
+ "data": "/9j/4AAQSkZJRg..."
+ }
+ },
+ { "type": "text", "text": "What is in this image?" }
+ ]
+ }
+ ```
+
+ We currently support the `base64` source type for images, and the `image/jpeg`,
+ `image/png`, `image/gif`, and `image/webp` media types.
+
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
+
+ Note that if you want to include a
+ [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+ the top-level `system` parameter — there is no `"system"` role for input
+ messages in the Messages API.
+
+ model: The model that will complete your prompt.\n\nSee
+ [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+ details and options.
+
+ stream: Whether to incrementally stream the response using server-sent events.
+
+ See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+ details.
+
+ metadata: An object describing metadata about the request.
+
+ stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+ Our models will normally stop when they have naturally completed their turn,
+ which will result in a response `stop_reason` of `"end_turn"`.
+
+ If you want the model to stop generating when it encounters custom strings of
+ text, you can use the `stop_sequences` parameter. If the model encounters one of
+ the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+ and the response `stop_sequence` value will contain the matched stop sequence.
+
+ system: System prompt.
+
+ A system prompt is a way of providing context and instructions to Claude, such
+ as specifying a particular goal or role. See our
+ [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+ temperature: Amount of randomness injected into the response.
+
+ Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+ for analytical / multiple choice, and closer to `1.0` for creative and
+ generative tasks.
+
+ Note that even with `temperature` of `0.0`, the results will not be fully
+ deterministic.
+
+ tool_choice: How the model should use the provided tools. The model can use a specific tool,
+ any available tool, or decide by itself.
+
+ tools: Definitions of tools that the model may use.
+
+ If you include `tools` in your API request, the model may return `tool_use`
+ content blocks that represent the model's use of those tools. You can then run
+ those tools using the tool input generated by the model and then optionally
+ return results back to the model using `tool_result` content blocks.
+
+ Each tool definition includes:
+
+ - `name`: Name of the tool.
+ - `description`: Optional, but strongly-recommended description of the tool.
+ - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+ shape that the model will produce in `tool_use` output content blocks.
+
+ For example, if you defined `tools` as:
+
+ ```json
+ [
+ {
+ "name": "get_stock_price",
+ "description": "Get the current stock price for a given ticker symbol.",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "ticker": {
+ "type": "string",
+ "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+ }
+ },
+ "required": ["ticker"]
+ }
+ }
+ ]
+ ```
+
+ And then asked the model "What's the S&P 500 at today?", the model might produce
+ `tool_use` content blocks in the response like this:
+
+ ```json
+ [
+ {
+ "type": "tool_use",
+ "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "name": "get_stock_price",
+ "input": { "ticker": "^GSPC" }
+ }
+ ]
+ ```
+
+ You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+ input, and return the following back to the model in a subsequent `user`
+ message:
+
+ ```json
+ [
+ {
+ "type": "tool_result",
+ "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "content": "259.75 USD"
+ }
+ ]
+ ```
+
+ Tools can be used for workflows that include running client-side tools and
+ functions, or more generally whenever you want the model to produce a particular
+ JSON structure of output.
+
+ See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+ top_k: Only sample from the top K options for each subsequent token.
+
+ Used to remove "long tail" low probability responses.
+ [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ top_p: Use nucleus sampling.
+
+ In nucleus sampling, we compute the cumulative distribution over all the options
+ for each subsequent token in decreasing probability order and cut it off once it
+ reaches a particular probability specified by `top_p`. You should either alter
+ `temperature` or `top_p`, but not both.
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @overload
+ async def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ stream: bool,
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PromptCachingBetaMessage | AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
+ """
+ Create a Message.
+
+ Send a structured list of input messages with text and/or image content, and the
+ model will generate the next message in the conversation.
+
+ The Messages API can be used for either single queries or stateless multi-turn
+ conversations.
+
+ Args:
+ max_tokens: The maximum number of tokens to generate before stopping.
+
+ Note that our models may stop _before_ reaching this maximum. This parameter
+ only specifies the absolute maximum number of tokens to generate.
+
+ Different models have different maximum values for this parameter. See
+ [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+
+ messages: Input messages.
+
+ Our models are trained to operate on alternating `user` and `assistant`
+ conversational turns. When creating a new `Message`, you specify the prior
+ conversational turns with the `messages` parameter, and the model then generates
+ the next `Message` in the conversation.
+
+ Each input message must be an object with a `role` and `content`. You can
+ specify a single `user`-role message, or you can include multiple `user` and
+ `assistant` messages. The first message must always use the `user` role.
+
+ If the final message uses the `assistant` role, the response content will
+ continue immediately from the content in that message. This can be used to
+ constrain part of the model's response.
+
+ Example with a single `user` message:
+
+ ```json
+ [{ "role": "user", "content": "Hello, Claude" }]
+ ```
+
+ Example with multiple conversational turns:
+
+ ```json
+ [
+ { "role": "user", "content": "Hello there." },
+ { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+ { "role": "user", "content": "Can you explain LLMs in plain English?" }
+ ]
+ ```
+
+ Example with a partially-filled response from Claude:
+
+ ```json
+ [
+ {
+ "role": "user",
+ "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+ },
+ { "role": "assistant", "content": "The best answer is (" }
+ ]
+ ```
+
+ Each input message `content` may be either a single `string` or an array of
+ content blocks, where each block has a specific `type`. Using a `string` for
+ `content` is shorthand for an array of one content block of type `"text"`. The
+ following input messages are equivalent:
+
+ ```json
+ { "role": "user", "content": "Hello, Claude" }
+ ```
+
+ ```json
+ { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+ ```
+
+ Starting with Claude 3 models, you can also send image content blocks:
+
+ ```json
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": "image/jpeg",
+ "data": "/9j/4AAQSkZJRg..."
+ }
+ },
+ { "type": "text", "text": "What is in this image?" }
+ ]
+ }
+ ```
+
+ We currently support the `base64` source type for images, and the `image/jpeg`,
+ `image/png`, `image/gif`, and `image/webp` media types.
+
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
+
+ Note that if you want to include a
+ [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+ the top-level `system` parameter — there is no `"system"` role for input
+ messages in the Messages API.
+
+ model: The model that will complete your prompt.\n\nSee
+ [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+ details and options.
+
+ stream: Whether to incrementally stream the response using server-sent events.
+
+ See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+ details.
+
+ metadata: An object describing metadata about the request.
+
+ stop_sequences: Custom text sequences that will cause the model to stop generating.
+
+ Our models will normally stop when they have naturally completed their turn,
+ which will result in a response `stop_reason` of `"end_turn"`.
+
+ If you want the model to stop generating when it encounters custom strings of
+ text, you can use the `stop_sequences` parameter. If the model encounters one of
+ the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+ and the response `stop_sequence` value will contain the matched stop sequence.
+
+ system: System prompt.
+
+ A system prompt is a way of providing context and instructions to Claude, such
+ as specifying a particular goal or role. See our
+ [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+
+ temperature: Amount of randomness injected into the response.
+
+ Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+ for analytical / multiple choice, and closer to `1.0` for creative and
+ generative tasks.
+
+ Note that even with `temperature` of `0.0`, the results will not be fully
+ deterministic.
+
+ tool_choice: How the model should use the provided tools. The model can use a specific tool,
+ any available tool, or decide by itself.
+
+ tools: Definitions of tools that the model may use.
+
+ If you include `tools` in your API request, the model may return `tool_use`
+ content blocks that represent the model's use of those tools. You can then run
+ those tools using the tool input generated by the model and then optionally
+ return results back to the model using `tool_result` content blocks.
+
+ Each tool definition includes:
+
+ - `name`: Name of the tool.
+ - `description`: Optional, but strongly-recommended description of the tool.
+ - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+ shape that the model will produce in `tool_use` output content blocks.
+
+ For example, if you defined `tools` as:
+
+ ```json
+ [
+ {
+ "name": "get_stock_price",
+ "description": "Get the current stock price for a given ticker symbol.",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "ticker": {
+ "type": "string",
+ "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+ }
+ },
+ "required": ["ticker"]
+ }
+ }
+ ]
+ ```
+
+ And then asked the model "What's the S&P 500 at today?", the model might produce
+ `tool_use` content blocks in the response like this:
+
+ ```json
+ [
+ {
+ "type": "tool_use",
+ "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "name": "get_stock_price",
+ "input": { "ticker": "^GSPC" }
+ }
+ ]
+ ```
+
+ You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+ input, and return the following back to the model in a subsequent `user`
+ message:
+
+ ```json
+ [
+ {
+ "type": "tool_result",
+ "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "content": "259.75 USD"
+ }
+ ]
+ ```
+
+ Tools can be used for workflows that include running client-side tools and
+ functions, or more generally whenever you want the model to produce a particular
+ JSON structure of output.
+
+ See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+
+ top_k: Only sample from the top K options for each subsequent token.
+
+ Used to remove "long tail" low probability responses.
+ [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ top_p: Use nucleus sampling.
+
+ In nucleus sampling, we compute the cumulative distribution over all the options
+ for each subsequent token in decreasing probability order and cut it off once it
+ reaches a particular probability specified by `top_p`. You should either alter
+ `temperature` or `top_p`, but not both.
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ ...
+
+ @required_args(["max_tokens", "messages", "model"], ["max_tokens", "messages", "model", "stream"])
+ async def create(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> PromptCachingBetaMessage | AsyncStream[RawPromptCachingBetaMessageStreamEvent]:
+ if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+ timeout = 600
+ extra_headers = {"anthropic-beta": "prompt-caching-2024-07-31", **(extra_headers or {})}
+ return await self._post(
+ "/v1/messages?beta=prompt_caching",
+ body=await async_maybe_transform(
+ {
+ "max_tokens": max_tokens,
+ "messages": messages,
+ "model": model,
+ "metadata": metadata,
+ "stop_sequences": stop_sequences,
+ "stream": stream,
+ "system": system,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_k": top_k,
+ "top_p": top_p,
+ },
+ message_create_params.MessageCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=PromptCachingBetaMessage,
+ stream=stream or False,
+ stream_cls=AsyncStream[RawPromptCachingBetaMessageStreamEvent],
+ )
+
+ def stream(
+ self,
+ *,
+ max_tokens: int,
+ messages: Iterable[PromptCachingBetaMessageParam],
+ model: ModelParam,
+ metadata: message_create_params.Metadata | NotGiven = NOT_GIVEN,
+ stop_sequences: List[str] | NotGiven = NOT_GIVEN,
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]] | NotGiven = NOT_GIVEN,
+ temperature: float | NotGiven = NOT_GIVEN,
+ tool_choice: message_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+ tools: Iterable[PromptCachingBetaToolParam] | NotGiven = NOT_GIVEN,
+ top_k: int | NotGiven = NOT_GIVEN,
+ top_p: float | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> AsyncPromptCachingBetaMessageStreamManager:
+ """Create a Message stream"""
+ if not is_given(timeout) and self._client.timeout == DEFAULT_TIMEOUT:
+ timeout = 600
+
+ extra_headers = {
+ "anthropic-beta": "prompt-caching-2024-07-31",
+ "X-Stainless-Stream-Helper": "beta.prompt_caching.messages",
+ **(extra_headers or {}),
+ }
+ request = self._post(
+ "/v1/messages?beta=prompt_caching",
+ body=maybe_transform(
+ {
+ "max_tokens": max_tokens,
+ "messages": messages,
+ "model": model,
+ "metadata": metadata,
+ "stop_sequences": stop_sequences,
+ "stream": True,
+ "system": system,
+ "temperature": temperature,
+ "tool_choice": tool_choice,
+ "tools": tools,
+ "top_k": top_k,
+ "top_p": top_p,
+ },
+ message_create_params.MessageCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=PromptCachingBetaMessage,
+ stream=True,
+ stream_cls=AsyncStream[RawPromptCachingBetaMessageStreamEvent],
+ )
+ return AsyncPromptCachingBetaMessageStreamManager(request)
+
+
+class MessagesWithRawResponse:
+ def __init__(self, messages: Messages) -> None:
+ self._messages = messages
+
+ self.create = _legacy_response.to_raw_response_wrapper(
+ messages.create,
+ )
+
+
+class AsyncMessagesWithRawResponse:
+ def __init__(self, messages: AsyncMessages) -> None:
+ self._messages = messages
+
+ self.create = _legacy_response.async_to_raw_response_wrapper(
+ messages.create,
+ )
+
+
+class MessagesWithStreamingResponse:
+ def __init__(self, messages: Messages) -> None:
+ self._messages = messages
+
+ self.create = to_streamed_response_wrapper(
+ messages.create,
+ )
+
+
+class AsyncMessagesWithStreamingResponse:
+ def __init__(self, messages: AsyncMessages) -> None:
+ self._messages = messages
+
+ self.create = async_to_streamed_response_wrapper(
+ messages.create,
+ )
diff --git a/src/anthropic/resources/beta/prompt_caching/prompt_caching.py b/src/anthropic/resources/beta/prompt_caching/prompt_caching.py
new file mode 100644
index 00000000..f15180e1
--- /dev/null
+++ b/src/anthropic/resources/beta/prompt_caching/prompt_caching.py
@@ -0,0 +1,80 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .messages import (
+ Messages,
+ AsyncMessages,
+ MessagesWithRawResponse,
+ AsyncMessagesWithRawResponse,
+ MessagesWithStreamingResponse,
+ AsyncMessagesWithStreamingResponse,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["PromptCaching", "AsyncPromptCaching"]
+
+
+class PromptCaching(SyncAPIResource):
+ @cached_property
+ def messages(self) -> Messages:
+ return Messages(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> PromptCachingWithRawResponse:
+ return PromptCachingWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> PromptCachingWithStreamingResponse:
+ return PromptCachingWithStreamingResponse(self)
+
+
+class AsyncPromptCaching(AsyncAPIResource):
+ @cached_property
+ def messages(self) -> AsyncMessages:
+ return AsyncMessages(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncPromptCachingWithRawResponse:
+ return AsyncPromptCachingWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncPromptCachingWithStreamingResponse:
+ return AsyncPromptCachingWithStreamingResponse(self)
+
+
+class PromptCachingWithRawResponse:
+ def __init__(self, prompt_caching: PromptCaching) -> None:
+ self._prompt_caching = prompt_caching
+
+ @cached_property
+ def messages(self) -> MessagesWithRawResponse:
+ return MessagesWithRawResponse(self._prompt_caching.messages)
+
+
+class AsyncPromptCachingWithRawResponse:
+ def __init__(self, prompt_caching: AsyncPromptCaching) -> None:
+ self._prompt_caching = prompt_caching
+
+ @cached_property
+ def messages(self) -> AsyncMessagesWithRawResponse:
+ return AsyncMessagesWithRawResponse(self._prompt_caching.messages)
+
+
+class PromptCachingWithStreamingResponse:
+ def __init__(self, prompt_caching: PromptCaching) -> None:
+ self._prompt_caching = prompt_caching
+
+ @cached_property
+ def messages(self) -> MessagesWithStreamingResponse:
+ return MessagesWithStreamingResponse(self._prompt_caching.messages)
+
+
+class AsyncPromptCachingWithStreamingResponse:
+ def __init__(self, prompt_caching: AsyncPromptCaching) -> None:
+ self._prompt_caching = prompt_caching
+
+ @cached_property
+ def messages(self) -> AsyncMessagesWithStreamingResponse:
+ return AsyncMessagesWithStreamingResponse(self._prompt_caching.messages)
diff --git a/src/anthropic/resources/messages.py b/src/anthropic/resources/messages.py
index 8b6ab106..8f303984 100644
--- a/src/anthropic/resources/messages.py
+++ b/src/anthropic/resources/messages.py
@@ -162,8 +162,8 @@ def create(
We currently support the `base64` source type for images, and the `image/jpeg`,
`image/png`, `image/gif`, and `image/webp` media types.
- See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
- input examples.
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
Note that if you want to include a
[system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -425,8 +425,8 @@ def create(
We currently support the `base64` source type for images, and the `image/jpeg`,
`image/png`, `image/gif`, and `image/webp` media types.
- See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
- input examples.
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
Note that if you want to include a
[system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -688,8 +688,8 @@ def create(
We currently support the `base64` source type for images, and the `image/jpeg`,
`image/png`, `image/gif`, and `image/webp` media types.
- See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
- input examples.
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
Note that if you want to include a
[system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -1068,8 +1068,8 @@ async def create(
We currently support the `base64` source type for images, and the `image/jpeg`,
`image/png`, `image/gif`, and `image/webp` media types.
- See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
- input examples.
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
Note that if you want to include a
[system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -1331,8 +1331,8 @@ async def create(
We currently support the `base64` source type for images, and the `image/jpeg`,
`image/png`, `image/gif`, and `image/webp` media types.
- See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
- input examples.
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
Note that if you want to include a
[system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
@@ -1594,8 +1594,8 @@ async def create(
We currently support the `base64` source type for images, and the `image/jpeg`,
`image/png`, `image/gif`, and `image/webp` media types.
- See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
- input examples.
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
Note that if you want to include a
[system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
diff --git a/src/anthropic/types/beta/__init__.py b/src/anthropic/types/beta/__init__.py
new file mode 100644
index 00000000..f8ee8b14
--- /dev/null
+++ b/src/anthropic/types/beta/__init__.py
@@ -0,0 +1,3 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
diff --git a/src/anthropic/types/beta/prompt_caching/__init__.py b/src/anthropic/types/beta/prompt_caching/__init__.py
new file mode 100644
index 00000000..3b4004fc
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/__init__.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .message_create_params import MessageCreateParams as MessageCreateParams
+from .prompt_caching_beta_usage import PromptCachingBetaUsage as PromptCachingBetaUsage
+from .prompt_caching_beta_message import PromptCachingBetaMessage as PromptCachingBetaMessage
+from .prompt_caching_beta_tool_param import PromptCachingBetaToolParam as PromptCachingBetaToolParam
+from .prompt_caching_beta_message_param import PromptCachingBetaMessageParam as PromptCachingBetaMessageParam
+from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam as PromptCachingBetaTextBlockParam
+from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam as PromptCachingBetaImageBlockParam
+from .prompt_caching_beta_tool_use_block_param import (
+ PromptCachingBetaToolUseBlockParam as PromptCachingBetaToolUseBlockParam,
+)
+from .prompt_caching_beta_tool_result_block_param import (
+ PromptCachingBetaToolResultBlockParam as PromptCachingBetaToolResultBlockParam,
+)
+from .raw_prompt_caching_beta_message_start_event import (
+ RawPromptCachingBetaMessageStartEvent as RawPromptCachingBetaMessageStartEvent,
+)
+from .raw_prompt_caching_beta_message_stream_event import (
+ RawPromptCachingBetaMessageStreamEvent as RawPromptCachingBetaMessageStreamEvent,
+)
+from .prompt_caching_beta_cache_control_ephemeral_param import (
+ PromptCachingBetaCacheControlEphemeralParam as PromptCachingBetaCacheControlEphemeralParam,
+)
diff --git a/src/anthropic/types/beta/prompt_caching/message_create_params.py b/src/anthropic/types/beta/prompt_caching/message_create_params.py
new file mode 100644
index 00000000..9baf68e3
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/message_create_params.py
@@ -0,0 +1,311 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ...model_param import ModelParam
+from .prompt_caching_beta_tool_param import PromptCachingBetaToolParam
+from .prompt_caching_beta_message_param import PromptCachingBetaMessageParam
+from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
+
+__all__ = [
+ "MessageCreateParamsBase",
+ "Metadata",
+ "ToolChoice",
+ "ToolChoiceToolChoiceAuto",
+ "ToolChoiceToolChoiceAny",
+ "ToolChoiceToolChoiceTool",
+ "MessageCreateParamsNonStreaming",
+ "MessageCreateParamsStreaming",
+]
+
+
+class MessageCreateParamsBase(TypedDict, total=False):
+ max_tokens: Required[int]
+ """The maximum number of tokens to generate before stopping.
+
+ Note that our models may stop _before_ reaching this maximum. This parameter
+ only specifies the absolute maximum number of tokens to generate.
+
+ Different models have different maximum values for this parameter. See
+ [models](https://docs.anthropic.com/en/docs/models-overview) for details.
+ """
+
+ messages: Required[Iterable[PromptCachingBetaMessageParam]]
+ """Input messages.
+
+ Our models are trained to operate on alternating `user` and `assistant`
+ conversational turns. When creating a new `Message`, you specify the prior
+ conversational turns with the `messages` parameter, and the model then generates
+ the next `Message` in the conversation.
+
+ Each input message must be an object with a `role` and `content`. You can
+ specify a single `user`-role message, or you can include multiple `user` and
+ `assistant` messages. The first message must always use the `user` role.
+
+ If the final message uses the `assistant` role, the response content will
+ continue immediately from the content in that message. This can be used to
+ constrain part of the model's response.
+
+ Example with a single `user` message:
+
+ ```json
+ [{ "role": "user", "content": "Hello, Claude" }]
+ ```
+
+ Example with multiple conversational turns:
+
+ ```json
+ [
+ { "role": "user", "content": "Hello there." },
+ { "role": "assistant", "content": "Hi, I'm Claude. How can I help you?" },
+ { "role": "user", "content": "Can you explain LLMs in plain English?" }
+ ]
+ ```
+
+ Example with a partially-filled response from Claude:
+
+ ```json
+ [
+ {
+ "role": "user",
+ "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+ },
+ { "role": "assistant", "content": "The best answer is (" }
+ ]
+ ```
+
+ Each input message `content` may be either a single `string` or an array of
+ content blocks, where each block has a specific `type`. Using a `string` for
+ `content` is shorthand for an array of one content block of type `"text"`. The
+ following input messages are equivalent:
+
+ ```json
+ { "role": "user", "content": "Hello, Claude" }
+ ```
+
+ ```json
+ { "role": "user", "content": [{ "type": "text", "text": "Hello, Claude" }] }
+ ```
+
+ Starting with Claude 3 models, you can also send image content blocks:
+
+ ```json
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": "image/jpeg",
+ "data": "/9j/4AAQSkZJRg..."
+ }
+ },
+ { "type": "text", "text": "What is in this image?" }
+ ]
+ }
+ ```
+
+ We currently support the `base64` source type for images, and the `image/jpeg`,
+ `image/png`, `image/gif`, and `image/webp` media types.
+
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
+
+ Note that if you want to include a
+ [system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
+ the top-level `system` parameter — there is no `"system"` role for input
+ messages in the Messages API.
+ """
+
+ model: Required[ModelParam]
+ """
+ The model that will complete your prompt.\n\nSee
+ [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+ details and options.
+ """
+
+ metadata: Metadata
+ """An object describing metadata about the request."""
+
+ stop_sequences: List[str]
+ """Custom text sequences that will cause the model to stop generating.
+
+ Our models will normally stop when they have naturally completed their turn,
+ which will result in a response `stop_reason` of `"end_turn"`.
+
+ If you want the model to stop generating when it encounters custom strings of
+ text, you can use the `stop_sequences` parameter. If the model encounters one of
+ the custom sequences, the response `stop_reason` value will be `"stop_sequence"`
+ and the response `stop_sequence` value will contain the matched stop sequence.
+ """
+
+ system: Union[str, Iterable[PromptCachingBetaTextBlockParam]]
+ """System prompt.
+
+ A system prompt is a way of providing context and instructions to Claude, such
+ as specifying a particular goal or role. See our
+ [guide to system prompts](https://docs.anthropic.com/en/docs/system-prompts).
+ """
+
+ temperature: float
+ """Amount of randomness injected into the response.
+
+ Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
+ for analytical / multiple choice, and closer to `1.0` for creative and
+ generative tasks.
+
+ Note that even with `temperature` of `0.0`, the results will not be fully
+ deterministic.
+ """
+
+ tool_choice: ToolChoice
+ """How the model should use the provided tools.
+
+ The model can use a specific tool, any available tool, or decide by itself.
+ """
+
+ tools: Iterable[PromptCachingBetaToolParam]
+ """Definitions of tools that the model may use.
+
+ If you include `tools` in your API request, the model may return `tool_use`
+ content blocks that represent the model's use of those tools. You can then run
+ those tools using the tool input generated by the model and then optionally
+ return results back to the model using `tool_result` content blocks.
+
+ Each tool definition includes:
+
+ - `name`: Name of the tool.
+ - `description`: Optional, but strongly-recommended description of the tool.
+ - `input_schema`: [JSON schema](https://json-schema.org/) for the tool `input`
+ shape that the model will produce in `tool_use` output content blocks.
+
+ For example, if you defined `tools` as:
+
+ ```json
+ [
+ {
+ "name": "get_stock_price",
+ "description": "Get the current stock price for a given ticker symbol.",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "ticker": {
+ "type": "string",
+ "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
+ }
+ },
+ "required": ["ticker"]
+ }
+ }
+ ]
+ ```
+
+ And then asked the model "What's the S&P 500 at today?", the model might produce
+ `tool_use` content blocks in the response like this:
+
+ ```json
+ [
+ {
+ "type": "tool_use",
+ "id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "name": "get_stock_price",
+ "input": { "ticker": "^GSPC" }
+ }
+ ]
+ ```
+
+ You might then run your `get_stock_price` tool with `{"ticker": "^GSPC"}` as an
+ input, and return the following back to the model in a subsequent `user`
+ message:
+
+ ```json
+ [
+ {
+ "type": "tool_result",
+ "tool_use_id": "toolu_01D7FLrfh4GYq7yT1ULFeyMV",
+ "content": "259.75 USD"
+ }
+ ]
+ ```
+
+ Tools can be used for workflows that include running client-side tools and
+ functions, or more generally whenever you want the model to produce a particular
+ JSON structure of output.
+
+ See our [guide](https://docs.anthropic.com/en/docs/tool-use) for more details.
+ """
+
+ top_k: int
+ """Only sample from the top K options for each subsequent token.
+
+ Used to remove "long tail" low probability responses.
+ [Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+ """
+
+ top_p: float
+ """Use nucleus sampling.
+
+ In nucleus sampling, we compute the cumulative distribution over all the options
+ for each subsequent token in decreasing probability order and cut it off once it
+ reaches a particular probability specified by `top_p`. You should either alter
+ `temperature` or `top_p`, but not both.
+
+ Recommended for advanced use cases only. You usually only need to use
+ `temperature`.
+ """
+
+
+class Metadata(TypedDict, total=False):
+ user_id: Optional[str]
+ """An external identifier for the user who is associated with the request.
+
+ This should be a uuid, hash value, or other opaque identifier. Anthropic may use
+ this id to help detect abuse. Do not include any identifying information such as
+ name, email address, or phone number.
+ """
+
+
+class ToolChoiceToolChoiceAuto(TypedDict, total=False):
+ type: Required[Literal["auto"]]
+
+
+class ToolChoiceToolChoiceAny(TypedDict, total=False):
+ type: Required[Literal["any"]]
+
+
+class ToolChoiceToolChoiceTool(TypedDict, total=False):
+ name: Required[str]
+ """The name of the tool to use."""
+
+ type: Required[Literal["tool"]]
+
+
+ToolChoice: TypeAlias = Union[ToolChoiceToolChoiceAuto, ToolChoiceToolChoiceAny, ToolChoiceToolChoiceTool]
+
+
+class MessageCreateParamsNonStreaming(MessageCreateParamsBase):
+ stream: Literal[False]
+ """Whether to incrementally stream the response using server-sent events.
+
+ See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+ details.
+ """
+
+
+class MessageCreateParamsStreaming(MessageCreateParamsBase):
+ stream: Required[Literal[True]]
+ """Whether to incrementally stream the response using server-sent events.
+
+ See [streaming](https://docs.anthropic.com/en/api/messages-streaming) for
+ details.
+ """
+
+
+MessageCreateParams = Union[MessageCreateParamsNonStreaming, MessageCreateParamsStreaming]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py
new file mode 100644
index 00000000..8370b938
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_cache_control_ephemeral_param.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["PromptCachingBetaCacheControlEphemeralParam"]
+
+
+class PromptCachingBetaCacheControlEphemeralParam(TypedDict, total=False):
+ type: Required[Literal["ephemeral"]]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
new file mode 100644
index 00000000..02dfb0bc
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_image_block_param.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Optional
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from ...._types import Base64FileInput
+from ...._utils import PropertyInfo
+from ...._models import set_pydantic_config
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaImageBlockParam", "Source"]
+
+
+class Source(TypedDict, total=False):
+ data: Required[Annotated[Union[str, Base64FileInput], PropertyInfo(format="base64")]]
+
+ media_type: Required[Literal["image/jpeg", "image/png", "image/gif", "image/webp"]]
+
+ type: Required[Literal["base64"]]
+
+
+set_pydantic_config(Source, {"arbitrary_types_allowed": True})
+
+
+class PromptCachingBetaImageBlockParam(TypedDict, total=False):
+ source: Required[Source]
+
+ type: Required[Literal["image"]]
+
+ cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
new file mode 100644
index 00000000..2cc49a2c
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message.py
@@ -0,0 +1,109 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...model import Model
+from ...._models import BaseModel
+from ...content_block import ContentBlock
+from .prompt_caching_beta_usage import PromptCachingBetaUsage
+
+__all__ = ["PromptCachingBetaMessage"]
+
+
+class PromptCachingBetaMessage(BaseModel):
+ id: str
+ """Unique object identifier.
+
+ The format and length of IDs may change over time.
+ """
+
+ content: List[ContentBlock]
+ """Content generated by the model.
+
+ This is an array of content blocks, each of which has a `type` that determines
+ its shape.
+
+ Example:
+
+ ```json
+ [{ "type": "text", "text": "Hi, I'm Claude." }]
+ ```
+
+ If the request input `messages` ended with an `assistant` turn, then the
+ response `content` will continue directly from that last turn. You can use this
+ to constrain the model's output.
+
+ For example, if the input `messages` were:
+
+ ```json
+ [
+ {
+ "role": "user",
+ "content": "What's the Greek name for Sun? (A) Sol (B) Helios (C) Sun"
+ },
+ { "role": "assistant", "content": "The best answer is (" }
+ ]
+ ```
+
+ Then the response `content` might be:
+
+ ```json
+ [{ "type": "text", "text": "B)" }]
+ ```
+ """
+
+ model: Model
+ """
+ The model that will complete your prompt.\n\nSee
+ [models](https://docs.anthropic.com/en/docs/models-overview) for additional
+ details and options.
+ """
+
+ role: Literal["assistant"]
+ """Conversational role of the generated message.
+
+ This will always be `"assistant"`.
+ """
+
+ stop_reason: Optional[Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"]] = None
+ """The reason that we stopped.
+
+ This may be one the following values:
+
+ - `"end_turn"`: the model reached a natural stopping point
+ - `"max_tokens"`: we exceeded the requested `max_tokens` or the model's maximum
+ - `"stop_sequence"`: one of your provided custom `stop_sequences` was generated
+ - `"tool_use"`: the model invoked one or more tools
+
+ In non-streaming mode this value is always non-null. In streaming mode, it is
+ null in the `message_start` event and non-null otherwise.
+ """
+
+ stop_sequence: Optional[str] = None
+ """Which custom stop sequence was generated, if any.
+
+ This value will be a non-null string if one of your custom stop sequences was
+ generated.
+ """
+
+ type: Literal["message"]
+ """Object type.
+
+ For Messages, this is always `"message"`.
+ """
+
+ usage: PromptCachingBetaUsage
+ """Billing and rate-limit usage.
+
+ Anthropic's API bills and rate-limits by token counts, as tokens represent the
+ underlying cost to our systems.
+
+ Under the hood, the API transforms requests into a format suitable for the
+ model. The model's output then goes through a parsing stage before becoming an
+ API response. As a result, the token counts in `usage` will not match one-to-one
+ with the exact visible content of an API request or response.
+
+ For example, `output_tokens` will be non-zero, even for an empty string response
+ from Claude.
+ """
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
new file mode 100644
index 00000000..f88093e2
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_message_param.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from ...content_block import ContentBlock
+from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
+from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam
+from .prompt_caching_beta_tool_use_block_param import PromptCachingBetaToolUseBlockParam
+from .prompt_caching_beta_tool_result_block_param import PromptCachingBetaToolResultBlockParam
+
+__all__ = ["PromptCachingBetaMessageParam"]
+
+
+class PromptCachingBetaMessageParam(TypedDict, total=False):
+ content: Required[
+ Union[
+ str,
+ Iterable[
+ Union[
+ PromptCachingBetaTextBlockParam,
+ PromptCachingBetaImageBlockParam,
+ PromptCachingBetaToolUseBlockParam,
+ PromptCachingBetaToolResultBlockParam,
+ ContentBlock,
+ ]
+ ],
+ ]
+ ]
+
+ role: Required[Literal["user", "assistant"]]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
new file mode 100644
index 00000000..cbb463d2
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_text_block_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaTextBlockParam"]
+
+
+class PromptCachingBetaTextBlockParam(TypedDict, total=False):
+ text: Required[str]
+
+ type: Required[Literal["text"]]
+
+ cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
new file mode 100644
index 00000000..f9feb37f
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_param.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaToolParam", "InputSchema"]
+
+
+class InputSchemaTyped(TypedDict, total=False):
+ type: Required[Literal["object"]]
+
+ properties: Optional[object]
+
+
+InputSchema: TypeAlias = Union[InputSchemaTyped, Dict[str, object]]
+
+
+class PromptCachingBetaToolParam(TypedDict, total=False):
+ input_schema: Required[InputSchema]
+ """[JSON schema](https://json-schema.org/) for this tool's input.
+
+ This defines the shape of the `input` that your tool accepts and that the model
+ will produce.
+ """
+
+ name: Required[str]
+
+ cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
+
+ description: str
+ """Description of what this tool does.
+
+ Tool descriptions should be as detailed as possible. The more information that
+ the model has about what the tool is and how to use it, the better it will
+ perform. You can use natural language descriptions to reinforce important
+ aspects of the tool input JSON schema.
+ """
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
new file mode 100644
index 00000000..6c1ca718
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_result_block_param.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .prompt_caching_beta_text_block_param import PromptCachingBetaTextBlockParam
+from .prompt_caching_beta_image_block_param import PromptCachingBetaImageBlockParam
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaToolResultBlockParam", "Content"]
+
+Content: TypeAlias = Union[PromptCachingBetaTextBlockParam, PromptCachingBetaImageBlockParam]
+
+
+class PromptCachingBetaToolResultBlockParam(TypedDict, total=False):
+ tool_use_id: Required[str]
+
+ type: Required[Literal["tool_result"]]
+
+ cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
+
+ content: Union[str, Iterable[Content]]
+
+ is_error: bool
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
new file mode 100644
index 00000000..35ccf446
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_tool_use_block_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .prompt_caching_beta_cache_control_ephemeral_param import PromptCachingBetaCacheControlEphemeralParam
+
+__all__ = ["PromptCachingBetaToolUseBlockParam"]
+
+
+class PromptCachingBetaToolUseBlockParam(TypedDict, total=False):
+ id: Required[str]
+
+ input: Required[object]
+
+ name: Required[str]
+
+ type: Required[Literal["tool_use"]]
+
+ cache_control: Optional[PromptCachingBetaCacheControlEphemeralParam]
diff --git a/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
new file mode 100644
index 00000000..20d23004
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/prompt_caching_beta_usage.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["PromptCachingBetaUsage"]
+
+
+class PromptCachingBetaUsage(BaseModel):
+ cache_creation_input_tokens: Optional[int] = None
+ """The number of input tokens used to create the cache entry."""
+
+ cache_read_input_tokens: Optional[int] = None
+ """The number of input tokens read from the cache."""
+
+ input_tokens: int
+ """The number of input tokens which were used."""
+
+ output_tokens: int
+ """The number of output tokens which were used."""
diff --git a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
new file mode 100644
index 00000000..9d055e22
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_start_event.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .prompt_caching_beta_message import PromptCachingBetaMessage
+
+__all__ = ["RawPromptCachingBetaMessageStartEvent"]
+
+
+class RawPromptCachingBetaMessageStartEvent(BaseModel):
+ message: PromptCachingBetaMessage
+
+ type: Literal["message_start"]
diff --git a/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
new file mode 100644
index 00000000..58099baf
--- /dev/null
+++ b/src/anthropic/types/beta/prompt_caching/raw_prompt_caching_beta_message_stream_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...raw_message_stop_event import RawMessageStopEvent
+from ...raw_message_delta_event import RawMessageDeltaEvent
+from ...raw_content_block_stop_event import RawContentBlockStopEvent
+from ...raw_content_block_delta_event import RawContentBlockDeltaEvent
+from ...raw_content_block_start_event import RawContentBlockStartEvent
+from .raw_prompt_caching_beta_message_start_event import RawPromptCachingBetaMessageStartEvent
+
+__all__ = ["RawPromptCachingBetaMessageStreamEvent"]
+
+RawPromptCachingBetaMessageStreamEvent: TypeAlias = Annotated[
+ Union[
+ RawPromptCachingBetaMessageStartEvent,
+ RawMessageDeltaEvent,
+ RawMessageStopEvent,
+ RawContentBlockStartEvent,
+ RawContentBlockDeltaEvent,
+ RawContentBlockStopEvent,
+ ],
+ PropertyInfo(discriminator="type"),
+]
diff --git a/src/anthropic/types/message_create_params.py b/src/anthropic/types/message_create_params.py
index 4b593038..76efa7f1 100644
--- a/src/anthropic/types/message_create_params.py
+++ b/src/anthropic/types/message_create_params.py
@@ -112,8 +112,8 @@ class MessageCreateParamsBase(TypedDict, total=False):
We currently support the `base64` source type for images, and the `image/jpeg`,
`image/png`, `image/gif`, and `image/webp` media types.
- See [examples](https://docs.anthropic.com/en/api/messages-examples) for more
- input examples.
+ See [examples](https://docs.anthropic.com/en/api/messages-examples#vision) for
+ more input examples.
Note that if you want to include a
[system prompt](https://docs.anthropic.com/en/docs/system-prompts), you can use
diff --git a/tests/api_resources/beta/__init__.py b/tests/api_resources/beta/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/beta/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/prompt_caching/__init__.py b/tests/api_resources/beta/prompt_caching/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/beta/prompt_caching/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/prompt_caching/test_messages.py b/tests/api_resources/beta/prompt_caching/test_messages.py
new file mode 100644
index 00000000..44e298df
--- /dev/null
+++ b/tests/api_resources/beta/prompt_caching/test_messages.py
@@ -0,0 +1,570 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from anthropic import Anthropic, AsyncAnthropic
+from tests.utils import assert_matches_type
+from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestMessages:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create_overload_1(self, client: Anthropic) -> None:
+ message = client.beta.prompt_caching.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ )
+ assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+ @parametrize
+ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> None:
+ message = client.beta.prompt_caching.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
+ stop_sequences=["string", "string", "string"],
+ stream=False,
+ system=[
+ {
+ "text": "Today's date is 2024-06-01.",
+ "type": "text",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ temperature=1,
+ tool_choice={"type": "auto"},
+ tools=[
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ ],
+ top_k=5,
+ top_p=0.7,
+ )
+ assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+ @parametrize
+ def test_raw_response_create_overload_1(self, client: Anthropic) -> None:
+ response = client.beta.prompt_caching.messages.with_raw_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ message = response.parse()
+ assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create_overload_1(self, client: Anthropic) -> None:
+ with client.beta.prompt_caching.messages.with_streaming_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ message = response.parse()
+ assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_create_overload_2(self, client: Anthropic) -> None:
+ message_stream = client.beta.prompt_caching.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ stream=True,
+ )
+ message_stream.response.close()
+
+ @parametrize
+ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> None:
+ message_stream = client.beta.prompt_caching.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ stream=True,
+ metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
+ stop_sequences=["string", "string", "string"],
+ system=[
+ {
+ "text": "Today's date is 2024-06-01.",
+ "type": "text",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ temperature=1,
+ tool_choice={"type": "auto"},
+ tools=[
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ ],
+ top_k=5,
+ top_p=0.7,
+ )
+ message_stream.response.close()
+
+ @parametrize
+ def test_raw_response_create_overload_2(self, client: Anthropic) -> None:
+ response = client.beta.prompt_caching.messages.with_raw_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ stream=True,
+ )
+
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ stream = response.parse()
+ stream.close()
+
+ @parametrize
+ def test_streaming_response_create_overload_2(self, client: Anthropic) -> None:
+ with client.beta.prompt_caching.messages.with_streaming_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ stream=True,
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ stream = response.parse()
+ stream.close()
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncMessages:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create_overload_1(self, async_client: AsyncAnthropic) -> None:
+ message = await async_client.beta.prompt_caching.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ )
+ assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+ @parametrize
+ async def test_method_create_with_all_params_overload_1(self, async_client: AsyncAnthropic) -> None:
+ message = await async_client.beta.prompt_caching.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
+ stop_sequences=["string", "string", "string"],
+ stream=False,
+ system=[
+ {
+ "text": "Today's date is 2024-06-01.",
+ "type": "text",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ temperature=1,
+ tool_choice={"type": "auto"},
+ tools=[
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ ],
+ top_k=5,
+ top_p=0.7,
+ )
+ assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create_overload_1(self, async_client: AsyncAnthropic) -> None:
+ response = await async_client.beta.prompt_caching.messages.with_raw_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ message = response.parse()
+ assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create_overload_1(self, async_client: AsyncAnthropic) -> None:
+ async with async_client.beta.prompt_caching.messages.with_streaming_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ message = await response.parse()
+ assert_matches_type(PromptCachingBetaMessage, message, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_create_overload_2(self, async_client: AsyncAnthropic) -> None:
+ message_stream = await async_client.beta.prompt_caching.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ stream=True,
+ )
+ await message_stream.response.aclose()
+
+ @parametrize
+ async def test_method_create_with_all_params_overload_2(self, async_client: AsyncAnthropic) -> None:
+ message_stream = await async_client.beta.prompt_caching.messages.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ stream=True,
+ metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
+ stop_sequences=["string", "string", "string"],
+ system=[
+ {
+ "text": "Today's date is 2024-06-01.",
+ "type": "text",
+ "cache_control": {"type": "ephemeral"},
+ }
+ ],
+ temperature=1,
+ tool_choice={"type": "auto"},
+ tools=[
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ {
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "description": "The city and state, e.g. San Francisco, CA",
+ "type": "string",
+ },
+ "unit": {
+ "description": "Unit for the output - one of (celsius, fahrenheit)",
+ "type": "string",
+ },
+ },
+ },
+ "name": "x",
+ "cache_control": {"type": "ephemeral"},
+ "description": "Get the current weather in a given location",
+ },
+ ],
+ top_k=5,
+ top_p=0.7,
+ )
+ await message_stream.response.aclose()
+
+ @parametrize
+ async def test_raw_response_create_overload_2(self, async_client: AsyncAnthropic) -> None:
+ response = await async_client.beta.prompt_caching.messages.with_raw_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ stream=True,
+ )
+
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ stream = response.parse()
+ await stream.close()
+
+ @parametrize
+ async def test_streaming_response_create_overload_2(self, async_client: AsyncAnthropic) -> None:
+ async with async_client.beta.prompt_caching.messages.with_streaming_response.create(
+ max_tokens=1024,
+ messages=[
+ {
+ "content": "Hello, world",
+ "role": "user",
+ }
+ ],
+ model="claude-3-5-sonnet-20240620",
+ stream=True,
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ stream = await response.parse()
+ await stream.close()
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_messages.py b/tests/api_resources/test_messages.py
index 94dc8cc6..e3685408 100644
--- a/tests/api_resources/test_messages.py
+++ b/tests/api_resources/test_messages.py
@@ -23,8 +23,8 @@ def test_method_create_overload_1(self, client: Anthropic) -> None:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -37,8 +37,8 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -47,16 +47,14 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
stream=False,
system=[
{
- "type": "text",
"text": "Today's date is 2024-06-01.",
+ "type": "text",
}
],
temperature=1,
tool_choice={"type": "auto"},
tools=[
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -70,10 +68,10 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -87,10 +85,10 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -104,6 +102,8 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
],
top_k=5,
@@ -117,8 +117,8 @@ def test_raw_response_create_overload_1(self, client: Anthropic) -> None:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -135,8 +135,8 @@ def test_streaming_response_create_overload_1(self, client: Anthropic) -> None:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -155,8 +155,8 @@ def test_method_create_overload_2(self, client: Anthropic) -> None:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -170,8 +170,8 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -180,16 +180,14 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
stop_sequences=["string", "string", "string"],
system=[
{
- "type": "text",
"text": "Today's date is 2024-06-01.",
+ "type": "text",
}
],
temperature=1,
tool_choice={"type": "auto"},
tools=[
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -203,10 +201,10 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -220,10 +218,10 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -237,6 +235,8 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
],
top_k=5,
@@ -250,8 +250,8 @@ def test_raw_response_create_overload_2(self, client: Anthropic) -> None:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -268,8 +268,8 @@ def test_streaming_response_create_overload_2(self, client: Anthropic) -> None:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -293,8 +293,8 @@ async def test_method_create_overload_1(self, async_client: AsyncAnthropic) -> N
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -307,8 +307,8 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -317,16 +317,14 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
stream=False,
system=[
{
- "type": "text",
"text": "Today's date is 2024-06-01.",
+ "type": "text",
}
],
temperature=1,
tool_choice={"type": "auto"},
tools=[
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -340,10 +338,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -357,10 +355,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -374,6 +372,8 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
],
top_k=5,
@@ -387,8 +387,8 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncAnthropic
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -405,8 +405,8 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncAnt
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -425,8 +425,8 @@ async def test_method_create_overload_2(self, async_client: AsyncAnthropic) -> N
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -440,8 +440,8 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -450,16 +450,14 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
stop_sequences=["string", "string", "string"],
system=[
{
- "type": "text",
"text": "Today's date is 2024-06-01.",
+ "type": "text",
}
],
temperature=1,
tool_choice={"type": "auto"},
tools=[
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -473,10 +471,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -490,10 +488,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
{
- "description": "Get the current weather in a given location",
- "name": "x",
"input_schema": {
"type": "object",
"properties": {
@@ -507,6 +505,8 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
},
},
},
+ "name": "x",
+ "description": "Get the current weather in a given location",
},
],
top_k=5,
@@ -520,8 +520,8 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncAnthropic
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -538,8 +538,8 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncAnt
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
diff --git a/tests/test_client.py b/tests/test_client.py
index 23364599..e20da702 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -890,8 +890,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -922,8 +922,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -1790,8 +1790,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",
@@ -1823,8 +1823,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
max_tokens=1024,
messages=[
{
- "role": "user",
"content": "Hello, world",
+ "role": "user",
}
],
model="claude-3-5-sonnet-20240620",