-
Notifications
You must be signed in to change notification settings - Fork 242
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft: Support execution limits in run_
functions
#374
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ | |
result, | ||
) | ||
from .result import ResultData | ||
from .settings import ModelSettings, merge_model_settings | ||
from .settings import ExecutionLimitSettings, ModelSettings, merge_model_settings | ||
from .tools import ( | ||
AgentDeps, | ||
RunContext, | ||
|
@@ -191,6 +191,7 @@ async def run( | |
model: models.Model | models.KnownModelName | None = None, | ||
deps: AgentDeps = None, | ||
model_settings: ModelSettings | None = None, | ||
execution_limit_settings: ExecutionLimitSettings | None = None, | ||
infer_name: bool = True, | ||
) -> result.RunResult[ResultData]: | ||
"""Run the agent with a user prompt in async mode. | ||
|
@@ -211,8 +212,9 @@ async def run( | |
message_history: History of the conversation so far. | ||
model: Optional model to use for this run, required if `model` was not set when creating the agent. | ||
deps: Optional dependencies to use for this run. | ||
infer_name: Whether to try to infer the agent name from the call frame if it's not set. | ||
model_settings: Optional settings to use for this model's request. | ||
execution_limit_settings: Optional settings to use in order to limit model request or cost (token usage). | ||
infer_name: Whether to try to infer the agent name from the call frame if it's not set. | ||
|
||
Returns: | ||
The result of the run. | ||
|
@@ -238,8 +240,8 @@ async def run( | |
tool.current_retry = 0 | ||
|
||
cost = result.Cost() | ||
|
||
model_settings = merge_model_settings(self.model_settings, model_settings) | ||
execution_limit_settings = execution_limit_settings or ExecutionLimitSettings(request_limit=50) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this where we want to set the default? |
||
|
||
run_step = 0 | ||
while True: | ||
|
@@ -254,6 +256,8 @@ async def run( | |
|
||
messages.append(model_response) | ||
cost += request_cost | ||
# TODO: is this the right location? Should we move this earlier in the logic? | ||
execution_limit_settings.increment(request_cost) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally would prefer if we added a I'd also prefer we rename |
||
|
||
with _logfire.span('handle model response', run_step=run_step) as handle_span: | ||
final_result, tool_responses = await self._handle_model_response(model_response, deps, messages) | ||
|
@@ -284,6 +288,7 @@ def run_sync( | |
model: models.Model | models.KnownModelName | None = None, | ||
deps: AgentDeps = None, | ||
model_settings: ModelSettings | None = None, | ||
execution_limit_settings: ExecutionLimitSettings | None = None, | ||
infer_name: bool = True, | ||
) -> result.RunResult[ResultData]: | ||
"""Run the agent with a user prompt synchronously. | ||
|
@@ -308,8 +313,9 @@ async def main(): | |
message_history: History of the conversation so far. | ||
model: Optional model to use for this run, required if `model` was not set when creating the agent. | ||
deps: Optional dependencies to use for this run. | ||
infer_name: Whether to try to infer the agent name from the call frame if it's not set. | ||
model_settings: Optional settings to use for this model's request. | ||
execution_limit_settings: Optional settings to use in order to limit model request or cost (token usage). | ||
infer_name: Whether to try to infer the agent name from the call frame if it's not set. | ||
|
||
Returns: | ||
The result of the run. | ||
|
@@ -322,8 +328,9 @@ async def main(): | |
message_history=message_history, | ||
model=model, | ||
deps=deps, | ||
infer_name=False, | ||
execution_limit_settings=execution_limit_settings, | ||
model_settings=model_settings, | ||
infer_name=False, | ||
) | ||
) | ||
|
||
|
@@ -336,6 +343,7 @@ async def run_stream( | |
model: models.Model | models.KnownModelName | None = None, | ||
deps: AgentDeps = None, | ||
model_settings: ModelSettings | None = None, | ||
execution_limit_settings: ExecutionLimitSettings | None = None, | ||
infer_name: bool = True, | ||
) -> AsyncIterator[result.StreamedRunResult[AgentDeps, ResultData]]: | ||
"""Run the agent with a user prompt in async mode, returning a streamed response. | ||
|
@@ -357,8 +365,9 @@ async def main(): | |
message_history: History of the conversation so far. | ||
model: Optional model to use for this run, required if `model` was not set when creating the agent. | ||
deps: Optional dependencies to use for this run. | ||
infer_name: Whether to try to infer the agent name from the call frame if it's not set. | ||
model_settings: Optional settings to use for this model's request. | ||
execution_limit_settings: Optional settings to use in order to limit model request or cost (token usage). | ||
infer_name: Whether to try to infer the agent name from the call frame if it's not set. | ||
|
||
Returns: | ||
The result of the run. | ||
|
@@ -387,6 +396,7 @@ async def main(): | |
|
||
cost = result.Cost() | ||
model_settings = merge_model_settings(self.model_settings, model_settings) | ||
execution_limit_settings = execution_limit_settings or ExecutionLimitSettings(request_limit=50) | ||
|
||
run_step = 0 | ||
while True: | ||
|
@@ -456,7 +466,9 @@ async def on_complete(): | |
tool_responses_str = ' '.join(r.part_kind for r in tool_responses) | ||
handle_span.message = f'handle model response -> {tool_responses_str}' | ||
# the model_response should have been fully streamed by now, we can add it's cost | ||
cost += model_response.cost() | ||
model_response_cost = model_response.cost() | ||
execution_limit_settings.increment(model_response_cost) | ||
cost += model_response_cost | ||
|
||
@contextmanager | ||
def override( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,16 @@ | ||
from __future__ import annotations | ||
|
||
from dataclasses import dataclass | ||
from typing import TYPE_CHECKING | ||
|
||
from httpx import Timeout | ||
from typing_extensions import TypedDict | ||
|
||
from .exceptions import UnexpectedModelBehavior | ||
|
||
if TYPE_CHECKING: | ||
from .result import Cost | ||
|
||
|
||
class ModelSettings(TypedDict, total=False): | ||
"""Settings to configure an LLM. | ||
|
@@ -70,3 +78,35 @@ def merge_model_settings(base: ModelSettings | None, overrides: ModelSettings | | |
return base | overrides | ||
else: | ||
return base or overrides | ||
|
||
|
||
@dataclass | ||
class ExecutionLimitSettings: | ||
"""Settings to configure an agent run.""" | ||
|
||
request_limit: int | None = None | ||
request_tokens_limit: int | None = None | ||
response_tokens_limit: int | None = None | ||
total_tokens_limit: int | None = None | ||
|
||
_request_count: int = 0 | ||
_request_tokens_count: int = 0 | ||
_response_tokens_count: int = 0 | ||
_total_tokens_count: int = 0 | ||
Comment on lines
+92
to
+95
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should these be public? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say yes if we want to also include this structure in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like the idea that the settings object also holds state, it feels to me like there should be a separate object for tracking state, and we can check the state against the settings. If I were a user I'd be inclined to reuse an instance of I would imagine we make a private type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we want to put the usage state on the runcontext we can make it public, but I feel like we can do that later/separately. I'll note that I could imagine Samuel disagreeing with all this, and I wouldn't find that unreasonable. |
||
|
||
def increment(self, cost: Cost) -> None: | ||
self._request_count += 1 | ||
self._check_limit(self.request_limit, self._request_count, 'request count') | ||
|
||
self._request_tokens_count += cost.request_tokens or 0 | ||
self._check_limit(self.request_tokens_limit, self._request_tokens_count, 'request tokens count') | ||
|
||
self._response_tokens_count += cost.response_tokens or 0 | ||
self._check_limit(self.response_tokens_limit, self._response_tokens_count, 'response tokens count') | ||
|
||
self._total_tokens_count += cost.total_tokens or 0 | ||
self._check_limit(self.total_tokens_limit, self._total_tokens_count, 'total tokens count') | ||
|
||
def _check_limit(self, limit: int | None, count: int, limit_name: str) -> None: | ||
if limit and limit < count: | ||
raise UnexpectedModelBehavior(f'Exceeded {limit_name} limit of {limit} by {count - limit}') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like this deserves its own exception, and probably one that doesn't inherit from UnexpectedModelBehavior (as this is more or less expected behavior) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this would be my preference