From 03d4f4e7ccf7a9596538f289fc5d47808fa8d858 Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Thu, 17 Oct 2024 20:30:03 +0700 Subject: [PATCH 1/2] feat(backend): Track LLM token usage + LLM blocks cleanup --- .../backend/backend/blocks/__init__.py | 5 +- .../backend/backend/blocks/llm.py | 256 ++++++++++-------- .../backend/backend/data/block.py | 29 +- .../backend/backend/data/graph.py | 2 +- .../backend/backend/executor/manager.py | 2 + .../backend/backend/server/rest_api.py | 4 +- .../backend/test/block/test_block.py | 6 +- 7 files changed, 178 insertions(+), 126 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/__init__.py b/autogpt_platform/backend/backend/blocks/__init__.py index 1fd85aef4630..92591f921e17 100644 --- a/autogpt_platform/backend/backend/blocks/__init__.py +++ b/autogpt_platform/backend/backend/blocks/__init__.py @@ -2,6 +2,7 @@ import os import re from pathlib import Path +from typing import Type from backend.data.block import Block @@ -24,7 +25,7 @@ AVAILABLE_MODULES.append(module) # Load all Block instances from the available modules -AVAILABLE_BLOCKS = {} +AVAILABLE_BLOCKS: dict[str, Type[Block]] = {} def all_subclasses(clz): @@ -76,6 +77,6 @@ def all_subclasses(clz): if block.disabled: continue - AVAILABLE_BLOCKS[block.id] = block + AVAILABLE_BLOCKS[block.id] = cls __all__ = ["AVAILABLE_MODULES", "AVAILABLE_BLOCKS"] diff --git a/autogpt_platform/backend/backend/blocks/llm.py b/autogpt_platform/backend/backend/blocks/llm.py index f38b5f5da72d..a31fbb19719b 100644 --- a/autogpt_platform/backend/backend/blocks/llm.py +++ b/autogpt_platform/backend/backend/blocks/llm.py @@ -122,6 +122,17 @@ def cost_factor(self) -> int: raise ValueError(f"Missing MODEL_METADATA metadata for model: {model}") +class MessageRole(str, Enum): + SYSTEM = "system" + USER = "user" + ASSISTANT = "assistant" + + +class Message(BlockSchema): + role: MessageRole + content: str + + class AIStructuredResponseGeneratorBlock(Block): class Input(BlockSchema): prompt: str @@ -137,10 +148,19 @@ class Input(BlockSchema): ) api_key: BlockSecret = SecretField(value="") sys_prompt: str = "" + conversation_history: list[Message] = SchemaField( + default=[], + description="The conversation history to provide context for the prompt.", + ) retry: int = 3 prompt_values: dict[str, str] = SchemaField( advanced=False, default={}, description="Values used to fill in the prompt." ) + max_tokens: int | None = SchemaField( + advanced=True, + default=None, + description="The maximum number of tokens to generate in the chat completion.", + ) class Output(BlockSchema): response: dict[str, Any] @@ -164,26 +184,47 @@ def __init__(self): }, test_output=("response", {"key1": "key1Value", "key2": "key2Value"}), test_mock={ - "llm_call": lambda *args, **kwargs: json.dumps( - { - "key1": "key1Value", - "key2": "key2Value", - } + "llm_call": lambda *args, **kwargs: ( + json.dumps( + { + "key1": "key1Value", + "key2": "key2Value", + } + ), + 0, + 0, ) }, ) @staticmethod def llm_call( - api_key: str, model: LlmModel, prompt: list[dict], json_format: bool - ) -> str: - provider = model.metadata.provider + api_key: str, + llm_model: LlmModel, + prompt: list[dict], + json_format: bool, + max_tokens: int | None = None, + ) -> tuple[str, int, int]: + """ + Args: + api_key: API key for the LLM provider. + llm_model: The LLM model to use. + prompt: The prompt to send to the LLM. + json_format: Whether the response should be in JSON format. + max_tokens: The maximum number of tokens to generate in the chat completion. + + Returns: + The response from the LLM. + The number of tokens used in the prompt. + The number of tokens used in the completion. + """ + provider = llm_model.metadata.provider if provider == "openai": openai.api_key = api_key response_format = None - if model in [LlmModel.O1_MINI, LlmModel.O1_PREVIEW]: + if llm_model in [LlmModel.O1_MINI, LlmModel.O1_PREVIEW]: sys_messages = [p["content"] for p in prompt if p["role"] == "system"] usr_messages = [p["content"] for p in prompt if p["role"] != "system"] prompt = [ @@ -194,11 +235,17 @@ def llm_call( response_format = {"type": "json_object"} response = openai.chat.completions.create( - model=model.value, + model=llm_model.value, messages=prompt, # type: ignore response_format=response_format, # type: ignore + max_completion_tokens=max_tokens, + ) + + return ( + response.choices[0].message.content or "", + response.usage.prompt_tokens if response.usage else 0, + response.usage.completion_tokens if response.usage else 0, ) - return response.choices[0].message.content or "" elif provider == "anthropic": system_messages = [p["content"] for p in prompt if p["role"] == "system"] sysprompt = " ".join(system_messages) @@ -216,13 +263,18 @@ def llm_call( client = anthropic.Anthropic(api_key=api_key) try: - response = client.messages.create( - model=model.value, - max_tokens=4096, + resp = client.messages.create( + model=llm_model.value, system=sysprompt, messages=messages, + max_tokens=max_tokens or 8192, + ) + + return ( + resp.content[0].text if resp.content else "", + resp.usage.input_tokens, + resp.usage.output_tokens, ) - return response.content[0].text if response.content else "" except anthropic.APIError as e: error_message = f"Anthropic API error: {str(e)}" logger.error(error_message) @@ -231,23 +283,32 @@ def llm_call( client = Groq(api_key=api_key) response_format = {"type": "json_object"} if json_format else None response = client.chat.completions.create( - model=model.value, + model=llm_model.value, messages=prompt, # type: ignore response_format=response_format, # type: ignore + max_tokens=max_tokens, + ) + return ( + response.choices[0].message.content or "", + response.usage.prompt_tokens if response.usage else 0, + response.usage.completion_tokens if response.usage else 0, ) - return response.choices[0].message.content or "" elif provider == "ollama": + sys_messages = [p["content"] for p in prompt if p["role"] == "system"] + usr_messages = [p["content"] for p in prompt if p["role"] != "system"] response = ollama.generate( - model=model.value, - prompt=prompt[0]["content"], + model=llm_model.value, + prompt=f"{sys_messages}\n\n{usr_messages}", + stream=False, ) - return response["response"] + # TODO: calculate/fetch Ollama's input/output token. + return response["response"], 0, 0 else: raise ValueError(f"Unsupported LLM provider: {provider}") def run(self, input_data: Input, **kwargs) -> BlockOutput: logger.debug(f"Calling LLM with input data: {input_data}") - prompt = [] + prompt = [p.model_dump() for p in input_data.conversation_history] def trim_prompt(s: str) -> str: lines = s.strip().split("\n") @@ -276,7 +337,8 @@ def trim_prompt(s: str) -> str: ) prompt.append({"role": "system", "content": sys_prompt}) - prompt.append({"role": "user", "content": input_data.prompt}) + if input_data.prompt: + prompt.append({"role": "user", "content": input_data.prompt}) def parse_response(resp: str) -> tuple[dict[str, Any], str | None]: try: @@ -292,19 +354,26 @@ def parse_response(resp: str) -> tuple[dict[str, Any], str | None]: logger.info(f"LLM request: {prompt}") retry_prompt = "" - model = input_data.model + llm_model = input_data.model api_key = ( input_data.api_key.get_secret_value() - or LlmApiKeys[model.metadata.provider].get_secret_value() + or LlmApiKeys[llm_model.metadata.provider].get_secret_value() ) for retry_count in range(input_data.retry): try: - response_text = self.llm_call( + response_text, input_token, output_token = self.llm_call( api_key=api_key, - model=model, + llm_model=llm_model, prompt=prompt, json_format=bool(input_data.expected_format), + max_tokens=input_data.max_tokens, + ) + self.merge_stats( + { + "input_token_count": input_token, + "output_token_count": output_token, + } ) logger.info(f"LLM attempt-{retry_count} response: {response_text}") @@ -341,8 +410,15 @@ def parse_response(resp: str) -> tuple[dict[str, Any], str | None]: ) prompt.append({"role": "user", "content": retry_prompt}) except Exception as e: - logger.error(f"Error calling LLM: {e}") + logger.exception(f"Error calling LLM: {e}") retry_prompt = f"Error calling LLM: {e}" + finally: + self.merge_stats( + { + "llm_call_count": retry_count + 1, + "llm_retry_count": retry_count, + } + ) raise RuntimeError(retry_prompt) @@ -362,6 +438,11 @@ class Input(BlockSchema): prompt_values: dict[str, str] = SchemaField( advanced=False, default={}, description="Values used to fill in the prompt." ) + max_tokens: int | None = SchemaField( + advanced=True, + default=None, + description="The maximum number of tokens to generate in the chat completion.", + ) class Output(BlockSchema): response: str @@ -379,15 +460,11 @@ def __init__(self): test_mock={"llm_call": lambda *args, **kwargs: "Response text"}, ) - @staticmethod - def llm_call(input_data: AIStructuredResponseGeneratorBlock.Input) -> str: - object_block = AIStructuredResponseGeneratorBlock() - for output_name, output_data in object_block.run(input_data): - if output_name == "response": - return output_data["response"] - else: - raise RuntimeError(output_data) - raise ValueError("Failed to get a response from the LLM.") + def llm_call(self, input_data: AIStructuredResponseGeneratorBlock.Input) -> str: + block = AIStructuredResponseGeneratorBlock() + response = block.run_once(input_data, "response") + self.merge_stats(block.execution_stats) + return response["response"] def run(self, input_data: Input, **kwargs) -> BlockOutput: object_input_data = AIStructuredResponseGeneratorBlock.Input( @@ -416,7 +493,7 @@ class Input(BlockSchema): style: SummaryStyle = SummaryStyle.CONCISE api_key: BlockSecret = SecretField(value="") # TODO: Make this dynamic - max_tokens: int = 4000 # Adjust based on the model's context window + max_tokens: int = 4096 # Adjust based on the model's context window chunk_overlap: int = 100 # Overlap between chunks to maintain context class Output(BlockSchema): @@ -470,15 +547,11 @@ def _split_text(text: str, max_tokens: int, overlap: int) -> list[str]: return chunks - @staticmethod - def llm_call( - input_data: AIStructuredResponseGeneratorBlock.Input, - ) -> dict[str, str]: - llm_block = AIStructuredResponseGeneratorBlock() - for output_name, output_data in llm_block.run(input_data): - if output_name == "response": - return output_data - raise ValueError("Failed to get a response from the LLM.") + def llm_call(self, input_data: AIStructuredResponseGeneratorBlock.Input) -> dict: + block = AIStructuredResponseGeneratorBlock() + response = block.run_once(input_data, "response") + self.merge_stats(block.execution_stats) + return response def _summarize_chunk(self, chunk: str, input_data: Input) -> str: prompt = f"Summarize the following text in a {input_data.style} form. Focus your summary on the topic of `{input_data.focus}` if present, otherwise just provide a general summary:\n\n```{chunk}```" @@ -527,17 +600,6 @@ def _combine_summaries(self, summaries: list[str], input_data: Input) -> str: ] # Get the first yielded value -class MessageRole(str, Enum): - SYSTEM = "system" - USER = "user" - ASSISTANT = "assistant" - - -class Message(BlockSchema): - role: MessageRole - content: str - - class AIConversationBlock(Block): class Input(BlockSchema): messages: List[Message] = SchemaField( @@ -552,9 +614,9 @@ class Input(BlockSchema): value="", description="API key for the chosen language model provider." ) max_tokens: int | None = SchemaField( + advanced=True, default=None, description="The maximum number of tokens to generate in the chat completion.", - ge=1, ) class Output(BlockSchema): @@ -592,62 +654,22 @@ def __init__(self): }, ) - @staticmethod - def llm_call( - api_key: str, - model: LlmModel, - messages: List[dict[str, str]], - max_tokens: int | None = None, - ) -> str: - provider = model.metadata.provider - - if provider == "openai": - openai.api_key = api_key - response = openai.chat.completions.create( - model=model.value, - messages=messages, # type: ignore - max_tokens=max_tokens, - ) - return response.choices[0].message.content or "" - elif provider == "anthropic": - client = anthropic.Anthropic(api_key=api_key) - response = client.messages.create( - model=model.value, - max_tokens=max_tokens or 4096, - messages=messages, # type: ignore - ) - return response.content[0].text if response.content else "" - elif provider == "groq": - client = Groq(api_key=api_key) - response = client.chat.completions.create( - model=model.value, - messages=messages, # type: ignore - max_tokens=max_tokens, - ) - return response.choices[0].message.content or "" - elif provider == "ollama": - response = ollama.chat( - model=model.value, - messages=messages, # type: ignore - stream=False, # type: ignore - ) - return response["message"]["content"] - else: - raise ValueError(f"Unsupported LLM provider: {provider}") + def llm_call(self, input_data: AIStructuredResponseGeneratorBlock.Input) -> str: + block = AIStructuredResponseGeneratorBlock() + response = block.run_once(input_data, "response") + self.merge_stats(block.execution_stats) + return response["response"] def run(self, input_data: Input, **kwargs) -> BlockOutput: - api_key = ( - input_data.api_key.get_secret_value() - or LlmApiKeys[input_data.model.metadata.provider].get_secret_value() - ) - - messages = [message.model_dump() for message in input_data.messages] - response = self.llm_call( - api_key=api_key, - model=input_data.model, - messages=messages, - max_tokens=input_data.max_tokens, + AIStructuredResponseGeneratorBlock.Input( + prompt="", + api_key=input_data.api_key, + model=input_data.model, + conversation_history=input_data.messages, + max_tokens=input_data.max_tokens, + expected_format={}, + ) ) yield "response", response @@ -680,6 +702,11 @@ class Input(BlockSchema): ge=1, le=5, ) + max_tokens: int | None = SchemaField( + advanced=True, + default=None, + description="The maximum number of tokens to generate in the chat completion.", + ) class Output(BlockSchema): generated_list: List[str] = SchemaField(description="The generated list.") @@ -734,11 +761,8 @@ def llm_call( input_data: AIStructuredResponseGeneratorBlock.Input, ) -> dict[str, str]: llm_block = AIStructuredResponseGeneratorBlock() - for output_name, output_data in llm_block.run(input_data): - if output_name == "response": - logger.debug(f"Received response from LLM: {output_data}") - return output_data - raise ValueError("Failed to get a response from the LLM.") + response = llm_block.run_once(input_data, "response") + return response @staticmethod def string_to_list(string): diff --git a/autogpt_platform/backend/backend/data/block.py b/autogpt_platform/backend/backend/data/block.py index 594fd10e7681..d1f9eb36f740 100644 --- a/autogpt_platform/backend/backend/data/block.py +++ b/autogpt_platform/backend/backend/data/block.py @@ -230,6 +230,7 @@ def __init__( self.disabled = disabled self.static_output = static_output self.block_type = block_type + self.execution_stats = {} @abstractmethod def run(self, input_data: BlockSchemaInputType, **kwargs) -> BlockOutput: @@ -244,6 +245,26 @@ def run(self, input_data: BlockSchemaInputType, **kwargs) -> BlockOutput: """ pass + def run_once(self, input_data: BlockSchemaInputType, output: str, **kwargs) -> Any: + for name, data in self.run(input_data, **kwargs): + if name == output: + return data + raise ValueError(f"{self.name} did not produce any output for {output}") + + def merge_stats(self, stats: dict[str, Any]) -> dict[str, Any]: + for key, value in stats.items(): + if isinstance(value, dict): + self.execution_stats.setdefault(key, {}).update(value) + elif isinstance(value, (int, float)): + self.execution_stats.setdefault(key, 0) + self.execution_stats[key] += value + elif isinstance(value, list): + self.execution_stats.setdefault(key, []) + self.execution_stats[key].extend(value) + else: + self.execution_stats[key] = value + return self.execution_stats + @property def name(self): return self.__class__.__name__ @@ -282,14 +303,15 @@ def execute(self, input_data: BlockInput, **kwargs) -> BlockOutput: # ======================= Block Helper Functions ======================= # -def get_blocks() -> dict[str, Block]: +def get_blocks() -> dict[str, Type[Block]]: from backend.blocks import AVAILABLE_BLOCKS # noqa: E402 return AVAILABLE_BLOCKS async def initialize_blocks() -> None: - for block in get_blocks().values(): + for cls in get_blocks().values(): + block = cls() existing_block = await AgentBlock.prisma().find_first( where={"OR": [{"id": block.id}, {"name": block.name}]} ) @@ -324,4 +346,5 @@ async def initialize_blocks() -> None: def get_block(block_id: str) -> Block | None: - return get_blocks().get(block_id) + cls = get_blocks().get(block_id) + return cls() if cls else None diff --git a/autogpt_platform/backend/backend/data/graph.py b/autogpt_platform/backend/backend/data/graph.py index 4f1be1de1ed8..b4f8f8aeb739 100644 --- a/autogpt_platform/backend/backend/data/graph.py +++ b/autogpt_platform/backend/backend/data/graph.py @@ -257,7 +257,7 @@ def is_input_output_block(nid: str) -> bool: block = get_block(node.block_id) if not block: - blocks = {v.id: v.name for v in get_blocks().values()} + blocks = {v().id: v().name for v in get_blocks().values()} raise ValueError( f"{suffix}, {node.block_id} is invalid block id, available blocks: {blocks}" ) diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py index d756dfc6ecf8..dd6f6f75772b 100644 --- a/autogpt_platform/backend/backend/executor/manager.py +++ b/autogpt_platform/backend/backend/executor/manager.py @@ -104,6 +104,7 @@ def execute_node( Args: db_client: The client to send execution updates to the server. + creds_manager: The manager to acquire and release credentials. data: The execution data for executing the current node. execution_stats: The execution statistics to be updated. @@ -209,6 +210,7 @@ def update_execution(status: ExecutionStatus) -> ExecutionResult: if creds_lock: creds_lock.release() if execution_stats is not None: + execution_stats.update(node_block.execution_stats) execution_stats["input_size"] = input_size execution_stats["output_size"] = output_size diff --git a/autogpt_platform/backend/backend/server/rest_api.py b/autogpt_platform/backend/backend/server/rest_api.py index 6860f3f60fee..ba095e90eac5 100644 --- a/autogpt_platform/backend/backend/server/rest_api.py +++ b/autogpt_platform/backend/backend/server/rest_api.py @@ -329,9 +329,9 @@ async def get_or_create_user_route(cls, user_data: dict = Depends(auth_middlewar @classmethod def get_graph_blocks(cls) -> list[dict[Any, Any]]: - blocks = block.get_blocks() + blocks = [cls() for cls in block.get_blocks().values()] costs = get_block_costs() - return [{**b.to_dict(), "costs": costs.get(b.id, [])} for b in blocks.values()] + return [{**b.to_dict(), "costs": costs.get(b.id, [])} for b in blocks] @classmethod def execute_graph_block( diff --git a/autogpt_platform/backend/test/block/test_block.py b/autogpt_platform/backend/test/block/test_block.py index be16a0b1a76e..48d2616f613e 100644 --- a/autogpt_platform/backend/test/block/test_block.py +++ b/autogpt_platform/backend/test/block/test_block.py @@ -1,3 +1,5 @@ +from typing import Type + import pytest from backend.data.block import Block, get_blocks @@ -5,5 +7,5 @@ @pytest.mark.parametrize("block", get_blocks().values(), ids=lambda b: b.name) -def test_available_blocks(block: Block): - execute_block_test(type(block)()) +def test_available_blocks(block: Type[Block]): + execute_block_test(block()) From 3ac6ff52a47d3531400a6796e8f201923158c324 Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Sat, 19 Oct 2024 12:26:19 +0700 Subject: [PATCH 2/2] Address comments --- .../backend/backend/blocks/__init__.py | 23 +++++++++++-------- .../backend/backend/data/block.py | 4 ++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/__init__.py b/autogpt_platform/backend/backend/blocks/__init__.py index d2c1b7ccb76d..4fb89e3957ff 100644 --- a/autogpt_platform/backend/backend/blocks/__init__.py +++ b/autogpt_platform/backend/backend/blocks/__init__.py @@ -2,7 +2,7 @@ import os import re from pathlib import Path -from typing import Type +from typing import Type, TypeVar from backend.data.block import Block @@ -28,25 +28,28 @@ AVAILABLE_BLOCKS: dict[str, Type[Block]] = {} -def all_subclasses(clz): - subclasses = clz.__subclasses__() +T = TypeVar("T") + + +def all_subclasses(cls: Type[T]) -> list[Type[T]]: + subclasses = cls.__subclasses__() for subclass in subclasses: subclasses += all_subclasses(subclass) return subclasses -for cls in all_subclasses(Block): - name = cls.__name__ +for block_cls in all_subclasses(Block): + name = block_cls.__name__ - if cls.__name__.endswith("Base"): + if block_cls.__name__.endswith("Base"): continue - if not cls.__name__.endswith("Block"): + if not block_cls.__name__.endswith("Block"): raise ValueError( - f"Block class {cls.__name__} does not end with 'Block', If you are creating an abstract class, please name the class with 'Base' at the end" + f"Block class {block_cls.__name__} does not end with 'Block', If you are creating an abstract class, please name the class with 'Base' at the end" ) - block = cls() + block = block_cls.create() if not isinstance(block.id, str) or len(block.id) != 36: raise ValueError(f"Block ID {block.name} error: {block.id} is not a valid UUID") @@ -88,6 +91,6 @@ def all_subclasses(clz): if block.disabled: continue - AVAILABLE_BLOCKS[block.id] = cls + AVAILABLE_BLOCKS[block.id] = block_cls __all__ = ["AVAILABLE_MODULES", "AVAILABLE_BLOCKS"] diff --git a/autogpt_platform/backend/backend/data/block.py b/autogpt_platform/backend/backend/data/block.py index d1f9eb36f740..e89013b3b3f5 100644 --- a/autogpt_platform/backend/backend/data/block.py +++ b/autogpt_platform/backend/backend/data/block.py @@ -232,6 +232,10 @@ def __init__( self.block_type = block_type self.execution_stats = {} + @classmethod + def create(cls: Type["Block"]) -> "Block": + return cls() + @abstractmethod def run(self, input_data: BlockSchemaInputType, **kwargs) -> BlockOutput: """