24 llm cost tracking (#25)

* state cleanup * models+actions * update version * fixed throwing action error inside try block * fixed tests * fixed tests * added logging instead of print
AgentOps-AI · Sep 22, 2023 · 3b7378e · 3b7378e
1 parent 9dfc70d
commit 3b7378e
Show file tree

Hide file tree

Showing 7 changed files with 201 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ Build your next agent with evals, observability, and replay analytics. Agentops
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
 ## Latest Release 📦
-`version: 0.0.3`
+`version: 0.0.4`
 This is an alpha release for early testers.
 
 Agentops is still in closed alpha. You can sign up for an API key [here](https://forms.gle/mFAP4XEoaiKXb2Xh9).

diff --git a/agentops/__init__.py b/agentops/__init__.py
@@ -2,3 +2,4 @@
 from .event import Event, EventState
 from .session import SessionState
 from .logger import AgentOpsLogger
+from .helpers import Models, ActionType
diff --git a/agentops/client.py b/agentops/client.py
@@ -6,11 +6,13 @@
 """
 
 from .event import Event, EventState
+from .helpers import Models, ActionType
 from .session import Session, SessionState
 from .worker import Worker
 from uuid import uuid4
 from typing import Optional, List
 import functools
+import logging
 import inspect
 import atexit
 import signal
@@ -50,8 +52,26 @@ def __init__(self, api_key: str, tags: Optional[List[str]] = None,
         signal.signal(signal.SIGINT, self.signal_handler)
         signal.signal(signal.SIGTERM, self.signal_handler)
 
+        # Override sys.excepthook
+        sys.excepthook = self.handle_exception
+
         self.start_session(tags)
 
+    def handle_exception(self, exc_type, exc_value, exc_traceback):
+        """
+        Handle uncaught exceptions before they result in program termination.
+
+        Args:
+            exc_type (Type[BaseException]): The type of the exception.
+            exc_value (BaseException): The exception instance.
+            exc_traceback (TracebackType): A traceback object encapsulating the call stack at the point where the exception originally occurred.
+        """
+        # Perform cleanup
+        self.cleanup()
+
+        # Then call the default excepthook to exit the program
+        sys.__excepthook__(exc_type, exc_value, exc_traceback)
+
     def signal_handler(self, signal, frame):
         """
         Signal handler for SIGINT (Ctrl+C) and SIGTERM. Ends the session and exits the program.
@@ -60,11 +80,13 @@ def signal_handler(self, signal, frame):
             signal (int): The signal number.
             frame: The current stack frame.
         """
-        print('Signal SIGTERM or SIGINT detected. Ending session...')
+        logging.info('Signal SIGTERM or SIGINT detected. Ending session...')
         self.end_session(end_state=EventState.FAIL)
         sys.exit(0)
 
-    def record(self, event: Event):
+    def record(self, event: Event,
+               action_type: ActionType = ActionType.ACTION,
+               model: Optional[Models] = None):
         """
         Record an event with the AgentOps service.
 
@@ -76,13 +98,53 @@ def record(self, event: Event):
             self.worker.add_event(
                 {'session_id': self.session.session_id, **event.__dict__})
         else:
-            print("This event was not recorded because the previous session has been ended. Start a new session to record again.")
+            logging.info("This event was not recorded because the previous session has been ended" +
+                         " Start a new session to record again.")
 
-    def record_action(self, event_name: str, tags: Optional[List[str]] = None):
+    def record_action(self, event_name: str,
+                      action_type: ActionType = ActionType.ACTION,
+                      model: Optional[Models] = None,
+                      tags: Optional[List[str]] = None):
         """
         Decorator to record an event before and after a function call.
+        Usage:
+            - Actions: Records function parameters and return statements of the
+                function being decorated. Specify the action_type = 'action'
+
+            - LLM Calls: Records prompt, model, and output of a function that
+                calls an LLM. Specify the action_type = 'llm'
+                Note: This requires that the function being decorated is passed a "prompt"
+                parameter when either defined or called. For example:
+                ```
+                # Decorate function definition
+                @ao_client.record_action(..., action_type='llm')
+                def openai_call(prompt):
+                    ...
+
+                openai_call(prompt='...')
+                ```
+                For decorated functions without the "prompt" params, this decorator
+                grants an overloaded "prompt" arg that automatically works. For example:
+
+                ```
+                # Decorate function definition
+                @ao_client.record_action(..., action_type='llm')
+                def openai_call(foo):
+                    ...
+
+                # This will work
+                openai_call(foo='...', prompt='...')
+                ```
+            - API Calls: Records input, headers, and response status for API calls.
+                TOOD: Currently not implemented, coming soon.
         Args:
             event_name (str): The name of the event to record.
+            action_type (ActionType, optional): The type of the event being recorded.
+                Events default to 'action'. Other options include 'api' and 'llm'.
+            model (Models, optional): The model used during the event if an LLM is used (i.e. GPT-4).
+                For models, see the types available in the Models enum. 
+                If a model is set but an action_type is not, the action_type will be coerced to 'llm'. 
+                Defaults to None.
             tags (List[str], optional): Any tags associated with the event. Defaults to None.
         """
         def decorator(func):
@@ -97,6 +159,26 @@ def wrapper(*args, **kwargs):
                 # Update with positional arguments
                 arg_values.update(dict(zip(arg_names, args)))
                 arg_values.update(kwargs)
+
+                # Get prompt from function arguments
+                prompt = arg_values.get('prompt')
+
+                # 1) Coerce action type to 'llm' if model is set
+                # 2) Throw error if no prompt is set. This is required for
+                # calculating price
+                action = action_type
+                if bool(model):
+                    action = ActionType.LLM
+                    if not bool(prompt):
+                        raise ValueError(
+                            "Prompt is required when model is provided.")
+
+                # Throw error if action type is 'llm' but no model is specified
+                if action == ActionType.LLM and not bool(model):
+                    raise ValueError(
+                        f"`model` is a required parameter if `action_type` is set as {ActionType.LLM}. " +
+                        f"Model can be set as: {list([mod.value for mod in Models])}")
+
                 try:
                     returns = func(*args, **kwargs)
 
@@ -108,15 +190,21 @@ def wrapper(*args, **kwargs):
                     self.record(Event(event_type=event_name,
                                       params=arg_values,
                                       returns=returns,
-                                      result="Success",
+                                      result=EventState.SUCCESS,
+                                      action_type=action,
+                                      model=model,
+                                      prompt=prompt,
                                       tags=tags))
 
                 except Exception as e:
                     # Record the event after the function call
                     self.record(Event(event_type=event_name,
                                       params=arg_values,
                                       returns=None,
-                                      result='Fail',
+                                      result=EventState.FAIL,
+                                      action_type=action,
+                                      model=model,
+                                      prompt=prompt,
                                       tags=tags))
 
                     # Re-raise the exception
@@ -133,13 +221,15 @@ def start_session(self, tags: Optional[List[str]] = None):
         Start a new session for recording events.
 
         Args:
-            tags (List[str], optional): Tags that can be used for grouping or sorting later. Examples could be ["GPT-4"].
+            tags (List[str], optional): Tags that can be used for grouping or sorting later.
+                e.g. ["test_run"].
         """
         self.session = Session(str(uuid4()), tags)
         self.worker = Worker(self.config)
         self.worker.start_session(self.session)
 
-    def end_session(self, end_state: SessionState = SessionState.INDETERMINATE, rating: Optional[str] = None):
+    def end_session(self, end_state: SessionState = SessionState.INDETERMINATE,
+                    rating: Optional[str] = None):
         """
         End the current session with the AgentOps service.
 
@@ -155,7 +245,7 @@ def end_session(self, end_state: SessionState = SessionState.INDETERMINATE, rati
             self.session.end_session(end_state, rating)
             self.worker.end_session(self.session)
         else:
-            print("Warning: The session has already been ended.")
+            logging.info("Warning: The session has already been ended.")
 
     def cleanup(self):
         # Only run cleanup function if session is created

diff --git a/agentops/event.py b/agentops/event.py
@@ -4,7 +4,7 @@
 Classes:
     Event: Represents discrete events to be recorded.
 """
-from .helpers import get_ISO_time
+from .helpers import get_ISO_time, ActionType, Models
 from typing import Optional, List
 
 
@@ -23,7 +23,13 @@ class Event:
         params (str, optional): The parameters passed to the operation.
         returns (str, optional): The output of the operation.
         result (str, optional): Result of the operation, e.g., "Success", "Fail", "Indeterminate".
-        tags (List[str], optional): Tags that can be used for grouping or sorting later. e.g. ["GPT-4"].
+        action_type (ActionType, optional): Type of action of the evnet e.g. 'action', 'llm', 'api'
+        model (Models, optional): The model used during the event if an LLM is used (i.e. GPT-4).
+                For models, see the types available in the Models enum. 
+                If a model is set but an action_type is not, the action_type will be coerced to 'llm'. 
+                Defaults to None.
+        prompt (str, optional): The input prompt for an LLM call when an LLM is being used.
+        tags (List[str], optional): Tags that can be used for grouping or sorting later. e.g. ["my_tag"].
 
 
     Attributes:
@@ -34,11 +40,17 @@ def __init__(self, event_type: str,
                  params: Optional[str] = None,
                  returns: Optional[str] = None,
                  result: EventState = EventState.INDETERMINATE,
+                 action_type: Optional[ActionType] = ActionType.ACTION,
+                 model: Optional[Models] = None,
+                 prompt: Optional[str] = None,
                  tags: Optional[List[str]] = None
                  ):
         self.event_type = event_type
         self.params = params
         self.returns = returns
         self.result = result
         self.tags = tags
+        self.action_type = action_type
+        self.model = model
+        self.prompt = prompt
         self.timestamp = get_ISO_time()
diff --git a/agentops/helpers.py b/agentops/helpers.py
@@ -1,5 +1,27 @@
+from enum import Enum
 import time
 from datetime import datetime
 
+
 def get_ISO_time():
-    return datetime.fromtimestamp(time.time()).isoformat(timespec='milliseconds') + 'Z'
+    return datetime.fromtimestamp(time.time()).isoformat(timespec='milliseconds') + 'Z'
+
+
+class Models(Enum):
+    GPT_3_5_TURBO = "gpt-3.5-turbo"
+    GPT_3_5_TURBO_0301 = "gpt-3.5-turbo-0301"
+    GPT_3_5_TURBO_0613 = "gpt-3.5-turbo-0613"
+    GPT_3_5_TURBO_16K = "gpt-3.5-turbo-16k"
+    GPT_3_5_TURBO_16K_0613 = "gpt-3.5-turbo-16k-0613"
+    GPT_4_0314 = "gpt-4-0314"
+    GPT_4 = "gpt-4"
+    GPT_4_32K = "gpt-4-32k"
+    GPT_4_32K_0314 = "gpt-4-32k-0314"
+    GPT_4_0613 = "gpt-4-0613"
+    TEXT_EMBEDDING_ADA_002 = "text-embedding-ada-002"
+
+
+class ActionType:
+    LLM = "llm"
+    API = "api"
+    ACTION = "action"
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "agentops"
-version = "0.0.3"
+version = "0.0.4"
 authors = [
   { name="Alex Reibman", email="areibman@gmail.com" },
   { name="Shawn Qiu", email="siyangqiu@gmail.com" }

diff --git a/tests/test_session.py b/tests/test_session.py
@@ -129,3 +129,65 @@ def add_three(x, y, z=3):
         assert request_json['events'][0]['returns'] == 6
         assert request_json['events'][0]['result'] == EventState.SUCCESS
         assert request_json['events'][0]['tags'] == ['foo', 'bar']
+
+    def test_llm_call(self, mock_req):
+        # Arrange
+        prompt = 'prompt'
+
+        @self.client.record_action(event_name=self.event_type, action_type='llm', model='gpt-4')
+        def llm_call(prompt=prompt):
+            return 'output'
+
+        # Act
+        llm_call()
+        time.sleep(0.1)
+
+        # Assert
+        assert len(mock_req.request_history) == 1
+        request_json = mock_req.last_request.json()
+        assert request_json['events'][0]['action_type'] == 'llm'
+        assert request_json['events'][0]['prompt'] == prompt
+        assert request_json['events'][0]['returns'] == 'output'
+        assert request_json['events'][0]['result'] == EventState.SUCCESS
+
+    def test_llm_call_no_prompt(self, mock_req):
+        # Arrange
+        @self.client.record_action(event_name=self.event_type,
+                                   action_type='llm', model='gpt-4')
+        def llm_call():
+            return 'output'
+
+        # Act and Assert
+        with pytest.raises(ValueError):
+            llm_call()
+
+    def test_llm_call_no_model(self, mock_req):
+        # Arrange
+        prompt = 'prompt'
+
+        @self.client.record_action(event_name=self.event_type, action_type='llm')
+        def llm_call(prompt=prompt):
+            return 'output'
+
+        # Act and Assert
+        with pytest.raises(ValueError):
+            llm_call()
+
+    def test_llm_call_no_action_type(self, mock_req):
+        # Arrange
+        prompt = 'prompt'
+
+        @self.client.record_action(event_name=self.event_type, model='gpt-4')
+        def llm_call(prompt=prompt):
+            return 'output'
+
+        llm_call()
+        time.sleep(0.1)
+
+        # Assert
+        assert len(mock_req.request_history) == 1
+        request_json = mock_req.last_request.json()
+        assert request_json['events'][0]['action_type'] == 'llm'
+        assert request_json['events'][0]['prompt'] == prompt
+        assert request_json['events'][0]['returns'] == 'output'
+        assert request_json['events'][0]['result'] == EventState.SUCCESS