fix: Fix summarizer for Anthropic and add integration tests (#2046)

letta-ai · Nov 16, 2024 · 82058d2 · 82058d2
1 parent f6abf60
commit 82058d2
Show file tree

Hide file tree

Showing 16 changed files with 218 additions and 68 deletions.
diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
@@ -0,0 +1,75 @@
+name: Integration Tests
+
+env:
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  COMPOSIO_API_KEY: ${{ secrets.COMPOSIO_API_KEY }}
+  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+  GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+  GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+  AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+  AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  run-integration-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    strategy:
+      fail-fast: false
+      matrix:
+        integration_test_suite:
+          - "integration_test_summarizer.py"
+    services:
+      qdrant:
+        image: qdrant/qdrant
+        ports:
+          - 6333:6333
+      postgres:
+        image: pgvector/pgvector:pg17
+        ports:
+          - 5432:5432
+        env:
+          POSTGRES_HOST_AUTH_METHOD: trust
+          POSTGRES_DB: postgres
+          POSTGRES_USER: postgres
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python, Poetry, and Dependencies
+        uses: packetcoders/action-setup-cache-python-poetry@main
+        with:
+          python-version: "3.12"
+          poetry-version: "1.8.2"
+          install-args: "-E dev -E postgres -E milvus -E external-tools -E tests"
+      - name: Migrate database
+        env:
+          LETTA_PG_PORT: 5432
+          LETTA_PG_USER: postgres
+          LETTA_PG_PASSWORD: postgres
+          LETTA_PG_DB: postgres
+          LETTA_PG_HOST: localhost
+        run: |
+          psql -h localhost -U postgres -d postgres -c 'CREATE EXTENSION vector'
+          poetry run alembic upgrade head
+      - name: Run core unit tests
+        env:
+          LETTA_PG_PORT: 5432
+          LETTA_PG_USER: postgres
+          LETTA_PG_PASSWORD: postgres
+          LETTA_PG_DB: postgres
+          LETTA_PG_HOST: localhost
+          LETTA_SERVER_PASS: test_server_token
+        run: |
+          poetry run pytest -s -vv tests/${{ matrix.integration_test_suite }}
diff --git a/.github/workflows/test_anthropic.yml b/.github/workflows/test_anthropic.yml
@@ -29,7 +29,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_valid_first_message
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_valid_first_message
         echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -38,7 +38,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_returns_keyword
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_returns_keyword
         echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -47,7 +47,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_uses_external_tool
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_uses_external_tool
         echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -56,7 +56,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_recall_chat_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_recall_chat_memory
         echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -65,7 +65,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_archival_memory_retrieval
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_archival_memory_retrieval
         echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -74,7 +74,7 @@ jobs:
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_claude_opus_3_edit_core_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_claude_opus_3_edit_core_memory
         echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 

diff --git a/.github/workflows/test_azure.yml b/.github/workflows/test_azure.yml
@@ -31,7 +31,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_valid_first_message
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_valid_first_message
         echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -41,7 +41,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_returns_keyword
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_returns_keyword
         echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -51,7 +51,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_uses_external_tool
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_uses_external_tool
         echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -61,7 +61,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_recall_chat_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_recall_chat_memory
         echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -71,7 +71,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_archival_memory_retrieval
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_archival_memory_retrieval
         echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -81,7 +81,7 @@ jobs:
         AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
         AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_azure_gpt_4o_mini_edit_core_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_azure_gpt_4o_mini_edit_core_memory
         echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 

diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml
@@ -29,7 +29,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_valid_first_message
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_valid_first_message
         echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -38,7 +38,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_keyword
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_returns_keyword
         echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -47,7 +47,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_uses_external_tool
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_uses_external_tool
         echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -56,7 +56,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_recall_chat_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_recall_chat_memory
         echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -65,7 +65,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_archival_memory_retrieval
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_archival_memory_retrieval
         echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 
@@ -74,7 +74,7 @@ jobs:
       env:
         GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_edit_core_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_groq_llama31_70b_edit_core_memory
         echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
       continue-on-error: true
 

diff --git a/.github/workflows/test_memgpt_hosted.yml b/.github/workflows/test_memgpt_hosted.yml
@@ -23,9 +23,9 @@ jobs:
 
     - name: Test LLM endpoint
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_letta_hosted
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_letta_hosted
       continue-on-error: true
 
     - name: Test embedding endpoint
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_letta_hosted
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_letta_hosted
diff --git a/.github/workflows/test_ollama.yml b/.github/workflows/test_ollama.yml
@@ -34,11 +34,11 @@ jobs:
 
     - name: Test LLM endpoint
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_llm_endpoint_ollama
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_llm_endpoint_ollama
 
     - name: Test embedding endpoint
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_ollama
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_ollama
 
     - name: Test provider
       run: |

diff --git a/.github/workflows/test_openai.yml b/.github/workflows/test_openai.yml
@@ -29,53 +29,53 @@ jobs:
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_valid_first_message
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_valid_first_message
 
     - name: Test model sends message with keyword
       id: test_keyword_message
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_returns_keyword
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_returns_keyword
 
     - name: Test model uses external tool correctly
       id: test_external_tool
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_uses_external_tool
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_uses_external_tool
 
     - name: Test model recalls chat memory
       id: test_chat_memory
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_recall_chat_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_recall_chat_memory
 
     - name: Test model uses 'archival_memory_search' to find secret
       id: test_archival_memory_search
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_retrieval
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_retrieval
 
     - name: Test model uses 'archival_memory_insert' to insert archival memories
       id: test_archival_memory_insert
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_archival_memory_insert
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_archival_memory_insert
 
     - name: Test model can edit core memories
       id: test_core_memory
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4o_edit_core_memory
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_openai_gpt_4o_edit_core_memory
 
     - name: Test embedding endpoint
       id: test_embedding_endpoint
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
-        poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai
+        poetry run pytest -s -vv tests/test_model_letta_perfomance.py::test_embedding_endpoint_openai
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -131,4 +131,4 @@ jobs:
           LETTA_SERVER_PASS: test_server_token
           PYTHONPATH: ${{ github.workspace }}:${{ env.PYTHONPATH }}
         run: |
-          poetry run pytest -s -vv -k "not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_tools.py and not test_concurrent_connections.py and not test_quickstart and not test_endpoints and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client.py" tests
+          poetry run pytest -s -vv -k "not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_tools.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_perfomance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client.py" tests
diff --git a/.gitignore b/.gitignore
@@ -1018,3 +1018,8 @@ pgdata/
 letta/.pytest_cache/
 memgpy/pytest.ini
 **/**/pytest_cache
+
+
+# local sandbox venvs
+letta/services/tool_sandbox_env/*
+tests/test_tool_sandbox/*
diff --git a/examples/tool_rule_usage.py b/examples/tool_rule_usage.py
@@ -9,7 +9,7 @@
     setup_agent,
 )
 from tests.helpers.utils import cleanup
-from tests.test_endpoints import llm_config_dir
+from tests.test_model_letta_perfomance import llm_config_dir
 
 """
 This example shows how you can constrain tool calls in your agent.

diff --git a/letta/agent.py b/letta/agent.py
@@ -48,6 +48,7 @@
 from letta.schemas.usage import LettaUsageStatistics
 from letta.services.source_manager import SourceManager
 from letta.services.user_manager import UserManager
+from letta.streaming_interface import StreamingRefreshCLIInterface
 from letta.system import (
     get_heartbeat,
     get_initial_boot_messages,
@@ -229,7 +230,7 @@ def update_state(self) -> AgentState:
 class Agent(BaseAgent):
     def __init__(
         self,
-        interface: Optional[AgentInterface],
+        interface: Optional[Union[AgentInterface, StreamingRefreshCLIInterface]],
         # agents can be created from providing agent_state
         agent_state: AgentState,
         tools: List[Tool],