Fix #487 (summarize call uses OpenAI even with local LLM config) (#488)

* use new chatcompletion function that takes agent config inside of summarize * patch issue with model now missing
letta-ai · Nov 19, 2023 · d67a919 · d67a919
1 parent eda5864
commit d67a919
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 16 deletions.
diff --git a/memgpt/agent.py b/memgpt/agent.py
@@ -716,9 +716,7 @@ def summarize_messages_inplace(self, cutoff=None, preserve_last_N_messages=True)
                 if (self.model is not None and self.model in LLM_MAX_TOKENS)
                 else str(LLM_MAX_TOKENS["DEFAULT"])
             )
-        summary = summarize_messages(
-            model=self.model, context_window=int(self.config.context_window), message_sequence_to_summarize=message_sequence_to_summarize
-        )
+        summary = summarize_messages(agent_config=self.config, message_sequence_to_summarize=message_sequence_to_summarize)
         printd(f"Got summary: {summary}")
 
         # Metadata that's useful for the agent to see

diff --git a/memgpt/memory.py b/memgpt/memory.py
@@ -4,14 +4,11 @@
 import re
 from typing import Optional, List, Tuple
 
-from .constants import MESSAGE_SUMMARY_WARNING_FRAC, MEMGPT_DIR
-from .utils import cosine_similarity, get_local_time, printd, count_tokens
-from .prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM
+from memgpt.constants import MESSAGE_SUMMARY_WARNING_FRAC, MEMGPT_DIR
+from memgpt.utils import cosine_similarity, get_local_time, printd, count_tokens
+from memgpt.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM
 from memgpt import utils
-from .openai_tools import (
-    get_embedding_with_backoff,
-    completions_with_backoff as create,
-)
+from memgpt.openai_tools import get_embedding_with_backoff, chat_completion_with_backoff
 from llama_index import (
     VectorStoreIndex,
     EmptyIndex,
@@ -119,11 +116,12 @@ def edit_replace(self, field, old_content, new_content):
 
 
 def summarize_messages(
-    model,
-    context_window,
+    agent_config,
     message_sequence_to_summarize,
 ):
     """Summarize a message sequence using GPT"""
+    # we need the context_window
+    context_window = agent_config.context_window
 
     summary_prompt = SUMMARY_PROMPT_SYSTEM
     summary_input = str(message_sequence_to_summarize)
@@ -132,17 +130,17 @@ def summarize_messages(
         trunc_ratio = (MESSAGE_SUMMARY_WARNING_FRAC * context_window / summary_input_tkns) * 0.8  # For good measure...
         cutoff = int(len(message_sequence_to_summarize) * trunc_ratio)
         summary_input = str(
-            [summarize_messages(model, context_window, message_sequence_to_summarize[:cutoff])] + message_sequence_to_summarize[cutoff:]
+            [summarize_messages(agent_config, context_window, message_sequence_to_summarize[:cutoff])]
+            + message_sequence_to_summarize[cutoff:]
         )
     message_sequence = [
         {"role": "system", "content": summary_prompt},
         {"role": "user", "content": summary_input},
     ]
 
-    response = create(
-        model=model,
+    response = chat_completion_with_backoff(
+        agent_config=agent_config,
         messages=message_sequence,
-        context_window=context_window,
     )
 
     printd(f"summarize_messages gpt reply: {response.choices[0]}")

diff --git a/memgpt/openai_tools.py b/memgpt/openai_tools.py
@@ -85,6 +85,11 @@ def chat_completion_with_backoff(agent_config, **kwargs):
     from memgpt.utils import printd
     from memgpt.config import AgentConfig, MemGPTConfig
 
+    # both "model" and "messages" are required for base OpenAI calls
+    # also required for local LLM Ollama, but not others
+    if "model" not in kwargs:
+        kwargs["model"] = agent_config.model
+
     printd(f"Using model {agent_config.model_endpoint_type}, endpoint: {agent_config.model_endpoint}")
     if agent_config.model_endpoint_type == "openai":
         # openai