ur-whitelab · qcampbel · Aug 8, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/mdagent/agent/agent.py b/mdagent/agent/agent.py
@@ -3,8 +3,6 @@
 from dotenv import load_dotenv
 from langchain.agents import AgentExecutor, OpenAIFunctionsAgent
 from langchain.agents.structured_chat.base import StructuredChatAgent
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.chat_models import ChatOpenAI
 
 from ..tools import get_tools, make_all_tools
 from ..utils import PathRegistry, SetCheckpoint, _make_llm
@@ -38,20 +36,26 @@ def __init__(
         tools=None,
         agent_type="OpenAIFunctionsAgent",  # this can also be structured_chat
         model="gpt-4-1106-preview",  # current name for gpt-4 turbo
-        tools_model="gpt-4-1106-preview",
+        tools_model=None,
         temp=0.1,
-        verbose=True,
+        streaming=True,
+        verbose=False,
         ckpt_dir="ckpt",
         top_k_tools=20,  # set "all" if you want to use all tools
         use_human_tool=False,
         uploaded_files=[],  # user input files to add to path registry
         run_id="",
         use_memory=False,
     ):
+        self.llm = _make_llm(model, temp, streaming)
+        if tools_model is None:
+            tools_model = model
+        self.tools_llm = _make_llm(tools_model, temp, streaming)
+
         self.use_memory = use_memory
         self.path_registry = PathRegistry.get_instance(ckpt_dir=ckpt_dir)
         self.ckpt_dir = self.path_registry.ckpt_dir
-        self.memory = MemoryManager(self.path_registry, run_id=run_id)
+        self.memory = MemoryManager(self.path_registry, self.tools_llm, run_id=run_id)
         self.run_id = self.memory.run_id
 
         self.uploaded_files = uploaded_files
@@ -60,18 +64,10 @@ def __init__(
 
         self.agent = None
         self.agent_type = agent_type
-        self.user_tools = tools
-        self.tools_llm = _make_llm(tools_model, temp, verbose)
         self.top_k_tools = top_k_tools
         self.use_human_tool = use_human_tool
-
-        self.llm = ChatOpenAI(
-            temperature=temp,
-            model=model,
-            client=None,
-            streaming=True,
-            callbacks=[StreamingStdOutCallbackHandler()],
-        )
+        self.user_tools = tools
+        self.verbose = verbose
 
     def _initialize_tools_and_agent(self, user_input=None):
         """Retrieve tools and initialize the agent."""
@@ -83,6 +79,7 @@ def _initialize_tools_and_agent(self, user_input=None):
                 self.tools = get_tools(
                     query=user_input,
                     llm=self.tools_llm,
+                    top_k_tools=self.top_k_tools,
                     human=self.use_human_tool,
                 )
             else:
@@ -97,6 +94,7 @@ def _initialize_tools_and_agent(self, user_input=None):
                 self.llm,
                 self.tools,
             ),
+            verbose=self.verbose,
             handle_parsing_errors=True,
         )
 
@@ -107,7 +105,7 @@ def run(self, user_input, callbacks=None):
         elif self.agent_type == "OpenAIFunctionsAgent":
             self.prompt = openaifxn_prompt.format(input=user_input, context=run_memory)
         self.agent = self._initialize_tools_and_agent(user_input)
-        model_output = self.agent.run(self.prompt, callbacks=callbacks)
+        model_output = self.agent.invoke(self.prompt, callbacks=callbacks)
         if self.use_memory:
             self.memory.generate_agent_summary(model_output)
             print("Your run id is: ", self.run_id)

diff --git a/mdagent/agent/memory.py b/mdagent/agent/memory.py
@@ -3,10 +3,8 @@
 import random
 import string
 
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.chains import LLMChain
-from langchain.chat_models import ChatOpenAI
 from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
 
 from mdagent.utils import PathRegistry
 
@@ -32,8 +30,7 @@ class MemoryManager:
     def __init__(
         self,
         path_registry: PathRegistry,
-        model="gpt-3.5-turbo",
-        temp=0.1,
+        llm,
         run_id="",
     ):
         self.path_registry = path_registry
@@ -46,14 +43,7 @@ def __init__(
         else:
             pull_mem = True
 
-        llm = ChatOpenAI(
-            temperature=temp,
-            model=model,
-            client=None,
-            streaming=True,
-            callbacks=[StreamingStdOutCallbackHandler()],
-        )
-        self.llm_agent_trace = LLMChain(llm=llm, prompt=agent_summary_template)
+        self.llm_agent_trace = agent_summary_template | llm | StrOutputParser()
 
         self._make_all_dirs()
         if pull_mem:
@@ -138,7 +128,7 @@ def generate_agent_summary(self, agent_trace):
         Returns:
         - None
         """
-        llm_out = self.llm_agent_trace({"agent_trace": agent_trace})["text"]
+        llm_out = self.llm_agent_trace.invoke({"agent_trace": agent_trace})
         key_str = f"{self.run_id}.{self.get_summary_number()}"
         run_summary = {key_str: llm_out}
         self._write_to_json(run_summary, self.agent_trace_summary)

diff --git a/mdagent/agent/prompt.py b/mdagent/agent/prompt.py
@@ -3,30 +3,35 @@
 structured_prompt = PromptTemplate(
     input_variables=["input, context"],
     template="""
-    You are an expert molecular dynamics scientist and
+    You are an expert molecular dynamics scientist, and
     your task is to respond to the question or
     solve the problem to the best of your ability using
     the provided tools.
 
     You can only respond with a single complete
-    Thought, Action, Action Input' format
+    'Thought, Action, Action Input' format
     OR a single 'Final Answer' format.
 
     Complete format:
-    Thought: (reflect on your progress and decide what " "to do next)
-    Action: (the action name, should be the name of a tool)
-    Action Input: (the input string to the action)
+    Thought: (reflect on your progress and decide what to do next)
+    Action:
+    ```
+    {{
+        "action": (the action name, it should be the name of a tool),
+        "action_input": (the input string for the action)
+    }}
+    '''
 
     OR
 
     Final Answer: (the final response to the original input
-    question, when all steps are complete)
+    question, once all steps are complete)
 
     You are required to use the tools provided,
     using the most specific tool
     available for each action.
     Your final answer should contain all information
-    necessary to answer the question and subquestions.
+    necessary to answer the question and its subquestions.
     Before you finish, reflect on your progress and make
     sure you have addressed the question in its entirety.
 
@@ -41,81 +46,10 @@
     Question: {input} """,
 )
 
-
-modular_analysis_prompt = PromptTemplate(
-    input_variables=[
-        "Main_Task",
-        "Subtask_types",
-        "Proteins",
-        "Parameters",
-        "UserProposedPlan",
-        "context",
-    ],
-    template="""
-        Approach the molecular dynamics inquiry by dissecting it into its modular
-        components:
-        Main Task: {Main_Task}
-        Subtasks: {Subtask_types}
-        Target Proteins: {Proteins}
-        Parameters: {Parameters}
-        Initial Plan Proposed by User: {UserProposedPlan}
-
-        The Main Task is the user's request.
-
-        The Subtasks are (some of/all) the individual steps that may need to be taken
-        to complete the Main Task; Preprocessing/Preparation usually involves
-        cleaning the initial pdb file (adding hydrogens, removing/adding water, etc.)
-        or making the required box for the simulation, Simulation involves running the
-        simulation and/or modifying the simulation script, Postprocessing involves
-        analyzing the results of the simulation (either using provided tools or figuring
-        it out on your own). Finally, Question is used if the user query is more
-        of a question than a request for a specific task.
-
-        the Target Proteins are the protein(s) that the user wants to focus on,
-        the Parameters are the 'special' conditions that the user wants to set and use
-        for the simulation, preprocessing and or analysis.
-
-        Sometimes users already have an idea of what is needed to be done.
-        Initial Plan Proposed by User is the user's initial plan for the simulation. You
-        can use this as a guide to understand what the user wants to do. You can also
-        modify it if you think is necessary.
-
-        You can only respond with a single complete
-        'Thought, Action, Action Input' format
-        OR a single 'Final Answer' format.
-
-        Complete format:
-        Thought: (reflect on your progress and decide what " "to do next)
-        Action: (the action name, should be the name of a tool)
-        Action Input: (the input string to the action)
-
-        OR
-
-        Final Answer: (the final answer to the original input
-        question)
-
-        Use the tools provided, using the most specific tool
-        available for each action.
-        Your final answer should contain all information
-        necessary to answer the question and subquestions.
-        Your thought process should be clean and clear,
-        and you must explicitly state the actions you are taking.
-
-        If you are asked to continue
-        or reference previous runs,
-        the context will be provided to you.
-        If context is provided, you should assume
-        you are continuing a chat.
-
-        Here is the input:
-        Previous Context: {context}
-    """,
-)
-
 openaifxn_prompt = PromptTemplate(
     input_variables=["input", "context"],
     template="""
-    You are an expert molecular dynamics scientist and your
+    You are an expert molecular dynamics scientist, and your
     task is to respond to the question or
     solve the problem to the best of your ability using
     the provided tools. Once you map a path to a short name,

diff --git a/mdagent/tools/base_tools/__init__.py b/mdagent/tools/base_tools/__init__.py
@@ -44,11 +44,7 @@
     UniprotID2Name,
 )
 from .simulation_tools.create_simulation import ModifyBaseSimulationScriptTool
-from .simulation_tools.setup_and_run import (
-    SetUpandRunFunction,
-    SetUpAndRunTool,
-    SimulationFunctions,
-)
+from .simulation_tools.setup_and_run import SetUpandRunFunction
 from .util_tools.git_issues_tool import SerpGitTool
 from .util_tools.registry_tools import ListRegistryPaths, MapPath2Name
 from .util_tools.search_tools import Scholar2ResultLLM
@@ -92,9 +88,7 @@
     "RMSDCalculator",
     "Scholar2ResultLLM",
     "SerpGitTool",
-    "SetUpAndRunTool",
     "SetUpandRunFunction",
-    "SimulationFunctions",
     "SimulationOutputFigures",
     "SmallMolPDB",
     "UniprotID2Name",

diff --git a/mdagent/tools/base_tools/preprocess_tools/pdb_get.py b/mdagent/tools/base_tools/preprocess_tools/pdb_get.py
@@ -27,7 +27,7 @@ def get_pdb(query_string: str, path_registry: PathRegistry):
     }
     r = requests.post(url, json=query)
     if r.status_code == 204:
-        return None
+        return None, None
     if "cif" in query_string or "CIF" in query_string:
         filetype = "cif"
     else:
@@ -57,7 +57,7 @@ def get_pdb(query_string: str, path_registry: PathRegistry):
         )
 
         return filename, file_id
-    return None
+    return None, None
 
 
 class ProteinName2PDBTool(BaseTool):

diff --git a/mdagent/tools/base_tools/simulation_tools/__init__.py b/mdagent/tools/base_tools/simulation_tools/__init__.py
@@ -1,9 +1,7 @@
 from .create_simulation import ModifyBaseSimulationScriptTool
-from .setup_and_run import SetUpandRunFunction, SetUpAndRunTool, SimulationFunctions
+from .setup_and_run import SetUpandRunFunction
 
 __all__ = [
     "ModifyBaseSimulationScriptTool",
     "SetUpandRunFunction",
-    "SetUpAndRunTool",
-    "SimulationFunctions",
 ]
diff --git a/mdagent/tools/base_tools/simulation_tools/create_simulation.py b/mdagent/tools/base_tools/simulation_tools/create_simulation.py
@@ -2,9 +2,9 @@
 from typing import Optional
 
 from langchain.base_language import BaseLanguageModel
-from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 from langchain.tools import BaseTool
+from langchain_core.output_parsers import StrOutputParser
 from pydantic import BaseModel, Field
 
 from mdagent.utils import FileType, PathRegistry
@@ -48,7 +48,7 @@ def _prompt_summary(self, query: str):
         prompt = PromptTemplate(
             template=prompt_template, input_variables=["base_script", "query"]
         )
-        llm_chain = LLMChain(prompt=prompt, llm=self.llm)
+        llm_chain = prompt | self.llm | StrOutputParser()
 
         return llm_chain.invoke(query)