Add in the async methods and link the run id

langchain-ai · Jun 7, 2023 · 6c1ad42 · 6c1ad42
1 parent a0d847f
commit 6c1ad42
Show file tree

Hide file tree

Showing 5 changed files with 95 additions and 160 deletions.
diff --git a/langchain/evaluation/run_evaluators/base.py b/langchain/evaluation/run_evaluators/base.py
@@ -6,10 +6,13 @@
 from langchainplus_sdk import EvaluationResult, RunEvaluator
 from langchainplus_sdk.schemas import Example, Run
 
-from langchain.callbacks.manager import CallbackManagerForChainRun
+from langchain.callbacks.manager import (
+    AsyncCallbackManagerForChainRun,
+    CallbackManagerForChainRun,
+)
 from langchain.chains.base import Chain
 from langchain.chains.llm import LLMChain
-from langchain.schema import BaseOutputParser
+from langchain.schema import RUN_KEY, BaseOutputParser
 
 
 class RunEvalInputMapper:
@@ -59,12 +62,44 @@ def _call(
         example: Optional[Example] = inputs.get("example")
         chain_input = self.input_mapper.map(run, example)
         _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
-        chain_output = self.eval_chain(chain_input, callbacks=_run_manager.get_child())
+        callbacks = _run_manager.get_child()
+        chain_output = self.eval_chain(
+            chain_input, callbacks=callbacks, include_run_info=True
+        )
+        run_info = chain_output[RUN_KEY]
         feedback = self.output_parser.parse_chain_output(chain_output)
+        feedback.evaluator_info[RUN_KEY] = run_info
+        return {"feedback": feedback}
+
+    async def _acall(
+        self,
+        inputs: Dict[str, Any],
+        run_manager: AsyncCallbackManagerForChainRun | None = None,
+    ) -> Dict[str, Any]:
+        run: Run = inputs["run"]
+        example: Optional[Example] = inputs.get("example")
+        chain_input = self.input_mapper.map(run, example)
+        _run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
+        callbacks = _run_manager.get_child()
+        chain_output = await self.eval_chain.acall(
+            chain_input,
+            callbacks=callbacks,
+            include_run_info=True,
+        )
+        run_info = chain_output[RUN_KEY]
+        feedback = self.output_parser.parse_chain_output(chain_output)
+        feedback.evaluator_info[RUN_KEY] = run_info
         return {"feedback": feedback}
 
     def evaluate_run(
         self, run: Run, example: Optional[Example] = None
     ) -> EvaluationResult:
         """Evaluate an example."""
         return self({"run": run, "example": example})["feedback"]
+
+    async def aevaluate_run(
+        self, run: Run, example: Optional[Example] = None
+    ) -> EvaluationResult:
+        """Evaluate an example."""
+        result = await self.acall({"run": run, "example": example})
+        return result["feedback"]
diff --git a/langchain/evaluation/run_evaluators/implementations.py b/langchain/evaluation/run_evaluators/implementations.py
@@ -43,7 +43,7 @@ class Config:
     def map(self, run: Run, example: Optional[Example] = None) -> Dict[str, str]:
         """Maps the Run and Optional[Example] to a dictionary"""
         if run.outputs is None:
-            raise ValueError("Run outputs cannot be None.")
+            raise ValueError(f"Run {run.id} has no outputs.")
 
         data = {
             value: run.outputs.get(key) for key, value in self.prediction_map.items()

diff --git a/langchain/experimental/client/tracing_datasets.ipynb b/langchain/experimental/client/tracing_datasets.ipynb
@@ -120,11 +120,11 @@
    },
    "outputs": [],
    "source": [
-    "from langchain.llms import OpenAI\n",
+    "from langchain.chat_models import ChatOpenAI\n",
     "from langchain.agents import initialize_agent, load_tools\n",
     "from langchain.agents import AgentType\n",
     "\n",
-    "llm = OpenAI(temperature=0)\n",
+    "llm = ChatOpenAI(temperature=0)\n",
     "tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
     "agent = initialize_agent(\n",
     "    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False\n",
@@ -138,51 +138,7 @@
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n",
-      "Retrying langchain.llms.openai.acompletion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "unknown format from LLM: This question cannot be answered using the numexpr library, as it does not involve any mathematical expressions.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "['39,566,248 people live in Canada as of 2023.',\n",
-       " \"Romain Gavras is Dua Lipa's boyfriend and his age raised to the .43 power is 4.9373857399466665.\",\n",
-       " '3.991298452658078',\n",
-       " 'The shortest distance (air line) between Boston and Paris is 3,437.00 mi (5,531.32 km).',\n",
-       " 'The total number of points scored in the 2023 Super Bowl raised to the .23 power is 2.3086081644669734.',\n",
-       " ValueError('unknown format from LLM: This question cannot be answered using the numexpr library, as it does not involve any mathematical expressions.'),\n",
-       " 'The 2023 Super Bowl scored 3 more points than the 2022 Super Bowl.',\n",
-       " '1.9347796717823205',\n",
-       " 'Devin Booker, Kendall Jenner\\'s boyfriend, is 6\\' 5\" tall and his height raised to the .13 power is 1.27335715306192.',\n",
-       " '1213 divided by 4345 is 0.2791714614499425']"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import asyncio\n",
     "\n",
@@ -206,13 +162,12 @@
     "        return await agent.arun(input_example)\n",
     "    except Exception as e:\n",
     "        # The agent sometimes makes mistakes! These will be captured by the tracing.\n",
-    "        print(e)\n",
     "        return e\n",
     "\n",
     "\n",
     "for input_example in inputs:\n",
     "    results.append(arun(agent, input_example))\n",
-    "await asyncio.gather(*results)"
+    "results = await asyncio.gather(*results)"
    ]
   },
   {
@@ -479,27 +434,6 @@
     "tags": []
    },
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Chain failed for example fb07a1d4-e96e-45fe-a3cd-5113e174b017. Error: unknown format from LLM: Sorry, I cannot answer this question as it requires information that is not currently available.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processed examples: 2\r"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Chain failed for example f088cda6-3745-4f83-b8fa-e5c1038e81b2. Error: unknown format from LLM: Sorry, as an AI language model, I do not have access to personal information such as someone's age. Please provide a different math problem.\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -511,36 +445,16 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Chain failed for example abb7259c-8136-4903-80b3-04644eebcc82. Error: Parsing LLM output produced both a final answer and a parse-able action: I need to use the search engine to find out who Dua Lipa's boyfriend is and then use the calculator to raise his age to the .43 power.\n",
-      "Action 1: Search\n",
-      "Action Input 1: \"Dua Lipa boyfriend\"\n",
-      "Observation 1: Anwar Hadid is Dua Lipa's boyfriend.\n",
-      "Action 2: Calculator\n",
-      "Action Input 2: 21^0.43\n",
-      "Observation 2: Anwar Hadid's age raised to the 0.43 power is approximately 3.87.\n",
-      "Thought: I now know the final answer.\n",
-      "Final Answer: Anwar Hadid is Dua Lipa's boyfriend and his age raised to the 0.43 power is approximately 3.87.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processed examples: 7\r"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Chain failed for example 2123b7f1-3d3d-4eca-ba30-faf0dff75399. Error: Could not parse LLM output: `I need to subtract the score of the`\n"
+      "Chain failed for example 59fb1b4d-d935-4e43-b2a7-bc33fde841bb. Error: LLMMathChain._evaluate(\"\n",
+      "round(0.2791714614499425, 2)\n",
+      "\") raised error: 'VariableNode' object is not callable. Please try again with a valid numerical expression\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processed examples: 9\r"
+      "Processed examples: 5\r"
      ]
     }
    ],
@@ -622,7 +536,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 14,
    "id": "35db4025-9183-4e5f-ba14-0b1b380f49c7",
    "metadata": {
     "tags": []
@@ -644,52 +558,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "id": "20ab5a84-1d34-4532-8b4f-b12407f42a0e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<a href=\"https://dev.langchain.plus\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
-      ],
-      "text/plain": [
-       "LangChainPlusClient (API URL: https://dev.api.langchain.plus)"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# TODO: Use this one above as well\n",
-    "from langchainplus_sdk import LangChainPlusClient\n",
-    "\n",
-    "client = LangChainPlusClient()\n",
-    "runs = list(client.list_runs(session_name=evaluation_session_name, execution_order=1, error=False))\n",
-    "client"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "58c23a51-1e0a-46d8-b04b-0e0627983232",
+   "execution_count": 27,
+   "id": "4c94a738-dcd3-442e-b8e7-dd36459f56e3",
    "metadata": {
     "tags": []
    },
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ddf4e207965345c7b1ac27a5e3e677e8",
+       "model_id": "a185493c1af74cbaa0f9b10f32cf81c6",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/44 [00:00<?, ?it/s]"
+       "0it [00:00, ?it/s]"
       ]
      },
      "metadata": {},
@@ -698,27 +581,45 @@
    ],
    "source": [
     "from tqdm.notebook import tqdm\n",
+    "feedbacks = []\n",
+    "runs = client.list_runs(session_name=evaluation_session_name, execution_order=1, error=False)\n",
     "for run in tqdm(runs):\n",
+    "    eval_feedback = []\n",
     "    for evaluator in evaluators:\n",
-    "        feedback = client.evaluate_run(run, evaluator)"
+    "        eval_feedback.append(client.aevaluate_run(run, evaluator))\n",
+    "    feedbacks.extend(await asyncio.gather(*eval_feedback)) "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "id": "8696f167-dc75-4ef8-8bb3-ac1ce8324f30",
    "metadata": {
     "tags": []
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<a href=\"https://dev.langchain.plus\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
+      ],
+      "text/plain": [
+       "LangChainPlusClient (API URL: https://dev.api.langchain.plus)"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "client"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "daf7dc7f-a5b0-49be-a695-2a87e283e588",
+   "id": "a5037e54-2c5a-4993-9b46-2a98773d3079",
    "metadata": {},
    "outputs": [],
    "source": []