Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return session name in runner response #6112

Merged
merged 1 commit into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions langchain/client/runner_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,21 +422,25 @@ async def arun_on_dataset(
client will be created using the credentials in the environment.

Returns:
A dictionary mapping example ids to the model outputs.
A dictionary containing the run's session name and the resulting model outputs.
"""
client_ = client or LangChainPlusClient()
session_name = _get_session_name(session_name, llm_or_chain_factory, dataset_name)
dataset = client_.read_dataset(dataset_name=dataset_name)
examples = client_.list_examples(dataset_id=str(dataset.id))

return await arun_on_examples(
results = await arun_on_examples(
examples,
llm_or_chain_factory,
concurrency_level=concurrency_level,
num_repetitions=num_repetitions,
session_name=session_name,
verbose=verbose,
)
return {
"session_name": session_name,
"results": results,
}


def run_on_dataset(
Expand Down Expand Up @@ -466,16 +470,20 @@ def run_on_dataset(
will be created using the credentials in the environment.

Returns:
A dictionary mapping example ids to the model outputs.
A dictionary containing the run's session name and the resulting model outputs.
"""
client_ = client or LangChainPlusClient()
session_name = _get_session_name(session_name, llm_or_chain_factory, dataset_name)
dataset = client_.read_dataset(dataset_name=dataset_name)
examples = client_.list_examples(dataset_id=str(dataset.id))
return run_on_examples(
results = run_on_examples(
examples,
llm_or_chain_factory,
num_repetitions=num_repetitions,
session_name=session_name,
verbose=verbose,
)
return {
"session_name": session_name,
"results": results,
}
38 changes: 12 additions & 26 deletions langchain/experimental/client/tracing_datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@
" error=False, # Only runs that succeed\n",
")\n",
"for run in runs:\n",
" if run.outputs is None:\n",
" continue\n",
" try:\n",
" client.create_example(\n",
" inputs=run.inputs, outputs=run.outputs, dataset_id=dataset.id\n",
Expand Down Expand Up @@ -388,7 +390,7 @@
" client will be created using the credentials in the environment.\n",
"\n",
"Returns:\n",
" A dictionary mapping example ids to the model outputs.\n",
" A dictionary containing the run's session name and the resulting model outputs.\n",
"\u001b[0;31mFile:\u001b[0m ~/code/lc/lckg/langchain/client/runner_utils.py\n",
"\u001b[0;31mType:\u001b[0m function"
]
Expand Down Expand Up @@ -438,16 +440,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Processed examples: 3\r"
"Processed examples: 4\r"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Chain failed for example 59fb1b4d-d935-4e43-b2a7-bc33fde841bb. Error: LLMMathChain._evaluate(\"\n",
"round(0.2791714614499425, 2)\n",
"\") raised error: 'VariableNode' object is not callable. Please try again with a valid numerical expression\n"
"Chain failed for example c855f923-4165-4fe0-a909-360749f3f764. Error: Could not parse LLM output: `The final answer is that there were no more points scored in the 2023 Super Bowl than in the 2022 Super Bowl.`\n"
]
},
{
Expand All @@ -459,13 +459,11 @@
}
],
"source": [
"evaluation_session_name = \"Search + Calculator Agent Evaluation\"\n",
"chain_results = await arun_on_dataset(\n",
" dataset_name=dataset_name,\n",
" llm_or_chain_factory=chain_factory,\n",
" concurrency_level=5, # Optional, sets the number of examples to run at a time\n",
" verbose=True,\n",
" session_name=evaluation_session_name, # Optional, a unique session name will be generated if not provided\n",
" client=client,\n",
")\n",
"\n",
Expand Down Expand Up @@ -558,7 +556,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 16,
"id": "4c94a738-dcd3-442e-b8e7-dd36459f56e3",
"metadata": {
"tags": []
Expand All @@ -567,7 +565,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a185493c1af74cbaa0f9b10f32cf81c6",
"model_id": "9989f6507cd04ea7a09ea3c5723dc984",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -582,8 +580,10 @@
"source": [
"from tqdm.notebook import tqdm\n",
"feedbacks = []\n",
"runs = client.list_runs(session_name=evaluation_session_name, execution_order=1, error=False)\n",
"runs = client.list_runs(session_name=chain_results[\"session_name\"], execution_order=1, error=False)\n",
"for run in tqdm(runs):\n",
" if run.outputs is None:\n",
" continue\n",
" eval_feedback = []\n",
" for evaluator in evaluators:\n",
" eval_feedback.append(client.aevaluate_run(run, evaluator))\n",
Expand All @@ -592,26 +592,12 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": null,
"id": "8696f167-dc75-4ef8-8bb3-ac1ce8324f30",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<a href=\"https://dev.langchain.plus\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
],
"text/plain": [
"LangChainPlusClient (API URL: https://dev.api.langchain.plus)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"client"
]
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/client/test_runner_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,4 @@ async def mock_arun_chain(
]
for uuid_ in uuids
}
assert results == expected
assert results["results"] == expected