Skip to content

Commit

Permalink
fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
isahers1 committed Jul 9, 2024
1 parent ea10c07 commit a601b6d
Showing 1 changed file with 117 additions and 82 deletions.
199 changes: 117 additions & 82 deletions scripts/tool_benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,45 @@
import datetime
import uuid
from langchain_anthropic import ChatAnthropic
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_fireworks import ChatFireworks
from typing import List, cast

from langchain_benchmarks.tool_usage.agents import StandardAgentFactory
from langchain.tools import BaseTool, tool
from langchain_anthropic import ChatAnthropic
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.messages.utils import convert_to_messages
import datetime
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tools import tool
from langchain_fireworks import ChatFireworks
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langsmith.client import Client
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_chroma import Chroma
from langchain_core.documents import Document

from langchain_benchmarks import (
__version__,
registry,
)
from langchain_benchmarks.rate_limiting import RateLimiter

from typing import List, cast
from langchain.tools import BaseTool, tool

from langchain_benchmarks.tool_usage.agents import StandardAgentFactory
from langchain_benchmarks.tool_usage.tasks.multiverse_math import *


tools = cast(
List[BaseTool],
[
tool(func)
for func in [
multiply,
add,
divide,
subtract,
power,
log,
negate,
sin,
cos,
pi,
]
],
)
List[BaseTool],
[
tool(func)
for func in [
multiply,
add,
divide,
subtract,
power,
log,
negate,
sin,
cos,
pi,
]
],
)

tests = [
(
Expand All @@ -53,25 +50,31 @@
"claude-3-sonnet-20240229",
ChatAnthropic(model="claude-3-sonnet-20240229", temperature=0),
),
(
"gpt-3.5-turbo-0125",
ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)),
("gpt-3.5-turbo-0125", ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)),
(
"gpt-4-turbo-2024-04-09",
ChatOpenAI(model="gpt-4-turbo-2024-04-09", temperature=0),
),
(
"accounts/fireworks/models/firefunction-v2",
ChatFireworks(model="accounts/fireworks/models/firefunction-v2", temperature=0)
)
ChatFireworks(model="accounts/fireworks/models/firefunction-v2", temperature=0),
),
]


def semantic_similar_few_shots(question, retriever, examples):
ans = []
for doc in retriever.get_relevant_documents(question)[:3]:
ans += [m for m in convert_to_messages(examples[doc.metadata['index']].outputs['output']) if isinstance(m,SystemMessage) == False]
ans += [
m
for m in convert_to_messages(
examples[doc.metadata["index"]].outputs["output"]
)
if isinstance(m, SystemMessage) == False
]
return ans


client = Client() # Launch langsmith client for cloning datasets
llm = ChatOpenAI(model="gpt-4-turbo-2024-04-09", temperature=0).bind_tools(tools)
experiment_uuid = uuid.uuid4().hex[:4]
Expand All @@ -87,73 +90,106 @@ def semantic_similar_few_shots(question, retriever, examples):

dataset_name = task.name

examples = [e for e in client.list_examples(dataset_name="multiverse-math-examples-for-few-shot")]
examples = [
e
for e in client.list_examples(
dataset_name="multiverse-math-examples-for-few-shot"
)
]
few_shot_messages = []
questions = []
for i in range(len(examples)):
converted_messages = convert_to_messages(examples[i].outputs['output'])
questions.append(Document(page_content=converted_messages[1].content,metadata={"index":i}))
converted_messages = convert_to_messages(examples[i].outputs["output"])
questions.append(
Document(page_content=converted_messages[1].content, metadata={"index": i})
)
few_shot_messages += converted_messages

few_shot_messages = [m for m in few_shot_messages if isinstance(m,SystemMessage) == False]
few_shot_messages = [
m for m in few_shot_messages if isinstance(m, SystemMessage) == False
]

few_shot_message = ""
for m in few_shot_messages:
if isinstance(m.content,list):
if isinstance(m.content, list):
few_shot_message += "AI message: "
for tool_use in m.content:
if 'name' in tool_use:
if "name" in tool_use:
few_shot_message += f"Use tool {tool_use['name']}, input: {', '.join(f'{k}:{v}' for k,v in tool_use['input'].items())}"
else:
few_shot_message += tool_use['text']
few_shot_message += tool_use["text"]
few_shot_message += "\n"
else:
if isinstance(m, HumanMessage):
few_shot_message += f"Human message: {m.content}"
else:
few_shot_message += f"AI message: {m.content}"

few_shot_message += "\n"

vectorstore = Chroma.from_documents(documents=questions, embedding=OpenAIEmbeddings())
vectorstore = Chroma.from_documents(
documents=questions, embedding=OpenAIEmbeddings()
)

retriever = vectorstore.as_retriever()

prompts = [
(ChatPromptTemplate.from_messages(
[
("system", "{instructions}"),
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
), "no-few-shot"),
(ChatPromptTemplate.from_messages(
[
("system", "{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: "),
]
+ few_shot_messages
+ [
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
), "few-shot-message"),
(ChatPromptTemplate.from_messages(
[
("system", "{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: " + few_shot_message),
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
), "few-shot-string"),
(ChatPromptTemplate.from_messages(
[
("system", "{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: "),
]
+ semantic_similar_few_shots("{question}", retriever, examples) +
[
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
), "few-shot-semantic")
(
ChatPromptTemplate.from_messages(
[
("system", "{instructions}"),
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
),
"no-few-shot",
),
(
ChatPromptTemplate.from_messages(
[
(
"system",
"{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: ",
),
]
+ few_shot_messages
+ [
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
),
"few-shot-message",
),
(
ChatPromptTemplate.from_messages(
[
(
"system",
"{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: "
+ few_shot_message,
),
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
),
"few-shot-string",
),
(
ChatPromptTemplate.from_messages(
[
(
"system",
"{instructions} Here are some example conversations of the user interacting with the AI until the correct answer is reached: ",
),
]
+ semantic_similar_few_shots("{question}", retriever, examples)
+ [
("human", "{question}"),
MessagesPlaceholder("agent_scratchpad"), # Workspace for the agent
]
),
"few-shot-semantic",
),
]

for model_name, model in tests[:-1]:
Expand All @@ -162,7 +198,6 @@ def semantic_similar_few_shots(question, retriever, examples):
print(f"Benchmarking {task.name} with model: {model_name}")
eval_config = task.get_eval_config()


for prompt, prompt_name in prompts:
agent_factory = StandardAgentFactory(
task, model, prompt, rate_limiter=rate_limiter
Expand All @@ -182,4 +217,4 @@ def semantic_similar_few_shots(question, retriever, examples):
"date": today,
"langchain_benchmarks_version": __version__,
},
)
)

0 comments on commit a601b6d

Please sign in to comment.