Format Templates (langchain-ai#12396)

hoanq1811 · Feb 2, 2024 · 6ebc39b · 6ebc39b
1 parent c60f161
commit 6ebc39b
Show file tree

Hide file tree

Showing 59 changed files with 791 additions and 432 deletions.
diff --git a/templates/Makefile b/templates/Makefile
@@ -1,2 +1,8 @@
 lint lint_diff:
-	poetry run ruff .
+	poetry run poe lint
+
+test:
+	poetry run poe test
+
+format:
+	poetry run poe format
diff --git a/templates/anthropic-iterative-search/anthropic_iterative_search/__init__.py b/templates/anthropic-iterative-search/anthropic_iterative_search/__init__.py
@@ -1,7 +1,7 @@
 from langchain.schema.runnable import ConfigurableField
 
+from .chain import chain
 from .retriever_agent import executor
-from .chain import chain 
 
 final_chain = chain.configurable_alternatives(
     ConfigurableField(id="chain"),

diff --git a/templates/anthropic-iterative-search/anthropic_iterative_search/chain.py b/templates/anthropic-iterative-search/anthropic_iterative_search/chain.py
@@ -1,5 +1,5 @@
-from langchain.prompts import ChatPromptTemplate
 from langchain.chat_models import ChatAnthropic
+from langchain.prompts import ChatPromptTemplate
 from langchain.schema.output_parser import StrOutputParser
 
 from .prompts import answer_prompt

diff --git a/templates/anthropic-iterative-search/anthropic_iterative_search/output_parser.py b/templates/anthropic-iterative-search/anthropic_iterative_search/output_parser.py
@@ -1,6 +1,7 @@
-from langchain.schema.agent import AgentAction, AgentFinish
 import re
 
+from langchain.schema.agent import AgentAction, AgentFinish
+
 from .agent_scratchpad import _format_docs
 
 
@@ -14,18 +15,23 @@ def extract_between_tags(tag: str, string: str, strip: bool = True) -> str:
         # Only return the first one
         return ext_list[0]
 
+
 def parse_output(outputs):
     partial_completion = outputs["partial_completion"]
     steps = outputs["intermediate_steps"]
-    search_query = extract_between_tags('search_query', partial_completion + '</search_query>') 
+    search_query = extract_between_tags(
+        "search_query", partial_completion + "</search_query>"
+    )
     if search_query is None:
         docs = []
         str_output = ""
         for action, observation in steps:
             docs.extend(observation)
             str_output += action.log
-            str_output += '</search_query>' + _format_docs(observation)
+            str_output += "</search_query>" + _format_docs(observation)
         str_output += partial_completion
         return AgentFinish({"docs": docs, "output": str_output}, log=partial_completion)
     else:
-        return AgentAction(tool="search", tool_input=search_query, log=partial_completion)
+        return AgentAction(
+            tool="search", tool_input=search_query, log=partial_completion
+        )
diff --git a/templates/anthropic-iterative-search/anthropic_iterative_search/prompts.py b/templates/anthropic-iterative-search/anthropic_iterative_search/prompts.py
@@ -2,6 +2,6 @@
 
 After each call to the Search Engine Tool, reflect briefly inside <search_quality></search_quality> tags about whether you now have enough information to answer, or whether more information is needed. If you have all the relevant information, write it in <information></information> tags, WITHOUT actually answering the question. Otherwise, issue a new search.
 
-Here is the user's question: <question>{query}</question> Remind yourself to make short queries in your scratchpad as you plan out your strategy."""
+Here is the user's question: <question>{query}</question> Remind yourself to make short queries in your scratchpad as you plan out your strategy."""  # noqa: E501
 
-answer_prompt = "Here is a user query: <query>{query}</query>. Here is some relevant information: <information>{information}</information>. Please answer the question using the relevant information."
+answer_prompt = "Here is a user query: <query>{query}</query>. Here is some relevant information: <information>{information}</information>. Please answer the question using the relevant information."  # noqa: E501
diff --git a/templates/anthropic-iterative-search/anthropic_iterative_search/retriever.py b/templates/anthropic-iterative-search/anthropic_iterative_search/retriever.py
@@ -3,13 +3,14 @@
 
 # This is used to tell the model how to best use the retriever.
 
-retriever_description = """You will be asked a question by a human user. You have access to the following tool to help answer the question. <tool_description> Search Engine Tool * The search engine will exclusively search over Wikipedia for pages similar to your query. It returns for each page its title and full page content. Use this tool if you want to get up-to-date and comprehensive information on a topic to help answer queries. Queries should be as atomic as possible -- they only need to address one part of the user's question. For example, if the user's query is "what is the color of a basketball?", your search query should be "basketball". Here's another example: if the user's question is "Who created the first neural network?", your first query should be "neural network". As you can see, these queries are quite short. Think keywords, not phrases. * At any time, you can make a call to the search engine using the following syntax: <search_query>query_word</search_query>. * You'll then get results back in <search_result> tags.</tool_description>"""
+retriever_description = """You will be asked a question by a human user. You have access to the following tool to help answer the question. <tool_description> Search Engine Tool * The search engine will exclusively search over Wikipedia for pages similar to your query. It returns for each page its title and full page content. Use this tool if you want to get up-to-date and comprehensive information on a topic to help answer queries. Queries should be as atomic as possible -- they only need to address one part of the user's question. For example, if the user's query is "what is the color of a basketball?", your search query should be "basketball". Here's another example: if the user's question is "Who created the first neural network?", your first query should be "neural network". As you can see, these queries are quite short. Think keywords, not phrases. * At any time, you can make a call to the search engine using the following syntax: <search_query>query_word</search_query>. * You'll then get results back in <search_result> tags.</tool_description>"""  # noqa: E501
 
 retriever = WikipediaRetriever()
 
 # This should be the same as the function name below
 RETRIEVER_TOOL_NAME = "search"
 
+
 @tool
 def search(query):
     """Search with the retriever."""

diff --git a/templates/anthropic-iterative-search/anthropic_iterative_search/retriever_agent.py b/templates/anthropic-iterative-search/anthropic_iterative_search/retriever_agent.py
@@ -1,13 +1,13 @@
+from langchain.agents import AgentExecutor
 from langchain.chat_models import ChatAnthropic
 from langchain.prompts import ChatPromptTemplate
-from langchain.schema.runnable import RunnablePassthrough, RunnableMap
 from langchain.schema.output_parser import StrOutputParser
-from langchain.agents import AgentExecutor
+from langchain.schema.runnable import RunnableMap, RunnablePassthrough
 
-from .retriever import search, RETRIEVER_TOOL_NAME, retriever_description
-from .prompts import retrieval_prompt
 from .agent_scratchpad import format_agent_scratchpad
 from .output_parser import parse_output
+from .prompts import retrieval_prompt
+from .retriever import retriever_description, search
 
 prompt = ChatPromptTemplate.from_messages([
     ("user", retrieval_prompt),

diff --git a/templates/anthropic-iterative-search/main.py b/templates/anthropic-iterative-search/main.py
@@ -1,6 +1,12 @@
-from anthropic_iterative_search import final_chain 
-
+from anthropic_iterative_search import final_chain
 
 if __name__ == "__main__":
-	query = "Which movie came out first: Oppenheimer, or Are You There God It's Me Margaret?"
-	print(final_chain.with_config(configurable={"chain": "retrieve"}).invoke({"query": query}))
+    query = (
+        "Which movie came out first: Oppenheimer, or "
+        "Are You There God It's Me Margaret?"
+    )
+    print(
+        final_chain.with_config(configurable={"chain": "retrieve"}).invoke(
+            {"query": query}
+        )
+    )
diff --git a/templates/cassandra-entomology-rag/cassandra_entomology_rag/__init__.py b/templates/cassandra-entomology-rag/cassandra_entomology_rag/__init__.py
@@ -1,14 +1,12 @@
 import os
 
 import cassio
-
 from langchain.chat_models import ChatOpenAI
 from langchain.embeddings import OpenAIEmbeddings
-from langchain.vectorstores import Cassandra
 from langchain.prompts import ChatPromptTemplate
-from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
-
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.vectorstores import Cassandra
 
 use_cassandra = int(os.environ.get("USE_CASSANDRA_CLUSTER", "0"))
 if use_cassandra:

diff --git a/templates/cassandra-entomology-rag/cassandra_entomology_rag/cassandra_cluster_init.py b/templates/cassandra-entomology-rag/cassandra_entomology_rag/cassandra_cluster_init.py
@@ -1,13 +1,13 @@
 import os
 
-from cassandra.cluster import Cluster
 from cassandra.auth import PlainTextAuthProvider
+from cassandra.cluster import Cluster
 
 
 def get_cassandra_connection():
     contact_points = [
         cp.strip()
-        for cp in os.environ.get("CASSANDRA_CONTACT_POINTS", "").split(',')
+        for cp in os.environ.get("CASSANDRA_CONTACT_POINTS", "").split(",")
         if cp.strip()
     ]
     CASSANDRA_KEYSPACE = os.environ["CASSANDRA_KEYSPACE"]
@@ -22,6 +22,8 @@ def get_cassandra_connection():
     else:
         auth_provider = None
 
-    c_cluster = Cluster(contact_points if contact_points else None, auth_provider=auth_provider)
+    c_cluster = Cluster(
+        contact_points if contact_points else None, auth_provider=auth_provider
+    )
     session = c_cluster.connect()
     return (session, CASSANDRA_KEYSPACE)
diff --git a/templates/cassandra-entomology-rag/setup.py b/templates/cassandra-entomology-rag/setup.py
@@ -1,14 +1,13 @@
 import os
 
 import cassio
-
-from langchain.vectorstores import Cassandra
 from langchain.embeddings import OpenAIEmbeddings
-
+from langchain.vectorstores import Cassandra
 
 use_cassandra = int(os.environ.get("USE_CASSANDRA_CLUSTER", "0"))
 if use_cassandra:
     from cassandra_entomology_rag.cassandra_cluster_init import get_cassandra_connection
+
     session, keyspace = get_cassandra_connection()
     cassio.init(
         session=session,
@@ -22,7 +21,7 @@
     )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     embeddings = OpenAIEmbeddings()
     vector_store = Cassandra(
         session=None,
@@ -32,16 +31,13 @@
     )
     #
     lines = [
-        l.strip()
-        for l in open("sources.txt").readlines()
-        if l.strip()
-        if l[0] != "#"
+        line.strip()
+        for line in open("sources.txt").readlines()
+        if line.strip()
+        if line[0] != "#"
     ]
     # deterministic IDs to prevent duplicates on multiple runs
-    ids = [
-        "_".join(l.split(" ")[:2]).lower().replace(":", "")
-        for l in lines
-    ]
+    ids = ["_".join(line.split(" ")[:2]).lower().replace(":", "") for line in lines]
     #
     vector_store.add_texts(texts=lines, ids=ids)
     print(f"Done ({len(lines)} lines inserted).")
diff --git a/templates/cassandra-synonym-caching/cassandra_synonym_caching/__init__.py b/templates/cassandra-synonym-caching/cassandra_synonym_caching/__init__.py
@@ -1,13 +1,12 @@
 import os
 
 import cassio
-
 import langchain
-from langchain.schema import BaseMessage
-from langchain.prompts import ChatPromptTemplate
+from langchain.cache import CassandraCache
 from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain.schema import BaseMessage
 from langchain.schema.runnable import RunnableLambda
-from langchain.cache import CassandraCache
 
 use_cassandra = int(os.environ.get("USE_CASSANDRA_CLUSTER", "0"))
 if use_cassandra:

diff --git a/templates/cassandra-synonym-caching/cassandra_synonym_caching/cassandra_cluster_init.py b/templates/cassandra-synonym-caching/cassandra_synonym_caching/cassandra_cluster_init.py
@@ -1,13 +1,13 @@
 import os
 
-from cassandra.cluster import Cluster
 from cassandra.auth import PlainTextAuthProvider
+from cassandra.cluster import Cluster
 
 
 def get_cassandra_connection():
     contact_points = [
         cp.strip()
-        for cp in os.environ.get("CASSANDRA_CONTACT_POINTS", "").split(',')
+        for cp in os.environ.get("CASSANDRA_CONTACT_POINTS", "").split(",")
         if cp.strip()
     ]
     CASSANDRA_KEYSPACE = os.environ["CASSANDRA_KEYSPACE"]
@@ -22,6 +22,8 @@ def get_cassandra_connection():
     else:
         auth_provider = None
 
-    c_cluster = Cluster(contact_points if contact_points else None, auth_provider=auth_provider)
+    c_cluster = Cluster(
+        contact_points if contact_points else None, auth_provider=auth_provider
+    )
     session = c_cluster.connect()
     return (session, CASSANDRA_KEYSPACE)
diff --git a/templates/csv-agent/csv_agent/agent.py b/templates/csv-agent/csv_agent/agent.py
@@ -1,24 +1,25 @@
-from langchain.agents import OpenAIFunctionsAgent, AgentExecutor
-from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain_experimental.tools import PythonAstREPLTool
+from pathlib import Path
+
 import pandas as pd
+from langchain.agents import AgentExecutor, OpenAIFunctionsAgent
 from langchain.chat_models import ChatOpenAI
-from langsmith import Client
-from langchain.smith import RunEvalConfig, run_on_dataset
-from pydantic import BaseModel, Field
 from langchain.embeddings import OpenAIEmbeddings
-from langchain.vectorstores import FAISS
+from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain.tools.retriever import create_retriever_tool
-from pathlib import Path
+from langchain.vectorstores import FAISS
+from langchain_experimental.tools import PythonAstREPLTool
+from pydantic import BaseModel, Field
 
 MAIN_DIR = Path(__file__).parents[1]
 
-pd.set_option('display.max_rows', 20)
-pd.set_option('display.max_columns', 20)
+pd.set_option("display.max_rows", 20)
+pd.set_option("display.max_columns", 20)
 
 embedding_model = OpenAIEmbeddings()
 vectorstore = FAISS.load_local(MAIN_DIR / "titanic_data", embedding_model)
-retriever_tool = create_retriever_tool(vectorstore.as_retriever(), "person_name_search", "Search for a person by name")
+retriever_tool = create_retriever_tool(
+    vectorstore.as_retriever(), "person_name_search", "Search for a person by name"
+)
 
 
 TEMPLATE = """You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
@@ -41,8 +42,7 @@
 
 <question>Who has id 320</question>
 <logic>Use `python_repl` since even though the question is about a person, you don't know their name so you can't include it.</logic>
-"""
-
+"""  # noqa: E501
 
 
 class PythonInputs(BaseModel):
@@ -52,15 +52,24 @@ class PythonInputs(BaseModel):
 df = pd.read_csv("titanic.csv")
 template = TEMPLATE.format(dhead=df.head().to_markdown())
 
-prompt = ChatPromptTemplate.from_messages([
-    ("system", template),
-    MessagesPlaceholder(variable_name="agent_scratchpad"),
-    ("human", "{input}")
-])
-
-repl = PythonAstREPLTool(locals={"df": df}, name="python_repl",
-                         description="Runs code and returns the output of the final line",
-                         args_schema=PythonInputs)
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", template),
+        MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ("human", "{input}"),
+    ]
+)
+
+repl = PythonAstREPLTool(
+    locals={"df": df},
+    name="python_repl",
+    description="Runs code and returns the output of the final line",
+    args_schema=PythonInputs,
+)
 tools = [repl, retriever_tool]
-agent = OpenAIFunctionsAgent(llm=ChatOpenAI(temperature=0, model="gpt-4"), prompt=prompt, tools=tools)
-agent_executor = AgentExecutor(agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate")
+agent = OpenAIFunctionsAgent(
+    llm=ChatOpenAI(temperature=0, model="gpt-4"), prompt=prompt, tools=tools
+)
+agent_executor = AgentExecutor(
+    agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate"
+)
diff --git a/templates/csv-agent/ingest.py b/templates/csv-agent/ingest.py
@@ -1,5 +1,4 @@
 from langchain.document_loaders import CSVLoader
-from langchain.tools.retriever import create_retriever_tool
 from langchain.indexes import VectorstoreIndexCreator
 from langchain.vectorstores import FAISS
 

diff --git a/templates/elastic-query-generator/elastic_query_generator/chain.py b/templates/elastic-query-generator/elastic_query_generator/chain.py
@@ -1,11 +1,12 @@
 import os
+from pathlib import Path
+
+from elasticsearch import Elasticsearch
 from langchain.chat_models import ChatOpenAI
 from langchain.output_parsers.json import SimpleJsonOutputParser
-from elasticsearch import Elasticsearch
-from pathlib import Path
 
-from .prompts import DSL_PROMPT
 from .elastic_index_info import get_indices_infos
+from .prompts import DSL_PROMPT
 
 es_host = os.environ["ELASTIC_SEARCH_SERVER"]
 es_password = os.environ["ELASTIC_PASSWORD"]

diff --git a/templates/elastic-query-generator/elastic_query_generator/elastic_index_info.py b/templates/elastic-query-generator/elastic_query_generator/elastic_index_info.py
@@ -1,5 +1,6 @@
 from typing import List
 
+
 def _list_indices(database, include_indices=None, ignore_indices=None) -> List[str]:
     all_indices = [
         index["index"] for index in database.cat.indices(format="json")

diff --git a/templates/elastic-query-generator/elastic_query_generator/prompts.py b/templates/elastic-query-generator/elastic_query_generator/prompts.py
@@ -16,6 +16,6 @@
 
 Question: Question here
 ESQuery: Elasticsearch Query formatted as json
-"""
+"""  # noqa: E501
 
 DSL_PROMPT = PromptTemplate.from_template(DEFAULT_DSL_TEMPLATE + PROMPT_SUFFIX)
diff --git a/templates/elastic-query-generator/ingest.py b/templates/elastic-query-generator/ingest.py
@@ -1,4 +1,5 @@
 import os
+
 from elasticsearch import Elasticsearch
 
 es_host = os.environ["ELASTIC_SEARCH_SERVER"]

diff --git a/templates/elastic-query-generator/main.py b/templates/elastic-query-generator/main.py
@@ -1,5 +1,4 @@
 from elastic_query_generator.chain import chain
 
-
 if __name__ == "__main__":
     print(chain.invoke({"input": "how many customers named Carol"}))