From 3d7b763046834f275b83e86562d0cec58c2a3ec3 Mon Sep 17 00:00:00 2001 From: Tibor Reiss Date: Sat, 15 Jun 2024 21:58:17 +0200 Subject: [PATCH] Add StuffDocumentsChain --- .../instrumentation/langchain/__init__.py | 10 +- .../test_sequential_chain.yaml | 133 ++++++++++++++++++ .../tests/conftest.py | 2 + .../tests/test_documents_chains.py | 55 ++++++++ 4 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 packages/opentelemetry-instrumentation-langchain/tests/cassettes/test_documents_chains/test_sequential_chain.yaml create mode 100644 packages/opentelemetry-instrumentation-langchain/tests/test_documents_chains.py diff --git a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/__init__.py b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/__init__.py index b27578ed0..f91bef27b 100644 --- a/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/__init__.py +++ b/packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/__init__.py @@ -37,6 +37,12 @@ _instruments = ("langchain >= 0.0.346", "langchain-core > 0.1.0") WRAPPED_METHODS = [ + { + "package": "langchain.chains.base", + "class": "Chain", + "is_callback": True, + "kind": TraceloopSpanKindValues.TASK.value, + }, { "package": "langchain.chains.llm", "class": "LLMChain", @@ -44,8 +50,8 @@ "kind": TraceloopSpanKindValues.TASK.value, }, { - "package": "langchain.chains.base", - "class": "Chain", + "package": "langchain.chains.combine_documents.stuff", + "class": "StuffDocumentsChain", "is_callback": True, "kind": TraceloopSpanKindValues.TASK.value, }, diff --git a/packages/opentelemetry-instrumentation-langchain/tests/cassettes/test_documents_chains/test_sequential_chain.yaml b/packages/opentelemetry-instrumentation-langchain/tests/cassettes/test_documents_chains/test_sequential_chain.yaml new file mode 100644 index 000000000..777db5ce3 --- /dev/null +++ b/packages/opentelemetry-instrumentation-langchain/tests/cassettes/test_documents_chains/test_sequential_chain.yaml @@ -0,0 +1,133 @@ +interactions: +- request: + body: '{"message": "Write a concise summary of the following:\n\n\n\"Today, all + ridges and faces of the Matterhorn have been ascended in all seasons,\n and + mountain guides take a large number of people up the northeast H\u00f6rnli route\n each + summer. In total, up to 150 climbers attempt the Matterhorn each day during\n summer. + By modern standards, the climb is fairly difficult (AD Difficulty rating),\n but + not hard for skilled mountaineers according to French climbing grades. There\n are + fixed ropes on parts of the route to help. Still, it should be remembered that\n several + climbers may die on the mountain each year.\n The usual pattern of ascent + is to take the Schwarzsee cable car up from Zermatt,\n hike up to the H\u00f6rnli + Hut elev. 3,260 m (10,700 ft), a large stone building at the\n base of the + main ridge, and spend the night. The next day, climbers rise at 3:30 am\n so + as to reach the summit and descend before the regular afternoon clouds and storms\n come + in. The Solvay Hut located on the ridge at 4,003 m (13,133 ft) can be used only\n in + a case of emergency.\n Other popular routes on the mountain include the Italian + (Lion) ridge (AD+ Difficulty\n rating) and the Zmutt ridge (D Difficulty + rating). The four faces, as well as the\n Furggen ridge, constitute the most + challenging routes to the summit. The north face\n is amongst the six most + difficult faces of the Alps, as well as \u2018The Trilogy\u2019, the\n three + hardest of the six, along with the north faces of the Eiger and the Grandes\n Jorasses + (TD+ Difficulty rating).\"\n\n\nCONCISE SUMMARY:", "stream": false, "model": + "command", "chat_history": [], "temperature": 0.75}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '1709' + content-type: + - application/json + host: + - api.cohere.com + user-agent: + - python-httpx/0.27.0 + x-client-name: + - langchain:partner + x-fern-language: + - Python + x-fern-sdk-name: + - cohere + x-fern-sdk-version: + - 5.5.3 + method: POST + uri: https://api.cohere.com/v1/chat + response: + body: + string: "{\"response_id\":\"01829b9a-651a-46b1-9f1e-54d419e57550\",\"text\":\"The + Matterhorn, a mountain in the Alps, is a popular climbing destination, with + up to 150 climbers attempting the summit each summer day. Several climbing + routes are available, ranging from the relatively difficult H\xF6rnli route + to the more challenging Italian (Lion) ridge and Zmutt ridge. Climbers typically + spend the night at the H\xF6rnli Hut and rise early to reach the summit and + descend before afternoon clouds and storms set in. While the climb is feasible + for skilled mountaineers, multiple climber deaths occur on the mountain each + year. Overall, the Matterhorn remains a challenging and dangerous mountaineering + destination.\",\"generation_id\":\"197447c0-3166-475b-8f06-0f29f5f3b5f5\",\"chat_history\":[{\"role\":\"USER\",\"message\":\"Write + a concise summary of the following:\\n\\n\\n\\\"Today, all ridges and faces + of the Matterhorn have been ascended in all seasons,\\n and mountain guides + take a large number of people up the northeast H\xF6rnli route\\n each + summer. In total, up to 150 climbers attempt the Matterhorn each day during\\n + \ summer. By modern standards, the climb is fairly difficult (AD Difficulty + rating),\\n but not hard for skilled mountaineers according to French climbing + grades. There\\n are fixed ropes on parts of the route to help. Still, + it should be remembered that\\n several climbers may die on the mountain + each year.\\n The usual pattern of ascent is to take the Schwarzsee cable + car up from Zermatt,\\n hike up to the H\xF6rnli Hut elev. 3,260 m (10,700 + ft), a large stone building at the\\n base of the main ridge, and spend + the night. The next day, climbers rise at 3:30 am\\n so as to reach the + summit and descend before the regular afternoon clouds and storms\\n come + in. The Solvay Hut located on the ridge at 4,003 m (13,133 ft) can be used + only\\n in a case of emergency.\\n Other popular routes on the mountain + include the Italian (Lion) ridge (AD+ Difficulty\\n rating) and the Zmutt + ridge (D Difficulty rating). The four faces, as well as the\\n Furggen + ridge, constitute the most challenging routes to the summit. The north face\\n + \ is amongst the six most difficult faces of the Alps, as well as \u2018The + Trilogy\u2019, the\\n three hardest of the six, along with the north faces + of the Eiger and the Grandes\\n Jorasses (TD+ Difficulty rating).\\\"\\n\\n\\nCONCISE + SUMMARY:\"},{\"role\":\"CHATBOT\",\"message\":\"The Matterhorn, a mountain + in the Alps, is a popular climbing destination, with up to 150 climbers attempting + the summit each summer day. Several climbing routes are available, ranging + from the relatively difficult H\xF6rnli route to the more challenging Italian + (Lion) ridge and Zmutt ridge. Climbers typically spend the night at the H\xF6rnli + Hut and rise early to reach the summit and descend before afternoon clouds + and storms set in. While the climb is feasible for skilled mountaineers, multiple + climber deaths occur on the mountain each year. Overall, the Matterhorn remains + a challenging and dangerous mountaineering destination.\"}],\"finish_reason\":\"COMPLETE\",\"meta\":{\"api_version\":{\"version\":\"1\"},\"billed_units\":{\"input_tokens\":420,\"output_tokens\":124},\"tokens\":{\"input_tokens\":431,\"output_tokens\":125}}}" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Via: + - 1.1 google + access-control-expose-headers: + - X-Debug-Trace-ID + cache-control: + - no-cache, no-store, no-transform, must-revalidate, private, max-age=0 + content-type: + - application/json + date: + - Sat, 15 Jun 2024 19:55:54 GMT + expires: + - Thu, 01 Jan 1970 00:00:00 UTC + num_chars: + - '1863' + num_tokens: + - '544' + pragma: + - no-cache + server: + - envoy + transfer-encoding: + - chunked + vary: + - Origin + x-accel-expires: + - '0' + x-debug-trace-id: + - b1ccb3e5ac3ec19b4a29046c0bd3acf5 + x-endpoint-monthly-call-limit: + - '1000' + x-envoy-upstream-service-time: + - '4356' + x-trial-endpoint-call-limit: + - '10' + x-trial-endpoint-call-remaining: + - '9' + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-langchain/tests/conftest.py b/packages/opentelemetry-instrumentation-langchain/tests/conftest.py index 05f6db0d5..1e8055ec0 100644 --- a/packages/opentelemetry-instrumentation-langchain/tests/conftest.py +++ b/packages/opentelemetry-instrumentation-langchain/tests/conftest.py @@ -40,6 +40,8 @@ def environment(): os.environ["OPENAI_API_KEY"] = "test_api_key" if not os.environ.get("ANTHROPIC_API_KEY"): os.environ["ANTHROPIC_API_KEY"] = "test" + if not os.environ.get("COHERE_API_KEY"): + os.environ["COHERE_API_KEY"] = "test" @pytest.fixture(scope="module") diff --git a/packages/opentelemetry-instrumentation-langchain/tests/test_documents_chains.py b/packages/opentelemetry-instrumentation-langchain/tests/test_documents_chains.py new file mode 100644 index 000000000..b1adbf78a --- /dev/null +++ b/packages/opentelemetry-instrumentation-langchain/tests/test_documents_chains.py @@ -0,0 +1,55 @@ +import json + +import pytest +from langchain.chains.summarize import load_summarize_chain +from langchain.text_splitter import CharacterTextSplitter +from langchain_cohere import ChatCohere +from opentelemetry.semconv.ai import SpanAttributes + + +# source: wikipedia +INPUT_TEXT = """ + Today, all ridges and faces of the Matterhorn have been ascended in all seasons, + and mountain guides take a large number of people up the northeast Hörnli route + each summer. In total, up to 150 climbers attempt the Matterhorn each day during + summer. By modern standards, the climb is fairly difficult (AD Difficulty rating), + but not hard for skilled mountaineers according to French climbing grades. There + are fixed ropes on parts of the route to help. Still, it should be remembered that + several climbers may die on the mountain each year. + The usual pattern of ascent is to take the Schwarzsee cable car up from Zermatt, + hike up to the Hörnli Hut elev. 3,260 m (10,700 ft), a large stone building at the + base of the main ridge, and spend the night. The next day, climbers rise at 3:30 am + so as to reach the summit and descend before the regular afternoon clouds and storms + come in. The Solvay Hut located on the ridge at 4,003 m (13,133 ft) can be used only + in a case of emergency. + Other popular routes on the mountain include the Italian (Lion) ridge (AD+ Difficulty + rating) and the Zmutt ridge (D Difficulty rating). The four faces, as well as the + Furggen ridge, constitute the most challenging routes to the summit. The north face + is amongst the six most difficult faces of the Alps, as well as ‘The Trilogy’, the + three hardest of the six, along with the north faces of the Eiger and the Grandes + Jorasses (TD+ Difficulty rating). +""" + + +@pytest.mark.vcr +def test_sequential_chain(exporter): + small_docs = CharacterTextSplitter().create_documents(texts=[INPUT_TEXT, ]) + llm = ChatCohere(model="command", temperature=0.75) + chain = load_summarize_chain(llm, chain_type="stuff") + chain.run(small_docs) + + spans = exporter.get_finished_spans() + + assert [ + "ChatCohere.langchain.task", + "LLMChain.langchain.task", + "StuffDocumentsChain.langchain.task", + ] == [span.name for span in spans] + + stuff_span = next(span for span in spans if span.name == "StuffDocumentsChain.langchain.task") + + data = json.loads(stuff_span.attributes[SpanAttributes.TRACELOOP_ENTITY_INPUT]) + assert data["inputs"].keys() == {"input_documents"} + assert data["kwargs"]["name"] == "StuffDocumentsChain" + data = json.loads(stuff_span.attributes[SpanAttributes.TRACELOOP_ENTITY_OUTPUT]) + assert data["outputs"].keys() == {"output_text"}