Skip to content

Commit

Permalink
Merge pull request #230 from nulib/5074-source-documents
Browse files Browse the repository at this point in the history
Add source documents (api urls) to logged metrics
  • Loading branch information
mbklein authored Jul 22, 2024
2 parents 2c467c0 + 5bd47bc commit df48b0f
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 42 deletions.
2 changes: 1 addition & 1 deletion chat/src/handlers/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
RESPONSE_TYPES = {
"base": ["answer", "ref"],
"debug": ["answer", "attributes", "azure_endpoint", "deployment_name", "is_superuser", "k", "openai_api_version", "prompt", "question", "ref", "temperature", "text_key", "token_counts"],
"log": ["answer", "is_superuser", "k", "openai_api_version", "prompt", "question", "ref", "temperature", "token_counts"]
"log": ["answer", "deployment_name", "is_superuser", "k", "openai_api_version", "prompt", "question", "ref", "source_documents", "temperature", "token_counts"]
}

def handler(event, context):
Expand Down
19 changes: 19 additions & 0 deletions chat/src/helpers/metrics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
import tiktoken

def debug_response(config, response, original_question):
source_urls = [doc["api_link"] for doc in original_question.get("source_documents", [])]

return {
"answer": response,
"attributes": config.attributes,
"azure_endpoint": config.azure_endpoint,
"deployment_name": config.deployment_name,
"is_superuser": config.api_token.is_superuser(),
"k": config.k,
"openai_api_version": config.openai_api_version,
"prompt": config.prompt_text,
"question": config.question,
"ref": config.ref,
"source_documents": source_urls,
"temperature": config.temperature,
"text_key": config.text_key,
"token_counts": token_usage(config, response, original_question),
}

def token_usage(config, response, original_question):
data = {
Expand Down
19 changes: 1 addition & 18 deletions chat/src/helpers/response.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from helpers.metrics import token_usage
from helpers.metrics import debug_response
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

Expand All @@ -16,23 +16,6 @@ def __init__(self, config):
self.store = {}

def debug_response_passthrough(self):
def debug_response(config, response, original_question):
return {
"answer": response,
"attributes": config.attributes,
"azure_endpoint": config.azure_endpoint,
"deployment_name": config.deployment_name,
"is_superuser": config.api_token.is_superuser(),
"k": config.k,
"openai_api_version": config.openai_api_version,
"prompt": config.prompt_text,
"question": config.question,
"ref": config.ref,
"temperature": config.temperature,
"text_key": config.text_key,
"token_counts": token_usage(config, response, original_question),
}

return RunnableLambda(lambda x: debug_response(self.config, x, self.original_question))

def original_question_passthrough(self):
Expand Down
95 changes: 72 additions & 23 deletions chat/test/helpers/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,53 +5,102 @@
sys.path.append('./src')

from unittest import TestCase, mock
from helpers.metrics import count_tokens, token_usage
from helpers.metrics import count_tokens, debug_response, token_usage
from event_config import EventConfig



@mock.patch.dict(
os.environ,
{
"AZURE_OPENAI_RESOURCE_NAME": "test",
"WEAVIATE_URL": "http://test",
"WEAVIATE_API_KEY": "test"
},
)
class TestMetrics(TestCase):
def test_token_usage(self):
original_question = {
"question": "What is your name?",
"source_documents": [],
@mock.patch.dict(
os.environ,
{
"AZURE_OPENAI_RESOURCE_NAME": "test",
"WEAVIATE_URL": "http://test",
"WEAVIATE_API_KEY": "test"
},
)
def setUp(self):
self.question = "What is your name?"
self.original_question = {
"question": self.question,
"source_documents": [
{
"accession_number": "SourceDoc:1",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/881e8cae-67be-4e04-9970-7eafb52b2c5c",
"canonical_link": "https://dc.library.northwestern.edu/items/881e8cae-67be-4e04-9970-7eafb52b2c5c",
"title": "Source Document One!"
},
{
"accession_number": "SourceDoc:2",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/ac0b2a0d-8f80-420a-b1a1-63b6ac2299f1",
"canonical_link": "https://dc.library.northwestern.edu/items/ac0b2a0d-8f80-420a-b1a1-63b6ac2299f1",
"title": "Source Document Two!"
},
{
"accession_number": "SourceDoc:3",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/11569bb5-1b89-4fa9-bdfb-2caf2ded5aa5",
"canonical_link": "https://dc.library.northwestern.edu/items/11569bb5-1b89-4fa9-bdfb-2caf2ded5aa5",
"title": "Source Document Three!"
},
{
"accession_number": "SourceDoc:4",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/211eeeca-d56e-4c6e-9123-1612d72258f9",
"canonical_link": "https://dc.library.northwestern.edu/items/211eeeca-d56e-4c6e-9123-1612d72258f9",
"title": "Source Document Four!"
},
{
"accession_number": "SourceDoc:5",
"api_link": "https://api.dc.library.northwestern.edu/api/v2/works/10e45e7a-8011-4ac5-97df-efa6a5439d0e",
"canonical_link": "https://dc.library.northwestern.edu/items/10e45e7a-8011-4ac5-97df-efa6a5439d0e",
"title": "Source Document Five!"
}
],
}
event = {
self.event = {
"body": json.dumps({
"deployment_name": "test",
"index": "test",
"k": 1,
"k": 5,
"openai_api_version": "2019-05-06",
"prompt": "This is a test prompt.",
"question": original_question,
"question": self.question,
"ref": "test",
"temperature": 0.5,
"text_key": "text",
"auth": "test123"
})
}
config = EventConfig(event=event)

response = {
self.config = EventConfig(event=self.event)
self.response = {
"output_text": "This is a test response.",
}

def test_debug_response(self):
result = debug_response(self.config, self.response, self.original_question)

self.assertEqual(result["k"], 5)
self.assertEqual(result["question"], self.question)
self.assertEqual(result["ref"], "test")
self.assertEqual(
result["source_documents"],
[
"https://api.dc.library.northwestern.edu/api/v2/works/881e8cae-67be-4e04-9970-7eafb52b2c5c",
"https://api.dc.library.northwestern.edu/api/v2/works/ac0b2a0d-8f80-420a-b1a1-63b6ac2299f1",
"https://api.dc.library.northwestern.edu/api/v2/works/11569bb5-1b89-4fa9-bdfb-2caf2ded5aa5",
"https://api.dc.library.northwestern.edu/api/v2/works/211eeeca-d56e-4c6e-9123-1612d72258f9",
"https://api.dc.library.northwestern.edu/api/v2/works/10e45e7a-8011-4ac5-97df-efa6a5439d0e"
]
)

result = token_usage(config, response, original_question)
def test_token_usage(self):
result = token_usage(self.config, self.response, self.original_question)

expected_result = {
"answer": 12,
"prompt": 314,
"question": 15,
"source_documents": 1,
"total": 342
"question": 5,
"source_documents": 527,
"total": 858
}

self.assertEqual(result, expected_result)
Expand Down

0 comments on commit df48b0f

Please sign in to comment.