-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #163 from nulib/prototype-streaming
Add websocket-based streaming chat to prototype
- Loading branch information
Showing
14 changed files
with
640 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -222,6 +222,7 @@ $RECYCLE.BIN/ | |
|
||
.vscode | ||
/samconfig.toml | ||
/samconfig.yaml | ||
/env.json | ||
/env.*.json | ||
/*.parameters | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
import boto3 | ||
import json | ||
import os | ||
import setup | ||
from helpers.apitoken import ApiToken | ||
from helpers.prompts import document_template, prompt_template | ||
from langchain.callbacks.base import BaseCallbackHandler | ||
from langchain.chains.qa_with_sources import load_qa_with_sources_chain | ||
from langchain.prompts import PromptTemplate | ||
from openai.error import InvalidRequestError | ||
|
||
DEFAULT_INDEX = "Work" | ||
DEFAULT_KEY = "title" | ||
DEFAULT_ATTRIBUTES = ("title,alternate_title,collection,contributor,creator," | ||
"date_created,description,genre,language,library_unit," | ||
"location,physical_description_material,physical_description_size," | ||
"published,rights_statement,scope_and_contents,series,source," | ||
"style_period,subject,table_of_contents,technique,visibility," | ||
"work_type") | ||
|
||
class Websocket: | ||
def __init__(self, endpoint_url, connection_id, ref): | ||
self.client = boto3.client('apigatewaymanagementapi', endpoint_url=endpoint_url) | ||
self.connection_id = connection_id | ||
self.ref = ref | ||
|
||
def send(self, data): | ||
data['ref'] = self.ref | ||
data_as_bytes = bytes(json.dumps(data), 'utf-8') | ||
self.client.post_to_connection(Data=data_as_bytes, ConnectionId=self.connection_id) | ||
|
||
class StreamingSocketCallbackHandler(BaseCallbackHandler): | ||
def __init__(self, socket: Websocket): | ||
self.socket = socket | ||
|
||
def on_llm_new_token(self, token: str, **kwargs): | ||
self.socket.send({'token': token}); | ||
|
||
def handler(event, context): | ||
try: | ||
payload = json.loads(event.get('body', '{}')) | ||
|
||
request_context = event.get('requestContext', {}) | ||
connection_id = request_context.get('connectionId') | ||
endpoint_url = f'https://{request_context.get("domainName")}/{request_context.get("stage")}' | ||
ref = payload.get('ref') | ||
socket = Websocket(connection_id=connection_id, endpoint_url=endpoint_url, ref=ref) | ||
|
||
|
||
api_token = ApiToken(signed_token=payload.get("auth")) | ||
if not api_token.is_logged_in(): | ||
socket.send({ "statusCode": 401, "body": "Unauthorized" }) | ||
return { | ||
"statusCode": 401, | ||
"body": "Unauthorized" | ||
} | ||
|
||
question = payload.get("question") | ||
index_name = payload.get("index", DEFAULT_INDEX) | ||
text_key = payload.get("text_key", DEFAULT_KEY) | ||
attributes = [ | ||
item for item | ||
in set(payload.get("attributes", DEFAULT_ATTRIBUTES).split(",")) | ||
if item not in [text_key, "source"] | ||
] | ||
|
||
weaviate = setup.weaviate_vector_store(index_name=index_name, | ||
text_key=text_key, | ||
attributes=attributes + ["source"]) | ||
|
||
client = setup.openai_chat_client(callbacks=[StreamingSocketCallbackHandler(socket)], streaming=True) | ||
|
||
prompt = PromptTemplate( | ||
template=prompt_template(), | ||
input_variables=["question", "context"] | ||
) | ||
|
||
document_prompt = PromptTemplate( | ||
template=document_template(attributes), | ||
input_variables=["page_content", "source"] + attributes, | ||
) | ||
|
||
docs = weaviate.similarity_search(question, k=10, additional="certainty") | ||
chain = load_qa_with_sources_chain( | ||
client, | ||
chain_type="stuff", | ||
prompt=prompt, | ||
document_prompt=document_prompt, | ||
document_variable_name="context", | ||
verbose=to_bool(os.getenv("VERBOSE")) | ||
) | ||
|
||
try: | ||
doc_response = [doc.__dict__ for doc in docs] | ||
socket.send({"question": question, "source_documents": doc_response}) | ||
response = chain({"question": question, "input_documents": docs}) | ||
response = { | ||
"answer": response["output_text"], | ||
} | ||
socket.send(response) | ||
except InvalidRequestError as err: | ||
response = { | ||
"question": question, | ||
"answer": str(err), | ||
"source_documents": [] | ||
} | ||
socket.send(response) | ||
|
||
return {'statusCode': 200} | ||
except Exception as err: | ||
print(event) | ||
raise err | ||
|
||
def to_bool(val): | ||
if isinstance(val, str): | ||
return val.lower() not in ["", "no", "false", "0"] | ||
return bool(val) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from datetime import datetime | ||
import jwt | ||
import os | ||
|
||
class ApiToken: | ||
@classmethod | ||
def empty_token(cls): | ||
time = int(datetime.now().timestamp()) | ||
return { | ||
'iss': os.getenv('DC_API_ENDPOINT'), | ||
'exp': datetime.fromtimestamp(time + 12 * 60 * 60).timestamp(), # 12 hours | ||
'iat': time, | ||
'entitlements': [], | ||
'isLoggedIn': False, | ||
} | ||
|
||
def __init__(self, signed_token=None): | ||
if signed_token is None: | ||
self.token = ApiToken.empty_token() | ||
else: | ||
try: | ||
secret = os.getenv("API_TOKEN_SECRET") | ||
self.token = jwt.decode(signed_token, secret, algorithms=["HS256"]) | ||
except Exception: | ||
self.token = ApiToken.empty_token() | ||
|
||
def is_logged_in(self): | ||
return self.token.get("isLoggedIn", False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
# ruff: noqa: E501 | ||
def prompt_template(): | ||
return """Using all of the provided source documents, create a helpful and thorough answer to the supplied question. | ||
If you don't know the answer, just say that you don't know. Don't try to make up an answer, but you should use the documents provided in order to ground your response. | ||
It may be helpful to explain why a provided document does not pertain to the query as well. | ||
Feel free to reference various aspects of the sources in your explanation, but please don't include the full sources in the answer. | ||
The Content field represents the title of each document, and the Metadata fields are the attributes. The Source field is the unique identifier for each document. | ||
'certainty' is an opinionated measure of the distance between the query vector and the document embedding vector. Certainty always returns a number between 0 and 1, with 1 indicating identical vectors and 0 indicating opposing angles. | ||
Content: Purchase order and note | ||
Metadata: | ||
_additional: {{'certainty': 0.8744078576564789, 'id': '29389b8d-a85d-46d1-9a6d-a738c6f81c88'}} | ||
alternate_title: None | ||
collection: Berkeley Folk Music Festival | ||
contributor: ['University of California, Berkeley. Associated Students', 'Berkeley Folk Music Festival'] | ||
creator: None | ||
date_created: ['October 7, 1970', '1970?'] | ||
description: ['Purchase order for costs related to security for the 1970 Berkeley Folk Music Festival and a handwritten note containing calculations and the heading "Police"'] | ||
genre: ['notes (documents)', 'purchase orders'] | ||
language: ['English'] | ||
library_unit: Charles Deering McCormick Library of Special Collections | ||
location: None | ||
physical_description_material: None | ||
physical_description_size: ['5 inches (height) x 3 inches (width)', '7 inches (height) x 8.5 inches (width)'] | ||
published: True | ||
rights_statement: In Copyright | ||
scope_and_contents: None | ||
series: ['Berkeley Folk Music Festival Archive--3. Festivals: Records, Budgets, Publicity'] | ||
source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88 | ||
style_period: None | ||
subject: ['Berkeley Folk Music Festival (15th : 1970 : Berkeley, Calif.)'] | ||
table_of_contents: None | ||
technique: None | ||
visibility: Public | ||
work_type: Image | ||
Source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88 | ||
Content: Berkeley Folk Music Festival, 1966 June 26-30 | ||
Metadata: | ||
_additional: {{'certainty': 0.869585394859314, 'id': '477e3f63-fc06-4bfc-8734-0b6100c0d1c3'}} | ||
alternate_title: None | ||
collection: Berkeley Folk Music Festival | ||
contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] | ||
creator: None | ||
date_created: ['1966'] | ||
description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] | ||
genre: ['posters'] | ||
language: ['English'] | ||
library_unit: Charles Deering McCormick Library of Special Collections | ||
location: None | ||
physical_description_material: None | ||
physical_description_size: ['12.75 inches (height) x 12.75 inches (width)'] | ||
published: True | ||
rights_statement: In Copyright | ||
scope_and_contents: None | ||
series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] | ||
source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3 | ||
style_period: None | ||
subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Hawes, Bess Lomax, 1921-2009'] | ||
table_of_contents: None | ||
technique: None | ||
visibility: Public | ||
work_type: Image | ||
Source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3 | ||
Content: Berkeley Folk Music Festival, 1966 June 26-30 | ||
Metadata: | ||
_additional: {{'certainty': 0.8694239258766174, 'id': 'bddeb375-762b-45e3-9e4e-5a4084ac5955'}} | ||
alternate_title: None | ||
collection: Berkeley Folk Music Festival | ||
contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] | ||
creator: None | ||
date_created: ['1966'] | ||
description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] | ||
genre: ['posters'] | ||
language: ['English'] | ||
library_unit: Charles Deering McCormick Library of Special Collections | ||
location: None | ||
physical_description_material: None | ||
physical_description_size: ['13.75 inches (height) x 21.75 inches (width)'] | ||
published: True | ||
rights_statement: In Copyright | ||
scope_and_contents: None | ||
series: ['Berkeley Folk Music Festival Archive--9. Posters of Berkeley Folk Music Festivals'] | ||
source: bddeb375-762b-45e3-9e4e-5a4084ac5955 | ||
style_period: None | ||
subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] | ||
table_of_contents: None | ||
technique: None | ||
visibility: Public | ||
work_type: Image | ||
Source: bddeb375-762b-45e3-9e4e-5a4084ac5955 | ||
Content: Berkeley Folk Music Festival, 1966 June 30-July 4 | ||
Metadata: | ||
_additional: {{'certainty': 0.8693937957286835, 'id': 'aab0bb76-ab02-429a-843a-5be56e31ba67'}} | ||
alternate_title: None | ||
collection: Berkeley Folk Music Festival | ||
contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] | ||
creator: None | ||
date_created: ['1966'] | ||
description: ['Poster for the 9th Annual Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger. Originally found in box 28, folder 3.'] | ||
genre: ['posters'] | ||
language: ['English'] | ||
library_unit: Charles Deering McCormick Library of Special Collections | ||
location: None | ||
physical_description_material: None | ||
physical_description_size: ['24.25 inches (height) x 37.5 inches (width)'] | ||
published: True | ||
rights_statement: In Copyright | ||
scope_and_contents: None | ||
series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] | ||
source: aab0bb76-ab02-429a-843a-5be56e31ba67 | ||
style_period: None | ||
subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] | ||
table_of_contents: None | ||
technique: None | ||
visibility: Public | ||
work_type: Image | ||
Source: aab0bb76-ab02-429a-843a-5be56e31ba67 | ||
QUESTION: Which musicians played at the Berkeley Folk Music Festival? | ||
HELPFUL ANSWER: For the 1966 Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, the following musicians and groups were listed as performers: | ||
Pete Seeger | ||
Jefferson Airplane | ||
Sam Hinton | ||
Greenbriar Boys | ||
Shlomo Carlebach | ||
John Fahey | ||
Los Halcones de Salitrillos | ||
Charley Marshall | ||
Phil Ochs | ||
Ralph J. Gleason | ||
Malvina Reynolds | ||
Robert Pete Williams | ||
Alice Stuart Thomas | ||
Bess Lomax Hawes | ||
Charles Seeger | ||
Unfortunately, the documents provided do not include information about musicians who performed at the Berkeley Folk Music Festival in other years during the 1960s or 1970s. Therefore, I can only confirm the musicians for the 1966 festival. | ||
{context} | ||
QUESTION: {question} | ||
========= | ||
HELPFUL ANSWER:""" | ||
|
||
def document_template(attributes): | ||
lines = (["Content: {page_content}", "Metadata:"] + | ||
[f" {attribute}: {{{attribute}}}" for attribute in attributes] + | ||
["Source: {source}"]) | ||
return "\n".join(lines) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
langchain~=0.0.208 | ||
nbformat~=5.9.0 | ||
openai~=0.27.8 | ||
pandas~=2.0.2 | ||
pyjwt~=2.6.0 | ||
python-dotenv~=1.0.0 | ||
tiktoken~=0.4.0 | ||
weaviate-client~=3.19.2 | ||
wheel~=0.40.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from langchain.chat_models import AzureChatOpenAI | ||
from langchain.vectorstores import Weaviate | ||
from typing import List | ||
import os | ||
import weaviate | ||
|
||
def openai_chat_client(**kwargs): | ||
deployment = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_ID") | ||
key = os.getenv("AZURE_OPENAI_API_KEY") | ||
resource = os.getenv("AZURE_OPENAI_RESOURCE_NAME") | ||
version = "2023-07-01-preview" | ||
|
||
return AzureChatOpenAI(deployment_name=deployment, | ||
openai_api_key=key, | ||
openai_api_base=f"https://{resource}.openai.azure.com/", | ||
openai_api_version=version, | ||
**kwargs) | ||
|
||
|
||
|
||
def weaviate_vector_store(index_name: str, text_key: str, attributes: List[str] = []): | ||
weaviate_url = os.environ['WEAVIATE_URL'] | ||
weaviate_api_key = os.environ['WEAVIATE_API_KEY'] | ||
# openai_api_key = os.environ['AZURE_OPENAI_API_KEY'] | ||
|
||
auth_config = weaviate.AuthApiKey(api_key=weaviate_api_key) | ||
|
||
client = weaviate.Client( | ||
url=weaviate_url, | ||
auth_client_secret=auth_config | ||
) | ||
return Weaviate(client=client, | ||
index_name=index_name, | ||
text_key=text_key, | ||
attributes=attributes) |
Oops, something went wrong.