From 1cba4ee2f77ab6350473b127ea5074305f88a2ad Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Thu, 31 Aug 2023 19:30:58 +0000 Subject: [PATCH] Integrate chat websocket stack --- .gitignore | 1 + chat/src/__init__.py | 0 chat/src/handlers/chat.py | 117 ++++++++++ chat/src/helpers/apitoken.py | 28 +++ chat/src/helpers/prompts.py | 153 ++++++++++++ chat/src/requirements.txt | 9 + chat/src/setup.py | 35 +++ chat/template.yaml | 221 ++++++++++++++++++ node/src/handlers/get-chat-endpoint.js | 21 ++ .../integration/get-chat-endpoint.test.js | 32 +++ node/test/test-helpers/index.js | 1 + python/src/handlers/chat.py | 14 +- python/src/helpers/prompts.py | 44 ---- template.yaml | 23 +- 14 files changed, 641 insertions(+), 58 deletions(-) create mode 100644 chat/src/__init__.py create mode 100644 chat/src/handlers/chat.py create mode 100644 chat/src/helpers/apitoken.py create mode 100644 chat/src/helpers/prompts.py create mode 100644 chat/src/requirements.txt create mode 100644 chat/src/setup.py create mode 100644 chat/template.yaml create mode 100644 node/src/handlers/get-chat-endpoint.js create mode 100644 node/test/integration/get-chat-endpoint.test.js diff --git a/.gitignore b/.gitignore index cfbf6386..6c650ad7 100644 --- a/.gitignore +++ b/.gitignore @@ -222,6 +222,7 @@ $RECYCLE.BIN/ .vscode /samconfig.toml +/samconfig.yaml /env.json /env.*.json /*.parameters diff --git a/chat/src/__init__.py b/chat/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/chat/src/handlers/chat.py b/chat/src/handlers/chat.py new file mode 100644 index 00000000..ff916767 --- /dev/null +++ b/chat/src/handlers/chat.py @@ -0,0 +1,117 @@ +import boto3 +import json +import os +import setup +from helpers.apitoken import ApiToken +from helpers.prompts import document_template, prompt_template +from langchain.callbacks.base import BaseCallbackHandler +from langchain.chains.qa_with_sources import load_qa_with_sources_chain +from langchain.prompts import PromptTemplate +from openai.error import InvalidRequestError + +DEFAULT_INDEX = "Work" +DEFAULT_KEY = "title" +DEFAULT_ATTRIBUTES = ("title,alternate_title,collection,contributor,creator," + "date_created,description,genre,language,library_unit," + "location,physical_description_material,physical_description_size," + "published,rights_statement,scope_and_contents,series,source," + "style_period,subject,table_of_contents,technique,visibility," + "work_type") + +class Websocket: + def __init__(self, endpoint_url, connection_id, ref): + self.client = boto3.client('apigatewaymanagementapi', endpoint_url=endpoint_url) + self.connection_id = connection_id + self.ref = ref + + def send(self, data): + data['ref'] = self.ref + data_as_bytes = bytes(json.dumps(data), 'utf-8') + self.client.post_to_connection(Data=data_as_bytes, ConnectionId=self.connection_id) + +class StreamingSocketCallbackHandler(BaseCallbackHandler): + def __init__(self, socket: Websocket): + self.socket = socket + + def on_llm_new_token(self, token: str, **kwargs): + self.socket.send({'token': token}); + +def handler(event, context): + try: + payload = json.loads(event.get('body', '{}')) + + request_context = event.get('requestContext', {}) + connection_id = request_context.get('connectionId') + endpoint_url = f'https://{request_context.get("domainName")}/{request_context.get("stage")}' + ref = payload.get('ref') + socket = Websocket(connection_id=connection_id, endpoint_url=endpoint_url, ref=ref) + + + api_token = ApiToken(signed_token=payload.get("auth")) + if not api_token.is_logged_in(): + socket.send({ "statusCode": 401, "body": "Unauthorized" }) + return { + "statusCode": 401, + "body": "Unauthorized" + } + + question = payload.get("question") + index_name = payload.get("index", DEFAULT_INDEX) + text_key = payload.get("text_key", DEFAULT_KEY) + attributes = [ + item for item + in set(payload.get("attributes", DEFAULT_ATTRIBUTES).split(",")) + if item not in [text_key, "source"] + ] + + weaviate = setup.weaviate_vector_store(index_name=index_name, + text_key=text_key, + attributes=attributes + ["source"]) + + client = setup.openai_chat_client(callbacks=[StreamingSocketCallbackHandler(socket)], streaming=True) + + prompt = PromptTemplate( + template=prompt_template(), + input_variables=["question", "context"] + ) + + document_prompt = PromptTemplate( + template=document_template(attributes), + input_variables=["page_content", "source"] + attributes, + ) + + docs = weaviate.similarity_search(question, k=10, additional="certainty") + chain = load_qa_with_sources_chain( + client, + chain_type="stuff", + prompt=prompt, + document_prompt=document_prompt, + document_variable_name="context", + verbose=to_bool(os.getenv("VERBOSE")) + ) + + try: + doc_response = [doc.__dict__ for doc in docs] + socket.send({"question": question, "source_documents": doc_response}) + response = chain({"question": question, "input_documents": docs}) + response = { + "answer": response["output_text"], + } + socket.send(response) + except InvalidRequestError as err: + response = { + "question": question, + "answer": str(err), + "source_documents": [] + } + socket.send(response) + + return {'statusCode': 200} + except Exception as err: + print(event) + raise err + +def to_bool(val): + if isinstance(val, str): + return val.lower() not in ["", "no", "false", "0"] + return bool(val) diff --git a/chat/src/helpers/apitoken.py b/chat/src/helpers/apitoken.py new file mode 100644 index 00000000..4c6ecbd4 --- /dev/null +++ b/chat/src/helpers/apitoken.py @@ -0,0 +1,28 @@ +from datetime import datetime +import jwt +import os + +class ApiToken: + @classmethod + def empty_token(cls): + time = int(datetime.now().timestamp()) + return { + 'iss': os.getenv('DC_API_ENDPOINT'), + 'exp': datetime.fromtimestamp(time + 12 * 60 * 60).timestamp(), # 12 hours + 'iat': time, + 'entitlements': [], + 'isLoggedIn': False, + } + + def __init__(self, signed_token=None): + if signed_token is None: + self.token = ApiToken.empty_token() + else: + try: + secret = os.getenv("API_TOKEN_SECRET") + self.token = jwt.decode(signed_token, secret, algorithms=["HS256"]) + except Exception: + self.token = ApiToken.empty_token() + + def is_logged_in(self): + return self.token.get("isLoggedIn", False) diff --git a/chat/src/helpers/prompts.py b/chat/src/helpers/prompts.py new file mode 100644 index 00000000..b79510e8 --- /dev/null +++ b/chat/src/helpers/prompts.py @@ -0,0 +1,153 @@ +# ruff: noqa: E501 +def prompt_template(): + return """Using all of the provided source documents, create a helpful and thorough answer to the supplied question. + If you don't know the answer, just say that you don't know. Don't try to make up an answer, but you should use the documents provided in order to ground your response. + It may be helpful to explain why a provided document does not pertain to the query as well. + Feel free to reference various aspects of the sources in your explanation, but please don't include the full sources in the answer. + The Content field represents the title of each document, and the Metadata fields are the attributes. The Source field is the unique identifier for each document. + 'certainty' is an opinionated measure of the distance between the query vector and the document embedding vector. Certainty always returns a number between 0 and 1, with 1 indicating identical vectors and 0 indicating opposing angles. + + Content: Purchase order and note + Metadata: + _additional: {{'certainty': 0.8744078576564789, 'id': '29389b8d-a85d-46d1-9a6d-a738c6f81c88'}} + alternate_title: None + collection: Berkeley Folk Music Festival + contributor: ['University of California, Berkeley. Associated Students', 'Berkeley Folk Music Festival'] + creator: None + date_created: ['October 7, 1970', '1970?'] + description: ['Purchase order for costs related to security for the 1970 Berkeley Folk Music Festival and a handwritten note containing calculations and the heading "Police"'] + genre: ['notes (documents)', 'purchase orders'] + language: ['English'] + library_unit: Charles Deering McCormick Library of Special Collections + location: None + physical_description_material: None + physical_description_size: ['5 inches (height) x 3 inches (width)', '7 inches (height) x 8.5 inches (width)'] + published: True + rights_statement: In Copyright + scope_and_contents: None + series: ['Berkeley Folk Music Festival Archive--3. Festivals: Records, Budgets, Publicity'] + source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88 + style_period: None + subject: ['Berkeley Folk Music Festival (15th : 1970 : Berkeley, Calif.)'] + table_of_contents: None + technique: None + visibility: Public + work_type: Image + Source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88 + + Content: Berkeley Folk Music Festival, 1966 June 26-30 + Metadata: + _additional: {{'certainty': 0.869585394859314, 'id': '477e3f63-fc06-4bfc-8734-0b6100c0d1c3'}} + alternate_title: None + collection: Berkeley Folk Music Festival + contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] + creator: None + date_created: ['1966'] + description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] + genre: ['posters'] + language: ['English'] + library_unit: Charles Deering McCormick Library of Special Collections + location: None + physical_description_material: None + physical_description_size: ['12.75 inches (height) x 12.75 inches (width)'] + published: True + rights_statement: In Copyright + scope_and_contents: None + series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] + source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3 + style_period: None + subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Hawes, Bess Lomax, 1921-2009'] + table_of_contents: None + technique: None + visibility: Public + work_type: Image + Source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3 + + Content: Berkeley Folk Music Festival, 1966 June 26-30 + Metadata: + _additional: {{'certainty': 0.8694239258766174, 'id': 'bddeb375-762b-45e3-9e4e-5a4084ac5955'}} + alternate_title: None + collection: Berkeley Folk Music Festival + contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] + creator: None + date_created: ['1966'] + description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] + genre: ['posters'] + language: ['English'] + library_unit: Charles Deering McCormick Library of Special Collections + location: None + physical_description_material: None + physical_description_size: ['13.75 inches (height) x 21.75 inches (width)'] + published: True + rights_statement: In Copyright + scope_and_contents: None + series: ['Berkeley Folk Music Festival Archive--9. Posters of Berkeley Folk Music Festivals'] + source: bddeb375-762b-45e3-9e4e-5a4084ac5955 + style_period: None + subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] + table_of_contents: None + technique: None + visibility: Public + work_type: Image + Source: bddeb375-762b-45e3-9e4e-5a4084ac5955 + + Content: Berkeley Folk Music Festival, 1966 June 30-July 4 + Metadata: + _additional: {{'certainty': 0.8693937957286835, 'id': 'aab0bb76-ab02-429a-843a-5be56e31ba67'}} + alternate_title: None + collection: Berkeley Folk Music Festival + contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] + creator: None + date_created: ['1966'] + description: ['Poster for the 9th Annual Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger. Originally found in box 28, folder 3.'] + genre: ['posters'] + language: ['English'] + library_unit: Charles Deering McCormick Library of Special Collections + location: None + physical_description_material: None + physical_description_size: ['24.25 inches (height) x 37.5 inches (width)'] + published: True + rights_statement: In Copyright + scope_and_contents: None + series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] + source: aab0bb76-ab02-429a-843a-5be56e31ba67 + style_period: None + subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] + table_of_contents: None + technique: None + visibility: Public + work_type: Image + Source: aab0bb76-ab02-429a-843a-5be56e31ba67 + + QUESTION: Which musicians played at the Berkeley Folk Music Festival? + HELPFUL ANSWER: For the 1966 Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, the following musicians and groups were listed as performers: + + Pete Seeger + Jefferson Airplane + Sam Hinton + Greenbriar Boys + Shlomo Carlebach + John Fahey + Los Halcones de Salitrillos + Charley Marshall + Phil Ochs + Ralph J. Gleason + Malvina Reynolds + Robert Pete Williams + Alice Stuart Thomas + Bess Lomax Hawes + Charles Seeger + + Unfortunately, the documents provided do not include information about musicians who performed at the Berkeley Folk Music Festival in other years during the 1960s or 1970s. Therefore, I can only confirm the musicians for the 1966 festival. + + {context} + + QUESTION: {question} + ========= + HELPFUL ANSWER:""" + +def document_template(attributes): + lines = (["Content: {page_content}", "Metadata:"] + + [f" {attribute}: {{{attribute}}}" for attribute in attributes] + + ["Source: {source}"]) + return "\n".join(lines) diff --git a/chat/src/requirements.txt b/chat/src/requirements.txt new file mode 100644 index 00000000..68ec56c3 --- /dev/null +++ b/chat/src/requirements.txt @@ -0,0 +1,9 @@ +langchain~=0.0.208 +nbformat~=5.9.0 +openai~=0.27.8 +pandas~=2.0.2 +pyjwt~=2.6.0 +python-dotenv~=1.0.0 +tiktoken~=0.4.0 +weaviate-client~=3.19.2 +wheel~=0.40.0 \ No newline at end of file diff --git a/chat/src/setup.py b/chat/src/setup.py new file mode 100644 index 00000000..184b856b --- /dev/null +++ b/chat/src/setup.py @@ -0,0 +1,35 @@ +from langchain.chat_models import AzureChatOpenAI +from langchain.vectorstores import Weaviate +from typing import List +import os +import weaviate + +def openai_chat_client(**kwargs): + deployment = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_ID") + key = os.getenv("AZURE_OPENAI_API_KEY") + resource = os.getenv("AZURE_OPENAI_RESOURCE_NAME") + version = "2023-07-01-preview" + + return AzureChatOpenAI(deployment_name=deployment, + openai_api_key=key, + openai_api_base=f"https://{resource}.openai.azure.com/", + openai_api_version=version, + **kwargs) + + + +def weaviate_vector_store(index_name: str, text_key: str, attributes: List[str] = []): + weaviate_url = os.environ['WEAVIATE_URL'] + weaviate_api_key = os.environ['WEAVIATE_API_KEY'] + # openai_api_key = os.environ['AZURE_OPENAI_API_KEY'] + + auth_config = weaviate.AuthApiKey(api_key=weaviate_api_key) + + client = weaviate.Client( + url=weaviate_url, + auth_client_secret=auth_config + ) + return Weaviate(client=client, + index_name=index_name, + text_key=text_key, + attributes=attributes) diff --git a/chat/template.yaml b/chat/template.yaml new file mode 100644 index 00000000..f97e3e16 --- /dev/null +++ b/chat/template.yaml @@ -0,0 +1,221 @@ +AWSTemplateFormatVersion: "2010-09-09" +Transform: AWS::Serverless-2016-10-31 +Description: Websocket Chat API for dc-api-v2 +Parameters: + ApiTokenSecret: + Type: String + Description: Secret Key for Encrypting JWTs (must match IIIF server) + AzureOpenaiApiKey: + Type: String + Description: Azure OpenAI API Key + AzureOpenaiEmbeddingDeploymentId: + Type: String + Description: Azure OpenAI Embedding Deployment ID + AzureOpenaiLlmDeploymentId: + Type: String + Description: Azure OpenAI LLM Deployment ID + AzureOpenaiResourceName: + Type: String + Description: Azure OpenAI Resource Name + WeaviateApiKey: + Type: String + Description: Weaviate API Key + WeaviateUrl: + Type: String + Description: Weaviate URL +Resources: + ApiGwAccountConfig: + Type: "AWS::ApiGateway::Account" + Properties: + CloudWatchRoleArn: !GetAtt "ApiGatewayLoggingRole.Arn" + ApiGatewayLoggingRole: + Type: "AWS::IAM::Role" + Properties: + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: + - "apigateway.amazonaws.com" + Action: "sts:AssumeRole" + Path: "/" + ManagedPolicyArns: + - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AmazonAPIGatewayPushToCloudWatchLogs" + ChatWebSocket: + Type: AWS::ApiGatewayV2::Api + Properties: + Name: ChatWebSocket + ProtocolType: WEBSOCKET + RouteSelectionExpression: "$request.body.message" + ConnectRoute: + Type: AWS::ApiGatewayV2::Route + Properties: + ApiId: !Ref ChatWebSocket + RouteKey: $connect + RouteResponseSelectionExpression: '$default' + AuthorizationType: NONE + ApiKeyRequired: false + OperationName: ConnectRoute + Target: !Sub 'integrations/${ConnectInteg}' + ConnectInteg: + Type: AWS::ApiGatewayV2::Integration + Properties: + ApiId: !Ref ChatWebSocket + Description: Connect Integration + IntegrationType: MOCK + RequestTemplates: + "200" : '{"statusCode" : 200}' + TemplateSelectionExpression: '200' + PassthroughBehavior: 'WHEN_NO_MATCH' + ConnectIntegResp: + Type: AWS::ApiGatewayV2::IntegrationResponse + Properties: + ApiId: !Ref ChatWebSocket + IntegrationId: !Ref ConnectInteg + IntegrationResponseKey: '$default' + ResponseTemplates: + "200" : '{"statusCode" : 200}' + ConnectRouteResponse: + Type: AWS::ApiGatewayV2::RouteResponse + Properties: + RouteId: !Ref ConnectRoute + ApiId: !Ref ChatWebSocket + RouteResponseKey: $default + DisconnectRoute: + Type: AWS::ApiGatewayV2::Route + Properties: + ApiId: !Ref ChatWebSocket + RouteKey: $disconnect + RouteResponseSelectionExpression: '$default' + AuthorizationType: NONE + OperationName: DisconnectRoute + Target: !Sub 'integrations/${DisconnectInteg}' + DisconnectInteg: + Type: AWS::ApiGatewayV2::Integration + Properties: + ApiId: !Ref ChatWebSocket + Description: Disconnect Integration + IntegrationType: MOCK + RequestTemplates: + "200" : '{"statusCode" : 200}' + TemplateSelectionExpression: '200' + PassthroughBehavior: 'WHEN_NO_MATCH' + DisconnectIntegResp: + Type: AWS::ApiGatewayV2::IntegrationResponse + Properties: + ApiId: !Ref ChatWebSocket + IntegrationId: !Ref DisconnectInteg + IntegrationResponseKey: '$default' + ResponseTemplates: + "200" : '{"statusCode" : 200}' + DisconnectRouteResponse: + Type: AWS::ApiGatewayV2::RouteResponse + Properties: + RouteId: !Ref DisconnectRoute + ApiId: !Ref ChatWebSocket + RouteResponseKey: $default + DefaultRoute: + Type: AWS::ApiGatewayV2::Route + Properties: + ApiId: !Ref ChatWebSocket + RouteKey: $default + RouteResponseSelectionExpression: '$default' + AuthorizationType: NONE + OperationName: DefaultRoute + Target: !Sub 'integrations/${DefaultInteg}' + DefaultInteg: + Type: AWS::ApiGatewayV2::Integration + Properties: + ApiId: !Ref ChatWebSocket + Description: Default Integration + IntegrationType: MOCK + RequestTemplates: + "200" : '{"statusCode" : 200}' + TemplateSelectionExpression: '200' + DefaultIntegResp: + Type: AWS::ApiGatewayV2::IntegrationResponse + Properties: + ApiId: !Ref ChatWebSocket + IntegrationId: !Ref DefaultInteg + IntegrationResponseKey: $default + ResponseTemplates: + "200" : '{"statusCode" : 200, "connectionId" : "$context.connectionId"}' + TemplateSelectionExpression: '200' + DefaultRouteResponse: + Type: AWS::ApiGatewayV2::RouteResponse + Properties: + RouteId: !Ref DefaultRoute + ApiId: !Ref ChatWebSocket + RouteResponseKey: $default + ChatRoute: + Type: AWS::ApiGatewayV2::Route + Properties: + ApiId: !Ref ChatWebSocket + RouteKey: chat + AuthorizationType: NONE + OperationName: ChatRoute + Target: !Sub 'integrations/${ChatIntegration}' + ChatIntegration: + Type: AWS::ApiGatewayV2::Integration + Properties: + ApiId: !Ref ChatWebSocket + Description: Chat Integration + IntegrationType: AWS_PROXY + IntegrationUri: !Sub "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/${ChatFunction.Arn}/invocations" + ChatPermission: + Type: AWS::Lambda::Permission + DependsOn: + - ChatWebSocket + Properties: + Action: lambda:InvokeFunction + FunctionName: !Ref ChatFunction + Principal: apigateway.amazonaws.com + ChatFunction: + Type: AWS::Serverless::Function + Properties: + CodeUri: ./src + Runtime: python3.9 + Architectures: + - x86_64 + MemorySize: 128 + Handler: handlers/chat.handler + Timeout: 300 + Environment: + Variables: + API_TOKEN_SECRET: !Ref ApiTokenSecret + AZURE_OPENAI_API_KEY: !Ref AzureOpenaiApiKey + AZURE_OPENAI_EMBEDDING_DEPLOYMENT_ID: !Ref AzureOpenaiEmbeddingDeploymentId + AZURE_OPENAI_LLM_DEPLOYMENT_ID: !Ref AzureOpenaiLlmDeploymentId + AZURE_OPENAI_RESOURCE_NAME: !Ref AzureOpenaiResourceName + WEAVIATE_API_KEY: !Ref WeaviateApiKey + WEAVIATE_URL: !Ref WeaviateUrl + Policies: + - Statement: + - Effect: Allow + Action: + - 'execute-api:ManageConnections' + Resource: + - !Sub 'arn:aws:execute-api:${AWS::Region}:${AWS::AccountId}:${ChatWebSocket}/*' + Deployment: + Type: AWS::ApiGatewayV2::Deployment + DependsOn: + - ConnectRoute + - DisconnectRoute + - DefaultRoute + - ChatRoute + Properties: + ApiId: !Ref ChatWebSocket + Stage: + Type: AWS::ApiGatewayV2::Stage + Properties: + StageName: latest + DeploymentId: !Ref Deployment + ApiId: !Ref ChatWebSocket + DefaultRouteSettings: + DetailedMetricsEnabled: true + LoggingLevel: INFO +Outputs: + WebSocketURI: + Description: "The WSS Protocol URI to connect to" + Value: !Sub 'wss://${ChatWebSocket}.execute-api.${AWS::Region}.amazonaws.com/${Stage}' diff --git a/node/src/handlers/get-chat-endpoint.js b/node/src/handlers/get-chat-endpoint.js new file mode 100644 index 00000000..c40ee77b --- /dev/null +++ b/node/src/handlers/get-chat-endpoint.js @@ -0,0 +1,21 @@ +const { wrap } = require("./middleware"); + +const handler = wrap(async (event) => { + if (!event.userToken.isLoggedIn()) { + return { + statusCode: 401, + headers: { "Content-Type": "text/plain" }, + body: "Authorization Required", + }; + } + + return { + statusCode: 200, + body: JSON.stringify({ + endpoint: process.env.WEBSOCKET_URI, + auth: event.userToken.sign(), + }), + }; +}); + +module.exports = { handler }; diff --git a/node/test/integration/get-chat-endpoint.test.js b/node/test/integration/get-chat-endpoint.test.js new file mode 100644 index 00000000..38e3aea7 --- /dev/null +++ b/node/test/integration/get-chat-endpoint.test.js @@ -0,0 +1,32 @@ +"use strict"; + +const chai = require("chai"); +const expect = chai.expect; +const jwt = require("jsonwebtoken"); + +const getChatEndpointHandler = requireSource("handlers/get-chat-endpoint"); +const ApiToken = requireSource("api/api-token"); + +describe("GET /chat-endpoint", function () { + helpers.saveEnvironment(); + + it("returns the websocket URI and token to a logged in user", async () => { + const token = new ApiToken().user({ uid: "abc123" }).sign(); + + const event = helpers + .mockEvent("GET", "/chat-endpoint") + .headers({ + Authorization: `Bearer ${token}`, + }) + .render(); + + const result = await getChatEndpointHandler.handler(event); + + expect(result.statusCode).to.eq(200); + const response = JSON.parse(result.body); + expect(response).to.contain({ + endpoint: "wss://thisisafakewebsocketapiurl", + auth: token, + }); + }); +}); diff --git a/node/test/test-helpers/index.js b/node/test/test-helpers/index.js index a90397fb..8045b1ed 100644 --- a/node/test/test-helpers/index.js +++ b/node/test/test-helpers/index.js @@ -13,6 +13,7 @@ const TestEnvironment = { DC_API_ENDPOINT: "https://thisisafakeapiurl", NUSSO_BASE_URL: "https://nusso-base.com/", NUSSO_API_KEY: "abc123", + WEBSOCKET_URI: "wss://thisisafakewebsocketapiurl", }; for (const v in TestEnvironment) delete process.env[v]; diff --git a/python/src/handlers/chat.py b/python/src/handlers/chat.py index d16aa5f6..fd8703e4 100644 --- a/python/src/handlers/chat.py +++ b/python/src/handlers/chat.py @@ -16,14 +16,12 @@ "X-Requested-With") DEFAULT_INDEX = "Work" DEFAULT_KEY = "title" -DEFAULT_ATTRIBUTES = ("title,accession_number,alternate_title,api_model," - "catalog_key,collection,contributor,create_date," - "creator,date_created,description,genre,identifier_descriptive," - "keywords,language,library_unit,location,physical_description_material," - "physical_description_size,preservation_level,published," - "related_material,related_url,rights_holder,rights_statement," - "scope_and_contents,series,source,status,style_period," - "subject,table_of_contents,technique,visibility,work_type") +DEFAULT_ATTRIBUTES = ("title,alternate_title,collection,contributor,creator," + "date_created,description,genre,language,library_unit," + "location,physical_description_material,physical_description_size," + "published,rights_statement,scope_and_contents,series,source," + "style_period,subject,table_of_contents,technique,visibility," + "work_type") def handler(event, context): event = Event(event) diff --git a/python/src/helpers/prompts.py b/python/src/helpers/prompts.py index 2ab775ac..b79510e8 100644 --- a/python/src/helpers/prompts.py +++ b/python/src/helpers/prompts.py @@ -10,34 +10,23 @@ def prompt_template(): Content: Purchase order and note Metadata: _additional: {{'certainty': 0.8744078576564789, 'id': '29389b8d-a85d-46d1-9a6d-a738c6f81c88'}} - accession_number: BFMF_B24_F03_016 alternate_title: None - api_model: Work - catalog_key: None collection: Berkeley Folk Music Festival contributor: ['University of California, Berkeley. Associated Students', 'Berkeley Folk Music Festival'] - create_date: 2021-03-19T22:22:33.902972Z creator: None date_created: ['October 7, 1970', '1970?'] description: ['Purchase order for costs related to security for the 1970 Berkeley Folk Music Festival and a handwritten note containing calculations and the heading "Police"'] genre: ['notes (documents)', 'purchase orders'] - identifier_descriptive: ['MS 63'] - keywords: None language: ['English'] library_unit: Charles Deering McCormick Library of Special Collections location: None physical_description_material: None physical_description_size: ['5 inches (height) x 3 inches (width)', '7 inches (height) x 8.5 inches (width)'] - preservation_level: Level 1 published: True - related_material: None - related_url: None - rights_holder: None rights_statement: In Copyright scope_and_contents: None series: ['Berkeley Folk Music Festival Archive--3. Festivals: Records, Budgets, Publicity'] source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88 - status: Done style_period: None subject: ['Berkeley Folk Music Festival (15th : 1970 : Berkeley, Calif.)'] table_of_contents: None @@ -49,34 +38,23 @@ def prompt_template(): Content: Berkeley Folk Music Festival, 1966 June 26-30 Metadata: _additional: {{'certainty': 0.869585394859314, 'id': '477e3f63-fc06-4bfc-8734-0b6100c0d1c3'}} - accession_number: BFMF_P34_09 alternate_title: None - api_model: Work - catalog_key: None collection: Berkeley Folk Music Festival contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] - create_date: 2021-03-15T20:20:06.428594Z creator: None date_created: ['1966'] description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] genre: ['posters'] - identifier_descriptive: None - keywords: None language: ['English'] library_unit: Charles Deering McCormick Library of Special Collections location: None physical_description_material: None physical_description_size: ['12.75 inches (height) x 12.75 inches (width)'] - preservation_level: Level 1 published: True - related_material: None - related_url: None - rights_holder: None rights_statement: In Copyright scope_and_contents: None series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3 - status: Done style_period: None subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Hawes, Bess Lomax, 1921-2009'] table_of_contents: None @@ -88,34 +66,23 @@ def prompt_template(): Content: Berkeley Folk Music Festival, 1966 June 26-30 Metadata: _additional: {{'certainty': 0.8694239258766174, 'id': 'bddeb375-762b-45e3-9e4e-5a4084ac5955'}} - accession_number: BFMF_P13_01 alternate_title: None - api_model: Work - catalog_key: None collection: Berkeley Folk Music Festival contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] - create_date: 2021-03-16T15:32:04.982281Z creator: None date_created: ['1966'] description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] genre: ['posters'] - identifier_descriptive: None - keywords: None language: ['English'] library_unit: Charles Deering McCormick Library of Special Collections location: None physical_description_material: None physical_description_size: ['13.75 inches (height) x 21.75 inches (width)'] - preservation_level: Level 1 published: True - related_material: None - related_url: None - rights_holder: None rights_statement: In Copyright scope_and_contents: None series: ['Berkeley Folk Music Festival Archive--9. Posters of Berkeley Folk Music Festivals'] source: bddeb375-762b-45e3-9e4e-5a4084ac5955 - status: Done style_period: None subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] table_of_contents: None @@ -127,34 +94,23 @@ def prompt_template(): Content: Berkeley Folk Music Festival, 1966 June 30-July 4 Metadata: _additional: {{'certainty': 0.8693937957286835, 'id': 'aab0bb76-ab02-429a-843a-5be56e31ba67'}} - accession_number: BFMF_P22_16 alternate_title: None - api_model: Work - catalog_key: None collection: Berkeley Folk Music Festival contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] - create_date: 2021-03-16T13:46:47.559482Z creator: None date_created: ['1966'] description: ['Poster for the 9th Annual Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger. Originally found in box 28, folder 3.'] genre: ['posters'] - identifier_descriptive: None - keywords: None language: ['English'] library_unit: Charles Deering McCormick Library of Special Collections location: None physical_description_material: None physical_description_size: ['24.25 inches (height) x 37.5 inches (width)'] - preservation_level: Level 1 published: True - related_material: None - related_url: None - rights_holder: None rights_statement: In Copyright scope_and_contents: None series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] source: aab0bb76-ab02-429a-843a-5be56e31ba67 - status: Done style_period: None subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] table_of_contents: None diff --git a/template.yaml b/template.yaml index b51cdd99..65e5998e 100644 --- a/template.yaml +++ b/template.yaml @@ -535,21 +535,32 @@ Resources: ApiId: !Ref dcApi Path: /oai Method: POST - helloWorldFunction: + chatWebsocket: + Type: AWS::Serverless::Application + Properties: + Location: ./chat/template.yaml + Parameters: + ApiTokenSecret: !Ref ApiTokenSecret + AzureOpenaiApiKey: !Ref AzureOpenaiApiKey + AzureOpenaiEmbeddingDeploymentId: !Ref AzureOpenaiEmbeddingDeploymentId + AzureOpenaiLlmDeploymentId: !Ref AzureOpenaiLlmDeploymentId + AzureOpenaiResourceName: !Ref AzureOpenaiResourceName + WeaviateApiKey: !Ref WeaviateApiKey + WeaviateUrl: !Ref WeaviateUrl + chatWebsocketEndpoint: Type: AWS::Serverless::Function Properties: - CodeUri: ./python/src - Runtime: python3.9 - Handler: handlers/hello.lambda_handler + Handler: handlers/get-chat-endpoint.handler + Description: Returns the URI of the chat websocket API. Environment: Variables: - DEFAULT_NAME: "World" + WEBSOCKET_URI: !GetAtt chatWebsocket.Outputs.WebSocketURI Events: GetApiGet: Type: HttpApi Properties: ApiId: !Ref dcApi - Path: /hello + Path: /chat-endpoint Method: GET chatFunction: Type: AWS::Serverless::Function