Amazon Nova Models Support

aws-samples · Dec 3, 2024 · 5588764 · 5588764
1 parent 6c5302e
commit 5588764
Show file tree

Hide file tree

Showing 44 changed files with 1,591 additions and 495 deletions.
diff --git a/README.md b/README.md
@@ -10,11 +10,29 @@
 
 ![sample](docs/about/assets/chabot-sample.gif "AWS GenAI Chatbot")
 
+
+## 🚀 NEW! Support for new Amazon Nova Models 🚀
+### Deploy this chatbot to use the latest Amazon Nova models!
+### These powerful models can __understand__ and __generate__ images and videos.
+
+Deploy this chatbot to experiment with:
+- `Amazon Nova Micro`
+- `Amazon Nova Lite`
+- `Amazon Nova Pro`
+- `Amazon Nova Canvas`
+- `Amazon Nova Reels`
+
+
+
+Make sure to request access to the new models [here](https://aws-samples.github.io/aws-genai-llm-chatbot/documentation/model-requirements.html#amazon-bedrock-requirements)
+---
+
+
 This solution provides ready-to-use code so you can start **experimenting with a variety of Large Language Models and Multimodal Language Models, settings and prompts** in your own AWS account.
 
 Supported model providers:
 
-- [Amazon Bedrock](https://aws.amazon.com/bedrock/)
+- [Amazon Bedrock](https://aws.amazon.com/bedrock/) which supports a wide range of models from AWS, Anthropic, Cohere and Mistral including the lastst models from Amazon Nova. See [Recent announcements](#) for more details.
 - [Amazon SageMaker](https://aws.amazon.com/sagemaker/) self-hosted models from Foundation, Jumpstart and HuggingFace.
 - Third-party providers via API such as Anthropic, Cohere, AI21 Labs, OpenAI, etc. [See available langchain integrations](https://python.langchain.com/docs/integrations/llms/) for a comprehensive list.
 
@@ -42,18 +60,12 @@ Roadmap is available through the [GitHub Project](https://github.com/orgs/aws-sa
 # License
 
 This library is licensed under the MIT-0 License. See the LICENSE file.
+
+- [Changelog](CHANGELOG.md) of the project.
 - [License](LICENSE) of the project.
 - [Code of Conduct](CODE_OF_CONDUCT.md) of the project.
 - [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
 
-Although this repository is released under the  MIT-0 license, its front-end and SQL implementation use the following third party projects:
-- [psycopg2-binary](https://github.com/psycopg/psycopg2)
-- [jackspeak](https://github.com/isaacs/jackspeak)
-- [package-json-from-dist](https://github.com/isaacs/package-json-from-dist)
-- [path-scurry](https://github.com/isaacs/path-scurry)
-
-These projects' licensing includes the LGPL v3 and  BlueOak-1.0.0 licenses.
-
 # Legal Disclaimer
 
 You should consider doing your own independent assessment before using the content in this sample for production purposes. This may include (amongst other things) testing, securing, and optimizing the content provided in this sample, based on your specific quality control practices and standards.
diff --git a/docs/guide/deploy.md b/docs/guide/deploy.md
@@ -21,9 +21,6 @@ The default EBS volume create with the Cloud9 instance is too small and you need
 To do this, run the following command from the Cloud9 terminal:
 
 ```
-git clone https://github.com/aws-samples/aws-genai-llm-chatbot.git
-cd aws-genai-llm-chatbot/
-chmod +x scripts/cloud9-resize.sh 
 ./scripts/cloud9-resize.sh
 ```
 

diff --git a/integtests/chatbot-api/multi_modal_test.py b/integtests/chatbot-api/multi_modal_test.py
@@ -10,7 +10,6 @@
 
 
 def test_multi_modal(client, default_multimodal_model, default_provider):
-
     key = "INTEG_TEST" + str(uuid.uuid4()) + ".jpeg"
     result = client.add_file(
         input={

diff --git a/lib/chatbot-api/functions/api-handler/common/constant.py b/lib/chatbot-api/functions/api-handler/common/constant.py
@@ -12,3 +12,4 @@
 SAFE_SHORT_STR_VALIDATION_OPTIONAL = Field(
     min_length=1, max_length=100, pattern=SAFE_STR_REGEX, default=None
 )
+SAFE_FILE_NAME_REGEX = r"^[A-Za-z0-9-_./\\ ]*$"
diff --git a/lib/chatbot-api/functions/api-handler/routes/documents.py b/lib/chatbot-api/functions/api-handler/routes/documents.py
@@ -137,6 +137,7 @@ class DocumentSubscriptionStatusRequest(BaseModel):
         ".jpg",
         ".jpeg",
         ".png",
+        ".mp4",
     ]
 )
 
@@ -149,14 +150,20 @@ def file_upload(input: dict):
 
     if "workspaceId" in input:
         if extension not in allowed_workspace_extensions:
-            raise genai_core.types.CommonError("Invalid file extension")
+            raise genai_core.types.CommonError(
+                f"""Invalid file extension {extension}.
+                Allowed extensions: {allowed_workspace_extensions}."""
+            )
 
         result = genai_core.presign.generate_workspace_presigned_post(
             request.workspaceId, request.fileName
         )
     else:
         if extension not in allowed_session_extensions:
-            raise genai_core.types.CommonError("Invalid file extension")
+            raise genai_core.types.CommonError(
+                f"""Invalid file extension {extension}.
+                Allowed extensions: {allowed_session_extensions}."""
+            )
 
         user_id = genai_core.auth.get_user_id(router)
         result = genai_core.presign.generate_user_presigned_post(

diff --git a/lib/chatbot-api/functions/api-handler/routes/sessions.py b/lib/chatbot-api/functions/api-handler/routes/sessions.py
@@ -1,5 +1,5 @@
 from pydantic import BaseModel, Field
-from common.constant import SAFE_STR_REGEX
+from common.constant import SAFE_FILE_NAME_REGEX
 from common.validation import WorkspaceIdValidation
 import genai_core.presign
 import genai_core.sessions
@@ -16,7 +16,7 @@
 
 
 class FileURequestValidation(BaseModel):
-    fileName: str = Field(min_length=1, max_length=500, pattern=SAFE_STR_REGEX)
+    fileName: str = Field(min_length=1, max_length=500, pattern=SAFE_FILE_NAME_REGEX)
 
 
 @router.resolver(field_name="getFileURL")

diff --git a/lib/layer/index.ts b/lib/layer/index.ts
@@ -34,9 +34,8 @@ export class Layer extends Construct {
           [
             `pip install -r requirements.txt ${args.join(" ")}`,
             `cd /asset-output/python`,
-            // Remove boto3 since it's already part of the lambda runtime
             // Remove sqlalchemy, used by Langchain when storing the memory using sql
-            `rm -rf boto3* botocore* sqlalchemy*`,
+            `rm -rf sqlalchemy*`,
             // Main impact of cold start is the file size. (faster to have the lambda regenerate them)
             `find . -name "*.pyc" -type f -delete`,
             `cd -`,

diff --git a/lib/model-interfaces/idefics/functions/request-handler/adapters/__init__.py b/lib/model-interfaces/idefics/functions/request-handler/adapters/__init__.py
@@ -1,3 +1,4 @@
 # flake8: noqa
-from .idefics import Idefics
 from .claude import Claude3
+from .idefics import Idefics
+from .nova import Nova
diff --git a/lib/model-interfaces/idefics/functions/request-handler/adapters/base.py b/lib/model-interfaces/idefics/functions/request-handler/adapters/base.py
@@ -1,12 +1,194 @@
+import json
+import mimetypes
+import os
+import uuid
 from abc import abstractmethod
+from dataclasses import dataclass, field
+from typing import Optional
 
+import boto3
+from aws_lambda_powertools import Logger
+from genai_core.clients import get_bedrock_client
+from genai_core.types import ChatbotMessageType
 
+logger = Logger()
+s3 = boto3.resource("s3")
+
+
+@dataclass
 class MultiModalModelBase:
+    model_id: str
+    session_id: Optional[str]
+    user_id: Optional[str]
+    disable_streaming: Optional[bool] = False
+    model_kwargs: Optional[dict] = field(default_factory=dict)
+    mode: Optional[str] = None
+    client: Optional[any] = get_bedrock_client()
+
     @abstractmethod
-    def handle_run(self, prompt: str, model_kwargs: dict) -> str: ...
+    def handle_run(
+        self, input: dict, model_kwargs: dict, files: Optional[list] = None
+    ) -> str:
+        ...
 
     @abstractmethod
-    def format_prompt(self, prompt: str, messages: list, files: list) -> str: ...
+    def on_llm_new_token(self, user_id: str, session_id: str, chunk: str) -> None:
+        ...
+
+    def upload_file_message(self, content: bytes, file_type: str):
+        key = str(uuid.uuid4())
+        s3_path = "private/" + self.user_id + "/" + key
+        s3.Object(os.environ["CHATBOT_FILES_BUCKET_NAME"], s3_path).put(Body=content)
+        return {
+            "provider": "s3",
+            "key": key,
+            "type": file_type,
+        }
+
+    def get_file_message(self, file: dict, use_s3_path: Optional[bool] = False):
+        if file["key"] is None:
+            raise Exception("Invalid S3 Key " + file["key"])
+
+        key = "private/" + self.user_id + "/" + file["key"]
+        logger.info(
+            "Fetching file", bucket=os.environ["CHATBOT_FILES_BUCKET_NAME"], key=key
+        )
+        extension = mimetypes.guess_extension(file["key"]) or file["key"].split(".")[-1]
+        mime_type = mimetypes.guess_type(file["key"])[0]
+        file_type = mime_type.split("/")[0]
+        logger.info("File type", file_type=file_type)
+        logger.info("File extension", extension=extension)
+        logger.info("File mime type", mime_type=mime_type)
+        format = mime_type.split("/")[-1] or extension
+
+        response = s3.Object(os.environ["CHATBOT_FILES_BUCKET_NAME"], key)
+        logger.info("File response", response=response)
+        media_bytes = response.get()["Body"].read()
+
+        source = {}
+        if use_s3_path:
+            source["s3Location"] = {
+                "uri": f"s3://{os.environ['CHATBOT_FILES_BUCKET_NAME']}/{key}",
+            }
+        else:
+            source["bytes"] = media_bytes
+
+        return {
+            file_type: {
+                "format": format,
+                "source": source,
+            }
+        }
+
+    def format_prompt(self, prompt: str, messages: list, files: list) -> str:
+        prompts = []
+
+        # Chat history
+        for message in messages:
+            if message.type.lower() == ChatbotMessageType.Human.value.lower():
+                user_msg = {
+                    "role": "user",
+                    "content": [],
+                }
+                prompts.append(user_msg)
+                message_files = message.additional_kwargs.get("files", [])
+
+                for message_file in message_files:
+                    user_msg["content"].append(self.get_file_message(message_file))
+
+                user_msg["content"].append({"text": message.content})
+
+            if message.type.lower() == ChatbotMessageType.AI.value.lower():
+                prompts.append(
+                    {
+                        "role": "assistant",
+                        "content": [{"text": message.content or "<EMPTY>"}],
+                    }
+                )
+
+        # User prompt
+        user_msg = {
+            "role": "user",
+            "content": [],
+        }
+        prompts.append(user_msg)
+        for file in files:
+            user_msg["content"].append(self.get_file_message(file))
+
+        user_msg["content"].append({"text": prompt})
+
+        return {
+            "messages": prompts,
+            "last_message": prompt,
+        }
+
+    def clean_prompt(self, input: dict) -> str:
+        for m in input["messages"]:
+            if m["role"] == "user" and type(m["content"]) == type([]):  # noqa: E721
+                for c in m["content"]:
+                    if "video" in c:
+                        c["video"]["source"]["bytes"] = ""
+                    if "image" in c:
+                        c["image"]["source"]["bytes"] = ""
+        return json.dumps(input)
+
+    @abstractmethod
+    def generate_image(self, input: dict, model_kwargs: dict):
+        ...
+
+    @abstractmethod
+    def generate_video(self, input: dict, model_kwargs: dict):
+        ...
+
+    def converse(self, input: dict, model_kwargs: dict):
+        logger.info("Incoming request for nova", model_kwargs=model_kwargs)
+        logger.info("Mode", mode=self.mode)
+        streaming = model_kwargs.get("streaming", False)
+
+        complete_response = ""
+        inf_params = {}
+
+        if "temperature" in model_kwargs:
+            inf_params["temperature"] = model_kwargs["temperature"]
+        if "topP" in model_kwargs:
+            inf_params["topP"] = model_kwargs["topP"]
+        if "maxTokens" in model_kwargs:
+            inf_params["maxTokens"] = model_kwargs["maxTokens"]
+
+        stream_params = {
+            "modelId": self.model_id,
+            "messages": input["messages"],
+            "inferenceConfig": inf_params,
+        }
+        logger.info("Stream params", stream_params=stream_params)
+
+        if streaming:
+            logger.info("Calling converse_stream")
+            mlm_response = self.client.converse_stream(
+                **stream_params,
+            )
+            logger.info("Stream response", mlm_response=mlm_response)
+            stream = mlm_response.get("stream")
+            if stream:
+                logger.info("Sending stream events to on_llm_new_token")
+                for event in stream:
+                    if "contentBlockDelta" in event:
+                        chunk = event["contentBlockDelta"]["delta"]["text"]
+                        complete_response += chunk
+                        self.on_llm_new_token(chunk)
+
+            logger.info("Complete response", complete_response=complete_response)
+            return {
+                "content": complete_response,
+            }
+
+        logger.info("Calling converse")
+        mlm_response = self.client.converse(
+            **stream_params,
+        )
+        logger.info("Response from nova", mlm_response=mlm_response)
+        content = mlm_response["output"]["message"]["content"][0]["text"]
 
-    def clean_prompt(self, prompt: str) -> str:
-        return prompt
+        return {
+            "content": content,
+        }