Azure-Samples · sinedied · Dec 16, 2024 · Dec 11, 2024 · Dec 6, 2024 · Dec 6, 2024
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@
 [![dev.to blog post walkthrough](https://img.shields.io/badge/Blog%20post-black?style=flat-square&logo=dev.to)](https://dev.to/azure/build-a-serverless-chatgpt-with-rag-using-langchainjs-3487)
 [![Build Status](https://img.shields.io/github/actions/workflow/status/Azure-Samples/serverless-chat-langchainjs/build-test.yaml?style=flat-square&label=Build)](https://github.com/Azure-Samples/serverless-chat-langchainjs/actions)
 ![Node version](https://img.shields.io/badge/Node.js->=20-3c873a?style=flat-square)
-[![Ollama + Mistral](https://img.shields.io/badge/Ollama-Mistral-ff7000?style=flat-square)](https://ollama.com/library/mistral)
+[![Ollama + Llama3.1](https://img.shields.io/badge/Ollama-Llama3.1-ff7000?style=flat-square)](https://ollama.com/library/llama3.1)
 [![TypeScript](https://img.shields.io/badge/TypeScript-blue?style=flat-square&logo=typescript&logoColor=white)](https://www.typescriptlang.org)
 [![License](https://img.shields.io/badge/License-MIT-yellow?style=flat-square)](LICENSE)
 
@@ -44,7 +44,7 @@ This application is made from multiple components:
 
 - A serverless API built with [Azure Functions](https://learn.microsoft.com/azure/azure-functions/functions-overview?pivots=programming-language-javascript) and using [LangChain.js](https://js.langchain.com/) to ingest the documents and generate responses to the user chat queries. The code is located in the `packages/api` folder.
 
-- A database to store the text extracted from the documents and the vectors generated by LangChain.js, using [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/).
+- A database to store chat sessions and the text extracted from the documents and the vectors generated by LangChain.js, using [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/).
 
 - A file storage to store the source documents, using [Azure Blob Storage](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction).
 
@@ -54,6 +54,7 @@ We use the [HTTP protocol for AI chat apps](https://aka.ms/chatprotocol) to comm
 
 - **Serverless Architecture**: Utilizes Azure Functions and Azure Static Web Apps for a fully serverless deployment.
 - **Retrieval-Augmented Generation (RAG)**: Combines the power of Azure Cosmos DB and LangChain.js to provide relevant and accurate responses.
+- **Chat Sessions History**: Maintains a personal chat history for each user, allowing them to revisit previous conversations.
 - **Scalable and Cost-Effective**: Leverages Azure's serverless offerings to provide a scalable and cost-effective solution.
 - **Local Development**: Supports local development using Ollama for testing without any cloud costs.
 
@@ -154,12 +155,12 @@ The resource group and all the resources will be deleted.
 If you have a machine with enough resources, you can run this sample entirely locally without using any cloud resources. To do that, you first have to install [Ollama](https://ollama.com) and then run the following commands to download the models on your machine:
 
 ```bash
-ollama pull mistral:v0.2
-ollama pull all-minilm:l6-v2
+ollama pull llama3.1:latest
+ollama pull nomic-embed-text:latest
 ```
 
 > [!NOTE]
-> The `mistral` model with download a few gigabytes of data, so it can take some time depending on your internet connection.
+> The `llama3.1` model with download a few gigabytes of data, so it can take some time depending on your internet connection.
 
 After that you have to install the NPM dependencies:
 

diff --git a/docs/faq.md b/docs/faq.md
@@ -93,8 +93,8 @@ You may also need to adjust the capacity in `infra/main.bicep` file, depending o
 To change the local models used by Ollama, you can edit the file `packages/api/src/constants.ts`:
 
 ```typescript
-export const ollamaEmbeddingsModel = 'all-minilm:l6-v2';
-export const ollamaChatModel = 'mistral:v0.2';
+export const ollamaEmbeddingsModel = 'nomic-embed-text:latest';
+export const ollamaChatModel = 'llama3.1:latest';
 ```
 
 You can see the complete list of available models at https://ollama.ai/models.

diff --git a/docs/images/architecture-local.drawio.png b/docs/images/architecture-local.drawio.png
diff --git a/docs/images/architecture.drawio.png b/docs/images/architecture.drawio.png
diff --git a/docs/readme.md b/docs/readme.md
@@ -35,7 +35,7 @@ This application is made from multiple components:
 
 - A serverless API built with [Azure Functions](https://learn.microsoft.com/azure/azure-functions/functions-overview?pivots=programming-language-javascript) and using [LangChain.js](https://js.langchain.com/) to ingest the documents and generate responses to the user chat queries. The code is located in the `packages/api` folder.
 
-- A database to store the text extracted from the documents and the vectors generated by LangChain.js, using [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/).
+- A database to store chat sessions and the text extracted from the documents and the vectors generated by LangChain.js, using [Azure Cosmos DB for NoSQL](https://learn.microsoft.com/azure/cosmos-db/nosql/).
 
 - A file storage to store the source documents, using [Azure Blob Storage](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction).
 
@@ -77,9 +77,9 @@ You can now open the web app in your browser and start chatting with the bot.
 
 Our API is composed of two main endpoints:
 
-- `/documents`: This endpoint allows to upload a PDF documents in the database. Using LangChain.js, we extract the text from the PDF file, split it into smaller chunks, and generate vectors for each chunk. We store the text and the vectors in the database for later use.
+- `POST /documents`: This endpoint allows to upload a PDF documents in the database. Using LangChain.js, we extract the text from the PDF file, split it into smaller chunks, and generate vectors for each chunk. We store the text and the vectors in the database for later use.
 
-- `/chat`: This endpoint receives a list of messages, the last being the user query and returns a response generated by the LLM. It uses the documents stored in the database to generate the response. We use LangChain.js components to connect to the database, load the documents and perform a vector search after vectorizing the user query. After that, the most relevant documents are injected into the prompt, and we generate the response. While this process seems complex, LangChain.js does all the heavy lifting for us so we can focus on the application flow.
+- `POST /chats`: This endpoint receives a list of messages, the last being the user query and returns a response generated by the LLM. It uses the documents stored in the database to generate the response. We use LangChain.js components to connect to the database, load the documents and perform a vector search after vectorizing the user query. After that, the most relevant documents are injected into the prompt, and we generate the response. While this process seems complex, LangChain.js does all the heavy lifting for us so we can focus on the application flow.
 
 The `/documents` endpoint is used to ingest the documents after the application is deployed by uploading the PDFs, using either `curl` commands or the Node.js script we built (have a look at the `postup` hook in the `azure.yaml` file).
 

diff --git a/docs/tutorial/04-preparing-understanding-language-models.md b/docs/tutorial/04-preparing-understanding-language-models.md
@@ -1,12 +1,12 @@
-# Preparing and Understanding Language Models: Configuring Azure OpenAI Service and Installing Ollama with Mistral 7B
+# Preparing and Understanding Language Models: Configuring Azure OpenAI Service and Installing Ollama with Llama3.1 8B
 
 This section we will cover the language models used in the project. Throughout the tutorial, we will also learn how to generate the environment variables needed to use the Azure Services, including the **[Azure OpenAI Service](https://learn.microsoft.com/azure/ai-services/openai/overview)**.
 
-We will also teach you how to use **[Ollama](https://ollama.com/)** with **[Mistral 7B](https://mistral.ai/)**, an Open Source Language Model, if you want to use it locally.
+We will also teach you how to use **[Ollama](https://ollama.com/)** with **[Llama3.1 8B](https://www.llama.com/)**, an Open Source Language Model, if you want to use it locally.
 
 ## Models to be used in the project
 
-We will teach you how to use two different language models: GPT-3.5 Turbo integrated with _Azure OpenAI Service_ (on Azure) and _Ollama with Mistral 7B_ (if you decide to use a model locally). Let's take a look at each of them.
+We will teach you how to use two different language models: GPT-3.5 Turbo integrated with _Azure OpenAI Service_ (on Azure) and _Ollama with Llama3.1 8B_ (if you decide to use a model locally). Let's take a look at each of them.
 
 ### GPT-3.5 Turbo Integrated with Azure OpenAI Service
 
@@ -18,17 +18,17 @@ You have the choice to use either **[OpenAI Service](https://openai.com/)** or *
 
 Azure OpenAI Service provides REST API access in many programming languages, including Python, Node.js, and C#. Additionally, it offers advanced language models like GPT-4, GPT-4-Turbo with Vision, which are versatile and adaptable to various tasks such as content generation, summarization, image recognition, semantic search, and text-to-code translation.
 
-### Ollama with Mistral 7B
+### Ollama with Llama3.1 8B
 
 ![Ollama Page](./images/ollama-page.png)
 
-**[Ollama](https://ollama.com/)** presents itself as an open-source solution, offering a transparent and modifiable platform. The Mistral 7B model has 7 billion parameters and is designed to be effective, efficient in terms of cost and scability.
+**[Ollama](https://ollama.com/)** presents itself as an open-source solution, offering a transparent and modifiable platform. The Llama3.1 8B has 8 billion parameters and is designed to be effective, efficient in terms of cost and scability.
 
 Ollama's openness encourages innovation and collaboration within the developer community. Users can adapt the model to their specific needs, experiment with innovative ideas, or integrate the model in ways that proprietary services might not allow.
 
 Additionally, using an open-source language model can decrease expenses, which is a crucial factor for projects with restricted budgets or for those who only wish to experiment with language models.
 
-![Mistral 7B Page](./images/mistral-7b-page.png)
+![Llama3.1 8B Page](./images/mistral-7b-page.png)
 
 ## Creating Azure resources
 
@@ -83,31 +83,31 @@ Before installing Ollama, please ensure you meet the prerequisites, which includ
 To begin, download the necessary models for this project by running the following commands in your terminal:
 
 ```bash
-ollama pull mistral:v0.2
-ollama pull all-minilm:l6-v2
+ollama pull llama3.1:latest
+ollama pull nomic-embed-text:latest
 ```
 
-We will use the Mistral 7B model, a powerful language model, and the All-MiniLM model, a small embedding model, to generate vectors from the text for the chatbot.
+We will use the Llama3.1 8B model, a powerful language model, and the All-MiniLM model, a small embedding model, to generate vectors from the text for the chatbot.
 
-> **Note:** The Mistral model will download several gigabytes of data, so the process may take some time depending on your internet connection.
+> **Note:** The Llama3.1 model will download several gigabytes of data, so the process may take some time depending on your internet connection.
 
 After downloading the models, you can verify the proper functioning of the Ollama server by executing the following command:
 
 ```bash
-ollama run mistral:v0.2
+ollama run llama3.1:latest
 ```
 
 An invite will be displayed in your terminal, allowing you to directly communicate with the AI model in a chat-like format.
 
-![Ollama Mistral](./images/ollama-mistra-cli.png)
+![Ollama Llama3.1 8B](./images/ollama-mistra-cli.png)
 
 Ask the model some questions and watch the answers. This will provide insight into the model's capabilities and how to interact with it.
 
 After to finish to test the Ollama server, you can stop it by pressing **Ctrl+D** in your terminal.
 
 ## Next Steps
 
-This tutorial covers language models that will be used in the project. Choose the best model to suit your needs. To use the Azure OpenAI Service, follow the instructions to set up the service in Azure. To use Ollama with Mistral 7B, follow the instructions to install Ollama and the local models.
+This tutorial covers language models that will be used in the project. Choose the best model to suit your needs. To use the Azure OpenAI Service, follow the instructions to set up the service in Azure. To use Ollama with Llama3.1 8B, follow the instructions to install Ollama and the local models.
 
 To begin developing the application, we first need to create some configuration files for the project. We'll cover this in the next section!
 

diff --git a/infra/core/database/cosmos/cosmos-account.bicep b/infra/core/database/cosmos/cosmos-account.bicep
diff --git a/infra/core/database/cosmos/sql/cosmos-sql-account.bicep b/infra/core/database/cosmos/sql/cosmos-sql-account.bicep
diff --git a/infra/core/database/cosmos/sql/cosmos-sql-db.bicep b/infra/core/database/cosmos/sql/cosmos-sql-db.bicep
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -220,7 +220,7 @@ module openAi 'core/ai/cognitiveservices.bicep' = if (empty(openAiUrl)) {
           version: chatModelVersion
         }
         sku: {
-          name: 'Standard'
+          name: 'GlobalStandard'
           capacity: chatDeploymentCapacity
         }
       }
@@ -237,22 +237,63 @@ module openAi 'core/ai/cognitiveservices.bicep' = if (empty(openAiUrl)) {
   }
 }
 
-module cosmosDb './core/database/cosmos/sql/cosmos-sql-db.bicep' = {
+module cosmosDb 'br/public:avm/res/document-db/database-account:0.9.0' = {
   name: 'cosmosDb'
   scope: resourceGroup
   params: {
-    accountName: !empty(cosmosDbServiceName) ? cosmosDbServiceName : '${abbrs.documentDBDatabaseAccounts}${resourceToken}'
-    location: location
+    name: !empty(cosmosDbServiceName) ? cosmosDbServiceName : '${abbrs.documentDBDatabaseAccounts}${resourceToken}'
     tags: tags
-    containers: [
+    locations: [
       {
-        name: 'vectorSearchContainer'
-        id: 'vectorSearchContainer'
-        partitionKey: '/id'
+        locationName: location
+        failoverPriority: 0
+        isZoneRedundant: false
       }
     ]
-    databaseName: 'vectorSearchDB'
-    disableLocalAuth: true
+    managedIdentities: {
+      systemAssigned: true
+    }
+    capabilitiesToAdd: [
+      'EnableServerless'
+      'EnableNoSQLVectorSearch'
+    ]
+    networkRestrictions: {
+      ipRules: []
+      virtualNetworkRules: []
+      publicNetworkAccess: 'Enabled'
+    }
+    sqlDatabases: [
+      {
+        containers: [
+          {
+            name: 'vectorSearchContainer'
+            paths: [
+              '/id'
+            ]
+          }
+        ]
+        name: 'vectorSearchDB'
+      }
+      {
+        containers: [
+          {
+            name: 'chatHistoryContainer'
+            paths: [
+              '/userId'
+            ]
+          }
+        ]
+        name: 'chatHistoryDB'
+      }
+    ]
+  }
+}
+
+module dbRoleDefinition './core/database/cosmos/sql/cosmos-sql-role-def.bicep' = {
+  scope: resourceGroup
+  name: 'db-contrib-role-definition'
+  params: {
+    accountName: cosmosDb.outputs.name
   }
 }
 
@@ -287,10 +328,10 @@ module dbContribRoleUser './core/database/cosmos/sql/cosmos-sql-role-assign.bice
   scope: resourceGroup
   name: 'db-contrib-role-user'
   params: {
-    accountName: cosmosDb.outputs.accountName
+    accountName: cosmosDb.outputs.name
     principalId: principalId
     // Cosmos DB Data Contributor
-    roleDefinitionId: cosmosDb.outputs.roleDefinitionId
+    roleDefinitionId: dbRoleDefinition.outputs.id
   }
 }
 
@@ -321,10 +362,10 @@ module dbContribRoleApi './core/database/cosmos/sql/cosmos-sql-role-assign.bicep
   scope: resourceGroup
   name: 'db-contrib-role-api'
   params: {
-    accountName: cosmosDb.outputs.accountName
+    accountName: cosmosDb.outputs.name
     principalId: api.outputs.identityPrincipalId
     // Cosmos DB Data Contributor
-    roleDefinitionId: cosmosDb.outputs.roleDefinitionId
+    roleDefinitionId: dbRoleDefinition.outputs.id
   }
 }