LangStream · nicoloboschi · Oct 13, 2023 · Oct 12, 2023
diff --git a/examples/applications/chatbot-rag-memory/README.md b/examples/applications/chatbot-rag-memory/README.md
@@ -17,7 +17,9 @@ to improve the quality of the response.
 Create a S3 bucket, it will contain only a metadata file for the WebCrawler.
 
 Create a database in Astra DB. The required tables (one for the vector embeddings, one for the chat history) will be created automatically as 
-specified in the `assets` part of the configuration.
+specified in the `assets` part of the configuration. Note that the conversation history table is indexed by
+session and timestamp to allow for a query to get the last N messages in the conversation. The table also
+has TTL set to 1 hour so the table doesn't not grow indefinitely.
 
 The gateway in this example is configured for GitHub authentication. You will need to a GitHub OAuth application
 to connect to the gateway. See the `gateway-authentication` example for details.

diff --git a/examples/applications/chatbot-rag-memory/chatbot.yaml b/examples/applications/chatbot-rag-memory/chatbot.yaml
@@ -15,32 +15,32 @@
 #
 
 topics:
-  - name: "{{ globals.questionsTopic }}"
+  - name: "${globals.questionsTopic}"
     creation-mode: create-if-not-exists
     deletion-mode: delete
-  - name: "{{ globals.answersTopic }}"
+  - name: "${globals.answersTopic}"
     creation-mode: create-if-not-exists
     deletion-mode: delete
-  - name: "{{ globals.logTopic }}"
+  - name: "${globals.logTopic}"
     creation-mode: create-if-not-exists
     deletion-mode: delete
 assets:
   - name: "keyspace-chat"
     asset-type: "astra-keyspace"
     creation-mode: create-if-not-exists
     config:
-      keyspace: "{{ globals.vectorKeyspace}}"
+      keyspace: "${globals.vectorKeyspace}"
       datasource: "AstraDatasource"
   - name: "vector-table-chat"
     asset-type: "cassandra-table"
     creation-mode: create-if-not-exists
     config:
-      table-name: "{{ globals.vectorTable}}"
-      keyspace: "{{ globals.vectorKeyspace}}"
+      table-name: "${globals.vectorTable}"
+      keyspace: "${globals.vectorKeyspace}"
       datasource: "AstraDatasource"
       create-statements:
         - >
-          CREATE TABLE IF NOT EXISTS {{ globals.vectorKeyspace}}.{{ globals.vectorTable }} (  
+          CREATE TABLE IF NOT EXISTS ${globals.vectorKeyspace}.${globals.vectorTable} (  
             filename TEXT,
             chunk_id int,
             num_tokens int,
@@ -50,18 +50,18 @@ assets:
             PRIMARY KEY (filename, chunk_id)
             );
         - >
-          CREATE CUSTOM INDEX IF NOT EXISTS {{ globals.vectorIndex }} 
-          ON {{ globals.vectorKeyspace}}.{{ globals.vectorTable }}(embeddings_vector) USING 'StorageAttachedIndex';
+          CREATE CUSTOM INDEX IF NOT EXISTS ${globals.vectorIndex} 
+          ON ${globals.vectorKeyspace}.${globals.vectorTable}(embeddings_vector) USING 'StorageAttachedIndex';
   - name: "convo-memory-table"
     asset-type: "cassandra-table"
     creation-mode: create-if-not-exists
     config:
-      table-name: "{{ globals.chatTable}}"
-      keyspace: "{{ globals.vectorKeyspace}}"
+      table-name: "${globals.chatTable}"
+      keyspace: "${globals.vectorKeyspace}"
       datasource: "AstraDatasource"
       create-statements:
         - >
-          CREATE TABLE IF NOT EXISTS {{ globals.vectorKeyspace}}.{{ globals.chatTable }} (
+          CREATE TABLE IF NOT EXISTS ${globals.vectorKeyspace}.${globals.chatTable} (
             sessionId TEXT,
             timestamp TIMEUUID,
             question TEXT,
@@ -77,34 +77,34 @@ pipeline:
   - name: "convert-to-structure"
     id: "convert-to-structure"
     type: "document-to-json"
-    input: "{{ globals.questionsTopic }}"
+    input: "${globals.questionsTopic}"
     configuration:
       text-field: "questionNoContext"
   - name: "Query Chat History"
     id: query-chat-history
     type: "query"
     configuration:
       datasource: "AstraDatasource"         
-      query: "select question,answer from {{ globals.vectorKeyspace}}.{{ globals.chatTable}} where sessionid = ? limit 3"
+      query: "select question,answer from ${globals.vectorKeyspace}.${globals.chatTable} where sessionid = ? limit 3"
       output-field: "value.history"
       fields:
         - "value.sessionid"    
   - name: "Update question based on chat history"
     type: "ai-chat-completions"
     configuration:
-      model: "{{ globals.chatModelName }}" 
+      model: "${globals.chatModelName}" 
       completion-field: "value.question"
       log-field: "value.chatHistoryPrompt"
       stream: false
       messages:
         - role: system
           content: |
               You are a conversational intepreter for a converstation between a user and 
-              a bot who is an expert on {{ globals.assistantType}}.
+              a bot who is an expert on ${globals.assistantType}.
 
               The user will give you a question without context. You will reformulate the question
               to take into account the context of the conversation. You should assume the question
-              is related to {{ globals.assistantType}}. You should also consult with the Chat History
+              is related to ${globals.assistantType}. You should also consult with the Chat History
               below when reformulating the question. For example,
               you will substitute pronouns for mostly likely noun in the conversation
               history. 
@@ -113,51 +113,52 @@ pipeline:
               in the Chat History. The chat history is in reverse chronological order, so the most 
               recent exhange is at the top.
 
-              Only respond with the reformulated question. If there is no chat history, then respond only with the question unchanaged.
+              Only respond with the reformulated question. If there is no chat history, then respond 
+              only with the question unchanged.
 
               Chat History:
               =============
-              {{%# value.history}}
-              User: {{% question}}  Assistant: {{% answer}}
+              {{# value.history}}
+              User: {{ question}}  Assistant: {{ answer}}
               -----------------------------------------------
-              {{%/ value.history}}
+              {{/ value.history}}
         - role: user
-          content: "{{% value.questionNoContext}}"
+          content: "{{ value.questionNoContext}}"
   - name: "compute-embeddings"
     id: "compute-embeddings"
     type: "compute-ai-embeddings"
     configuration:
       model: "text-embedding-ada-002" # This needs to match the name of the model deployment, not the base model
       embeddings-field: "value.question_embeddings"
-      text: "{{% value.question }}"
+      text: "{{ value.question }}"
   - name: "lookup-related-documents-in-llm"
     type: "query"
     configuration:
       datasource: "AstraDatasource"
-      query: "SELECT text FROM {{ globals.vectorKeyspace}}.{{ globals.vectorTable}} ORDER BY embeddings_vector ANN OF ? LIMIT 4"
+      query: "SELECT text FROM ${globals.vectorKeyspace}.${globals.vectorTable} ORDER BY embeddings_vector ANN OF ? LIMIT 4"
       fields:
         - "value.question_embeddings"
       output-field: "value.related_documents"
   - name: "ai-chat-completions"
     type: "ai-chat-completions"
-    output: "{{ globals.logTopic }}"
+    output: "${globals.logTopic}"
     configuration:
-      model: "{{ globals.chatModelName }}" # This needs to be set to the model deployment name, not the base name
+      model: "${globals.chatModelName}" # This needs to be set to the model deployment name, not the base name
       completion-field: "value.answer"
       log-field: "value.prompt"
-      stream-to-topic: "{{ globals.answersTopic }}"
+      stream-to-topic: "${globals.answersTopic}"
       stream-response-completion-field: "value"
       min-chunks-per-message: 20
       messages:
         - role: system
           content: |
-              You are a helpful assistant for {{ globals.assistantType}}. 
+              You are a helpful assistant for ${globals.assistantType}. 
 
-              A user is going to ask a questions. Refer to the Related Documents below 
-              when answering to their questions. Use them as much as possible
+              A user is going to ask a question. Refer to the Related Documents below 
+              when answering their question. Use them as much as possible
               when answering the question. If you do not know the answer, say so.
 
-              Do not answer questions not related to {{ globals.assistantType}}.
+              Do not answer questions not related to ${globals.assistantType}.
 
               When answering questions, take into consideration the history of the 
               chat converastion, which is listed below under Chat History. The chat history 
@@ -166,22 +167,22 @@ pipeline:
               Related Documents:
               ==================
 
-              {{%# value.related_documents}}
-              {{% text}}
-              {{%/ value.related_documents}}
+              {{# value.related_documents}}
+              {{ text}}
+              {{/ value.related_documents}}
 
               Chat History:
               =============
-              {{%# value.history}}
-              User: {{% question }}  Assistant: {{% answer}}
+              {{# value.history}}
+              User: {{ question }}  Assistant: {{ answer}}
               -----------------------------------------------
-              {{%/ value.history}}
+              {{/ value.history}}
         - role: user
-          content: "{{% value.question}}"
+          content: "{{ value.question}}"
   - name: "Write conversation history to Astra"
     type: "vector-db-sink"
-    input: "{{ globals.logTopic }}"
+    input: "${globals.logTopic}"
     configuration:
       datasource: "AstraVector"
-      table: "{{ globals.vectorKeyspace}}.{{ globals.chatTable}}"
+      table: "${globals.vectorKeyspace}.${globals.chatTable}"
       mapping: "sessionid=value.sessionid,question=value.questionNoContext,answer=value.answer,prompt=value.prompt,timestamp=now()"
diff --git a/examples/applications/chatbot-rag-memory/configuration.yaml b/examples/applications/chatbot-rag-memory/configuration.yaml
@@ -16,25 +16,48 @@
 #
 
 configuration:
+  defaults:
+    globals:
+      assistantType: "LangStream project"
+      vectorKeyspace: "chatbot"
+      vectorTable: "langstreamdocs"
+      chatTable: "lsdocshistory"
+      vectorIndex: "annlangstream"
+      chunksTopic: "langstream-chunks"
+      questionsTopic: "langstream-questions"
+      answersTopic: "langstream-answers"
+      chatModelName: "gpt-35-turbo"
+      logTopic: "langstream-logs"
+      seedUrls: 
+        - "https://docs.langstream.ai/"
+        - "https://langstream.ai/"
+      allowedUrls: 
+        - "https://docs.langstream.ai/"
+        - "https://langstream.ai/"
+      forbiddenPaths: []
+      vectorDb: ""
   resources:
   - type: "open-ai-configuration"
     name: "OpenAI Azure configuration"
     configuration:
-      url: "{{ secrets.open-ai.url }}"
-      access-key: "{{ secrets.open-ai.access-key }}"
+      url: "${secrets.open-ai.url}"
+      access-key: "${secrets.open-ai.access-key}"
       provider: "azure"
   - type: "datasource"
     name: "AstraDatasource"
     configuration:
-      service: "astra"
-      clientId: "{{{ secrets.astra.clientId }}}"
-      secret: "{{{ secrets.astra.secret }}}"
-      token: "{{{ secrets.astra.token }}}"
-      database: "{{{ secrets.astra.database }}}"
+        service: "astra"
+        clientId: "${secrets.astra.clientId}"
+        secret: "${secrets.astra.secret}"
+        token: "${secrets.astra.token}"
+        database: "${secrets.astra.database}"
+        environment: "${secrets.astra.environment}"
   - type: "vector-database"
     name: "AstraVector"
     configuration:
-      service: "astra"
-      username: "{{{ secrets.astra.username }}}"
-      password: "{{{ secrets.astra.password }}}"
-      secureBundle: "{{{ secrets.astra.secureBundle }}}"
+        service: "astra"
+        clientId: "${secrets.astra.clientId}"
+        secret: "${secrets.astra.secret}"
+        token: "${secrets.astra.token}"
+        database: "${secrets.astra.database}"
+        environment: "${secrets.astra.environment}"
diff --git a/examples/applications/chatbot-rag-memory/crawler.yaml b/examples/applications/chatbot-rag-memory/crawler.yaml
@@ -16,38 +16,26 @@
 
 name: "Crawl a website"
 topics:
-  - name: "{{ globals.chunksTopic }}"
+  - name: "${globals.chunksTopic}"
     creation-mode: create-if-not-exists
     deletion-mode: delete
 pipeline:
   - name: "Crawl the WebSite"
     type: "webcrawler-source"
-    configuration:
-      seed-urls: 
-        - "{{{ globals.seedUrl1 }}}"
-        - "{{{ globals.seedUrl2 }}}"
-      allowed-domains:
-        - "{{{ globals.allowedUrl1 }}}"
-        - "{{{ globals.allowedUrl2 }}}"
-      # forbidden-paths:
-      #   - "{{{ globals.forbiddenUrl1 }}}"
-      #   - "{{{ globals.forbiddenUrl2 }}}"
-      #   - "{{{ globals.forbiddenUrl3 }}}"
-      #   - "{{{ globals.forbiddenUrl4 }}}"
-      #   - "{{{ globals.forbiddenUrl5 }}}"
-      #   - "{{{ globals.forbiddenUrl6 }}}"
-      #   - "{{{ globals.forbiddenUrl7 }}}"
+    configuration: 
+      seed-urls: "${globals.seedUrls}"
+      allowed-domains: "${globals.allowedUrls}"
+      forbidden-paths: "${globals.forbiddenPaths}"
       min-time-between-requests: 500
       max-unflushed-pages: 100
       max-depth: 50
       max-urls: 5000
       user-agent: "langstream.ai-webcrawler/1.0"
-      bucketName: "{{{secrets.s3-credentials.bucket-name}}}"
-      endpoint: "{{{secrets.s3-credentials.endpoint}}}"
-      access-key: "{{{secrets.s3-credentials.access-key}}}"
-      secret-key: "{{{secrets.s3-credentials.secret}}}"
-      region: "{{{secrets.s3-credentials.region}}}"
-      idle-time: 5
+      bucketName: "${secrets.s3-credentials.bucket-name}"
+      endpoint: "${secrets.s3-credentials.endpoint}"
+      access-key: "${secrets.s3-credentials.access-key}"
+      secret-key: "${secrets.s3-credentials.secret}"
+      region: "${secrets.s3-credentials.region}"
   - name: "Extract text"
     type: "text-extractor"
   - name: "Normalise text"
@@ -93,7 +81,7 @@ pipeline:
   - name: "compute-embeddings"
     id: "step1"
     type: "compute-ai-embeddings"
-    output: "{{ globals.chunksTopic }}"
+    output: "${globals.chunksTopic}"
     configuration:
       model: "text-embedding-ada-002" # This needs to match the name of the model deployment, not the base model
       embeddings-field: "value.embeddings_vector"

diff --git a/examples/applications/chatbot-rag-memory/gateways.yaml b/examples/applications/chatbot-rag-memory/gateways.yaml
@@ -18,27 +18,27 @@
 gateways:
   - id: "user-input"
     type: produce
-    topic: "{{ globals.questionsTopic }}"
+    topic: "${globals.questionsTopic}"
     parameters:
       - sessionId
     authentication:
       provider: github
       configuration:
-        clientId: "{{ secrets.github.client-id }}"
+        clientId: "${secrets.github.client-id}"
     produceOptions:
       headers:
         - key: sessionid
           valueFromParameters: sessionId
 
   - id: "bot-output"
     type: consume
-    topic: "{{ globals.answersTopic }}"
+    topic: "${globals.answersTopic}"
     parameters:
       - sessionId
     authentication:
       provider: github
       configuration:
-        clientId: "{{ secrets.github.client-id }}"
+        clientId: "${secrets.github.client-id}"
     consumeOptions:
       filters:
         headers:

diff --git a/examples/applications/chatbot-rag-memory/write-to-astra.yaml b/examples/applications/chatbot-rag-memory/write-to-astra.yaml
@@ -16,14 +16,14 @@
 
 name: "Write to AstraDB vector database"
 topics:
-  - name: "{{ globals.chunksTopic }}"
+  - name: "${globals.chunksTopic}"
     creation-mode: create-if-not-exists
     deletion-mode: delete
 pipeline:
   - name: "Write to AstraDB"
     type: "vector-db-sink"
-    input: "{{ globals.chunksTopic }}"
+    input: "${globals.chunksTopic}"
     configuration:
       datasource: "AstraVector"
-      table: "{{ globals.vectorKeyspace}}.{{ globals.vectorTable}}"
+      table: "${globals.vectorKeyspace}.${globals.vectorTable}"
       mapping: "filename=value.filename, chunk_id=value.chunk_id, language=value.language, text=value.text, embeddings_vector=value.embeddings_vector, num_tokens=value.chunk_num_tokens"