mudler · mudler · Nov 2, 2023 · Nov 2, 2023
diff --git a/examples/chainlit/Dockerfile b/examples/chainlit/Dockerfile
@@ -0,0 +1,16 @@
+# Use an official Python runtime as a parent image
+FROM harbor.home.sfxworks.net/docker/library/python:3.9-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the current directory contents into the container at /app
+COPY requirements.txt /app
+
+# Install any needed packages specified in requirements.txt
+RUN pip install -r requirements.txt
+
+COPY . /app
+
+# Run app.py when the container launches
+CMD ["chainlit", "run", "-h", "--host", "0.0.0.0", "main.py" ]
diff --git a/examples/chainlit/README.md b/examples/chainlit/README.md
@@ -0,0 +1,25 @@
+# LocalAI Demonstration with Embeddings and Chainlit
+
+This demonstration shows you how to use embeddings with existing data in `LocalAI`, and how to integrate it with Chainlit for an interactive querying experience. We are using the `llama_index` library to facilitate the embedding and querying processes, and `chainlit` to provide an interactive interface. The `Weaviate` client is used as the embedding source.
+
+## Prerequisites
+
+Before proceeding, make sure you have the following installed:
+- Weaviate client
+- LocalAI and its dependencies
+- Chainlit and its dependencies
+
+## Getting Started
+
+1. Clone this repository:
+2. Navigate to the project directory:
+3. Run the example: `chainlit run main.py`
+
+# Highlight on `llama_index` and `chainlit`
+
+`llama_index` is the key library that facilitates the process of embedding and querying data in LocalAI. It provides a seamless interface to integrate various components, such as `WeaviateVectorStore`, `LocalAI`, `ServiceContext`, and more, for a smooth querying experience.
+
+`chainlit` is used to provide an interactive interface for users to query the data and see the results in real-time. It integrates with llama_index to handle the querying process and display the results to the user.
+
+In this example, `llama_index` is used to set up the `VectorStoreIndex` and `QueryEngine`, and `chainlit` is used to handle the user interactions with `LocalAI` and display the results.
+
diff --git a/examples/chainlit/config.yaml b/examples/chainlit/config.yaml
@@ -0,0 +1,16 @@
+localAI:
+  temperature: 0
+  modelName: gpt-3.5-turbo
+  apiBase: http://local-ai.default
+  apiKey: stub
+  streaming: True
+weviate:
+  url: http://weviate.local
+  index: AIChroma
+query:
+  mode: hybrid
+  topK: 1
+  alpha: 0.0
+  chunkSize: 1024
+embedding:
+  model: BAAI/bge-small-en-v1.5
diff --git a/examples/chainlit/main.py b/examples/chainlit/main.py
@@ -0,0 +1,82 @@
+import os
+
+import weaviate
+from llama_index.storage.storage_context import StorageContext
+from llama_index.vector_stores import WeaviateVectorStore
+
+from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
+from llama_index.callbacks.base import CallbackManager
+from llama_index import (
+    LLMPredictor,
+    ServiceContext,
+    StorageContext,
+    VectorStoreIndex,
+)
+import chainlit as cl
+
+from llama_index.llms import LocalAI
+from llama_index.embeddings import HuggingFaceEmbedding
+import yaml
+
+# Load the configuration file
+with open("config.yaml", "r") as ymlfile:
+    cfg = yaml.safe_load(ymlfile)
+
+# Get the values from the configuration file or set the default values
+temperature = cfg['localAI'].get('temperature', 0)
+model_name = cfg['localAI'].get('modelName', "gpt-3.5-turbo")
+api_base = cfg['localAI'].get('apiBase', "http://local-ai.default")
+api_key = cfg['localAI'].get('apiKey', "stub")
+streaming = cfg['localAI'].get('streaming', True)
+weaviate_url = cfg['weviate'].get('url', "http://weviate.default")
+index_name = cfg['weviate'].get('index', "AIChroma")
+query_mode = cfg['query'].get('mode', "hybrid")
+topK = cfg['query'].get('topK', 1)
+alpha = cfg['query'].get('alpha', 0.0)
+embed_model_name = cfg['embedding'].get('model', "BAAI/bge-small-en-v1.5")
+chunk_size = cfg['query'].get('chunkSize', 1024)
+
+
+embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
+
+
+llm = LocalAI(temperature=temperature, model_name=model_name, api_base=api_base, api_key=api_key, streaming=streaming)
+llm.globally_use_chat_completions = True;
+client = weaviate.Client(weaviate_url)
+vector_store = WeaviateVectorStore(weaviate_client=client, index_name=index_name)
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
+
+@cl.on_chat_start
+async def factory():
+
+    llm_predictor = LLMPredictor(
+        llm=llm
+    )
+
+    service_context = ServiceContext.from_defaults(embed_model=embed_model, callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]), llm_predictor=llm_predictor, chunk_size=chunk_size)
+
+    index = VectorStoreIndex.from_vector_store(
+        vector_store,
+        storage_context=storage_context,
+        service_context=service_context
+    )
+
+    query_engine = index.as_query_engine(vector_store_query_mode=query_mode, similarity_top_k=topK, alpha=alpha, streaming=True)
+
+    cl.user_session.set("query_engine", query_engine)
+
+
+@cl.on_message
+async def main(message: cl.Message):
+    query_engine = cl.user_session.get("query_engine")
+    response = await cl.make_async(query_engine.query)(message.content)
+
+    response_message = cl.Message(content="")
+
+    for token in response.response_gen:
+        await response_message.stream_token(token=token)
+
+    if response.response_txt:
+        response_message.content = response.response_txt
+
+    await response_message.send()
diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
@@ -0,0 +1,7 @@
+llama_hub==0.0.41
+llama_index==0.8.55
+Requests==2.31.0
+weaviate_client==3.25.1
+transformers
+torch
+chainlit