update website

tensorlakeai · Aug 14, 2024 · e46f33a · e46f33a
1 parent 6ccd19d
commit e46f33a
Show file tree

Hide file tree

Showing 5 changed files with 35 additions and 30 deletions.
diff --git a/docs/docs/getting-started-basic.mdx b/docs/docs/getting-started-basic.mdx
@@ -23,6 +23,10 @@ Windows: Use WSL, but we don't actively test.
 **Python Versions**: 3.9-3.11
 </Info>
 
+<Info>
+The code of this guide can be found [here](https://github.com/tensorlakeai/indexify/tree/main/examples/getting_started/website/basic).
+</Info>
+
 ## Prerequisites
 
 Before we begin, make sure you have:
@@ -321,7 +325,7 @@ from indexify import IndexifyClient
 from llama_cpp import Llama
 
 client = IndexifyClient()
-llm = Llama.from_pretrained(repo_id='microsoft/Phi-3-mini-4k-instruct-gguf', filename='*q4.gguf', verbose=False)
+llm = Llama.from_pretrained(repo_id='microsoft/Phi-3-mini-4k-instruct-gguf', filename='*q4.gguf', verbose=False, n_ctx=2048)
 
 # Get entities
 ingested_content_list = client.list_content("wiki_extraction_pipeline")

diff --git a/examples/getting_started/website/basic/graph.yaml b/examples/getting_started/website/basic/graph.yaml
@@ -1,14 +1,15 @@
-name: "wiki_extraction_pipeline" 
+name: "wiki_extraction_pipeline"
 extraction_policies:
-  - extractor: "tensorlake/openai"
+  - extractor: "tensorlake/llama_cpp"
     name: "entity-extractor" 
     input_params:
-      system_prompt: "Extract entities from text, and return the output in JSON format." 
+      system_prompt: "Extract entities from text, and return the output in JSON format."
   - extractor: "tensorlake/chunk-extractor"
-    name: "chunker" 
+    name: "chunker"
     input_params:
-      chunk_size: 1000 
+      chunk_size: 1000
       overlap: 100
   - extractor: "tensorlake/minilm-l6"
     name: "wikiembedding"
-    content_source: "chunker"
+    content_source: "chunker"
+
diff --git a/examples/getting_started/website/basic/ingest.py b/examples/getting_started/website/basic/ingest.py
@@ -11,4 +11,5 @@ def load_data(player):
 
 if __name__ == "__main__":
     load_data("Kevin Durant")
-    load_data("Stephen Curry")
+    load_data("Stephen Curry")
+
diff --git a/examples/getting_started/website/basic/query.py b/examples/getting_started/website/basic/query.py
@@ -1,38 +1,35 @@
 from indexify import IndexifyClient
+from llama_cpp import Llama
 
 client = IndexifyClient()
+llm = Llama.from_pretrained(repo_id='microsoft/Phi-3-mini-4k-instruct-gguf', filename='*q4.gguf',  verbose=False, n_ctx=2048)
 
-ingested_content_list = client.list_content("wiki_extraction_pipeline") 
+# Get entities
+ingested_content_list = client.list_content("wiki_extraction_pipeline")
 content_id = ingested_content_list[0].id
 entities = client.get_extracted_content(
     content_id, 
     "wiki_extraction_pipeline", 
     "entity-extractor")
 
-
+# Get chunks
 chunks = client.get_extracted_content(
     content_id, 
     "wiki_extraction_pipeline", 
-    "chunker") #(1)!
-
-from openai import OpenAI
-
-client_openai = OpenAI()
+    "chunker")
 
 def query_database(question: str, index: str, top_k=3):
-    retrieved_results = client.search_index(name=index, query=question, top_k=top_k) 
+    retrieved_results = client.search_index(name=index, query=question, top_k=top_k)
     context = "\n-".join([item["text"] for item in retrieved_results])
-    response = client_openai.chat.completions.create(
+
+    response = llm.create_chat_completion(
         messages=[
-            {
-                "role": "user",
-                "content": f"Answer the question, based on the context.\n question: {question} \n context: {context}",
-            },
-        ],
-        model="gpt-3.5-turbo",
+            {"role": "system", "content": "You are a helpful assistant that answers questions based on the given context."},
+            {"role": "user", "content": f"Answer the following question based on the context provided:\nQuestion: {question}\nContext: {context}"}
+        ]
     )
-    return response.choices[0].message.content
-
+
+    return response["choices"][0]["message"]["content"]
 
 if __name__ == "__main__":
     index_name = "wiki_extraction_pipeline.wikiembedding.embedding"
@@ -44,4 +41,5 @@ def query_database(question: str, index: str, top_k=3):
             "wiki_extraction_pipeline.wikiembedding.embedding",
             4,
         )
-    )
+    )
+
diff --git a/examples/getting_started/website/basic/setup.py b/examples/getting_started/website/basic/setup.py
@@ -1,10 +1,11 @@
 from indexify import IndexifyClient, ExtractionGraph
 
-client = IndexifyClient() 
+client = IndexifyClient()
 
 def create_extraction_graph():
-    extraction_graph = ExtractionGraph.from_yaml_file("graph.yaml") 
-    client.create_extraction_graph(extraction_graph) 
+    extraction_graph = ExtractionGraph.from_yaml_file("graph.yaml")
+    client.create_extraction_graph(extraction_graph)
 
 if __name__ == "__main__":
-    create_extraction_graph()
+    create_extraction_graph()
+