Skip to content

Commit

Permalink
update website
Browse files Browse the repository at this point in the history
  • Loading branch information
diptanu committed Aug 14, 2024
1 parent 6ccd19d commit e46f33a
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 30 deletions.
6 changes: 5 additions & 1 deletion docs/docs/getting-started-basic.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ Windows: Use WSL, but we don't actively test.
**Python Versions**: 3.9-3.11
</Info>

<Info>
The code of this guide can be found [here](https://github.com/tensorlakeai/indexify/tree/main/examples/getting_started/website/basic).
</Info>

## Prerequisites

Before we begin, make sure you have:
Expand Down Expand Up @@ -321,7 +325,7 @@ from indexify import IndexifyClient
from llama_cpp import Llama
client = IndexifyClient()
llm = Llama.from_pretrained(repo_id='microsoft/Phi-3-mini-4k-instruct-gguf', filename='*q4.gguf', verbose=False)
llm = Llama.from_pretrained(repo_id='microsoft/Phi-3-mini-4k-instruct-gguf', filename='*q4.gguf', verbose=False, n_ctx=2048)
# Get entities
ingested_content_list = client.list_content("wiki_extraction_pipeline")
Expand Down
13 changes: 7 additions & 6 deletions examples/getting_started/website/basic/graph.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
name: "wiki_extraction_pipeline"
name: "wiki_extraction_pipeline"
extraction_policies:
- extractor: "tensorlake/openai"
- extractor: "tensorlake/llama_cpp"
name: "entity-extractor"
input_params:
system_prompt: "Extract entities from text, and return the output in JSON format."
system_prompt: "Extract entities from text, and return the output in JSON format."
- extractor: "tensorlake/chunk-extractor"
name: "chunker"
name: "chunker"
input_params:
chunk_size: 1000
chunk_size: 1000
overlap: 100
- extractor: "tensorlake/minilm-l6"
name: "wikiembedding"
content_source: "chunker"
content_source: "chunker"

3 changes: 2 additions & 1 deletion examples/getting_started/website/basic/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ def load_data(player):

if __name__ == "__main__":
load_data("Kevin Durant")
load_data("Stephen Curry")
load_data("Stephen Curry")

34 changes: 16 additions & 18 deletions examples/getting_started/website/basic/query.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,35 @@
from indexify import IndexifyClient
from llama_cpp import Llama

client = IndexifyClient()
llm = Llama.from_pretrained(repo_id='microsoft/Phi-3-mini-4k-instruct-gguf', filename='*q4.gguf', verbose=False, n_ctx=2048)

ingested_content_list = client.list_content("wiki_extraction_pipeline")
# Get entities
ingested_content_list = client.list_content("wiki_extraction_pipeline")
content_id = ingested_content_list[0].id
entities = client.get_extracted_content(
content_id,
"wiki_extraction_pipeline",
"entity-extractor")


# Get chunks
chunks = client.get_extracted_content(
content_id,
"wiki_extraction_pipeline",
"chunker") #(1)!

from openai import OpenAI

client_openai = OpenAI()
"chunker")

def query_database(question: str, index: str, top_k=3):
retrieved_results = client.search_index(name=index, query=question, top_k=top_k)
retrieved_results = client.search_index(name=index, query=question, top_k=top_k)
context = "\n-".join([item["text"] for item in retrieved_results])
response = client_openai.chat.completions.create(

response = llm.create_chat_completion(
messages=[
{
"role": "user",
"content": f"Answer the question, based on the context.\n question: {question} \n context: {context}",
},
],
model="gpt-3.5-turbo",
{"role": "system", "content": "You are a helpful assistant that answers questions based on the given context."},
{"role": "user", "content": f"Answer the following question based on the context provided:\nQuestion: {question}\nContext: {context}"}
]
)
return response.choices[0].message.content


return response["choices"][0]["message"]["content"]

if __name__ == "__main__":
index_name = "wiki_extraction_pipeline.wikiembedding.embedding"
Expand All @@ -44,4 +41,5 @@ def query_database(question: str, index: str, top_k=3):
"wiki_extraction_pipeline.wikiembedding.embedding",
4,
)
)
)

9 changes: 5 additions & 4 deletions examples/getting_started/website/basic/setup.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from indexify import IndexifyClient, ExtractionGraph

client = IndexifyClient()
client = IndexifyClient()

def create_extraction_graph():
extraction_graph = ExtractionGraph.from_yaml_file("graph.yaml")
client.create_extraction_graph(extraction_graph)
extraction_graph = ExtractionGraph.from_yaml_file("graph.yaml")
client.create_extraction_graph(extraction_graph)

if __name__ == "__main__":
create_extraction_graph()
create_extraction_graph()

0 comments on commit e46f33a

Please sign in to comment.