Help with Raptor implementation #15472
Replies: 7 comments 6 replies
-
To modify your class to use Updated Constructor Codefrom llama_index.packs.raptor import RaptorPack
from llama_index.packs.raptor.base import SummaryModule
from llama_index.core import Document, MockEmbedding
from llama_index.core.llms import OpenAI
from llama_index.storage.docstore.redis import RedisDocumentStore
from llama_index.storage.vectorstore.redis import RedisVectorStore
from llama_index.storage.context import StorageContext
from llama_index.packs.raptor.base import RaptorRetriever
class YourClass:
def __init__(self, config, namespace, similarity_top_k, recency, similarity_top_k_reranker, verbose=False):
self.config = config
self.namespace = namespace
self.similarity_top_k = similarity_top_k
self.redis_client = redis.from_url(self.config.get("REDIS_VECTOR_STORE"))
self.docstore = RedisDocumentStore.from_redis_client(
redis_client=self.redis_client, namespace=self.namespace
)
self.vector_store = RedisVectorStore(
index_name=self.namespace,
redis_url=self.config.get("REDIS_VECTOR_STORE"),
metadata_fields=["namespace"],
)
self.storage_context = StorageContext.from_defaults(
docstore=self.docstore,
index_store=RedisIndexStore.from_redis_client(
redis_client=self.redis_client, namespace=self.namespace
),
vector_store=self.vector_store,
)
self.llm = OpenAI(
model=self.config.get("LLM_ANSWERING_MODEL"),
api_key=self.config.get("OPENAI_API_KEY"),
)
summary_prompt = "As a professional summarizer, create a concise and comprehensive summary of the provided text, be it an article, post, conversation, or passage with as much detail as possible."
self.summary_module = SummaryModule(
llm=self.llm, summary_prompt=summary_prompt, num_workers=16
)
self.raptor_pack = RaptorPack(
documents=[],
embed_model=MockEmbedding(embed_dim=1536),
llm=self.llm,
vector_store=self.vector_store,
similarity_top_k=self.similarity_top_k,
mode="collapsed",
summary_module=self.summary_module
)
try:
self.base_index = load_index_from_storage(self.storage_context)
print("[INFO] Index found at storage")
except ValueError as e:
print("[INFO] No index found at storage")
self.base_index = VectorStoreIndex(
nodes=[],
storage_context=self.storage_context,
store_nodes_override=True,
)
self.base_retriever = self.base_index.as_retriever(
similarity_top_k=self.similarity_top_k,
filters=MetadataFilters(
filters=[ExactMatchFilter(key="namespace", value=self.namespace)]
),
)
self.retriever = AutoMergingRetriever(
self.base_retriever, self.storage_context, verbose=verbose
)
self.recency_postprocessor = FixedRecencyPostprocessor(
top_k=recency, date_key="creation_date"
)
self.postprocessor = SentenceTransformerRerank(
model=self.config.get("RAG_RERANK_MODEL"),
top_n=similarity_top_k_reranker,
)
self.response_synthesizer = get_response_synthesizer(
response_mode=ResponseMode.COMPACT, llm=self.llm
) Updated Helper Functions def create_document(self, text, filename):
documents = []
for idx, page in text.items():
document = Document(text=page)
current_date = datetime.now().strftime("%Y-%m-%d")
document.metadata = {
"filename": filename,
"page_number": idx,
"creation_date": current_date,
"last_accessed_date": current_date,
"last_modified_date": current_date,
"namespace": self.namespace,
}
documents.append(document)
return documents
def return_nodes(self, docs):
nodes = self.node_parser.get_nodes_from_documents(docs)
return nodes
def add_nodes_to_doc_store(self, all_nodes):
try:
self.docstore.add_documents(nodes=all_nodes)
except Exception as e:
print(f"An error occurred when adding documents to doc store: {e}")
traceback.print_exc()
raise Exception(
f"An error occurred when adding documents to doc store: {e}"
)
def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
query_bundle = QueryBundle(query_str=query)
nodes = self.raptor_pack.run(query, mode="collapsed")
recency_nodes = self.recency_postprocessor.postprocess_nodes(
nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") Explanation
This setup should allow you to use |
Beta Was this translation helpful? Give feedback.
-
@dosu use QueryFusionRetriever to merger raptor + bm25 in the generated code |
Beta Was this translation helpful? Give feedback.
-
@dosu you forgot the HierarchicalNodeParser should it be used for adding the nodes? |
Beta Was this translation helpful? Give feedback.
-
@dosu raise Exception(f"An error occurred retrieving: {e}") |
Beta Was this translation helpful? Give feedback.
-
@dosu in the code below define my metadata filter properly: self.raptor_pack = RaptorPack(
documents=[],
embed_model=Settings.embed_model,
llm=self.llm,
vector_store=self.vector_store,
similarity_top_k=self.similarity_top_k,
mode="collapsed",
summary_module=self.summary_module,
)
self.bm25_retriever = BM25Retriever.from_defaults(
docstore=self.docstore, similarity_top_k=self.similarity_top_k
)
self.fusion_retriever = QueryFusionRetriever(
[self.raptor_pack.retriever, self.bm25_retriever],
similarity_top_k=self.similarity_top_k,
num_queries=1, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=verbose,
)
self.node_parser = HierarchicalNodeParser.from_defaults()
try:
self.base_index = load_index_from_storage(self.storage_context)
print("[INFO] Index found at storage")
except ValueError as e:
print("[INFO] No index found at storage")
self.base_index = VectorStoreIndex(
nodes=[],
storage_context=self.storage_context,
store_nodes_override=True,
)
self.base_retriever = self.base_index.as_retriever(
similarity_top_k=self.similarity_top_k,
filters=MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
),
)
self.retriever = AutoMergingRetriever(
self.base_retriever, self.storage_context, verbose=verbose
)
self.recency_postprocessor = FixedRecencyPostprocessor(
top_k=recency, date_key="creation_date"
)
self.postprocessor = SentenceTransformerRerank(
model=self.config.get("RAG_RERANK_MODEL"),
top_n=similarity_top_k_reranker,
)
self.response_synthesizer = get_response_synthesizer(
response_mode=ResponseMode.COMPACT, llm=self.llm
) the code fragment here does not take into account my metadata filter: self.fusion_retriever = QueryFusionRetriever(
[self.raptor_pack.retriever, self.bm25_retriever],
similarity_top_k=self.similarity_top_k,
num_queries=1, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=verbose,
) the filter should be something like: filters=MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
), this is being used in the function as: def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
query_bundle = QueryBundle(query_str=query)
retrived_nodes = self.fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") |
Beta Was this translation helpful? Give feedback.
-
@dosu TypeError: QueryFusionRetriever.init() got an unexpected keyword argument 'filters |
Beta Was this translation helpful? Give feedback.
-
@dosu this does not work. Can we do it here in the function? def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
query_bundle = QueryBundle(query_str=query)
retrived_nodes = self.fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") |
Beta Was this translation helpful? Give feedback.
-
I have the following initialisation for my class:L
constructor code:
I also have the following helper functions:
How can I modify it to use raptor ?
Beta Was this translation helpful? Give feedback.
All reactions