-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: introduce pipeline arch for gathering sources
- Loading branch information
1 parent
3bf9ccf
commit c94236b
Showing
4 changed files
with
93 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from typing import Any, Callable | ||
|
||
class Artifact: | ||
|
||
def __init__(self, data: Any): | ||
self._data = data | ||
|
||
def __rshift__(self, other: Callable): | ||
result = other(self._data) | ||
return Artifact(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
""" | ||
Goal is to get many potentially relevant sources for a topic | ||
To be filtered and sorted at a later stage | ||
""" | ||
from experiments.topic_source_curation.generate_questions import get_urls_for_slug, generate_questions_from_url_list | ||
from app.util.pipeline import Artifact | ||
|
||
def gather_sources(topic_slug): | ||
qs = Artifact(topic_slug) >> get_urls_for_slug >> generate_questions_from_url_list | ||
print(qs._data) | ||
|
||
|
||
if __name__ == '__main__': | ||
gather_sources('dogs') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,42 @@ | ||
from langchain.vectorstores.neo4j_vector import Neo4jVector | ||
from langchain.embeddings.openai import OpenAIEmbeddings | ||
model_api = { | ||
'embedding_model': 'text-embedding-ada-002', | ||
} | ||
db = { | ||
'db_url': 'bolt://localhost:7689', | ||
'db_username': 'neo4j', | ||
'db_password': 'password', | ||
} | ||
neo4j_vector = Neo4jVector.from_existing_index( | ||
OpenAIEmbeddings(model=model_api['embedding_model']), | ||
index_name="index", | ||
url=db['db_url'], | ||
username=db['db_username'], | ||
password=db['db_password'], | ||
|
||
class SourceQuerier: | ||
model_api = { | ||
'embedding_model': 'text-embedding-ada-002', | ||
} | ||
db = { | ||
'db_url': 'bolt://localhost:7689', | ||
'db_username': 'neo4j', | ||
'db_password': 'password', | ||
} | ||
|
||
def __init__(self): | ||
self.neo4j_vector = self._get_neo4j_vector() | ||
|
||
@classmethod | ||
def _get_neo4j_vector(cls): | ||
return Neo4jVector.from_existing_index( | ||
OpenAIEmbeddings(model=cls.model_api['embedding_model']), | ||
index_name="index", | ||
url=cls.db['db_url'], | ||
username=cls.db['db_username'], | ||
password=cls.db['db_password'], | ||
) | ||
|
||
def query_sources(self, query, top_k, score_threshold): | ||
retrieved_docs = self.neo4j_vector.similarity_search_with_relevance_scores( | ||
query.lower(), top_k, score_threshold=score_threshold | ||
) | ||
return retrieved_docs | ||
|
||
|
||
if __name__ == '__main__': | ||
query = 'Which famous Jewish scholars, rabbis and philosophers lived in or were associated with the Alexandrian Jewish community?' | ||
top_k = 10 | ||
retrieved_docs = neo4j_vector.similarity_search_with_relevance_scores( | ||
query.lower(), top_k | ||
) | ||
for doc in retrieved_docs: | ||
query = 'Why are dogs portrayed mostly negatively in the Bible?' | ||
top_k = 10000 | ||
querier = SourceQuerier() | ||
docs = querier.query_sources(query, top_k, 0.9) | ||
for doc in docs: | ||
print(doc[0].metadata['source']) | ||
print(doc[1]) | ||
print(len(docs)) |