From 07d5017ba17fff81fa5bcc22a5a1d10fb15a3ce8 Mon Sep 17 00:00:00 2001 From: bnassivet Date: Sat, 3 Jun 2023 11:11:04 -0400 Subject: [PATCH 1/5] feat: _similarity_search_with_relevance_scores for qdrant vector store --- langchain/vectorstores/qdrant.py | 24 +++++- .../vectorstores/test_qdrant.py | 77 ++++++++++++++++++- 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/langchain/vectorstores/qdrant.py b/langchain/vectorstores/qdrant.py index 5af38b9694d6c..c60e2fd59caf5 100644 --- a/langchain/vectorstores/qdrant.py +++ b/langchain/vectorstores/qdrant.py @@ -213,7 +213,7 @@ def similarity_search( return list(map(itemgetter(0), results)) def similarity_search_with_score( - self, query: str, k: int = 4, filter: Optional[MetadataFilter] = None + self, query: str, k: int = 4, filter: Optional[MetadataFilter] = None, **kwargs ) -> List[Tuple[Document, float]]: """Return docs most similar to query. @@ -243,6 +243,28 @@ def similarity_search_with_score( for result in results ] + def _similarity_search_with_relevance_scores( + self, + query: str, + k: int = 4, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return docs and relevance scores in the range [0, 1]. + + 0 is dissimilar, 1 is most similar. + + Args: + query: input text + k: Number of Documents to return. Defaults to 4. + **kwargs: kwargs to be passed to similarity search. Should include: + score_threshold: Optional, a floating point value between 0 to 1 to + filter the resulting set of retrieved docs + + Returns: + List of Tuples of (doc, similarity_score) + """ + return self.similarity_search_with_score(query, k, **kwargs) + def max_marginal_relevance_search( self, query: str, diff --git a/tests/integration_tests/vectorstores/test_qdrant.py b/tests/integration_tests/vectorstores/test_qdrant.py index 8362951c6c8b4..b0c4e8abb78f9 100644 --- a/tests/integration_tests/vectorstores/test_qdrant.py +++ b/tests/integration_tests/vectorstores/test_qdrant.py @@ -74,7 +74,6 @@ def test_qdrant_with_metadatas( output = docsearch.similarity_search("foo", k=1) assert output == [Document(page_content="foo", metadata={"page": 0})] - def test_qdrant_similarity_search_filters() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] @@ -99,6 +98,82 @@ def test_qdrant_similarity_search_filters() -> None: ) ] +def test_qdrant_similarity_search_with_relevance_score_no_threshold() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [ + {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}} + for i in range(len(texts)) + ] + docsearch = Qdrant.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + location=":memory:", + ) + + output = docsearch.similarity_search_with_relevance_scores( + "foo", k=3, score_threshold=None + ) + assert len(output) == 3 + for i in range(len(output)): + assert round(output[i][1], 2) >= 0 + assert round(output[i][1], 2) <= 1 + +def test_qdrant_similarity_search_with_relevance_score_with_threshold() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [ + {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}} + for i in range(len(texts)) + ] + docsearch = Qdrant.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + location=":memory:", + ) + + score_threshold = 0.98 + kwargs={"score_threshold" : score_threshold} + output = docsearch.similarity_search_with_relevance_scores( + "foo", k=3, **kwargs + ) + print(output) + assert len(output) == 1 + assert all([score >= score_threshold for _, score in output]) + +def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + metadatas = [ + {"page": i, "metadata": {"page": i + 1, "pages": [i + 2, -1]}} + for i in range(len(texts)) + ] + docsearch = Qdrant.from_texts( + texts, + FakeEmbeddings(), + metadatas=metadatas, + location=":memory:", + ) + score_threshold = 0.99 # for almost exact match + # test negative filter condition + negative_filter={"page": 1, "metadata": {"page": 2, "pages": [3]}} + kwargs={"filter": negative_filter, "score_threshold" : score_threshold} + output = docsearch.similarity_search_with_relevance_scores( + "foo", k=3, **kwargs + ) + print(output) + assert len(output) == 0 + # test positive filter condition + positive_filter={"page": 0, "metadata": {"page": 1, "pages": [2]}} + kwargs={"filter": positive_filter, "score_threshold" : score_threshold} + output = docsearch.similarity_search_with_relevance_scores( + "foo", k=3, **kwargs + ) + print(output) + assert len(output) == 1 + assert all([score >= score_threshold for _, score in output]) @pytest.mark.parametrize( ["content_payload_key", "metadata_payload_key"], From 7dfc5347fddc3ac1a4db5b2ba6597f23be5f7e02 Mon Sep 17 00:00:00 2001 From: bnassivet Date: Mon, 5 Jun 2023 22:25:38 -0400 Subject: [PATCH 2/5] merge resolution --- tests/integration_tests/vectorstores/test_qdrant.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration_tests/vectorstores/test_qdrant.py b/tests/integration_tests/vectorstores/test_qdrant.py index 461a2e374da40..b04aa27802abc 100644 --- a/tests/integration_tests/vectorstores/test_qdrant.py +++ b/tests/integration_tests/vectorstores/test_qdrant.py @@ -143,7 +143,6 @@ def test_qdrant_similarity_search_with_relevance_score_no_threshold() -> None: metadatas=metadatas, location=":memory:", ) - output = docsearch.similarity_search_with_relevance_scores( "foo", k=3, score_threshold=None ) From 7ccd9756e3d0852c6233d91189871e529c4bd07e Mon Sep 17 00:00:00 2001 From: bnassivet Date: Mon, 5 Jun 2023 22:28:09 -0400 Subject: [PATCH 3/5] removed print instructions --- tests/integration_tests/vectorstores/test_qdrant.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/integration_tests/vectorstores/test_qdrant.py b/tests/integration_tests/vectorstores/test_qdrant.py index b04aa27802abc..1082ab917df52 100644 --- a/tests/integration_tests/vectorstores/test_qdrant.py +++ b/tests/integration_tests/vectorstores/test_qdrant.py @@ -170,7 +170,6 @@ def test_qdrant_similarity_search_with_relevance_score_with_threshold() -> None: output = docsearch.similarity_search_with_relevance_scores( "foo", k=3, **kwargs ) - print(output) assert len(output) == 1 assert all([score >= score_threshold for _, score in output]) @@ -194,7 +193,6 @@ def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter output = docsearch.similarity_search_with_relevance_scores( "foo", k=3, **kwargs ) - print(output) assert len(output) == 0 # test positive filter condition positive_filter={"page": 0, "metadata": {"page": 1, "pages": [2]}} @@ -202,7 +200,6 @@ def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter output = docsearch.similarity_search_with_relevance_scores( "foo", k=3, **kwargs ) - print(output) assert len(output) == 1 assert all([score >= score_threshold for _, score in output]) From 553c3dfe75997bdafe2e6a12563c2a978a26ce80 Mon Sep 17 00:00:00 2001 From: bnassivet Date: Mon, 5 Jun 2023 22:46:21 -0400 Subject: [PATCH 4/5] changed signature for linting --- langchain/vectorstores/qdrant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/vectorstores/qdrant.py b/langchain/vectorstores/qdrant.py index 22b49b7df5611..18d1be11be8aa 100644 --- a/langchain/vectorstores/qdrant.py +++ b/langchain/vectorstores/qdrant.py @@ -182,7 +182,7 @@ def similarity_search( return list(map(itemgetter(0), results)) def similarity_search_with_score( - self, query: str, k: int = 4, filter: Optional[MetadataFilter] = None, **kwargs + self, query: str, k: int = 4, filter: Optional[MetadataFilter] = None, **kwargs: Any ) -> List[Tuple[Document, float]]: """Return docs most similar to query. From 4abb183b648a89a10fe4ac784e1c1bc70fa82139 Mon Sep 17 00:00:00 2001 From: bnassivet Date: Tue, 6 Jun 2023 12:54:17 -0400 Subject: [PATCH 5/5] style: reformatted using black --- langchain/vectorstores/qdrant.py | 6 +++- .../vectorstores/test_qdrant.py | 32 +++++++++---------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/langchain/vectorstores/qdrant.py b/langchain/vectorstores/qdrant.py index 18d1be11be8aa..9bc99809a3593 100644 --- a/langchain/vectorstores/qdrant.py +++ b/langchain/vectorstores/qdrant.py @@ -182,7 +182,11 @@ def similarity_search( return list(map(itemgetter(0), results)) def similarity_search_with_score( - self, query: str, k: int = 4, filter: Optional[MetadataFilter] = None, **kwargs: Any + self, + query: str, + k: int = 4, + filter: Optional[MetadataFilter] = None, + **kwargs: Any, ) -> List[Tuple[Document, float]]: """Return docs most similar to query. diff --git a/tests/integration_tests/vectorstores/test_qdrant.py b/tests/integration_tests/vectorstores/test_qdrant.py index 1082ab917df52..aec77cd0a4b8f 100644 --- a/tests/integration_tests/vectorstores/test_qdrant.py +++ b/tests/integration_tests/vectorstores/test_qdrant.py @@ -130,6 +130,7 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None: ) ] + def test_qdrant_similarity_search_with_relevance_score_no_threshold() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] @@ -151,6 +152,7 @@ def test_qdrant_similarity_search_with_relevance_score_no_threshold() -> None: assert round(output[i][1], 2) >= 0 assert round(output[i][1], 2) <= 1 + def test_qdrant_similarity_search_with_relevance_score_with_threshold() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] @@ -166,14 +168,15 @@ def test_qdrant_similarity_search_with_relevance_score_with_threshold() -> None: ) score_threshold = 0.98 - kwargs={"score_threshold" : score_threshold} - output = docsearch.similarity_search_with_relevance_scores( - "foo", k=3, **kwargs - ) + kwargs = {"score_threshold": score_threshold} + output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs) assert len(output) == 1 assert all([score >= score_threshold for _, score in output]) -def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter() -> None: + +def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter() -> ( + None +): """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] metadatas = [ @@ -186,23 +189,20 @@ def test_qdrant_similarity_search_with_relevance_score_with_threshold_and_filter metadatas=metadatas, location=":memory:", ) - score_threshold = 0.99 # for almost exact match + score_threshold = 0.99 # for almost exact match # test negative filter condition - negative_filter={"page": 1, "metadata": {"page": 2, "pages": [3]}} - kwargs={"filter": negative_filter, "score_threshold" : score_threshold} - output = docsearch.similarity_search_with_relevance_scores( - "foo", k=3, **kwargs - ) + negative_filter = {"page": 1, "metadata": {"page": 2, "pages": [3]}} + kwargs = {"filter": negative_filter, "score_threshold": score_threshold} + output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs) assert len(output) == 0 # test positive filter condition - positive_filter={"page": 0, "metadata": {"page": 1, "pages": [2]}} - kwargs={"filter": positive_filter, "score_threshold" : score_threshold} - output = docsearch.similarity_search_with_relevance_scores( - "foo", k=3, **kwargs - ) + positive_filter = {"page": 0, "metadata": {"page": 1, "pages": [2]}} + kwargs = {"filter": positive_filter, "score_threshold": score_threshold} + output = docsearch.similarity_search_with_relevance_scores("foo", k=3, **kwargs) assert len(output) == 1 assert all([score >= score_threshold for _, score in output]) + def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"]