Skip to content

Commit

Permalink
Fix cosine normalization (#213)
Browse files Browse the repository at this point in the history
* Enable vectors comparison in tests (round to 1e-3)

* Normalize vectors if cosine distance is used

* fix: compare scroll results

---------

Co-authored-by: George Panchuk <george.panchuk@qdrant.tech>
  • Loading branch information
kacperlukawski and joein authored Jul 22, 2023
1 parent 6616e18 commit c7f221e
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
6 changes: 6 additions & 0 deletions qdrant_client/local/local_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,9 @@ def _update_point(self, point: models.PointStruct) -> None:
for vector_name, named_vectors in self.vectors.items():
vector = vectors.get(vector_name)
if vector is not None:
params = self.get_vector_params(vector_name)
if params.distance == models.Distance.COSINE:
vector = np.array(vector) / np.linalg.norm(vector)
self.vectors[vector_name][idx] = vector
self.deleted_per_vector[vector_name][idx] = 0
else:
Expand Down Expand Up @@ -587,6 +590,9 @@ def _add_point(self, point: models.PointStruct) -> None:
)
else:
vector_np = np.array(vector)
params = self.get_vector_params(vector_name)
if params.distance == models.Distance.COSINE:
vector_np = vector_np / np.linalg.norm(vector_np)
named_vectors[idx] = vector_np
self.vectors[vector_name] = named_vectors
self.deleted_per_vector[vector_name] = np.append(
Expand Down
1 change: 0 additions & 1 deletion qdrant_client/qdrant_client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import warnings
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Union

from qdrant_client import grpc as grpc
Expand Down
16 changes: 13 additions & 3 deletions tests/congruence_tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from qdrant_client import QdrantClient
from qdrant_client.client_base import QdrantBase
from qdrant_client.conversions import common_types as types
from qdrant_client.http import models
from qdrant_client.http.models import VectorStruct
from qdrant_client.local.qdrant_local import QdrantLocal
Expand Down Expand Up @@ -84,7 +85,7 @@ def compare_collections(
compare_client_results(
client_1,
client_2,
lambda client: client.scroll(COLLECTION_NAME, limit=num_vectors * 2),
lambda client: client.scroll(COLLECTION_NAME, with_vectors=True, limit=num_vectors * 2),
)


Expand All @@ -96,11 +97,13 @@ def compare_vectors(vec1: Optional[VectorStruct], vec2: Optional[VectorStruct],

if isinstance(vec1, dict):
for key, value in vec1.items():
assert np.allclose(vec1[key], vec2[key]), (
assert np.allclose(vec1[key], vec2[key], atol=1.0e-3), (
f"res1[{i}].vectors[{key}] = {value}, " f"res2[{i}].vectors[{key}] = {vec2[key]}"
)
else:
assert np.allclose(vec1, vec2), f"res1[{i}].vectors = {vec1}, res2[{i}].vectors = {vec2}"
assert np.allclose(
vec1, vec2, atol=1.0e-3
), f"res1[{i}].vectors = {vec1}, res2[{i}].vectors = {vec2}"


def compare_scored_record(
Expand Down Expand Up @@ -149,6 +152,13 @@ def compare_client_results(
res1 = foo(client1, **kwargs)
res2 = foo(client2, **kwargs)

# compare scroll results
if isinstance(res1, tuple) and len(res1) == 2:
if isinstance(res1[0], list) and (res1[1] is None or isinstance(res1[1], types.PointId)):
res1, offset1 = res1
res2, offset2 = res2
assert offset1 == offset2, f"offset1 = {offset1}, offset2 = {offset2}"

if isinstance(res1, list):
compare_records(res1, res2)
elif isinstance(res1, models.GroupsResult):
Expand Down

0 comments on commit c7f221e

Please sign in to comment.