From 59c47c36e65855720bca8e08cb129f1e53a634e2 Mon Sep 17 00:00:00 2001 From: Jean-Marie Dalmasso Date: Fri, 3 Nov 2023 17:04:52 +0100 Subject: [PATCH 01/12] feat: add ability to delete by metadata only --- src/vecs/collection.py | 58 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/vecs/collection.py b/src/vecs/collection.py index 64c7f36..cc7dbd0 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -368,6 +368,64 @@ def delete(self, ids: Iterable[str]) -> List[str]: ids.extend(sess.execute(stmt).scalars() or []) return ids + def delete_by_metadata(self, metadata: Metadata) -> List[str]: + """ + Deletes vectors from the collection by matching metadata. + + Args: + metadata (Metadata): A dictionary of metadata key-value pairs to match. + + Returns: + List[str]: A list of the identifiers of the deleted vectors. + """ + del_ids = [] + with self.client.Session() as sess: + with sess.begin(): + # Build the filter for the metadata + meta_filter = build_filters(self.table.c.metadata, metadata) + + # Perform the delete operation + stmt = ( + delete(self.table) + .where(meta_filter) + .returning(self.table.c.id) + ) + result = sess.execute(stmt) + del_ids = [row[0] for row in result.fetchall()] + return del_ids + + def delete_vectors(self, ids: Optional[Iterable[str]] = None, metadata: Optional[Metadata] = None) -> List[str]: + if ids is None and metadata is None: + raise VectorDeletionError("Either ids or metadata must be provided.") + + del_ids = [] + + with self.client.Session() as sess: + with sess.begin(): + if ids is not None: + if isinstance(ids, str): + raise ArgError("ids must be a list of strings") + for id_chunk in flu(ids).chunk(12): + stmt = ( + delete(self.table) + .where(self.table.c.id.in_(id_chunk)) + .returning(self.table.c.id) + ) + del_ids.extend(sess.execute(stmt).scalars() or []) + + if metadata is not None: + # Assuming build_filters is a previously defined function + meta_filter = build_filters(self.table.c.metadata, metadata) + stmt = ( + delete(self.table) + .where(meta_filter) + .returning(self.table.c.id) + ) + result = sess.execute(stmt) + del_ids.extend([row[0] for row in result.fetchall()]) + + return del_ids + def __getitem__(self, items): """ Fetches a vector from the collection by its identifier. From f6cb650f01878453a8b0d67c7daa7109ff82c217 Mon Sep 17 00:00:00 2001 From: Jean-Marie Dalmasso Date: Fri, 3 Nov 2023 17:11:13 +0100 Subject: [PATCH 02/12] chore: add doc --- src/vecs/collection.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/vecs/collection.py b/src/vecs/collection.py index cc7dbd0..c017c23 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -395,6 +395,16 @@ def delete_by_metadata(self, metadata: Metadata) -> List[str]: return del_ids def delete_vectors(self, ids: Optional[Iterable[str]] = None, metadata: Optional[Metadata] = None) -> List[str]: + """ + Deletes vectors from the collection by matching metadata. + + Args: + ids (Iterable[str]): An iterable of vector identifiers. + metadata (Metadata): A dictionary of metadata key-value pairs to match. + + Returns: + List[str]: A list of the identifiers of the deleted vectors. + """ if ids is None and metadata is None: raise VectorDeletionError("Either ids or metadata must be provided.") From 0b2c4d0afe3f313764d4919543d61e66b1d9ff3c Mon Sep 17 00:00:00 2001 From: Jean-Marie Dalmasso Date: Fri, 3 Nov 2023 17:13:48 +0100 Subject: [PATCH 03/12] chore: remove unnecessary comments --- src/vecs/collection.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/vecs/collection.py b/src/vecs/collection.py index c017c23..2802389 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -381,10 +381,8 @@ def delete_by_metadata(self, metadata: Metadata) -> List[str]: del_ids = [] with self.client.Session() as sess: with sess.begin(): - # Build the filter for the metadata meta_filter = build_filters(self.table.c.metadata, metadata) - # Perform the delete operation stmt = ( delete(self.table) .where(meta_filter) @@ -424,7 +422,6 @@ def delete_vectors(self, ids: Optional[Iterable[str]] = None, metadata: Optional del_ids.extend(sess.execute(stmt).scalars() or []) if metadata is not None: - # Assuming build_filters is a previously defined function meta_filter = build_filters(self.table.c.metadata, metadata) stmt = ( delete(self.table) From 2050a4d86d7b6e6bb6b1c420ffde13d992b6e7aa Mon Sep 17 00:00:00 2001 From: Jean-Marie Dalmasso Date: Sat, 4 Nov 2023 10:09:36 +0100 Subject: [PATCH 04/12] chore: add docs and clean up method --- docs/api.md | 4 ++- src/vecs/collection.py | 64 ++++-------------------------------------- 2 files changed, 9 insertions(+), 59 deletions(-) diff --git a/docs/api.md b/docs/api.md index 45aedb8..4ef3350 100644 --- a/docs/api.md +++ b/docs/api.md @@ -62,10 +62,12 @@ docs.upsert( ## Deleting vectors -Deleting records removes them from the collection. To delete records, specify a list of `ids` to the `delete` method. The ids of the sucessfully deleted records are returned from the method. Note that attempting to delete non-existent records does not raise an error. +Deleting records removes them from the collection. To delete records, specify a list of `ids` or metadata filters to the `delete` method. The ids of the sucessfully deleted records are returned from the method. Note that attempting to delete non-existent records does not raise an error. ```python docs.delete(ids=["vec0", "vec1"]) +# or delete with filters +docs.delete(filters={"year": {"$eq": 2012}}) ``` ## Create an index diff --git a/src/vecs/collection.py b/src/vecs/collection.py index 2802389..8e870ee 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -340,70 +340,18 @@ def fetch(self, ids: Iterable[str]) -> List[Record]: records.extend(chunk_records) return records - def delete(self, ids: Iterable[str]) -> List[str]: + def delete(self, ids: Optional[Iterable[str]] = None, filters: Optional[Metadata] = None) -> List[str]: """ - Deletes vectors from the collection by their identifiers. + Deletes vectors from the collection by matching filters or ids. Args: ids (Iterable[str]): An iterable of vector identifiers. + filters (Metadata): A dictionary of metadata key-value pairs to match. Returns: List[str]: A list of the identifiers of the deleted vectors. """ - if isinstance(ids, str): - raise ArgError("ids must be a list of strings") - - chunk_size = 12 - - del_ids = list(ids) - ids = [] - with self.client.Session() as sess: - with sess.begin(): - for id_chunk in flu(del_ids).chunk(chunk_size): - stmt = ( - delete(self.table) - .where(self.table.c.id.in_(id_chunk)) - .returning(self.table.c.id) - ) - ids.extend(sess.execute(stmt).scalars() or []) - return ids - - def delete_by_metadata(self, metadata: Metadata) -> List[str]: - """ - Deletes vectors from the collection by matching metadata. - - Args: - metadata (Metadata): A dictionary of metadata key-value pairs to match. - - Returns: - List[str]: A list of the identifiers of the deleted vectors. - """ - del_ids = [] - with self.client.Session() as sess: - with sess.begin(): - meta_filter = build_filters(self.table.c.metadata, metadata) - - stmt = ( - delete(self.table) - .where(meta_filter) - .returning(self.table.c.id) - ) - result = sess.execute(stmt) - del_ids = [row[0] for row in result.fetchall()] - return del_ids - - def delete_vectors(self, ids: Optional[Iterable[str]] = None, metadata: Optional[Metadata] = None) -> List[str]: - """ - Deletes vectors from the collection by matching metadata. - - Args: - ids (Iterable[str]): An iterable of vector identifiers. - metadata (Metadata): A dictionary of metadata key-value pairs to match. - - Returns: - List[str]: A list of the identifiers of the deleted vectors. - """ - if ids is None and metadata is None: + if ids is None and filters is None: raise VectorDeletionError("Either ids or metadata must be provided.") del_ids = [] @@ -421,8 +369,8 @@ def delete_vectors(self, ids: Optional[Iterable[str]] = None, metadata: Optional ) del_ids.extend(sess.execute(stmt).scalars() or []) - if metadata is not None: - meta_filter = build_filters(self.table.c.metadata, metadata) + if filters is not None: + meta_filter = build_filters(self.table.c.metadata, filters) stmt = ( delete(self.table) .where(meta_filter) From a048db4adab4e8d22df3a2fd78967ac55966e0cb Mon Sep 17 00:00:00 2001 From: Jean-Marie Dalmasso Date: Thu, 9 Nov 2023 17:35:54 +0100 Subject: [PATCH 05/12] fix: PR comments --- src/tests/test_collection.py | 19 +++++++++++ src/vecs/collection.py | 66 ++++++++++++++++++++++-------------- 2 files changed, 59 insertions(+), 26 deletions(-) diff --git a/src/tests/test_collection.py b/src/tests/test_collection.py index 8112151..af9d2ab 100644 --- a/src/tests/test_collection.py +++ b/src/tests/test_collection.py @@ -101,14 +101,33 @@ def test_delete(client: vecs.Client) -> None: # insert works movies.upsert(records) + # delete by IDs. delete_ids = ["vec0", "vec15", "vec99"] movies.delete(ids=delete_ids) assert len(movies) == n_records - len(delete_ids) + # insert works + movies.upsert(records) + + # delete with filters + genre_to_delete = "action" + deleted_ids_by_genre = movies.delete( + filters={"genre": {"$eq": genre_to_delete}}) + assert len(deleted_ids_by_genre) > 0 + # bad input with pytest.raises(vecs.exc.ArgError): movies.delete(ids="should_be_a_list") + # bad input: neither ids nor filters provided. + with pytest.raises(vecs.exc.ArgError): + movies.delete() + + # bad input: should only provide either ids or filters, not both + with pytest.raises(vecs.exc.ArgError): + movies.delete(ids=["vec0"], filters={ + "genre": {"$eq": genre_to_delete}}) + def test_repr(client: vecs.Client) -> None: movies = client.get_or_create_collection(name="movies", dimension=99) diff --git a/src/vecs/collection.py b/src/vecs/collection.py index 8e870ee..d752f8a 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -154,7 +154,8 @@ def __init__( ] ) if len(reported_dimensions) == 0: - raise ArgError("One of dimension or adapter must provide a dimension") + raise ArgError( + "One of dimension or adapter must provide a dimension") elif len(reported_dimensions) > 1: raise MismatchedDimension( "Dimensions reported by adapter, dimension, and collection do not match" @@ -335,7 +336,8 @@ def fetch(self, ids: Iterable[str]) -> List[Record]: with self.client.Session() as sess: with sess.begin(): for id_chunk in flu(ids).chunk(chunk_size): - stmt = select(self.table).where(self.table.c.id.in_(id_chunk)) + stmt = select(self.table).where( + self.table.c.id.in_(id_chunk)) chunk_records = sess.execute(stmt) records.extend(chunk_records) return records @@ -352,32 +354,37 @@ def delete(self, ids: Optional[Iterable[str]] = None, filters: Optional[Metadata List[str]: A list of the identifiers of the deleted vectors. """ if ids is None and filters is None: - raise VectorDeletionError("Either ids or metadata must be provided.") + raise ArgError("Either ids or filters must be provided.") + + if ids is not None and filters is not None: + raise ArgError( + "Either ids or filters must be provided, not both.") + + if isinstance(ids, str): + raise ArgError("ids must be a list of strings") + ids = ids or [] + filters = filters or {} del_ids = [] with self.client.Session() as sess: with sess.begin(): - if ids is not None: - if isinstance(ids, str): - raise ArgError("ids must be a list of strings") - for id_chunk in flu(ids).chunk(12): - stmt = ( - delete(self.table) - .where(self.table.c.id.in_(id_chunk)) - .returning(self.table.c.id) - ) - del_ids.extend(sess.execute(stmt).scalars() or []) - - if filters is not None: - meta_filter = build_filters(self.table.c.metadata, filters) + for id_chunk in flu(ids).chunk(12): stmt = ( delete(self.table) - .where(meta_filter) + .where(self.table.c.id.in_(id_chunk)) .returning(self.table.c.id) ) - result = sess.execute(stmt) - del_ids.extend([row[0] for row in result.fetchall()]) + del_ids.extend(sess.execute(stmt).scalars() or []) + + meta_filter = build_filters(self.table.c.metadata, filters) + stmt = ( + delete(self.table) + .where(meta_filter) + .returning(self.table.c.id) + ) + result = sess.execute(stmt) + del_ids.extend([row[0] for row in result.fetchall()]) return del_ids @@ -473,7 +480,8 @@ def query( ] if len(adapted_query) != 1: - raise ArgError("Failed to produce exactly one query vector from input") + raise ArgError( + "Failed to produce exactly one query vector from input") _, vec, _ = adapted_query[0] @@ -494,7 +502,8 @@ def query( stmt = select(*cols) if filters: - stmt = stmt.filter(build_filters(self.table.c.metadata, filters)) # type: ignore + stmt = stmt.filter(build_filters( + self.table.c.metadata, filters)) # type: ignore stmt = stmt.order_by(distance_clause) stmt = stmt.limit(limit) @@ -503,7 +512,8 @@ def query( with sess.begin(): # index ignored if greater than n_lists sess.execute( - text("set local ivfflat.probes = :probes").bindparams(probes=probes) + text("set local ivfflat.probes = :probes").bindparams( + probes=probes) ) if self.client._supports_hnsw(): sess.execute( @@ -693,10 +703,12 @@ def create_index( sess.execute(text(f'drop index vecs."{self.index}";')) self._index = None else: - raise ArgError("replace is set to False but an index exists") + raise ArgError( + "replace is set to False but an index exists") if method == IndexMethod.ivfflat: - n_records: int = sess.execute(func.count(self.table.c.id)).scalar() # type: ignore + n_records: int = sess.execute(func.count( + self.table.c.id)).scalar() # type: ignore n_lists = ( int(max(n_records / 1000, 30)) @@ -785,7 +797,8 @@ def build_filters(json_col: Column, filters: Dict): if operator == "$in": if not isinstance(clause, list): - raise FilterError("argument to $in filter must be a list") + raise FilterError( + "argument to $in filter must be a list") for elem in clause: if not isinstance(elem, (int, str, float)): @@ -795,7 +808,8 @@ def build_filters(json_col: Column, filters: Dict): # cast the array of scalars to a postgres array of jsonb so we can # directly compare json types in the query - contains_value = [cast(elem, postgresql.JSONB) for elem in clause] + contains_value = [cast(elem, postgresql.JSONB) + for elem in clause] return json_col.op("->")(key).in_(contains_value) matches_value = cast(clause, postgresql.JSONB) From c35f5d9e922d782d605f1f5a5f11933c49c38dc2 Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Tue, 14 Nov 2023 16:09:15 -0600 Subject: [PATCH 06/12] apply pre-commit hooks to reduce diff size --- src/tests/test_collection.py | 6 ++---- src/vecs/collection.py | 39 ++++++++++++++---------------------- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/src/tests/test_collection.py b/src/tests/test_collection.py index a806d18..7d1c761 100644 --- a/src/tests/test_collection.py +++ b/src/tests/test_collection.py @@ -111,8 +111,7 @@ def test_delete(client: vecs.Client) -> None: # delete with filters genre_to_delete = "action" - deleted_ids_by_genre = movies.delete( - filters={"genre": {"$eq": genre_to_delete}}) + deleted_ids_by_genre = movies.delete(filters={"genre": {"$eq": genre_to_delete}}) assert len(deleted_ids_by_genre) > 0 # bad input @@ -125,8 +124,7 @@ def test_delete(client: vecs.Client) -> None: # bad input: should only provide either ids or filters, not both with pytest.raises(vecs.exc.ArgError): - movies.delete(ids=["vec0"], filters={ - "genre": {"$eq": genre_to_delete}}) + movies.delete(ids=["vec0"], filters={"genre": {"$eq": genre_to_delete}}) def test_repr(client: vecs.Client) -> None: diff --git a/src/vecs/collection.py b/src/vecs/collection.py index 4e3717d..879ac35 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -189,8 +189,7 @@ def __init__( ] ) if len(reported_dimensions) == 0: - raise ArgError( - "One of dimension or adapter must provide a dimension") + raise ArgError("One of dimension or adapter must provide a dimension") elif len(reported_dimensions) > 1: raise MismatchedDimension( "Dimensions reported by adapter, dimension, and collection do not match" @@ -371,13 +370,14 @@ def fetch(self, ids: Iterable[str]) -> List[Record]: with self.client.Session() as sess: with sess.begin(): for id_chunk in flu(ids).chunk(chunk_size): - stmt = select(self.table).where( - self.table.c.id.in_(id_chunk)) + stmt = select(self.table).where(self.table.c.id.in_(id_chunk)) chunk_records = sess.execute(stmt) records.extend(chunk_records) return records - def delete(self, ids: Optional[Iterable[str]] = None, filters: Optional[Metadata] = None) -> List[str]: + def delete( + self, ids: Optional[Iterable[str]] = None, filters: Optional[Metadata] = None + ) -> List[str]: """ Deletes vectors from the collection by matching filters or ids. @@ -392,8 +392,7 @@ def delete(self, ids: Optional[Iterable[str]] = None, filters: Optional[Metadata raise ArgError("Either ids or filters must be provided.") if ids is not None and filters is not None: - raise ArgError( - "Either ids or filters must be provided, not both.") + raise ArgError("Either ids or filters must be provided, not both.") if isinstance(ids, str): raise ArgError("ids must be a list of strings") @@ -413,11 +412,7 @@ def delete(self, ids: Optional[Iterable[str]] = None, filters: Optional[Metadata del_ids.extend(sess.execute(stmt).scalars() or []) meta_filter = build_filters(self.table.c.metadata, filters) - stmt = ( - delete(self.table) - .where(meta_filter) - .returning(self.table.c.id) - ) + stmt = delete(self.table).where(meta_filter).returning(self.table.c.id) result = sess.execute(stmt) del_ids.extend([row[0] for row in result.fetchall()]) @@ -515,8 +510,7 @@ def query( ] if len(adapted_query) != 1: - raise ArgError( - "Failed to produce exactly one query vector from input") + raise ArgError("Failed to produce exactly one query vector from input") _, vec, _ = adapted_query[0] @@ -537,8 +531,9 @@ def query( stmt = select(*cols) if filters: - stmt = stmt.filter(build_filters( - self.table.c.metadata, filters)) # type: ignore + stmt = stmt.filter( + build_filters(self.table.c.metadata, filters) + ) # type: ignore stmt = stmt.order_by(distance_clause) stmt = stmt.limit(limit) @@ -547,8 +542,7 @@ def query( with sess.begin(): # index ignored if greater than n_lists sess.execute( - text("set local ivfflat.probes = :probes").bindparams( - probes=probes) + text("set local ivfflat.probes = :probes").bindparams(probes=probes) ) if self.client._supports_hnsw(): sess.execute( @@ -762,8 +756,7 @@ def create_index( sess.execute(text(f'drop index vecs."{self.index}";')) self._index = None else: - raise ArgError( - "replace is set to False but an index exists") + raise ArgError("replace is set to False but an index exists") if method == IndexMethod.ivfflat: if not index_arguments: @@ -872,8 +865,7 @@ def build_filters(json_col: Column, filters: Dict): if operator == "$in": if not isinstance(clause, list): - raise FilterError( - "argument to $in filter must be a list") + raise FilterError("argument to $in filter must be a list") for elem in clause: if not isinstance(elem, (int, str, float)): @@ -883,8 +875,7 @@ def build_filters(json_col: Column, filters: Dict): # cast the array of scalars to a postgres array of jsonb so we can # directly compare json types in the query - contains_value = [cast(elem, postgresql.JSONB) - for elem in clause] + contains_value = [cast(elem, postgresql.JSONB) for elem in clause] return json_col.op("->")(key).in_(contains_value) matches_value = cast(clause, postgresql.JSONB) From 6af1324ff7bddca2696138b00c9d7effde7f892c Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Tue, 14 Nov 2023 16:16:58 -0600 Subject: [PATCH 07/12] remove extra sql roundtrip on Collection.delete --- src/vecs/collection.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/vecs/collection.py b/src/vecs/collection.py index 879ac35..a22e3f0 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -403,18 +403,22 @@ def delete( with self.client.Session() as sess: with sess.begin(): - for id_chunk in flu(ids).chunk(12): + if ids: + for id_chunk in flu(ids).chunk(12): + stmt = ( + delete(self.table) + .where(self.table.c.id.in_(id_chunk)) + .returning(self.table.c.id) + ) + del_ids.extend(sess.execute(stmt).scalars() or []) + + if filters: + meta_filter = build_filters(self.table.c.metadata, filters) stmt = ( - delete(self.table) - .where(self.table.c.id.in_(id_chunk)) - .returning(self.table.c.id) + delete(self.table).where(meta_filter).returning(self.table.c.id) # type: ignore ) - del_ids.extend(sess.execute(stmt).scalars() or []) - - meta_filter = build_filters(self.table.c.metadata, filters) - stmt = delete(self.table).where(meta_filter).returning(self.table.c.id) - result = sess.execute(stmt) - del_ids.extend([row[0] for row in result.fetchall()]) + result = sess.execute(stmt) + del_ids.extend([row[0] for row in result.fetchall()]) return del_ids From 9d88aec485eed97a6c9bd813ce11680b4b911b59 Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Tue, 14 Nov 2023 16:17:44 -0600 Subject: [PATCH 08/12] bugfix: empty filter match attempts index access on None --- src/vecs/collection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vecs/collection.py b/src/vecs/collection.py index a22e3f0..1c2129a 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -417,8 +417,8 @@ def delete( stmt = ( delete(self.table).where(meta_filter).returning(self.table.c.id) # type: ignore ) - result = sess.execute(stmt) - del_ids.extend([row[0] for row in result.fetchall()]) + result = sess.execute(stmt).scalars() + del_ids.extend([row for row in result.fetchall()]) return del_ids From 9b64db4888bdc5f032180186749f8b7a95bf91cb Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Tue, 14 Nov 2023 16:18:29 -0600 Subject: [PATCH 09/12] remove unused list comprehension --- src/vecs/collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vecs/collection.py b/src/vecs/collection.py index 1c2129a..faf09f8 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -418,7 +418,7 @@ def delete( delete(self.table).where(meta_filter).returning(self.table.c.id) # type: ignore ) result = sess.execute(stmt).scalars() - del_ids.extend([row for row in result.fetchall()]) + del_ids.extend(result.fetchall()) return del_ids From 02d4a10d7a81e1d5dcab52702bbf7460b948be7a Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Tue, 14 Nov 2023 16:19:03 -0600 Subject: [PATCH 10/12] minor docs rephrasing --- docs/api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.md b/docs/api.md index 7526f10..6e1764a 100644 --- a/docs/api.md +++ b/docs/api.md @@ -66,7 +66,7 @@ Deleting records removes them from the collection. To delete records, specify a ```python docs.delete(ids=["vec0", "vec1"]) -# or delete with filters +# or delete by a metadata filter docs.delete(filters={"year": {"$eq": 2012}}) ``` From b8efc7b42c608a0ae3cbf662a78a3121c9d734d0 Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Tue, 14 Nov 2023 16:26:34 -0600 Subject: [PATCH 11/12] remove random genre in test so exact delete count is deterministic --- src/tests/test_collection.py | 10 +++++++--- src/vecs/collection.py | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/tests/test_collection.py b/src/tests/test_collection.py index 7d1c761..e7c4b38 100644 --- a/src/tests/test_collection.py +++ b/src/tests/test_collection.py @@ -1,3 +1,4 @@ +import itertools import random import numpy as np @@ -91,11 +92,14 @@ def test_delete(client: vecs.Client) -> None: f"vec{ix}", vec, { - "genre": random.choice(["action", "rom-com", "drama"]), + "genre": genre, "year": int(50 * random.random()) + 1970, }, ) - for ix, vec in enumerate(np.random.random((n_records, dim))) + for (ix, vec), genre in zip( + enumerate(np.random.random((n_records, dim))), + itertools.cycle(["action", "rom-com", "drama"]), + ) ] # insert works @@ -112,7 +116,7 @@ def test_delete(client: vecs.Client) -> None: # delete with filters genre_to_delete = "action" deleted_ids_by_genre = movies.delete(filters={"genre": {"$eq": genre_to_delete}}) - assert len(deleted_ids_by_genre) > 0 + assert len(deleted_ids_by_genre) == 34 # bad input with pytest.raises(vecs.exc.ArgError): diff --git a/src/vecs/collection.py b/src/vecs/collection.py index faf09f8..f772862 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -536,8 +536,8 @@ def query( stmt = select(*cols) if filters: stmt = stmt.filter( - build_filters(self.table.c.metadata, filters) - ) # type: ignore + build_filters(self.table.c.metadata, filters) # type: ignore + ) stmt = stmt.order_by(distance_clause) stmt = stmt.limit(limit) From 7f1833323ccf9cfc91b61d8e2452d45becca2eb3 Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Tue, 14 Nov 2023 16:29:10 -0600 Subject: [PATCH 12/12] tag args as optional in docstring --- src/vecs/collection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vecs/collection.py b/src/vecs/collection.py index f772862..7ff7884 100644 --- a/src/vecs/collection.py +++ b/src/vecs/collection.py @@ -382,8 +382,8 @@ def delete( Deletes vectors from the collection by matching filters or ids. Args: - ids (Iterable[str]): An iterable of vector identifiers. - filters (Metadata): A dictionary of metadata key-value pairs to match. + ids (Iterable[str], optional): An iterable of vector identifiers. + filters (Optional[Dict], optional): Filters to apply to the search. Defaults to None. Returns: List[str]: A list of the identifiers of the deleted vectors.