diff --git a/redis/commands/search/commands.py b/redis/commands/search/commands.py index 2df2b5a754..764ad4c2b8 100644 --- a/redis/commands/search/commands.py +++ b/redis/commands/search/commands.py @@ -9,6 +9,8 @@ from ._util import to_string from .aggregation import AggregateRequest, AggregateResult, Cursor from .document import Document +from .field import Field +from .indexDefinition import IndexDefinition from .query import Query from .result import Result from .suggestion import SuggestionParser @@ -151,44 +153,43 @@ def batch_indexer(self, chunk_size=100): def create_index( self, - fields, - no_term_offsets=False, - no_field_flags=False, - stopwords=None, - definition=None, + fields: List[Field], + no_term_offsets: bool = False, + no_field_flags: bool = False, + stopwords: Optional[List[str]] = None, + definition: Optional[IndexDefinition] = None, max_text_fields=False, temporary=None, - no_highlight=False, - no_term_frequencies=False, - skip_initial_scan=False, + no_highlight: bool = False, + no_term_frequencies: bool = False, + skip_initial_scan: bool = False, ): """ - Create the search index. The index must not already exist. - - ### Parameters: - - - **fields**: a list of TextField or NumericField objects - - **no_term_offsets**: If true, we will not save term offsets in - the index - - **no_field_flags**: If true, we will not save field flags that - allow searching in specific fields - - **stopwords**: If not None, we create the index with this custom - stopword list. The list can be empty - - **max_text_fields**: If true, we will encode indexes as if there - were more than 32 text fields which allows you to add additional - fields (beyond 32). - - **temporary**: Create a lightweight temporary index which will - expire after the specified period of inactivity (in seconds). The - internal idle timer is reset whenever the index is searched or added to. - - **no_highlight**: If true, disabling highlighting support. - Also implied by no_term_offsets. - - **no_term_frequencies**: If true, we avoid saving the term frequencies - in the index. - - **skip_initial_scan**: If true, we do not scan and index. - - For more information see `FT.CREATE `_. - """ # noqa + Creates the search index. The index must not already exist. + + For more information, see https://redis.io/commands/ft.create/ + + Args: + fields: A list of Field objects. + no_term_offsets: If `true`, term offsets will not be saved in the index. + no_field_flags: If true, field flags that allow searching in specific fields + will not be saved. + stopwords: If provided, the index will be created with this custom stopword + list. The list can be empty. + definition: If provided, the index will be created with this custom index + definition. + max_text_fields: If true, indexes will be encoded as if there were more than + 32 text fields, allowing for additional fields beyond 32. + temporary: Creates a lightweight temporary index which will expire after the + specified period of inactivity. The internal idle timer is reset + whenever the index is searched or added to. + no_highlight: If true, disables highlighting support. Also implied by + `no_term_offsets`. + no_term_frequencies: If true, term frequencies will not be saved in the + index. + skip_initial_scan: If true, the initial scan and indexing will be skipped. + """ args = [CREATE_CMD, self.index_name] if definition is not None: args += definition.args diff --git a/redis/commands/search/field.py b/redis/commands/search/field.py index f316ed9f14..8af7777f19 100644 --- a/redis/commands/search/field.py +++ b/redis/commands/search/field.py @@ -4,6 +4,10 @@ class Field: + """ + A class representing a field in a document. + """ + NUMERIC = "NUMERIC" TEXT = "TEXT" WEIGHT = "WEIGHT" @@ -14,6 +18,8 @@ class Field: NOINDEX = "NOINDEX" AS = "AS" GEOSHAPE = "GEOSHAPE" + INDEX_MISSING = "INDEXMISSING" + INDEX_EMPTY = "INDEXEMPTY" def __init__( self, @@ -21,8 +27,24 @@ def __init__( args: List[str] = None, sortable: bool = False, no_index: bool = False, + index_missing: bool = False, + index_empty: bool = False, as_name: str = None, ): + """ + Create a new field object. + + Args: + name: The name of the field. + args: + sortable: If `True`, the field will be sortable. + no_index: If `True`, the field will not be indexed. + index_missing: If `True`, it will be possible to search for documents that + have this field missing. + index_empty: If `True`, it will be possible to search for documents that + have this field empty. + as_name: If provided, this alias will be used for the field. + """ if args is None: args = [] self.name = name @@ -34,6 +56,10 @@ def __init__( self.args_suffix.append(Field.SORTABLE) if no_index: self.args_suffix.append(Field.NOINDEX) + if index_missing: + self.args_suffix.append(Field.INDEX_MISSING) + if index_empty: + self.args_suffix.append(Field.INDEX_EMPTY) if no_index and not sortable: raise ValueError("Non-Sortable non-Indexable fields are ignored") diff --git a/tests/test_search.py b/tests/test_search.py index 7d335f6cec..cceb12a547 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -2105,7 +2105,7 @@ def test_geo_params(client): params_dict = {"lat": "34.95126", "lon": "29.69465", "radius": 1000, "units": "km"} q = Query("@g:[$lon $lat $radius $units]").dialect(2) res = client.ft().search(q, query_params=params_dict) - _assert_geosearch_result(client, res, ["doc1", "doc2", "doc3"]) + _assert_search_result(client, res, ["doc1", "doc2", "doc3"]) @pytest.mark.redismod @@ -2122,13 +2122,13 @@ def test_geoshapes_query_intersects_and_disjoint(client): Query("@g:[intersects $shape]").dialect(3), query_params={"shape": "POLYGON((15 15, 75 15, 50 70, 20 40, 15 15))"}, ) - _assert_geosearch_result(client, intersection, ["doc_point2", "doc_polygon1"]) + _assert_search_result(client, intersection, ["doc_point2", "doc_polygon1"]) disjunction = client.ft().search( Query("@g:[disjoint $shape]").dialect(3), query_params={"shape": "POLYGON((15 15, 75 15, 50 70, 20 40, 15 15))"}, ) - _assert_geosearch_result(client, disjunction, ["doc_point1", "doc_polygon2"]) + _assert_search_result(client, disjunction, ["doc_point1", "doc_polygon2"]) @pytest.mark.redismod @@ -2146,19 +2146,19 @@ def test_geoshapes_query_contains_and_within(client): Query("@g:[contains $shape]").dialect(3), query_params={"shape": "POINT(25 25)"}, ) - _assert_geosearch_result(client, contains_a, ["doc_polygon1"]) + _assert_search_result(client, contains_a, ["doc_polygon1"]) contains_b = client.ft().search( Query("@g:[contains $shape]").dialect(3), query_params={"shape": "POLYGON((24 24, 24 26, 25 25, 24 24))"}, ) - _assert_geosearch_result(client, contains_b, ["doc_polygon1"]) + _assert_search_result(client, contains_b, ["doc_polygon1"]) within = client.ft().search( Query("@g:[within $shape]").dialect(3), query_params={"shape": "POLYGON((15 15, 75 15, 50 70, 20 40, 15 15))"}, ) - _assert_geosearch_result(client, within, ["doc_point2", "doc_polygon1"]) + _assert_search_result(client, within, ["doc_point2", "doc_polygon1"]) @pytest.mark.redismod @@ -2322,19 +2322,153 @@ def test_geoshape(client: redis.Redis): q2 = Query("@geom:[CONTAINS $poly]").dialect(3) qp2 = {"poly": "POLYGON((2 2, 2 50, 50 50, 50 2, 2 2))"} result = client.ft().search(q1, query_params=qp1) - _assert_geosearch_result(client, result, ["small"]) + _assert_search_result(client, result, ["small"]) result = client.ft().search(q2, query_params=qp2) - _assert_geosearch_result(client, result, ["small", "large"]) + _assert_search_result(client, result, ["small", "large"]) -def _assert_geosearch_result(client, result, expected_doc_ids): +@pytest.mark.redismod +def test_search_missing_fields(client): + definition = IndexDefinition(prefix=["property:"], index_type=IndexType.HASH) + + fields = [ + TextField("title", sortable=True), + TagField("features", index_missing=True), + TextField("description", index_missing=True), + ] + + client.ft().create_index(fields, definition=definition) + + # All fields present + client.hset( + "property:1", + mapping={ + "title": "Luxury Villa in Malibu", + "features": "pool,sea view,modern", + "description": "A stunning modern villa overlooking the Pacific Ocean.", + }, + ) + + # Missing features + client.hset( + "property:2", + mapping={ + "title": "Downtown Flat", + "description": "Modern flat in central Paris with easy access to metro.", + }, + ) + + # Missing description + client.hset( + "property:3", + mapping={ + "title": "Beachfront Bungalow", + "features": "beachfront,sun deck", + }, + ) + + with pytest.raises(redis.exceptions.ResponseError) as e: + client.ft().search( + Query("ismissing(@title)").dialect(5).return_field("id").no_content() + ) + assert "to be defined with 'INDEXMISSING'" in e.value.args[0] + + res = client.ft().search( + Query("ismissing(@features)").dialect(5).return_field("id").no_content() + ) + _assert_search_result(client, res, ["property:2"]) + + res = client.ft().search( + Query("-ismissing(@features)").dialect(5).return_field("id").no_content() + ) + _assert_search_result(client, res, ["property:1", "property:3"]) + + res = client.ft().search( + Query("ismissing(@description)").dialect(5).return_field("id").no_content() + ) + _assert_search_result(client, res, ["property:3"]) + + res = client.ft().search( + Query("-ismissing(@description)").dialect(5).return_field("id").no_content() + ) + _assert_search_result(client, res, ["property:1", "property:2"]) + + +@pytest.mark.redismod +def test_search_empty_fields(client): + definition = IndexDefinition(prefix=["property:"], index_type=IndexType.HASH) + + fields = [ + TextField("title", sortable=True), + TagField("features", index_empty=True), + TextField("description", index_empty=True), + ] + + client.ft().create_index(fields, definition=definition) + + # All fields present + client.hset( + "property:1", + mapping={ + "title": "Luxury Villa in Malibu", + "features": "pool,sea view,modern", + "description": "A stunning modern villa overlooking the Pacific Ocean.", + }, + ) + + # Empty features + client.hset( + "property:2", + mapping={ + "title": "Downtown Flat", + "features": "", + "description": "Modern flat in central Paris with easy access to metro.", + }, + ) + + # Empty description + client.hset( + "property:3", + mapping={ + "title": "Beachfront Bungalow", + "features": "beachfront,sun deck", + "description": "", + }, + ) + + with pytest.raises(redis.exceptions.ResponseError) as e: + client.ft().search( + Query("@title:''").dialect(5).return_field("id").no_content() + ) + assert "to be defined with `INDEXEMPTY`" in e.value.args[0] + + res = client.ft().search( + Query("@features:{ }").dialect(5).return_field("id").no_content() + ) + _assert_search_result(client, res, ["property:2"]) + + res = client.ft().search( + Query("-@features:{ }").dialect(5).return_field("id").no_content() + ) + _assert_search_result(client, res, ["property:1", "property:3"]) + + res = client.ft().search( + Query("@description:''").dialect(5).return_field("id").no_content() + ) + _assert_search_result(client, res, ["property:3"]) + + res = client.ft().search( + Query("-@description:''").dialect(5).return_field("id").no_content() + ) + _assert_search_result(client, res, ["property:1", "property:2"]) + + +def _assert_search_result(client, result, expected_doc_ids): """ Make sure the result of a geo search is as expected, taking into account the RESP version being used. """ if is_resp2_connection(client): assert set([doc.id for doc in result.docs]) == set(expected_doc_ids) - assert result.total == len(expected_doc_ids) else: assert set([doc["id"] for doc in result["results"]]) == set(expected_doc_ids) - assert result["total_results"] == len(expected_doc_ids)