From 587a6b90f57250203f2480934e433a019c0a0659 Mon Sep 17 00:00:00 2001 From: Mahesh Rajamani <99678631+maheshrajamani@users.noreply.github.com> Date: Mon, 8 Jan 2024 11:24:40 -0500 Subject: [PATCH] Changes for indexing option - create collection and cache (#771) --- .../command/impl/CreateCollectionCommand.java | 68 ++++++++- .../sgv2/jsonapi/exception/ErrorCode.java | 2 + .../executor/CollectionSettings.java | 114 +++++++++------ .../cqldriver/executor/NamespaceCache.java | 4 +- .../model/impl/CreateCollectionOperation.java | 29 ++-- .../model/impl/FindCollectionsOperation.java | 2 +- .../impl/CreateCollectionCommandResolver.java | 94 ++++++++----- .../ObjectMapperConfigurationTest.java | 66 +++++++++ .../v1/CreateCollectionIntegrationTest.java | 130 +++++++++++++++++ .../executor/NamespaceCacheTest.java | 131 ++++++++++++++++++ .../CreateCollectionCommandResolverTest.java | 81 ++++++++++- 11 files changed, 632 insertions(+), 89 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/CreateCollectionCommand.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/CreateCollectionCommand.java index c4317b6f45..be381bad01 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/CreateCollectionCommand.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/impl/CreateCollectionCommand.java @@ -5,7 +5,12 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import io.stargate.sgv2.jsonapi.api.model.command.NamespaceCommand; +import io.stargate.sgv2.jsonapi.exception.ErrorCode; +import io.stargate.sgv2.jsonapi.exception.JsonApiException; import jakarta.validation.constraints.*; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import javax.annotation.Nullable; import org.eclipse.microprofile.openapi.annotations.enums.SchemaType; import org.eclipse.microprofile.openapi.annotations.media.Schema; @@ -40,7 +45,15 @@ public record Options( description = "Embedding api configuration to support `$vectorize`", type = SchemaType.OBJECT, implementation = VectorSearchConfig.class) - VectorizeConfig vectorize) { + VectorizeConfig vectorize, + @JsonInclude(JsonInclude.Include.NON_NULL) + @Nullable + @Schema( + description = + "Optional indexing configuration to provide allow/deny list of fields for indexing", + type = SchemaType.OBJECT, + implementation = IndexingConfig.class) + IndexingConfig indexing) { public record VectorSearchConfig( @Positive(message = "dimension should be greater than `0`") @@ -70,6 +83,59 @@ public VectorSearchConfig(Integer dimension, String metric) { } } + public record IndexingConfig( + @JsonInclude(JsonInclude.Include.NON_NULL) + @Schema( + description = "List of allowed indexing fields", + type = SchemaType.ARRAY, + implementation = String.class) + @Nullable + List allow, + @JsonInclude(JsonInclude.Include.NON_NULL) + @Schema( + description = "List of denied indexing fields", + type = SchemaType.ARRAY, + implementation = String.class) + @Nullable + List deny) { + + public void validate() { + if (allow() != null && deny() != null) { + throw new JsonApiException( + ErrorCode.INVALID_INDEXING_DEFINITION, + ErrorCode.INVALID_INDEXING_DEFINITION.getMessage() + + " - `allow` and `deny` cannot be used together"); + } + + if (allow() == null && deny() == null) { + throw new JsonApiException( + ErrorCode.INVALID_INDEXING_DEFINITION, + ErrorCode.INVALID_INDEXING_DEFINITION.getMessage() + + " - `allow` or `deny` should be provided"); + } + + if (allow() != null) { + Set dedupe = new HashSet<>(allow()); + if (dedupe.size() != allow().size()) { + throw new JsonApiException( + ErrorCode.INVALID_INDEXING_DEFINITION, + ErrorCode.INVALID_INDEXING_DEFINITION.getMessage() + + " - `allow` cannot contain duplicates"); + } + } + + if (deny() != null) { + Set dedupe = new HashSet<>(deny()); + if (dedupe.size() != deny().size()) { + throw new JsonApiException( + ErrorCode.INVALID_INDEXING_DEFINITION, + ErrorCode.INVALID_INDEXING_DEFINITION.getMessage() + + " - `deny` cannot contain duplicates"); + } + } + } + } + public record VectorizeConfig( @NotNull @Schema( diff --git a/src/main/java/io/stargate/sgv2/jsonapi/exception/ErrorCode.java b/src/main/java/io/stargate/sgv2/jsonapi/exception/ErrorCode.java index bf42299f5c..afca17a9dd 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/exception/ErrorCode.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/exception/ErrorCode.java @@ -27,6 +27,8 @@ public enum ErrorCode { INVALID_REQUST("Request not supported by the data store"), + INVALID_INDEXING_DEFINITION("Invalid indexing definition"), + NAMESPACE_DOES_NOT_EXIST("The provided namespace does not exist."), SHRED_BAD_DOCUMENT_TYPE("Bad document type to shred"), diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettings.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettings.java index 75c0fa2a7c..5ce3af26b5 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettings.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettings.java @@ -13,8 +13,10 @@ import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; +import java.util.HashSet; import java.util.Map; import java.util.Optional; +import java.util.Set; /** * Refactored as seperate class that represent a collection property. @@ -32,7 +34,22 @@ public record CollectionSettings( int vectorSize, SimilarityFunction similarityFunction, String vectorizeServiceName, - String modelName) { + String modelName, + IndexingConfig indexingConfig) { + + public record IndexingConfig(Set allowed, Set denied) { + public static IndexingConfig fromJson(JsonNode jsonNode) { + Set allowed = new HashSet<>(); + Set denied = new HashSet<>(); + if (jsonNode.has("allow")) { + jsonNode.get("allow").forEach(node -> allowed.add(node.asText())); + } + if (jsonNode.has("deny")) { + jsonNode.get("deny").forEach(node -> denied.add(node.asText())); + } + return new IndexingConfig(allowed, denied); + } + } /** * The similarity function used for the vector index. This is only applicable if the vector index @@ -86,21 +103,18 @@ public static CollectionSettings getCollectionSettings( function = CollectionSettings.SimilarityFunction.fromString(functionName); } final String comment = (String) table.getOptions().get(CqlIdentifier.fromInternal("comment")); - if (comment != null && !comment.isBlank()) { - return createCollectionSettingsFromJson( - collectionName, vectorEnabled, vectorSize, function, comment, objectMapper); - } else { - return new CollectionSettings( - collectionName, vectorEnabled, vectorSize, function, null, null); - } + return createCollectionSettings( + collectionName, vectorEnabled, vectorSize, function, comment, objectMapper); } else { // if not vector collection - return new CollectionSettings( + // handling comment so get the indexing config from comment + final String comment = (String) table.getOptions().get(CqlIdentifier.fromInternal("comment")); + return createCollectionSettings( collectionName, vectorEnabled, 0, CollectionSettings.SimilarityFunction.UNDEFINED, - null, - null); + comment, + objectMapper); } } @@ -109,46 +123,62 @@ public static CollectionSettings getCollectionSettings( boolean vectorEnabled, int vectorSize, SimilarityFunction similarityFunction, - String vectorize, + String comment, ObjectMapper objectMapper) { - // parse vectorize to get vectorizeServiceName and modelName - if (vectorize != null && !vectorize.isBlank()) { - return createCollectionSettingsFromJson( - collectionName, vectorEnabled, vectorSize, similarityFunction, vectorize, objectMapper); - } else { - return new CollectionSettings( - collectionName, vectorEnabled, vectorSize, similarityFunction, null, null); - } + return createCollectionSettings( + collectionName, vectorEnabled, vectorSize, similarityFunction, comment, objectMapper); } - private static CollectionSettings createCollectionSettingsFromJson( + private static CollectionSettings createCollectionSettings( String collectionName, boolean vectorEnabled, int vectorSize, SimilarityFunction function, - String vectorize, + String comment, ObjectMapper objectMapper) { - try { - JsonNode vectorizeConfig = objectMapper.readTree(vectorize); - String vectorizeServiceName = vectorizeConfig.path("service").textValue(); - JsonNode optionsNode = vectorizeConfig.path("options"); - String modelName = optionsNode.path("modelName").textValue(); - if (vectorizeServiceName != null - && !vectorizeServiceName.isEmpty() - && modelName != null - && !modelName.isEmpty()) { - return new CollectionSettings( - collectionName, vectorEnabled, vectorSize, function, vectorizeServiceName, modelName); - } else { - // This should never happen, VectorizeConfig check null, unless it fails - throw new JsonApiException( - VECTORIZECONFIG_CHECK_FAIL, - "%s, please check 'vectorize' configuration." - .formatted(VECTORIZECONFIG_CHECK_FAIL.getMessage())); + + if (comment == null || comment.isBlank()) { + return new CollectionSettings( + collectionName, vectorEnabled, vectorSize, function, null, null, null); + } else { + String vectorizeServiceName = null; + String modelName = null; + JsonNode commentConfig; + try { + commentConfig = objectMapper.readTree(comment); + } catch (JsonProcessingException e) { + // This should never happen, already check if vectorize is a valid JSON + throw new RuntimeException("Invalid json string, please check 'options' configuration.", e); + } + JsonNode vectorizeConfig = commentConfig.path("vectorize"); + if (!vectorizeConfig.isMissingNode()) { + vectorizeServiceName = vectorizeConfig.path("service").textValue(); + JsonNode optionsNode = vectorizeConfig.path("options"); + modelName = optionsNode.path("modelName").textValue(); + if (!(vectorizeServiceName != null + && !vectorizeServiceName.isEmpty() + && modelName != null + && !modelName.isEmpty())) { + // This should never happen, VectorizeConfig check null, unless it fails + throw new JsonApiException( + VECTORIZECONFIG_CHECK_FAIL, + "%s, please check 'vectorize' configuration." + .formatted(VECTORIZECONFIG_CHECK_FAIL.getMessage())); + } + } + IndexingConfig indexingConfig = null; + JsonNode indexing = commentConfig.path("indexing"); + if (!indexing.isMissingNode()) { + indexingConfig = IndexingConfig.fromJson(indexing); } - } catch (JsonProcessingException e) { - // This should never happen, already check if vectorize is a valid JSON - throw new RuntimeException("Invalid json string, please check 'vectorize' configuration.", e); + return new CollectionSettings( + collectionName, + vectorEnabled, + vectorSize, + function, + vectorizeServiceName, + modelName, + indexingConfig); } } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/NamespaceCache.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/NamespaceCache.java index 1e94eeb00a..a88898a132 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/NamespaceCache.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/NamespaceCache.java @@ -73,7 +73,9 @@ protected Uni getCollectionProperties(String collectionName) && (sre.getStatus().getCode() == io.grpc.Status.Code.NOT_FOUND || sre.getStatus().getCode() == io.grpc.Status.Code.INVALID_ARGUMENT))) { return Uni.createFrom() - .item(new CollectionSettings(collectionName, false, 0, null, null, null)); + .item( + new CollectionSettings( + collectionName, false, 0, null, null, null, null)); } return Uni.createFrom().failure(error); } else { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/CreateCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/CreateCollectionOperation.java index fbdc688a41..0ba4683d95 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/CreateCollectionOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/CreateCollectionOperation.java @@ -34,7 +34,7 @@ public record CreateCollectionOperation( boolean vectorSearch, int vectorSize, String vectorFunction, - String vectorize) + String comment) implements Operation { // shared matcher instance used to tell Collections from Tables private static final JsonapiTableMatcher COLLECTION_MATCHER = new JsonapiTableMatcher(); @@ -47,7 +47,7 @@ public static CreateCollectionOperation withVectorSearch( String name, int vectorSize, String vectorFunction, - String vectorize) { + String comment) { return new CreateCollectionOperation( commandContext, dbLimitsConfig, @@ -57,7 +57,7 @@ public static CreateCollectionOperation withVectorSearch( true, vectorSize, vectorFunction, - vectorize); + comment); } public static CreateCollectionOperation withoutVectorSearch( @@ -65,9 +65,18 @@ public static CreateCollectionOperation withoutVectorSearch( DatabaseLimitsConfig dbLimitsConfig, ObjectMapper objectMapper, CQLSessionCache cqlSessionCache, - String name) { + String name, + String comment) { return new CreateCollectionOperation( - commandContext, dbLimitsConfig, objectMapper, cqlSessionCache, name, false, 0, null, null); + commandContext, + dbLimitsConfig, + objectMapper, + cqlSessionCache, + name, + false, + 0, + null, + comment); } @Override @@ -101,7 +110,7 @@ public Uni> execute(QueryExecutor queryExecutor) { vectorSearch, vectorSize, CollectionSettings.SimilarityFunction.fromString(vectorFunction), - vectorize, + comment, objectMapper); // if table exists and user want to create a vector collection with the same name if (vectorSearch) { @@ -244,8 +253,8 @@ protected SimpleStatement getCreateTable(String keyspace, String table) { + vectorSize + ">, " + " PRIMARY KEY (key))"; - if (vectorize != null) { - createTableWithVector = createTableWithVector + " WITH comment = '" + vectorize + "'"; + if (comment != null) { + createTableWithVector = createTableWithVector + " WITH comment = '" + comment + "'"; } return SimpleStatement.newInstance(String.format(createTableWithVector, keyspace, table)); } else { @@ -263,7 +272,9 @@ protected SimpleStatement getCreateTable(String keyspace, String table) { + " query_timestamp_values map, " + " query_null_values set, " + " PRIMARY KEY (key))"; - + if (comment != null) { + createTable = createTable + " WITH comment = '" + comment + "'"; + } return SimpleStatement.newInstance(String.format(createTable, keyspace, table)); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/FindCollectionsOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/FindCollectionsOperation.java index 4cd64d4b67..cca91d9d3f 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/FindCollectionsOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/FindCollectionsOperation.java @@ -115,7 +115,7 @@ public CommandResult get() { collectionProperty.similarityFunction().name().toLowerCase()); options = new CreateCollectionCommand.Options( - vectorSearchConfig, vectorizeConfig); + vectorSearchConfig, vectorizeConfig, null); } // CreateCollectionCommand object is created for convenience to generate json // response. The code is not creating a collection here. diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CreateCollectionCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CreateCollectionCommandResolver.java index 5488dd6a9e..2a8a2a348c 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CreateCollectionCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CreateCollectionCommandResolver.java @@ -1,7 +1,7 @@ package io.stargate.sgv2.jsonapi.service.resolver.model.impl; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; import io.stargate.sgv2.api.common.config.DataStoreConfig; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.impl.CreateCollectionCommand; @@ -51,44 +51,74 @@ public Class getCommandClass() { @Override public Operation resolveCommand(CommandContext ctx, CreateCollectionCommand command) { - if (command.options() != null && command.options().vector() != null) { - if (!dataStoreConfig.vectorSearchEnabled()) { - throw new JsonApiException( - ErrorCode.VECTOR_SEARCH_NOT_AVAILABLE, - ErrorCode.VECTOR_SEARCH_NOT_AVAILABLE.getMessage()); + + if (command.options() != null) { + boolean vectorize = false; + boolean indexing = false; + int vectorSize = 0; + String function = null; + + // handling indexing options + if (command.options().indexing() != null) { + // validation of configuration + command.options().indexing().validate(); + indexing = true; + // No need to process if both are null or empty + } - final int vectorSize = command.options().vector().dimension(); - if (vectorSize > documentLimitsConfig.maxVectorEmbeddingLength()) { - throw new JsonApiException( - ErrorCode.VECTOR_SEARCH_FIELD_TOO_BIG, - String.format( - "%s: %d (max %d)", - ErrorCode.VECTOR_SEARCH_FIELD_TOO_BIG.getMessage(), - vectorSize, - documentLimitsConfig.maxVectorEmbeddingLength())); + + // handling vector and vectorize options + if (command.options().vector() != null) { + if (!dataStoreConfig.vectorSearchEnabled()) { + throw new JsonApiException( + ErrorCode.VECTOR_SEARCH_NOT_AVAILABLE, + ErrorCode.VECTOR_SEARCH_NOT_AVAILABLE.getMessage()); + } + function = command.options().vector().metric(); + vectorSize = command.options().vector().dimension(); + if (vectorSize > documentLimitsConfig.maxVectorEmbeddingLength()) { + throw new JsonApiException( + ErrorCode.VECTOR_SEARCH_FIELD_TOO_BIG, + String.format( + "%s: %d (max %d)", + ErrorCode.VECTOR_SEARCH_FIELD_TOO_BIG.getMessage(), + vectorSize, + documentLimitsConfig.maxVectorEmbeddingLength())); + } + if (command.options().vectorize() != null) { + vectorize = true; + } } - String vectorize = null; - if (command.options().vectorize() != null) { - try { - vectorize = objectMapper.writeValueAsString(command.options().vectorize()); - } catch (JsonProcessingException e) { - // This should never happen because the object is extracted from json request - throw new RuntimeException(e); + + String comment = null; + if (indexing || vectorize) { + final ObjectNode objectNode = objectMapper.createObjectNode(); + if (indexing) { + objectNode.putPOJO("indexing", command.options().indexing()); } + if (vectorize) { + objectNode.putPOJO("vectorize", command.options().vectorize()); + } + comment = objectNode.toString(); } - return CreateCollectionOperation.withVectorSearch( - ctx, - dbLimitsConfig, - objectMapper, - cqlSessionCache, - command.name(), - vectorSize, - command.options().vector().metric(), - vectorize); + if (command.options().vector() != null) { + return CreateCollectionOperation.withVectorSearch( + ctx, + dbLimitsConfig, + objectMapper, + cqlSessionCache, + command.name(), + vectorSize, + function, + comment); + } else { + return CreateCollectionOperation.withoutVectorSearch( + ctx, dbLimitsConfig, objectMapper, cqlSessionCache, command.name(), comment); + } } else { return CreateCollectionOperation.withoutVectorSearch( - ctx, dbLimitsConfig, objectMapper, cqlSessionCache, command.name()); + ctx, dbLimitsConfig, objectMapper, cqlSessionCache, command.name(), null); } } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/configuration/ObjectMapperConfigurationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/configuration/ObjectMapperConfigurationTest.java index 3b31a4075a..4010fa5371 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/configuration/ObjectMapperConfigurationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/configuration/ObjectMapperConfigurationTest.java @@ -462,6 +462,72 @@ public void happyPathVectorizeSearch() throws Exception { }); } + @Test + public void happyPathIndexingAllow() throws Exception { + String json = + """ + { + "createCollection": { + "name": "some_name", + "options": { + "indexing": { + "allow": ["field1", "field2"] + } + } + } + } + """; + + Command result = objectMapper.readValue(json, Command.class); + + assertThat(result) + .isInstanceOfSatisfying( + CreateCollectionCommand.class, + createCollection -> { + String name = createCollection.name(); + assertThat(name).isNotNull(); + assertThat(createCollection.options()).isNotNull(); + assertThat(createCollection.options().indexing()).isNotNull(); + assertThat(createCollection.options().indexing().deny()).isNull(); + assertThat(createCollection.options().indexing().allow()).hasSize(2); + assertThat(createCollection.options().indexing().allow()) + .contains("field1", "field2"); + }); + } + + @Test + public void happyPathIndexingDeny() throws Exception { + String json = + """ + { + "createCollection": { + "name": "some_name", + "options": { + "indexing": { + "deny": ["field1", "field2"] + } + } + } + } + """; + + Command result = objectMapper.readValue(json, Command.class); + + assertThat(result) + .isInstanceOfSatisfying( + CreateCollectionCommand.class, + createCollection -> { + String name = createCollection.name(); + assertThat(name).isNotNull(); + assertThat(createCollection.options()).isNotNull(); + assertThat(createCollection.options().indexing()).isNotNull(); + assertThat(createCollection.options().indexing().allow()).isNull(); + assertThat(createCollection.options().indexing().deny()).hasSize(2); + assertThat(createCollection.options().indexing().deny()) + .contains("field1", "field2"); + }); + } + @Test public void happyPathVectorSearchDefaultFunction() throws Exception { String json = diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CreateCollectionIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CreateCollectionIntegrationTest.java index 7777423c29..880e93c740 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CreateCollectionIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/CreateCollectionIntegrationTest.java @@ -83,6 +83,79 @@ class CreateCollection { } """; + String createCollectionWithAllowIndexing = + """ + { + "createCollection": { + "name": "simple_collection_allow_indexing", + "options" : { + "vector" : { + "size" : 5, + "function" : "cosine" + }, + "indexing" : { + "allow" : ["field1", "field2", "address.city", "_id"] + } + } + } + } + """; + + String createCollectionWithDenyIndexing = + """ + { + "createCollection": { + "name": "simple_collection_deny_indexing", + "options" : { + "vector" : { + "size" : 5, + "function" : "cosine" + }, + "indexing" : { + "deny" : ["field1", "field2", "address.city", "_id"] + } + } + } + } + """; + + String createCollectionWithIndexingError = + """ + { + "createCollection": { + "name": "simple_collection_error1", + "options" : { + "vector" : { + "size" : 5, + "function" : "cosine" + }, + "indexing" : { + "allow" : ["field1", "field1"] + } + } + } + } + """; + + String createCollectionWithIndexingError2 = + """ + { + "createCollection": { + "name": "simple_collection_error2", + "options" : { + "vector" : { + "size" : 5, + "function" : "cosine" + }, + "indexing" : { + "allow" : ["field1", "field2"], + "deny" : ["field1", "field2"] + } + } + } + } + """; + @Test public void happyPath() { String json = @@ -255,6 +328,63 @@ public void duplicateVectorCollectionNameWithDiffSetting() { "The provided collection name 'simple_collection' already exists with a different vector setting.")) .body("errors[0].errorCode", is("INVALID_COLLECTION_NAME")) .body("errors[0].exceptionClass", is("JsonApiException")); + + // create vector collection with indexing allow option + given() + .header(HttpConstants.AUTHENTICATION_TOKEN_HEADER_NAME, getAuthToken()) + .contentType(ContentType.JSON) + .body(createCollectionWithAllowIndexing) + .when() + .post(NamespaceResource.BASE_PATH, namespaceName) + .then() + .statusCode(200) + .body("status.ok", is(1)); + + // create vector collection with indexing deny option + given() + .header(HttpConstants.AUTHENTICATION_TOKEN_HEADER_NAME, getAuthToken()) + .contentType(ContentType.JSON) + .body(createCollectionWithDenyIndexing) + .when() + .post(NamespaceResource.BASE_PATH, namespaceName) + .then() + .statusCode(200) + .body("status.ok", is(1)); + + // create vector collection with error indexing option + given() + .header(HttpConstants.AUTHENTICATION_TOKEN_HEADER_NAME, getAuthToken()) + .contentType(ContentType.JSON) + .body(createCollectionWithIndexingError) + .when() + .post(NamespaceResource.BASE_PATH, namespaceName) + .then() + .statusCode(200) + .body("status", is(nullValue())) + .body("data", is(nullValue())) + .body( + "errors[0].message", + is("Invalid indexing definition - `allow` cannot contain duplicates")) + .body("errors[0].errorCode", is("INVALID_INDEXING_DEFINITION")) + .body("errors[0].exceptionClass", is("JsonApiException")); + + // create vector collection with error indexing option + given() + .header(HttpConstants.AUTHENTICATION_TOKEN_HEADER_NAME, getAuthToken()) + .contentType(ContentType.JSON) + .body(createCollectionWithIndexingError2) + .when() + .post(NamespaceResource.BASE_PATH, namespaceName) + .then() + .statusCode(200) + .body("status", is(nullValue())) + .body("data", is(nullValue())) + .body( + "errors[0].message", + is("Invalid indexing definition - `allow` and `deny` cannot be used together")) + .body("errors[0].errorCode", is("INVALID_INDEXING_DEFINITION")) + .body("errors[0].exceptionClass", is("JsonApiException")); + // delete the collection given() .header(HttpConstants.AUTHENTICATION_TOKEN_HEADER_NAME, getAuthToken()) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/NamespaceCacheTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/NamespaceCacheTest.java index 11a1737fd2..8392f939b5 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/NamespaceCacheTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/NamespaceCacheTest.java @@ -165,6 +165,137 @@ public void checkValidJsonApiTable() { }); } + @Test + public void checkValidJsonApiTableWithIndexing() { + QueryExecutor queryExecutor = mock(QueryExecutor.class); + when(queryExecutor.getSchema(any(), any())) + .then( + i -> { + List partitionColumn = + Lists.newArrayList( + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("key"), + DataTypes.tupleOf(DataTypes.TINYINT, DataTypes.TEXT), + false)); + Map columns = new HashMap<>(); + columns.put( + CqlIdentifier.fromInternal("tx_id"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("tx_id"), + DataTypes.TIMEUUID, + false)); + columns.put( + CqlIdentifier.fromInternal("doc_json"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("doc_json"), + DataTypes.TEXT, + false)); + columns.put( + CqlIdentifier.fromInternal("exist_keys"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("exist_keys"), + DataTypes.setOf(DataTypes.TEXT), + false)); + columns.put( + CqlIdentifier.fromInternal("array_size"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("array_size"), + DataTypes.mapOf(DataTypes.TEXT, DataTypes.INT), + false)); + columns.put( + CqlIdentifier.fromInternal("array_contains"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("array_contains"), + DataTypes.setOf(DataTypes.TEXT), + false)); + columns.put( + CqlIdentifier.fromInternal("query_bool_values"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("query_bool_values"), + DataTypes.mapOf(DataTypes.TEXT, DataTypes.TINYINT), + false)); + columns.put( + CqlIdentifier.fromInternal("query_dbl_values"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("query_dbl_values"), + DataTypes.mapOf(DataTypes.TEXT, DataTypes.DECIMAL), + false)); + columns.put( + CqlIdentifier.fromInternal("query_text_values"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("query_text_values"), + DataTypes.mapOf(DataTypes.TEXT, DataTypes.TEXT), + false)); + columns.put( + CqlIdentifier.fromInternal("query_timestamp_values"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("query_timestamp_values"), + DataTypes.mapOf(DataTypes.TEXT, DataTypes.TIMESTAMP), + false)); + columns.put( + CqlIdentifier.fromInternal("query_null_values"), + new DefaultColumnMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + CqlIdentifier.fromInternal("query_null_values"), + DataTypes.setOf(DataTypes.TEXT), + false)); + + return Uni.createFrom() + .item( + Optional.of( + new DefaultTableMetadata( + CqlIdentifier.fromInternal("ks"), + CqlIdentifier.fromInternal("table"), + UUID.randomUUID(), + false, + false, + partitionColumn, + new HashMap<>(), + columns, + Map.of( + CqlIdentifier.fromInternal("comment"), + "{\"indexing\":{\"deny\":[\"comment\"]}}"), + new HashMap<>()))); + }); + NamespaceCache namespaceCache = new NamespaceCache("ks", queryExecutor, objectMapper); + CollectionSettings collectionSettings = + namespaceCache + .getCollectionProperties("table") + .subscribe() + .withSubscriber(UniAssertSubscriber.create()) + .awaitItem() + .getItem(); + + assertThat(collectionSettings) + .satisfies( + s -> { + assertThat(s.vectorEnabled()).isFalse(); + assertThat(s.collectionName()).isEqualTo("table"); + assertThat(s.indexingConfig().denied()).containsExactly("comment"); + }); + } + @Test public void checkInvalidJsonApiTable() { QueryExecutor queryExecutor = mock(QueryExecutor.class); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CreateCollectionCommandResolverTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CreateCollectionCommandResolverTest.java index 11372ebed4..ba089e957e 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CreateCollectionCommandResolverTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CreateCollectionCommandResolverTest.java @@ -1,7 +1,7 @@ package io.stargate.sgv2.jsonapi.service.resolver.model.impl; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.*; +import static org.assertj.core.api.Assertions.catchThrowable; import com.fasterxml.jackson.databind.ObjectMapper; import io.quarkus.test.Mock; @@ -10,6 +10,8 @@ import io.stargate.sgv2.common.testprofiles.NoGlobalResourcesTestProfile; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.impl.CreateCollectionCommand; +import io.stargate.sgv2.jsonapi.exception.ErrorCode; +import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.operation.model.Operation; import io.stargate.sgv2.jsonapi.service.operation.model.impl.CreateCollectionOperation; import jakarta.inject.Inject; @@ -121,9 +123,82 @@ public void happyPathVectorizeSearch() throws Exception { assertThat(op.vectorSearch()).isEqualTo(true); assertThat(op.vectorSize()).isEqualTo(4); assertThat(op.vectorFunction()).isEqualTo("cosine"); - assertThat(op.vectorize()) + assertThat(op.comment()) .isEqualTo( - "{\"service\":\"openai\",\"options\":{\"modelName\":\"text-embedding-ada-002\"}}"); + "{\"vectorize\":{\"service\":\"openai\",\"options\":{\"modelName\":\"text-embedding-ada-002\"}}}"); + }); + } + + @Test + public void happyPathIndexing() throws Exception { + String json = + """ + { + "createCollection": { + "name" : "my_collection", + "options": { + "vector": { + "dimension": 4, + "metric": "cosine" + }, + "indexing": { + "deny" : ["comment"] + } + } + } + } + """; + + CreateCollectionCommand command = objectMapper.readValue(json, CreateCollectionCommand.class); + Operation result = resolver.resolveCommand(commandContext, command); + + assertThat(result) + .isInstanceOfSatisfying( + CreateCollectionOperation.class, + op -> { + assertThat(op.name()).isEqualTo("my_collection"); + assertThat(op.commandContext()).isEqualTo(commandContext); + assertThat(op.vectorSearch()).isEqualTo(true); + assertThat(op.vectorSize()).isEqualTo(4); + assertThat(op.vectorFunction()).isEqualTo("cosine"); + assertThat(op.comment()).isEqualTo("{\"indexing\":{\"deny\":[\"comment\"]}}"); + }); + } + + @Test + public void indexingOptionsError() throws Exception { + String json = + """ + { + "createCollection": { + "name" : "my_collection", + "options": { + "vector": { + "dimension": 4, + "metric": "cosine" + }, + "indexing": { + "deny" : ["comment"], + "allow" : ["data"] + } + } + } + } + """; + + CreateCollectionCommand command = objectMapper.readValue(json, CreateCollectionCommand.class); + Throwable throwable = catchThrowable(() -> resolver.resolveCommand(commandContext, command)); + + assertThat(throwable) + .isInstanceOf(JsonApiException.class) + .satisfies( + e -> { + JsonApiException exception = (JsonApiException) e; + assertThat(exception.getMessage()) + .isEqualTo( + "Invalid indexing definition - `allow` and `deny` cannot be used together"); + assertThat(exception.getErrorCode()) + .isEqualTo(ErrorCode.INVALID_INDEXING_DEFINITION); }); }