Skip to content

Commit

Permalink
Changes for indexing option - create collection and cache (#771)
Browse files Browse the repository at this point in the history
  • Loading branch information
maheshrajamani authored Jan 8, 2024
1 parent 17a563d commit 587a6b9
Show file tree
Hide file tree
Showing 11 changed files with 632 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import io.stargate.sgv2.jsonapi.api.model.command.NamespaceCommand;
import io.stargate.sgv2.jsonapi.exception.ErrorCode;
import io.stargate.sgv2.jsonapi.exception.JsonApiException;
import jakarta.validation.constraints.*;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
import org.eclipse.microprofile.openapi.annotations.enums.SchemaType;
import org.eclipse.microprofile.openapi.annotations.media.Schema;
Expand Down Expand Up @@ -40,7 +45,15 @@ public record Options(
description = "Embedding api configuration to support `$vectorize`",
type = SchemaType.OBJECT,
implementation = VectorSearchConfig.class)
VectorizeConfig vectorize) {
VectorizeConfig vectorize,
@JsonInclude(JsonInclude.Include.NON_NULL)
@Nullable
@Schema(
description =
"Optional indexing configuration to provide allow/deny list of fields for indexing",
type = SchemaType.OBJECT,
implementation = IndexingConfig.class)
IndexingConfig indexing) {

public record VectorSearchConfig(
@Positive(message = "dimension should be greater than `0`")
Expand Down Expand Up @@ -70,6 +83,59 @@ public VectorSearchConfig(Integer dimension, String metric) {
}
}

public record IndexingConfig(
@JsonInclude(JsonInclude.Include.NON_NULL)
@Schema(
description = "List of allowed indexing fields",
type = SchemaType.ARRAY,
implementation = String.class)
@Nullable
List<String> allow,
@JsonInclude(JsonInclude.Include.NON_NULL)
@Schema(
description = "List of denied indexing fields",
type = SchemaType.ARRAY,
implementation = String.class)
@Nullable
List<String> deny) {

public void validate() {
if (allow() != null && deny() != null) {
throw new JsonApiException(
ErrorCode.INVALID_INDEXING_DEFINITION,
ErrorCode.INVALID_INDEXING_DEFINITION.getMessage()
+ " - `allow` and `deny` cannot be used together");
}

if (allow() == null && deny() == null) {
throw new JsonApiException(
ErrorCode.INVALID_INDEXING_DEFINITION,
ErrorCode.INVALID_INDEXING_DEFINITION.getMessage()
+ " - `allow` or `deny` should be provided");
}

if (allow() != null) {
Set<String> dedupe = new HashSet<>(allow());
if (dedupe.size() != allow().size()) {
throw new JsonApiException(
ErrorCode.INVALID_INDEXING_DEFINITION,
ErrorCode.INVALID_INDEXING_DEFINITION.getMessage()
+ " - `allow` cannot contain duplicates");
}
}

if (deny() != null) {
Set<String> dedupe = new HashSet<>(deny());
if (dedupe.size() != deny().size()) {
throw new JsonApiException(
ErrorCode.INVALID_INDEXING_DEFINITION,
ErrorCode.INVALID_INDEXING_DEFINITION.getMessage()
+ " - `deny` cannot contain duplicates");
}
}
}
}

public record VectorizeConfig(
@NotNull
@Schema(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ public enum ErrorCode {

INVALID_REQUST("Request not supported by the data store"),

INVALID_INDEXING_DEFINITION("Invalid indexing definition"),

NAMESPACE_DOES_NOT_EXIST("The provided namespace does not exist."),

SHRED_BAD_DOCUMENT_TYPE("Bad document type to shred"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;
import io.stargate.sgv2.jsonapi.exception.ErrorCode;
import io.stargate.sgv2.jsonapi.exception.JsonApiException;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

/**
* Refactored as seperate class that represent a collection property.
Expand All @@ -32,7 +34,22 @@ public record CollectionSettings(
int vectorSize,
SimilarityFunction similarityFunction,
String vectorizeServiceName,
String modelName) {
String modelName,
IndexingConfig indexingConfig) {

public record IndexingConfig(Set<String> allowed, Set<String> denied) {
public static IndexingConfig fromJson(JsonNode jsonNode) {
Set<String> allowed = new HashSet<>();
Set<String> denied = new HashSet<>();
if (jsonNode.has("allow")) {
jsonNode.get("allow").forEach(node -> allowed.add(node.asText()));
}
if (jsonNode.has("deny")) {
jsonNode.get("deny").forEach(node -> denied.add(node.asText()));
}
return new IndexingConfig(allowed, denied);
}
}

/**
* The similarity function used for the vector index. This is only applicable if the vector index
Expand Down Expand Up @@ -86,21 +103,18 @@ public static CollectionSettings getCollectionSettings(
function = CollectionSettings.SimilarityFunction.fromString(functionName);
}
final String comment = (String) table.getOptions().get(CqlIdentifier.fromInternal("comment"));
if (comment != null && !comment.isBlank()) {
return createCollectionSettingsFromJson(
collectionName, vectorEnabled, vectorSize, function, comment, objectMapper);
} else {
return new CollectionSettings(
collectionName, vectorEnabled, vectorSize, function, null, null);
}
return createCollectionSettings(
collectionName, vectorEnabled, vectorSize, function, comment, objectMapper);
} else { // if not vector collection
return new CollectionSettings(
// handling comment so get the indexing config from comment
final String comment = (String) table.getOptions().get(CqlIdentifier.fromInternal("comment"));
return createCollectionSettings(
collectionName,
vectorEnabled,
0,
CollectionSettings.SimilarityFunction.UNDEFINED,
null,
null);
comment,
objectMapper);
}
}

Expand All @@ -109,46 +123,62 @@ public static CollectionSettings getCollectionSettings(
boolean vectorEnabled,
int vectorSize,
SimilarityFunction similarityFunction,
String vectorize,
String comment,
ObjectMapper objectMapper) {
// parse vectorize to get vectorizeServiceName and modelName
if (vectorize != null && !vectorize.isBlank()) {
return createCollectionSettingsFromJson(
collectionName, vectorEnabled, vectorSize, similarityFunction, vectorize, objectMapper);
} else {
return new CollectionSettings(
collectionName, vectorEnabled, vectorSize, similarityFunction, null, null);
}
return createCollectionSettings(
collectionName, vectorEnabled, vectorSize, similarityFunction, comment, objectMapper);
}

private static CollectionSettings createCollectionSettingsFromJson(
private static CollectionSettings createCollectionSettings(
String collectionName,
boolean vectorEnabled,
int vectorSize,
SimilarityFunction function,
String vectorize,
String comment,
ObjectMapper objectMapper) {
try {
JsonNode vectorizeConfig = objectMapper.readTree(vectorize);
String vectorizeServiceName = vectorizeConfig.path("service").textValue();
JsonNode optionsNode = vectorizeConfig.path("options");
String modelName = optionsNode.path("modelName").textValue();
if (vectorizeServiceName != null
&& !vectorizeServiceName.isEmpty()
&& modelName != null
&& !modelName.isEmpty()) {
return new CollectionSettings(
collectionName, vectorEnabled, vectorSize, function, vectorizeServiceName, modelName);
} else {
// This should never happen, VectorizeConfig check null, unless it fails
throw new JsonApiException(
VECTORIZECONFIG_CHECK_FAIL,
"%s, please check 'vectorize' configuration."
.formatted(VECTORIZECONFIG_CHECK_FAIL.getMessage()));

if (comment == null || comment.isBlank()) {
return new CollectionSettings(
collectionName, vectorEnabled, vectorSize, function, null, null, null);
} else {
String vectorizeServiceName = null;
String modelName = null;
JsonNode commentConfig;
try {
commentConfig = objectMapper.readTree(comment);
} catch (JsonProcessingException e) {
// This should never happen, already check if vectorize is a valid JSON
throw new RuntimeException("Invalid json string, please check 'options' configuration.", e);
}
JsonNode vectorizeConfig = commentConfig.path("vectorize");
if (!vectorizeConfig.isMissingNode()) {
vectorizeServiceName = vectorizeConfig.path("service").textValue();
JsonNode optionsNode = vectorizeConfig.path("options");
modelName = optionsNode.path("modelName").textValue();
if (!(vectorizeServiceName != null
&& !vectorizeServiceName.isEmpty()
&& modelName != null
&& !modelName.isEmpty())) {
// This should never happen, VectorizeConfig check null, unless it fails
throw new JsonApiException(
VECTORIZECONFIG_CHECK_FAIL,
"%s, please check 'vectorize' configuration."
.formatted(VECTORIZECONFIG_CHECK_FAIL.getMessage()));
}
}
IndexingConfig indexingConfig = null;
JsonNode indexing = commentConfig.path("indexing");
if (!indexing.isMissingNode()) {
indexingConfig = IndexingConfig.fromJson(indexing);
}
} catch (JsonProcessingException e) {
// This should never happen, already check if vectorize is a valid JSON
throw new RuntimeException("Invalid json string, please check 'vectorize' configuration.", e);
return new CollectionSettings(
collectionName,
vectorEnabled,
vectorSize,
function,
vectorizeServiceName,
modelName,
indexingConfig);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ protected Uni<CollectionSettings> getCollectionProperties(String collectionName)
&& (sre.getStatus().getCode() == io.grpc.Status.Code.NOT_FOUND
|| sre.getStatus().getCode() == io.grpc.Status.Code.INVALID_ARGUMENT))) {
return Uni.createFrom()
.item(new CollectionSettings(collectionName, false, 0, null, null, null));
.item(
new CollectionSettings(
collectionName, false, 0, null, null, null, null));
}
return Uni.createFrom().failure(error);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public record CreateCollectionOperation(
boolean vectorSearch,
int vectorSize,
String vectorFunction,
String vectorize)
String comment)
implements Operation {
// shared matcher instance used to tell Collections from Tables
private static final JsonapiTableMatcher COLLECTION_MATCHER = new JsonapiTableMatcher();
Expand All @@ -47,7 +47,7 @@ public static CreateCollectionOperation withVectorSearch(
String name,
int vectorSize,
String vectorFunction,
String vectorize) {
String comment) {
return new CreateCollectionOperation(
commandContext,
dbLimitsConfig,
Expand All @@ -57,17 +57,26 @@ public static CreateCollectionOperation withVectorSearch(
true,
vectorSize,
vectorFunction,
vectorize);
comment);
}

public static CreateCollectionOperation withoutVectorSearch(
CommandContext commandContext,
DatabaseLimitsConfig dbLimitsConfig,
ObjectMapper objectMapper,
CQLSessionCache cqlSessionCache,
String name) {
String name,
String comment) {
return new CreateCollectionOperation(
commandContext, dbLimitsConfig, objectMapper, cqlSessionCache, name, false, 0, null, null);
commandContext,
dbLimitsConfig,
objectMapper,
cqlSessionCache,
name,
false,
0,
null,
comment);
}

@Override
Expand Down Expand Up @@ -101,7 +110,7 @@ public Uni<Supplier<CommandResult>> execute(QueryExecutor queryExecutor) {
vectorSearch,
vectorSize,
CollectionSettings.SimilarityFunction.fromString(vectorFunction),
vectorize,
comment,
objectMapper);
// if table exists and user want to create a vector collection with the same name
if (vectorSearch) {
Expand Down Expand Up @@ -244,8 +253,8 @@ protected SimpleStatement getCreateTable(String keyspace, String table) {
+ vectorSize
+ ">, "
+ " PRIMARY KEY (key))";
if (vectorize != null) {
createTableWithVector = createTableWithVector + " WITH comment = '" + vectorize + "'";
if (comment != null) {
createTableWithVector = createTableWithVector + " WITH comment = '" + comment + "'";
}
return SimpleStatement.newInstance(String.format(createTableWithVector, keyspace, table));
} else {
Expand All @@ -263,7 +272,9 @@ protected SimpleStatement getCreateTable(String keyspace, String table) {
+ " query_timestamp_values map<text, timestamp>, "
+ " query_null_values set<text>, "
+ " PRIMARY KEY (key))";

if (comment != null) {
createTable = createTable + " WITH comment = '" + comment + "'";
}
return SimpleStatement.newInstance(String.format(createTable, keyspace, table));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public CommandResult get() {
collectionProperty.similarityFunction().name().toLowerCase());
options =
new CreateCollectionCommand.Options(
vectorSearchConfig, vectorizeConfig);
vectorSearchConfig, vectorizeConfig, null);
}
// CreateCollectionCommand object is created for convenience to generate json
// response. The code is not creating a collection here.
Expand Down
Loading

0 comments on commit 587a6b9

Please sign in to comment.