Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes for indexing option - create collection and cache #771

Merged
merged 6 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import io.stargate.sgv2.jsonapi.api.model.command.NamespaceCommand;
import io.stargate.sgv2.jsonapi.exception.ErrorCode;
import io.stargate.sgv2.jsonapi.exception.JsonApiException;
import jakarta.validation.constraints.*;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
import org.eclipse.microprofile.openapi.annotations.enums.SchemaType;
import org.eclipse.microprofile.openapi.annotations.media.Schema;
Expand Down Expand Up @@ -40,7 +45,15 @@ public record Options(
description = "Embedding api configuration to support `$vectorize`",
type = SchemaType.OBJECT,
implementation = VectorSearchConfig.class)
VectorizeConfig vectorize) {
VectorizeConfig vectorize,
@JsonInclude(JsonInclude.Include.NON_NULL)
@Nullable
@Schema(
description =
"Optional indexing configuration to provide allow/deny list of fields for indexing",
type = SchemaType.OBJECT,
implementation = IndexingConfig.class)
IndexingConfig indexing) {

public record VectorSearchConfig(
@Positive(message = "dimension should be greater than `0`")
Expand Down Expand Up @@ -70,6 +83,59 @@ public VectorSearchConfig(Integer dimension, String metric) {
}
}

public record IndexingConfig(
tatu-at-datastax marked this conversation as resolved.
Show resolved Hide resolved
maheshrajamani marked this conversation as resolved.
Show resolved Hide resolved
@JsonInclude(JsonInclude.Include.NON_NULL)
@Schema(
description = "List of allowed indexing fields",
type = SchemaType.ARRAY,
implementation = String.class)
@Nullable
List<String> allow,
@JsonInclude(JsonInclude.Include.NON_NULL)
@Schema(
description = "List of denied indexing fields",
type = SchemaType.ARRAY,
implementation = String.class)
@Nullable
List<String> deny) {

public void validate() {
if (allow() != null && deny() != null) {
throw new JsonApiException(
ErrorCode.INVALID_INDEXING_USAGE,
ErrorCode.INVALID_INDEXING_USAGE.getMessage()
+ " - allow and deny cannot be used together");
maheshrajamani marked this conversation as resolved.
Show resolved Hide resolved
}

if (allow() == null && deny() == null) {
throw new JsonApiException(
ErrorCode.INVALID_INDEXING_USAGE,
ErrorCode.INVALID_INDEXING_USAGE.getMessage()
+ " - allow or deny should be provided");
maheshrajamani marked this conversation as resolved.
Show resolved Hide resolved
}

if (allow() != null) {
Set<String> dedupe = new HashSet<>(allow());
if (dedupe.size() != allow().size()) {
throw new JsonApiException(
ErrorCode.INVALID_INDEXING_USAGE,
ErrorCode.INVALID_INDEXING_USAGE.getMessage()
+ " - allow cannot contain duplicates");
}
}

if (deny() != null) {
Set<String> dedupe = new HashSet<>(deny());
if (dedupe.size() != deny().size()) {
throw new JsonApiException(
ErrorCode.INVALID_INDEXING_USAGE,
ErrorCode.INVALID_INDEXING_USAGE.getMessage()
+ " - deny cannot contain duplicates");
}
}
}
}

public record VectorizeConfig(
@NotNull
@Schema(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ public enum ErrorCode {

INVALID_REQUST("Request not supported by the data store"),

INVALID_INDEXING_USAGE("Invalid indexing usage"),
maheshrajamani marked this conversation as resolved.
Show resolved Hide resolved

NAMESPACE_DOES_NOT_EXIST("The provided namespace does not exist."),

SHRED_BAD_DOCUMENT_TYPE("Bad document type to shred"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;
import io.stargate.sgv2.jsonapi.exception.ErrorCode;
import io.stargate.sgv2.jsonapi.exception.JsonApiException;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

/**
* Refactored as seperate class that represent a collection property.
Expand All @@ -32,7 +34,22 @@ public record CollectionSettings(
int vectorSize,
SimilarityFunction similarityFunction,
String vectorizeServiceName,
String modelName) {
String modelName,
IndexingConfig indexingConfig) {

public record IndexingConfig(Set<String> allowed, Set<String> denied) {
public static IndexingConfig fromJson(JsonNode jsonNode) {
Set<String> allowed = new HashSet<>();
Set<String> denied = new HashSet<>();
if (jsonNode.has("allow")) {
jsonNode.get("allow").forEach(node -> allowed.add(node.asText()));
}
if (jsonNode.has("deny")) {
jsonNode.get("deny").forEach(node -> denied.add(node.asText()));
}
return new IndexingConfig(allowed, denied);
}
}

/**
* The similarity function used for the vector index. This is only applicable if the vector index
Expand Down Expand Up @@ -86,21 +103,18 @@ public static CollectionSettings getCollectionSettings(
function = CollectionSettings.SimilarityFunction.fromString(functionName);
}
final String comment = (String) table.getOptions().get(CqlIdentifier.fromInternal("comment"));
if (comment != null && !comment.isBlank()) {
return createCollectionSettingsFromJson(
collectionName, vectorEnabled, vectorSize, function, comment, objectMapper);
} else {
return new CollectionSettings(
collectionName, vectorEnabled, vectorSize, function, null, null);
}
return createCollectionSettings(
collectionName, vectorEnabled, vectorSize, function, comment, objectMapper);
} else { // if not vector collection
return new CollectionSettings(
// handling comment so get the indexing config from comment
final String comment = (String) table.getOptions().get(CqlIdentifier.fromInternal("comment"));
return createCollectionSettings(
collectionName,
vectorEnabled,
0,
CollectionSettings.SimilarityFunction.UNDEFINED,
null,
null);
comment,
objectMapper);
tatu-at-datastax marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand All @@ -109,46 +123,63 @@ public static CollectionSettings getCollectionSettings(
boolean vectorEnabled,
int vectorSize,
SimilarityFunction similarityFunction,
String vectorize,
String comment,
ObjectMapper objectMapper) {
// parse vectorize to get vectorizeServiceName and modelName
if (vectorize != null && !vectorize.isBlank()) {
return createCollectionSettingsFromJson(
collectionName, vectorEnabled, vectorSize, similarityFunction, vectorize, objectMapper);
} else {
return new CollectionSettings(
collectionName, vectorEnabled, vectorSize, similarityFunction, null, null);
}
return createCollectionSettings(
collectionName, vectorEnabled, vectorSize, similarityFunction, comment, objectMapper);
}

private static CollectionSettings createCollectionSettingsFromJson(
private static CollectionSettings createCollectionSettings(
String collectionName,
boolean vectorEnabled,
int vectorSize,
SimilarityFunction function,
String vectorize,
String comment,
ObjectMapper objectMapper) {
try {
JsonNode vectorizeConfig = objectMapper.readTree(vectorize);
String vectorizeServiceName = vectorizeConfig.path("service").textValue();
JsonNode optionsNode = vectorizeConfig.path("options");
String modelName = optionsNode.path("modelName").textValue();
if (vectorizeServiceName != null
&& !vectorizeServiceName.isEmpty()
&& modelName != null
&& !modelName.isEmpty()) {

if (comment == null || comment.isBlank()) {
return new CollectionSettings(
collectionName, vectorEnabled, vectorSize, function, null, null, null);
} else {
try {
String vectorizeServiceName = null;
String modelName = null;

JsonNode commentConfig = objectMapper.readTree(comment);
maheshrajamani marked this conversation as resolved.
Show resolved Hide resolved
JsonNode vectorizeConfig = commentConfig.path("vectorize");
if (!vectorizeConfig.isMissingNode()) {
vectorizeServiceName = vectorizeConfig.path("service").textValue();
JsonNode optionsNode = vectorizeConfig.path("options");
modelName = optionsNode.path("modelName").textValue();
if (!(vectorizeServiceName != null
&& !vectorizeServiceName.isEmpty()
&& modelName != null
&& !modelName.isEmpty())) {
// This should never happen, VectorizeConfig check null, unless it fails
throw new JsonApiException(
VECTORIZECONFIG_CHECK_FAIL,
"%s, please check 'vectorize' configuration."
.formatted(VECTORIZECONFIG_CHECK_FAIL.getMessage()));
}
}
IndexingConfig indexingConfig = null;
JsonNode indexing = commentConfig.path("indexing");
if (!indexing.isMissingNode()) {
indexingConfig = IndexingConfig.fromJson(indexing);
}
return new CollectionSettings(
collectionName, vectorEnabled, vectorSize, function, vectorizeServiceName, modelName);
} else {
// This should never happen, VectorizeConfig check null, unless it fails
throw new JsonApiException(
VECTORIZECONFIG_CHECK_FAIL,
"%s, please check 'vectorize' configuration."
.formatted(VECTORIZECONFIG_CHECK_FAIL.getMessage()));
collectionName,
vectorEnabled,
vectorSize,
function,
vectorizeServiceName,
modelName,
indexingConfig);
} catch (JsonProcessingException e) {
// This should never happen, already check if vectorize is a valid JSON
throw new RuntimeException(
"Invalid json string, please check 'vectorize' configuration.", e);
}
} catch (JsonProcessingException e) {
// This should never happen, already check if vectorize is a valid JSON
throw new RuntimeException("Invalid json string, please check 'vectorize' configuration.", e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ protected Uni<CollectionSettings> getCollectionProperties(String collectionName)
&& (sre.getStatus().getCode() == io.grpc.Status.Code.NOT_FOUND
|| sre.getStatus().getCode() == io.grpc.Status.Code.INVALID_ARGUMENT))) {
return Uni.createFrom()
.item(new CollectionSettings(collectionName, false, 0, null, null, null));
.item(
new CollectionSettings(
collectionName, false, 0, null, null, null, null));
}
return Uni.createFrom().failure(error);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public record CreateCollectionOperation(
boolean vectorSearch,
int vectorSize,
String vectorFunction,
String vectorize)
String comment)
implements Operation {
// shared matcher instance used to tell Collections from Tables
private static final JsonapiTableMatcher COLLECTION_MATCHER = new JsonapiTableMatcher();
Expand All @@ -47,7 +47,7 @@ public static CreateCollectionOperation withVectorSearch(
String name,
int vectorSize,
String vectorFunction,
String vectorize) {
String comment) {
return new CreateCollectionOperation(
commandContext,
dbLimitsConfig,
Expand All @@ -57,17 +57,26 @@ public static CreateCollectionOperation withVectorSearch(
true,
vectorSize,
vectorFunction,
vectorize);
comment);
}

public static CreateCollectionOperation withoutVectorSearch(
CommandContext commandContext,
DatabaseLimitsConfig dbLimitsConfig,
ObjectMapper objectMapper,
CQLSessionCache cqlSessionCache,
String name) {
String name,
String comment) {
return new CreateCollectionOperation(
commandContext, dbLimitsConfig, objectMapper, cqlSessionCache, name, false, 0, null, null);
commandContext,
dbLimitsConfig,
objectMapper,
cqlSessionCache,
name,
false,
0,
null,
comment);
}

@Override
Expand Down Expand Up @@ -101,7 +110,7 @@ public Uni<Supplier<CommandResult>> execute(QueryExecutor queryExecutor) {
vectorSearch,
vectorSize,
CollectionSettings.SimilarityFunction.fromString(vectorFunction),
vectorize,
comment,
objectMapper);
// if table exists and user want to create a vector collection with the same name
if (vectorSearch) {
Expand Down Expand Up @@ -244,8 +253,8 @@ protected SimpleStatement getCreateTable(String keyspace, String table) {
+ vectorSize
+ ">, "
+ " PRIMARY KEY (key))";
if (vectorize != null) {
createTableWithVector = createTableWithVector + " WITH comment = '" + vectorize + "'";
if (comment != null) {
createTableWithVector = createTableWithVector + " WITH comment = '" + comment + "'";
}
return SimpleStatement.newInstance(String.format(createTableWithVector, keyspace, table));
} else {
Expand All @@ -263,7 +272,9 @@ protected SimpleStatement getCreateTable(String keyspace, String table) {
+ " query_timestamp_values map<text, timestamp>, "
+ " query_null_values set<text>, "
+ " PRIMARY KEY (key))";

if (comment != null) {
createTable = createTable + " WITH comment = '" + comment + "'";
}
return SimpleStatement.newInstance(String.format(createTable, keyspace, table));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public CommandResult get() {
collectionProperty.similarityFunction().name().toLowerCase());
options =
new CreateCollectionCommand.Options(
vectorSearchConfig, vectorizeConfig);
vectorSearchConfig, vectorizeConfig, null);
}
// CreateCollectionCommand object is created for convenience to generate json
// response. The code is not creating a collection here.
Expand Down
Loading