Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create index command implementation #1526

Merged
merged 21 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d306b5b
Initial commit
maheshrajamani Oct 9, 2024
c747444
Working create index
maheshrajamani Oct 10, 2024
acfd2eb
Added IfNotExists support
maheshrajamani Oct 10, 2024
97ac2d9
Fix for ifNotExist
maheshrajamani Oct 10, 2024
40f4d97
Added comments and support for default similarity
maheshrajamani Oct 10, 2024
8060ced
Added comments
maheshrajamani Oct 10, 2024
358cb4a
Renamed all addIndex to createIndex
maheshrajamani Oct 10, 2024
7169932
Fix for IT failures
maheshrajamani Oct 10, 2024
f09de74
Merge branch 'main' into create-index
maheshrajamani Oct 10, 2024
9db3d1b
Fix for IT failures
maheshrajamani Oct 10, 2024
efde024
Fix for IT failures
maheshrajamani Oct 10, 2024
ef4fc93
Moved ifNotExist flag to command level options
maheshrajamani Oct 11, 2024
fd1b5b2
Merge branch 'main' into create-index
maheshrajamani Oct 11, 2024
0d61aa6
Changed columnName and IndexName to CqlIdentifier in CreateIndexAttempt.
maheshrajamani Oct 11, 2024
19e2301
Merge branch 'create-index' of github.com:stargate/data-api into crea…
maheshrajamani Oct 11, 2024
2d8b83a
Refactored to move supported sources to a constants file.
maheshrajamani Oct 11, 2024
07d2fb3
Changes based on review comments
maheshrajamani Oct 11, 2024
d5fed28
Changes based on review comments
maheshrajamani Oct 11, 2024
b10ca9e
Changed var to CqlIdentifier
maheshrajamani Oct 11, 2024
f8da4a9
Use CqlIdentifier to get column information from metadata
maheshrajamani Oct 11, 2024
37be411
Merge branch 'main' into create-index
maheshrajamani Oct 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
@JsonSubTypes.Type(value = InsertManyCommand.class),
@JsonSubTypes.Type(value = UpdateManyCommand.class),
@JsonSubTypes.Type(value = UpdateOneCommand.class),
@JsonSubTypes.Type(value = AddIndexCommand.class),
// We have only collection resource that is used for api tables
@JsonSubTypes.Type(value = CreateIndexCommand.class),
@JsonSubTypes.Type(value = DropIndexCommand.class),
})
public interface CollectionCommand extends Command {}
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ public interface Command {

/** Enum class for API command name. This is what user uses for command json body. */
enum CommandName {
ADD_INDEX("addIndex"),
COUNT_DOCUMENTS("countDocuments"),
CREATE_COLLECTION("createCollection"),
CREATE_NAMESPACE("createNamespace"),
CREATE_INDEX("createIndex"),
CREATE_KEYSPACE("createKeyspace"),
CREATE_NAMESPACE("createNamespace"),
CREATE_TABLE("createTable"),
DELETE_COLLECTION("deleteCollection"),
DELETE_MANY("deleteMany"),
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package io.stargate.sgv2.jsonapi.api.model.command.impl;

import com.fasterxml.jackson.annotation.JsonTypeName;
import io.stargate.sgv2.jsonapi.api.model.command.CollectionCommand;
import io.stargate.sgv2.jsonapi.service.schema.SimilarityFunction;
import jakarta.annotation.Nullable;
import jakarta.validation.constraints.NotNull;
import jakarta.validation.constraints.Pattern;
import jakarta.validation.constraints.Size;
import org.eclipse.microprofile.openapi.annotations.enums.SchemaType;
import org.eclipse.microprofile.openapi.annotations.media.Schema;

// TODO, hide table feature detail before it goes public
@Schema(description = "Command that creates an index for a column in a table.")
@JsonTypeName("createIndex")
public record CreateIndexCommand(
@NotNull
@Size(min = 1, max = 48)
@Pattern(regexp = "[a-zA-Z][a-zA-Z0-9_]*")
@Schema(description = "Name of the column to create the index on")
String name,
@NotNull
@Schema(
description = "Definition for created index for a column.",
type = SchemaType.OBJECT)
Definition definition,
@Nullable @Schema(description = "Creating index command option.", type = SchemaType.OBJECT)
Options options)
implements CollectionCommand {
public record Definition(
@NotNull
@Size(min = 1, max = 48)
@Pattern(regexp = "[a-zA-Z][a-zA-Z0-9_]*")
@Schema(description = "Name of the column for which index to be created.")
String column,
@Nullable @Schema(description = "Different indexing options.", type = SchemaType.OBJECT)
Options options) {
// This is index definition options for text and vector column types.
public record Options(
@Nullable
@Schema(
description = "Ignore case in matching string values.",
defaultValue = "true",
type = SchemaType.BOOLEAN,
implementation = Boolean.class)
Boolean caseSensitive,
@Nullable
@Schema(
description = "When set to true, perform Unicode normalization on indexed strings.",
defaultValue = "false",
type = SchemaType.BOOLEAN,
implementation = Boolean.class)
Boolean normalize,
@Nullable
@Schema(
description =
"When set to true, index will converts alphabetic, numeric, and symbolic characters to the ascii equivalent, if one exists.",
defaultValue = "false",
type = SchemaType.BOOLEAN,
implementation = Boolean.class)
Boolean ascii,
@Nullable
@Pattern(
regexp = "(dot_product|cosine|euclidean)",
message = "function name can only be 'dot_product', 'cosine' or 'euclidean'")
@Schema(
description =
"Similarity function algorithm that needs to be used for vector search",
defaultValue = "cosine",
type = SchemaType.STRING,
implementation = String.class)
SimilarityFunction metric,
@Nullable
@Size(min = 1, max = 48)
@Pattern(regexp = "[a-zA-Z][a-zA-Z0-9_]*")
@Schema(description = "Model name used to generate the embeddings.")
String sourceModel) {}
}

// This is index command option irrespective of column definition.
public record Options(
tatu-at-datastax marked this conversation as resolved.
Show resolved Hide resolved
@Schema(
description = "Flag to ignore if index already exists",
defaultValue = "false",
type = SchemaType.BOOLEAN,
implementation = Boolean.class)
Boolean ifNotExists) {}

/** {@inheritDoc} */
@Override
public CommandName commandName() {
return CommandName.CREATE_INDEX;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,10 @@ public CollectionResource(MeteredCommandProcessor meteredCommandProcessor) {
InsertOneCommand.class,
InsertManyCommand.class,
UpdateManyCommand.class,
UpdateOneCommand.class
UpdateOneCommand.class,
// TODO, hide table feature detail before it goes public,
// https://github.com/stargate/data-api/pull/1360
// AddIndexCommand.class,
// CreateIndexCommand.class,
// DropIndexCommand.class
}),
examples = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package io.stargate.sgv2.jsonapi.config.constants;

import io.smallrye.config.ConfigMapping;
import java.util.Set;

@ConfigMapping(prefix = "stargate.jsonapi.vector")
public interface VectorConstant {
/*
Supported Source Models for Vector Index in Cassandra
*/
Set<String> SUPPORTED_SOURCES =
Set.of(
"ada002", "openai_v3_small", "openai_v3_large", "bert", "gecko", "nv_qa_4", "cohere_v3");
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ public enum Code implements ErrorCode<SchemaException> {
COLUMN_TYPE_INCORRECT,
COLUMN_TYPE_UNSUPPORTED,
INVALID_CONFIGURATION,
INVALID_INDEX_DEFINITION,
INVALID_VECTORIZE_CONFIGURATION,
LIST_TYPE_INCORRECT_DEFINITION,
MAP_TYPE_INCORRECT_DEFINITION,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package io.stargate.sgv2.jsonapi.service.cqldriver.override;

import com.datastax.oss.driver.api.core.CqlIdentifier;
import com.datastax.oss.driver.internal.querybuilder.CqlHelper;
import com.datastax.oss.driver.internal.querybuilder.schema.DefaultCreateIndex;
import com.datastax.oss.driver.internal.querybuilder.schema.OptionsUtils;
import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableMap;
import com.datastax.oss.driver.shaded.guava.common.collect.UnmodifiableIterator;
import java.util.Map;

/**
* An extension of the {@link DefaultCreateIndex} class, This is needed because the column name
* appended to the builder needs to use `asCql(true)` to keep the quotes.
Copy link
Contributor

@tatu-at-datastax tatu-at-datastax Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really true? Can the caller not pass properly constructed CqlIdentifier to ensure this?
And if not, is this a bug?

EDIT: base class does have things as CqlIdentifier so it looks like a bug to report. And if so, we are doing override in the meantime so as not to be blocked.
If so, makes sense.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes., reported this to drivers team.

*/
public class ExtendedCreateIndex extends DefaultCreateIndex {

public ExtendedCreateIndex(DefaultCreateIndex defaultCreateIndex) {

super(
defaultCreateIndex.getIndex(),
defaultCreateIndex.isIfNotExists(),
defaultCreateIndex.getKeyspace(),
defaultCreateIndex.getTable(),
defaultCreateIndex.getColumnToIndexType(),
defaultCreateIndex.getUsingClass(),
// This is fine as the internal options object is ImmutableMap
(ImmutableMap<String, Object>) defaultCreateIndex.getOptions());
}

@Override
public String asCql() {
StringBuilder builder = new StringBuilder("CREATE ");
if (this.getUsingClass() != null) {
builder.append("CUSTOM ");
}

builder.append("INDEX");
if (this.isIfNotExists()) {
builder.append(" IF NOT EXISTS");
}

if (this.getIndex() != null) {
builder.append(' ').append(this.getIndex().asCql(true));
}

if (this.getTable() == null) {
return builder.toString();
} else {
builder.append(" ON ");
CqlHelper.qualify(this.getKeyspace(), this.getTable(), builder);
if (this.getColumnToIndexType().isEmpty()) {
return builder.toString();
} else {
builder.append(" (");
boolean firstColumn = true;
UnmodifiableIterator var3 = this.getColumnToIndexType().entrySet().iterator();

while (var3.hasNext()) {
Map.Entry<CqlIdentifier, String> entry = (Map.Entry) var3.next();
if (firstColumn) {
firstColumn = false;
} else {
builder.append(",");
}

if (((String) entry.getValue()).equals("__NO_INDEX_TYPE")) {
builder.append(entry.getKey().asCql(true));
tatu-at-datastax marked this conversation as resolved.
Show resolved Hide resolved
} else {
builder
.append((String) entry.getValue())
.append("(")
.append(entry.getKey().asCql(true))
.append(")");
}
}

builder.append(")");
if (this.getUsingClass() != null) {
builder.append(" USING '").append(this.getUsingClass()).append('\'');
}

if (!this.getOptions().isEmpty()) {
builder.append(OptionsUtils.buildOptions(this.getOptions(), true));
}

return builder.toString();
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package io.stargate.sgv2.jsonapi.service.cqldriver.override;

import com.datastax.oss.driver.api.core.type.DataType;
import com.datastax.oss.driver.internal.core.type.DefaultVectorType;

/**
* Extended vector type to support vector size This is needed because java drivers
* DataTypes.vectorOf() method has a bug
*/
public class ExtendedVectorType extends DefaultVectorType {
tatu-at-datastax marked this conversation as resolved.
Show resolved Hide resolved
public ExtendedVectorType(DataType subtype, int vectorSize) {
super(subtype, vectorSize);
}

@Override
public String asCql(boolean includeFrozen, boolean pretty) {
return "VECTOR<" + getElementType().asCql(includeFrozen, pretty) + "," + getDimensions() + ">";
}
}

This file was deleted.

Loading