Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create collection mixed casing fix #1117

Merged
merged 8 commits into from
May 28, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public enum ErrorCode {
TOO_MANY_COLLECTIONS("Too many collections"),

TOO_MANY_INDEXES("Too many indexes"),
INDEXES_CREATION_FAILED("Index creation failed, check schema"),

UNSUPPORTED_FILTER_DATA_TYPE("Unsupported filter data type"),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ private Uni<Supplier<CommandResult>> executeCollectionCreation(
res -> {
if (res.wasApplied()) {
final List<SimpleStatement> indexStatements =
getIndexStatements(commandContext.namespace(), name);
getIndexStatements(commandContext.namespace(), name, collectionExisted);
Multi<AsyncResultSet> indexResultMulti;
/*
CI will override ddlDelayMillis to 0 using `-Dstargate.jsonapi.operations.database-config.ddl-delay-millis=0`
Expand Down Expand Up @@ -222,10 +222,11 @@ private Uni<Supplier<CommandResult>> executeCollectionCreation(
error ->
// InvalidQueryException(DB index limit violation)
error instanceof InvalidQueryException
&& error
.getMessage()
.matches(
".*Cannot have more than \\d+ indexes, failed to create index on table.*"))
&& (error
.getMessage()
.matches(
".*Cannot have more than \\d+ indexes, failed to create index on table.*")
|| error.getMessage().matches("Index .* already exists")))
.recoverWithUni(
error -> {
// if index creation violates DB index limit and collection not existed before,
Expand All @@ -235,12 +236,21 @@ private Uni<Supplier<CommandResult>> executeCollectionCreation(
}
// if index creation violates DB index limit and collection existed before,
// will not drop the collection
return Uni.createFrom()
.item(
() ->
ErrorCode.TOO_MANY_INDEXES.toApiException(
"collection \"%s\" creation failed due to index creation failing; need %d indexes to create the collection;",
name, dbLimitsConfig.indexesNeededPerCollection()));
if (error.getMessage().matches("Index .* already exists")) {
return Uni.createFrom()
.item(
() ->
ErrorCode.INDEXES_CREATION_FAILED.toApiException(
maheshrajamani marked this conversation as resolved.
Show resolved Hide resolved
"collection \"%s\" creation failed due to index creation failing; check schema",
name));
} else {
return Uni.createFrom()
.item(
() ->
ErrorCode.TOO_MANY_INDEXES.toApiException(
"collection \"%s\" creation failed due to index creation failing; need %d indexes to create the collection;",
name, dbLimitsConfig.indexesNeededPerCollection()));
}
});
}

Expand Down Expand Up @@ -400,48 +410,60 @@ public SimpleStatement getCreateTable(String keyspace, String table) {
}
}

public List<SimpleStatement> getIndexStatements(String keyspace, String table) {
public List<SimpleStatement> getIndexStatements(
String keyspace, String table, boolean collectionExisted) {
List<SimpleStatement> statements = new ArrayList<>(10);
String apprender =
collectionExisted ? "CREATE CUSTOM INDEX IF NOT EXISTS" : "CREATE CUSTOM INDEX";
if (!indexingDenyAll()) {
String existKeys =
"CREATE CUSTOM INDEX IF NOT EXISTS %s_exists_keys ON \"%s\".\"%s\" (exist_keys) USING 'StorageAttachedIndex'";
apprender
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment to explain why we are using quoted identifiers, comments like this make it less likely another person will change it later.

I would also consider a helper function or lambda to quote the identifier rather than have lots of \" in a string which can easily become unbalanced.

Comment also for the create table statement.

Example "Using CQL quoted identifiers to preserve the case of the identifiers in the CQL schema, and enforce case sensitive comparisons. This is expected behaviour for the Data API."

+ " \"%s_exists_keys\" ON \"%s\".\"%s\" (exist_keys) USING 'StorageAttachedIndex'";

statements.add(SimpleStatement.newInstance(String.format(existKeys, table, keyspace, table)));

String arraySize =
"CREATE CUSTOM INDEX IF NOT EXISTS %s_array_size ON \"%s\".\"%s\" (entries(array_size)) USING 'StorageAttachedIndex'";
apprender
+ " \"%s_array_size\" ON \"%s\".\"%s\" (entries(array_size)) USING 'StorageAttachedIndex'";
statements.add(SimpleStatement.newInstance(String.format(arraySize, table, keyspace, table)));

String arrayContains =
"CREATE CUSTOM INDEX IF NOT EXISTS %s_array_contains ON \"%s\".\"%s\" (array_contains) USING 'StorageAttachedIndex'";
apprender
+ " \"%s_array_contains\" ON \"%s\".\"%s\" (array_contains) USING 'StorageAttachedIndex'";
statements.add(
SimpleStatement.newInstance(String.format(arrayContains, table, keyspace, table)));

String boolQuery =
"CREATE CUSTOM INDEX IF NOT EXISTS %s_query_bool_values ON \"%s\".\"%s\" (entries(query_bool_values)) USING 'StorageAttachedIndex'";
apprender
+ " \"%s_query_bool_values\" ON \"%s\".\"%s\" (entries(query_bool_values)) USING 'StorageAttachedIndex'";
statements.add(SimpleStatement.newInstance(String.format(boolQuery, table, keyspace, table)));

String dblQuery =
"CREATE CUSTOM INDEX IF NOT EXISTS %s_query_dbl_values ON \"%s\".\"%s\" (entries(query_dbl_values)) USING 'StorageAttachedIndex'";
apprender
+ " \"%s_query_dbl_values\" ON \"%s\".\"%s\" (entries(query_dbl_values)) USING 'StorageAttachedIndex'";
statements.add(SimpleStatement.newInstance(String.format(dblQuery, table, keyspace, table)));

String textQuery =
"CREATE CUSTOM INDEX IF NOT EXISTS %s_query_text_values ON \"%s\".\"%s\" (entries(query_text_values)) USING 'StorageAttachedIndex'";
apprender
+ " \"%s_query_text_values\" ON \"%s\".\"%s\" (entries(query_text_values)) USING 'StorageAttachedIndex'";
statements.add(SimpleStatement.newInstance(String.format(textQuery, table, keyspace, table)));

String timestampQuery =
"CREATE CUSTOM INDEX IF NOT EXISTS %s_query_timestamp_values ON \"%s\".\"%s\" (entries(query_timestamp_values)) USING 'StorageAttachedIndex'";
apprender
+ " \"%s_query_timestamp_values\" ON \"%s\".\"%s\" (entries(query_timestamp_values)) USING 'StorageAttachedIndex'";
statements.add(
SimpleStatement.newInstance(String.format(timestampQuery, table, keyspace, table)));

String nullQuery =
"CREATE CUSTOM INDEX IF NOT EXISTS %s_query_null_values ON \"%s\".\"%s\" (query_null_values) USING 'StorageAttachedIndex'";
apprender
+ " \"%s_query_null_values\" ON \"%s\".\"%s\" (query_null_values) USING 'StorageAttachedIndex'";
statements.add(SimpleStatement.newInstance(String.format(nullQuery, table, keyspace, table)));
}

if (vectorSearch) {
String vectorSearch =
"CREATE CUSTOM INDEX IF NOT EXISTS %s_query_vector_value ON \"%s\".\"%s\" (query_vector_value) USING 'StorageAttachedIndex' WITH OPTIONS = { 'similarity_function': '"
apprender
+ " \"%s_query_vector_value\" ON \"%s\".\"%s\" (query_vector_value) USING 'StorageAttachedIndex' WITH OPTIONS = { 'similarity_function': '"
+ vectorFunction()
+ "'}";
statements.add(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ private FileWriterParams buildFileWriterParams() {
.getQuery();
List<String> indexCQLs =
createCollectionOperation
.getIndexStatements(this.namespace, this.createCollection.name())
.getIndexStatements(this.namespace, this.createCollection.name(), false)
.stream()
.map(SimpleStatement::getQuery)
.toList();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class CreateCollection {
}
}
""";

String createVectorCollection =
"""
{
Expand Down Expand Up @@ -93,6 +94,50 @@ public void happyPath() {
deleteCollection(collectionName);
}

@Test
public void caseSensitive() {
String json =
"""
{
"createCollection": {
"name": "%s"
}
}
"""
.formatted("testcollection");

given()
.headers(getHeaders())
.contentType(ContentType.JSON)
.body(json)
.when()
.post(NamespaceResource.BASE_PATH, namespaceName)
.then()
.statusCode(200)
.body("status.ok", is(1));

json =
"""
{
"createCollection": {
"name": "%s"
}
}
"""
.formatted("testCollection");

given()
.headers(getHeaders())
.contentType(ContentType.JSON)
.body(json)
.when()
.post(NamespaceResource.BASE_PATH, namespaceName)
.then()
.statusCode(200)
.body("status.ok", is(1));
deleteCollection("testCollection");
}

@Test
public void duplicateNonVectorCollectionName() {
// create a non vector collection
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.argThat;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
Expand All @@ -12,8 +13,16 @@
import com.datastax.oss.driver.api.core.cql.ColumnDefinitions;
import com.datastax.oss.driver.api.core.cql.Row;
import com.datastax.oss.driver.api.core.metadata.Metadata;
import com.datastax.oss.driver.api.core.metadata.Node;
import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata;
import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata;
import com.datastax.oss.driver.api.core.servererrors.InvalidQueryException;
import com.datastax.oss.driver.api.core.type.DataType;
import com.datastax.oss.driver.internal.core.metadata.schema.DefaultColumnMetadata;
import com.datastax.oss.driver.internal.core.metadata.schema.DefaultKeyspaceMetadata;
import com.datastax.oss.driver.internal.core.type.DefaultTupleType;
import com.datastax.oss.driver.internal.core.type.PrimitiveType;
import com.datastax.oss.protocol.internal.ProtocolConstants;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.quarkus.test.junit.QuarkusTest;
import io.quarkus.test.junit.TestProfile;
Expand All @@ -28,6 +37,7 @@
import io.stargate.sgv2.jsonapi.service.testutil.MockAsyncResultSet;
import io.stargate.sgv2.jsonapi.service.testutil.MockRow;
import jakarta.inject.Inject;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -289,5 +299,106 @@ public void denyAllCollectionVector() {
// 1 create Table + 1 vector index
assertThat(schemaCounter.get()).isEqualTo(2);
}

@Test
public void indexAlreadyDropTable() {
List<Row> resultRows =
Arrays.asList(new MockRow(RESULT_COLUMNS, 0, Arrays.asList(byteBufferFrom(true))));

AsyncResultSet results = new MockAsyncResultSet(RESULT_COLUMNS, resultRows, null);
final AtomicInteger schemaCounter = new AtomicInteger();
final AtomicInteger dropCounter = new AtomicInteger();
QueryExecutor queryExecutor = mock(QueryExecutor.class);
when(queryExecutor.executeCreateSchemaChange(
eq(dataApiRequestInfo),
argThat(
simpleStatement ->
simpleStatement.getQuery().startsWith("CREATE TABLE IF NOT EXISTS"))))
.then(
invocation -> {
schemaCounter.incrementAndGet();
return Uni.createFrom().item(results);
});

when(queryExecutor.executeCreateSchemaChange(
eq(dataApiRequestInfo),
argThat(
simpleStatement -> simpleStatement.getQuery().startsWith("CREATE CUSTOM INDEX"))))
.then(
invocation -> {
schemaCounter.incrementAndGet();
throw new InvalidQueryException(mock(Node.class), "Index xxxxx already exists");
});

when(queryExecutor.executeDropSchemaChange(
eq(dataApiRequestInfo),
argThat(
simpleStatement ->
simpleStatement.getQuery().startsWith("DROP TABLE IF EXISTS"))))
.then(
invocation -> {
dropCounter.incrementAndGet();
return Uni.createFrom().item(results);
});
CQLSessionCache sessionCache = mock(CQLSessionCache.class);
CqlSession session = mock(CqlSession.class);
when(sessionCache.getSession(dataApiRequestInfo)).thenReturn(session);
Metadata metadata = mock(Metadata.class);
when(session.getMetadata()).thenReturn(metadata);
Map<CqlIdentifier, KeyspaceMetadata> allKeyspaces = new HashMap<>();
DefaultKeyspaceMetadata keyspaceMetadata =
new DefaultKeyspaceMetadata(
CqlIdentifier.fromInternal(KEYSPACE_NAME),
false,
false,
new HashMap<>(),
new HashMap<>(),
new HashMap<>(),
new HashMap<>(),
new HashMap<>(),
new HashMap<>());
allKeyspaces.put(CqlIdentifier.fromInternal(KEYSPACE_NAME), keyspaceMetadata);
when(metadata.getKeyspaces()).thenReturn(allKeyspaces);

CreateCollectionOperation operation =
CreateCollectionOperation.withoutVectorSearch(
COMMAND_CONTEXT,
databaseLimitsConfig,
objectMapper,
sessionCache,
COLLECTION_NAME,
"",
10,
true,
false);

Supplier<CommandResult> execute =
operation
.execute(dataApiRequestInfo, queryExecutor)
.subscribe()
.withSubscriber(UniAssertSubscriber.create())
.awaitItem()
.getItem();
// 1 create Table + 1 index failure
assertThat(schemaCounter.get()).isEqualTo(2);
// 1 drop table
assertThat(dropCounter.get()).isEqualTo(1);
}

private List<ColumnMetadata> createCorrectPartitionColumn() {
List<DataType> tuple =
Arrays.asList(
new PrimitiveType(ProtocolConstants.DataType.TINYINT),
new PrimitiveType(ProtocolConstants.DataType.VARCHAR));
List<ColumnMetadata> partitionKey = new ArrayList<>();
partitionKey.add(
new DefaultColumnMetadata(
CqlIdentifier.fromInternal("keyspace"),
CqlIdentifier.fromInternal("collection"),
CqlIdentifier.fromInternal("key"),
new DefaultTupleType(tuple),
false));
return partitionKey;
}
}
}