Skip to content

Commit

Permalink
Tables: Initial Table commands and operations
Browse files Browse the repository at this point in the history
Refactoring for tables, this adds the first read code for
reading from a table and makes the schema cache handle Table objects.

Using the janky FeatureFlags class.

Builds and unit tests pass with the -Poffline profile.

This is a squash of multiple commits from the ajm/tables branch, cherry picked
into the ajm/tables-chunk-2 branch which is build on the ajm/tables-chunk-1
branch.

The commits squashed here were:

commit 045fe36
Author: Tatu Saloranta <tatu.saloranta@datastax.com>
Date:   Mon Jul 8 15:42:00 2024 -0700

    Fix other 3 offline test failures too

commit 791f2b2
Author: Tatu Saloranta <tatu.saloranta@datastax.com>
Date:   Mon Jul 8 15:39:11 2024 -0700

    Fix 2 offline unit tests

commit 284c6a8
Author: Tatu Saloranta <tatu.saloranta@datastax.com>
Date:   Mon Jul 8 15:30:31 2024 -0700

    mvn fmt:format

commit e50947d
Author: Aaron Morton <aaron.morton@datastax.com>
Date:   Tue Jul 9 10:18:58 2024 +1200

    Refactor to run with the offline profile

    and fixed all failing non integration tests

-> Merge commit, not cherry picked <-
commit ca476da
Merge: a2dcaf2 138b372
Author: Tatu Saloranta <tatu.saloranta@datastax.com>
Date:   Mon Jul 8 14:06:15 2024 -0700

    Merge branch 'main' into ajm/tables

-> came from merge above, not cherry picked <-
commit 138b372
Author: Tatu Saloranta <tatu.saloranta@datastax.com>
Date:   Mon Jul 8 13:29:16 2024 -0700

    Add second link to Azure OpenAI docs

-> Merge commit, not cherry picked <-
commit a2dcaf2
Merge: 5350744 96b2839
Author: Tatu Saloranta <tatu.saloranta@datastax.com>
Date:   Mon Jul 8 12:18:00 2024 -0700

    Merge branch 'main' into ajm/tables

commit 5350744
Author: Tatu Saloranta <tatu.saloranta@datastax.com>
Date:   Mon Jul 8 12:14:11 2024 -0700

    fmt:format for offline classes too

-> came from merge above, not cheery picked  <-
commit 96b2839
Author: Tatu Saloranta <tatu.saloranta@datastax.com>
Date:   Mon Jul 8 11:31:52 2024 -0700

    Fixes #1238: convert internal-only `ErrorCode`s into `ErrorCode.SERVER_INTERNAL_ERROR` (#1239)

commit 8f0ea75
Author: Aaron Morton <aaron.morton@datastax.com>
Date:   Mon Jul 8 18:59:29 2024 +1200

    Working to accept a command targeted at a table !

    Will failed in the CommandResolver because
    resolveTableCommand is not implemented in subclasses.

commit 607f0d8
Author: Aaron Morton <aaron.morton@datastax.com>
Date:   Mon Jul 8 13:32:09 2024 +1200

    Expand Schema Cache to handle tables

    added flag in FeatureFlags to enable getting tables in
    the Namespace cache

    unit tests passing other than session cache tests

commit 1296c55
Author: Aaron Morton <aaron.morton@datastax.com>
Date:   Mon Jul 8 11:32:29 2024 +1200

    Refactor to add collection and table operations

    Existing operations are not CollectionOperations
    Added new TableOperations
  • Loading branch information
amorton authored and Yuqi-Du committed Jul 19, 2024
1 parent b9f52f1 commit be9b951
Show file tree
Hide file tree
Showing 150 changed files with 1,074 additions and 708 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,54 @@ public record CommandContext<T extends SchemaObject>(
public static final CommandContext<TableSchemaObject> EMPTY_TABLE =
new CommandContext<>(TableSchemaObject.MISSING, null, "testCommand", null);

public static CommandContext<CollectionSchemaObject> collectionCommandContext(
/**
* Factory method to create a new instance of {@link CommandContext} based on the schema object we
* are working with.
*
* <p>This one handles the super class of {@link SchemaObject}
*
* @param schemaObject
* @param embeddingProvider
* @param commandName
* @param jsonProcessingMetricsReporter
* @return
*/
@SuppressWarnings("unchecked")
public static <T extends SchemaObject> CommandContext<T> forSchemaObject(
T schemaObject,
EmbeddingProvider embeddingProvider,
String commandName,
JsonProcessingMetricsReporter jsonProcessingMetricsReporter) {

// TODO: upgrade to use the modern switch statements
// TODO: how to remove the unchecked cast ? Had to use unchecked cast to get back to the
// CommandContext<T>
if (schemaObject instanceof CollectionSchemaObject cso) {
return (CommandContext<T>)
forSchemaObject(cso, embeddingProvider, commandName, jsonProcessingMetricsReporter);
}
if (schemaObject instanceof TableSchemaObject tso) {
return (CommandContext<T>)
forSchemaObject(tso, embeddingProvider, commandName, jsonProcessingMetricsReporter);
}
if (schemaObject instanceof KeyspaceSchemaObject kso) {
return (CommandContext<T>)
forSchemaObject(kso, embeddingProvider, commandName, jsonProcessingMetricsReporter);
}
throw new IllegalArgumentException("Unknown schema object type: " + schemaObject.getClass());
}

/**
* Factory method to create a new instance of {@link CommandContext} based on the schema object we
* are working with
*
* @param schemaObject
* @param embeddingProvider
* @param commandName
* @param jsonProcessingMetricsReporter
* @return
*/
public static CommandContext<CollectionSchemaObject> forSchemaObject(
CollectionSchemaObject schemaObject,
EmbeddingProvider embeddingProvider,
String commandName,
Expand All @@ -40,7 +87,17 @@ public static CommandContext<CollectionSchemaObject> collectionCommandContext(
schemaObject, embeddingProvider, commandName, jsonProcessingMetricsReporter);
}

public static CommandContext<TableSchemaObject> tableCommandContext(
/**
* Factory method to create a new instance of {@link CommandContext} based on the schema object we
* are working with
*
* @param schemaObject
* @param embeddingProvider
* @param commandName
* @param jsonProcessingMetricsReporter
* @return
*/
public static CommandContext<TableSchemaObject> forSchemaObject(
TableSchemaObject schemaObject,
EmbeddingProvider embeddingProvider,
String commandName,
Expand All @@ -49,6 +106,25 @@ public static CommandContext<TableSchemaObject> tableCommandContext(
schemaObject, embeddingProvider, commandName, jsonProcessingMetricsReporter);
}

/**
* Factory method to create a new instance of {@link CommandContext} based on the schema object we
* are working with
*
* @param schemaObject
* @param embeddingProvider
* @param commandName
* @param jsonProcessingMetricsReporter
* @return
*/
public static CommandContext<KeyspaceSchemaObject> forSchemaObject(
KeyspaceSchemaObject schemaObject,
EmbeddingProvider embeddingProvider,
String commandName,
JsonProcessingMetricsReporter jsonProcessingMetricsReporter) {
return new CommandContext<>(
schemaObject, embeddingProvider, commandName, jsonProcessingMetricsReporter);
}

@SuppressWarnings("unchecked")
public CommandContext<CollectionSchemaObject> asCollectionContext() {
Preconditions.checkArgument(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.stargate.sgv2.jsonapi.api.model.command.clause.filter;

import io.stargate.sgv2.jsonapi.exception.ErrorCode;
import io.stargate.sgv2.jsonapi.service.operation.model.impl.filters.DBFilterBase;
import io.stargate.sgv2.jsonapi.service.operation.model.filters.DBFilterBase;
import io.stargate.sgv2.jsonapi.service.shredding.model.DocumentId;
import jakarta.validation.Valid;
import jakarta.validation.constraints.NotBlank;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ public Uni<RestResponse<CommandResult>> postCommand(
@Size(min = 1, max = 48)
String collection) {
return schemaCache
.getCollectionSettings(
.getSchemaObject(
dataApiRequestInfo, dataApiRequestInfo.getTenantId(), namespace, collection)
.onItemOrFailure()
.transformToUni(
Expand All @@ -178,7 +178,7 @@ else if (error instanceof JsonApiException jsonApiException) {
return Uni.createFrom().item(new ThrowableCommandResultSupplier(error));
} else {
// TODO No need for the else clause here, simplify
// TODO: this is where we know if it's a table or a collection
// TODO: refactor this code to be cleaner so it assigns on one line
EmbeddingProvider embeddingProvider = null;
final VectorConfig.VectorizeConfig vectorizeConfig =
schemaObject.vectorConfig().vectorizeConfig();
Expand All @@ -195,17 +195,13 @@ else if (error instanceof JsonApiException jsonApiException) {
command.getClass().getSimpleName());
}

// TODO: when the schema cache returns table and collections we switch here to
// create
// the correct command context
CommandContext<?> commandContext =
CommandContext.collectionCommandContext(
var commandContext =
CommandContext.forSchemaObject(
schemaObject,
embeddingProvider,
command.getClass().getSimpleName(),
jsonProcessingMetricsReporter);

// call processor
return meteredCommandProcessor.processCommand(
dataApiRequestInfo, commandContext, command);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package io.stargate.sgv2.jsonapi.service;

/** Hack / temp class to have run time checking for if Tables are supported */
public final class FeatureFlags {

public static final boolean TABLES_SUPPORTED = Boolean.getBoolean("stargate.tables.supported");
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import io.stargate.sgv2.jsonapi.service.cql.ColumnUtils;
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSchemaObject;
import io.stargate.sgv2.jsonapi.service.cqldriver.serializer.CQLBindValues;
import io.stargate.sgv2.jsonapi.service.operation.model.impl.builder.BuiltCondition;
import io.stargate.sgv2.jsonapi.service.operation.model.builder.BuiltCondition;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package io.stargate.sgv2.jsonapi.service.cqldriver.executor;

import com.google.common.base.Preconditions;
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.Tags;

/**
* This class is used to track the usage of indexes in a query. It is used to generate metrics for
* different commands by index column usage
*/
public class CollectionIndexUsage implements IndexUsage {

public boolean primaryKeyTag,
existKeysIndexTag,
arraySizeIndexTag,
arrayContainsTag,
booleanIndexTag,
numberIndexTag,
textIndexTag,
timestampIndexTag,
nullIndexTag,
vectorIndexTag;

/**
* This method is used to generate the tags for the index usage
*
* @return
*/
@Override
public Tags getTags() {
return Tags.of(
Tag.of("key", String.valueOf(primaryKeyTag)),
Tag.of("exist_keys", String.valueOf(existKeysIndexTag)),
Tag.of("array_size", String.valueOf(arraySizeIndexTag)),
Tag.of("array_contains", String.valueOf(arrayContainsTag)),
Tag.of("query_bool_values", String.valueOf(booleanIndexTag)),
Tag.of("query_dbl_values", String.valueOf(numberIndexTag)),
Tag.of("query_text_values", String.valueOf(textIndexTag)),
Tag.of("query_timestamp_values", String.valueOf(timestampIndexTag)),
Tag.of("query_null_values", String.valueOf(nullIndexTag)),
Tag.of("query_vector_value", String.valueOf(vectorIndexTag)));
}

/**
* This method is used to merge the index usage of two different types for filters used in a query
*
* @param indexUsage
*/
public void merge(IndexUsage indexUsage) {
Preconditions.checkArgument(
indexUsage instanceof CollectionIndexUsage, "Cannot merge different types of index usage");
var other = (CollectionIndexUsage) indexUsage;

this.arrayContainsTag |= other.arrayContainsTag;
this.primaryKeyTag |= other.primaryKeyTag;
this.existKeysIndexTag |= other.existKeysIndexTag;
this.arraySizeIndexTag |= other.arraySizeIndexTag;
this.booleanIndexTag |= other.booleanIndexTag;
this.numberIndexTag |= other.numberIndexTag;
this.textIndexTag |= other.textIndexTag;
this.timestampIndexTag |= other.timestampIndexTag;
this.nullIndexTag |= other.nullIndexTag;
this.vectorIndexTag |= other.vectorIndexTag;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,23 @@ public VectorConfig vectorConfig() {
return vectorConfig;
}

@Override
public IndexUsage newIndexUsage() {
return new CollectionIndexUsage();
}

/**
* Helper to avoid cast from the interface method because there are times we need to set
* properties on this immediately
*
* <p>Used in resolvers so they can set the vector tag for an ANN sort
*
* @return
*/
public CollectionIndexUsage newCollectionIndexUsage() {
return new CollectionIndexUsage();
}

public record IdConfig(IdType idType) {
public static IdConfig defaultIdConfig() {
return new IdConfig(IdType.UNDEFINED);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package io.stargate.sgv2.jsonapi.service.cqldriver.executor;

import io.micrometer.core.instrument.Tags;

public interface IndexUsage {

/** Default implementation when we are not tracking index usage for the schema object */
IndexUsage NO_OP =
new IndexUsage() {
@Override
public Tags getTags() {
return Tags.empty();
}

@Override
public void merge(IndexUsage indexUsage) {
// NO-OP
}
};

/**
* This method is used to generate the tags for the index usage
*
* @return
*/
Tags getTags();

/**
* This method is used to merge the index usage of two different types for filters used in a query
*
* @param indexUsage
*/
void merge(IndexUsage indexUsage);
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ public KeyspaceSchemaObject(SchemaObjectName name) {
public VectorConfig vectorConfig() {
return VectorConfig.notEnabledVectorConfig();
}

@Override
public IndexUsage newIndexUsage() {
return IndexUsage.NO_OP;
}
}
Loading

0 comments on commit be9b951

Please sign in to comment.