Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

multiple tables issue fix [draft] #1631

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
756f615
Use WarningException, expand CommandResultBuilder
amorton Oct 9, 2024
7cc74b5
Merge branch 'main' into ajm/warnings-full-error
amorton Oct 9, 2024
2aeb995
Merge branch 'main' into ajm/warnings-full-error
amorton Oct 9, 2024
b9cdb7f
fixes from merge
amorton Oct 9, 2024
d3a46b8
fix failing unit tests
amorton Oct 10, 2024
ad15492
fmt fix
amorton Oct 10, 2024
88c502c
fix InsertIntegrationTest$InsertManyFails.orderedFailOnBadKeyReturnDo…
amorton Oct 14, 2024
4db1c01
integration test fixes
amorton Oct 15, 2024
cd4969d
fixes for DropKeyspaceIntegrationTest
amorton Oct 15, 2024
831921d
integration test fix
amorton Oct 15, 2024
d70ec4e
BIG FIX - created wrong type of result builder
amorton Oct 15, 2024
2fdcb0b
Merge branch 'main' into ajm/warnings-full-error
amorton Oct 15, 2024
284ae2d
compile fix from merge
amorton Oct 15, 2024
4590ece
BUG FIXES and test improvements
amorton Oct 16, 2024
715b57c
Merge branch 'main' into ajm/warnings-full-error
amorton Oct 16, 2024
9c0a878
Merge branch 'main' into ajm/warnings-full-error
amorton Oct 17, 2024
b707bee
Merge branch 'main' into ajm/ann-sort-tables
amorton Oct 22, 2024
d78ad82
WIP - not compiling.
amorton Oct 22, 2024
ca68d2c
Merge branch 'main' into ajm/ann-sort-tables
amorton Oct 30, 2024
9a714f4
Working vector sort with no tests yet
amorton Oct 30, 2024
879d16d
Added integration tests
amorton Oct 31, 2024
5f5d809
fmt fix
amorton Oct 31, 2024
da2a9ee
Merge branch 'main' into ajm/ann-sort-tables
amorton Oct 31, 2024
ee1181d
error typo, filter on complex type
Yuqi-Du Oct 31, 2024
38192ef
Merge remote-tracking branch 'origin/main' into yuqi/multiple-tables-…
Yuqi-Du Oct 31, 2024
00ff57a
Merge remote-tracking branch 'origin/ajm/ann-sort-tables' into yuqi/m…
Yuqi-Du Oct 31, 2024
683cf51
CANNOT_SORT_TABLE_DELETE_UPDATE_COMMAND
Yuqi-Du Oct 31, 2024
824ee65
CANNOT_SORT_TABLE_DELETE_COMMAND
Yuqi-Du Nov 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,24 @@
""")
public record SortClause(@Valid List<SortExpression> sortExpressions) implements SchemaValidatable {

public boolean isEmpty() {
return sortExpressions == null || sortExpressions.isEmpty();
}

public List<SortExpression> tableVectorSorts() {
return sortExpressions == null
? List.of()
: sortExpressions.stream().filter(SortExpression::isTableVectorSort).toList();
}

public List<SortExpression> nonTableVectorSorts() {
return sortExpressions == null
? List.of()
: sortExpressions.stream()
.filter(sortExpression -> !sortExpression.isTableVectorSort())
.toList();
}

public boolean hasVsearchClause() {
return sortExpressions != null
&& !sortExpressions.isEmpty()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package io.stargate.sgv2.jsonapi.api.model.command.clause.sort;

import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;
import static io.stargate.sgv2.jsonapi.config.constants.DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD;
import static io.stargate.sgv2.jsonapi.config.constants.DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD;

import jakarta.validation.constraints.NotBlank;
import java.util.Objects;
import javax.annotation.Nullable;

public record SortExpression(
Expand All @@ -15,16 +18,36 @@ public record SortExpression(
@Nullable float[] vector,
@Nullable String vectorize) {

// TODO: either remove the static factories or make this a class, as a record the ctor is public

public static SortExpression sort(String path, boolean ascending) {
return new SortExpression(path, ascending, null, null);
}

public static SortExpression vsearch(float[] vector) {
return new SortExpression(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, false, vector, null);
return new SortExpression(VECTOR_EMBEDDING_FIELD, false, vector, null);
}

public static SortExpression vectorizeSearch(String vectorize) {
return new SortExpression(
DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, false, null, vectorize);
return new SortExpression(VECTOR_EMBEDDING_TEXT_FIELD, false, null, vectorize);
}

/**
* Create a sort that is used when sorting a table column by a vector.
*
* <p>aaron -30-oct, this is a bit of a hack, but it is a quick way to support sorting by a vector
* for tables
*/
public static SortExpression tableVectorSort(String path, float[] vector) {
return new SortExpression(path, false, vector, null);
}

public boolean isTableVectorSort() {
return !pathIs$VectorNames() && vector != null;
}

private boolean pathIs$VectorNames() {
return (Objects.equals(path, VECTOR_EMBEDDING_FIELD)
|| Objects.equals(path, VECTOR_EMBEDDING_TEXT_FIELD));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,34 +47,28 @@ public SortClause deserialize(JsonParser parser, DeserializationContext ctxt) th
Iterator<Map.Entry<String, JsonNode>> fieldIter = sortNode.fields();
int totalFields = sortNode.size();
List<SortExpression> sortExpressions = new ArrayList<>(sortNode.size());

while (fieldIter.hasNext()) {
Map.Entry<String, JsonNode> inner = fieldIter.next();
String path = inner.getKey().trim();

if (DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD.equals(path)) {
// Vector search can't be used with other sort clause
if (totalFields > 1) {
throw ErrorCodeV1.VECTOR_SEARCH_USAGE_ERROR.toApiException();
}
if (!inner.getValue().isArray()) {
throw ErrorCodeV1.SHRED_BAD_VECTOR_VALUE.toApiException();
} else {
ArrayNode arrayNode = (ArrayNode) inner.getValue();
float[] arrayVals = new float[arrayNode.size()];
if (arrayNode.size() == 0) {
throw ErrorCodeV1.SHRED_BAD_VECTOR_SIZE.toApiException();
}
for (int i = 0; i < arrayNode.size(); i++) {
JsonNode element = arrayNode.get(i);
if (!element.isNumber()) {
throw ErrorCodeV1.SHRED_BAD_VECTOR_VALUE.toApiException();
}
arrayVals[i] = element.floatValue();
}
SortExpression exp = SortExpression.vsearch(arrayVals);
sortExpressions.clear();
sortExpressions.add(exp);
break;
}

SortExpression exp =
SortExpression.vsearch(arrayNodeToVector((ArrayNode) inner.getValue()));
sortExpressions.clear();
sortExpressions.add(exp);
// TODO: aaron 17-oct-2024 - this break seems unneeded as above it checks if there is only 1
// field, leaving for now
break;

} else if (DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD.equals(path)) {
// Vector search can't be used with other sort clause
if (totalFields > 1) {
Expand All @@ -83,14 +77,25 @@ public SortClause deserialize(JsonParser parser, DeserializationContext ctxt) th
if (!inner.getValue().isTextual()) {
throw ErrorCodeV1.SHRED_BAD_VECTORIZE_VALUE.toApiException();
}

String vectorizeData = inner.getValue().textValue();
if (vectorizeData.isBlank()) {
throw ErrorCodeV1.SHRED_BAD_VECTORIZE_VALUE.toApiException();
}
SortExpression exp = SortExpression.vectorizeSearch(vectorizeData);
sortExpressions.clear();
sortExpressions.add(exp);
// TODO: aaron 17-oct-2024 - this break seems unneeded as above it checks if there is only 1
// field, leaving for now
break;
} else if (inner.getValue().isArray()) {
// TODO: HACK: quick support for tables, if the value is an array we will assume the column
// is a vector then need to check on table pathway that the sort is correct.
// NOTE: does not check if there are more than one sort expression, the
// TableSortClauseResolver will take care of that so we can get proper ApiExceptions
// this is also why we do not break the look here
sortExpressions.add(
SortExpression.tableVectorSort(path, arrayNodeToVector((ArrayNode) inner.getValue())));
} else {
if (path.isBlank()) {
throw ErrorCodeV1.INVALID_SORT_CLAUSE_PATH.toApiException(
Expand All @@ -116,4 +121,25 @@ public SortClause deserialize(JsonParser parser, DeserializationContext ctxt) th
}
return new SortClause(sortExpressions);
}

/**
* TODO: this almost duplicates code in WriteableShreddedDocument.shredVector() but that does not
* check the array elements, we MUST stop duplicating code like this
*/
private static float[] arrayNodeToVector(ArrayNode arrayNode) {

float[] arrayVals = new float[arrayNode.size()];
if (arrayNode.isEmpty()) {
throw ErrorCodeV1.SHRED_BAD_VECTOR_SIZE.toApiException();
}

for (int i = 0; i < arrayNode.size(); i++) {
JsonNode element = arrayNode.get(i);
if (!element.isNumber()) {
throw ErrorCodeV1.SHRED_BAD_VECTOR_VALUE.toApiException();
}
arrayVals[i] = element.floatValue();
}
return arrayVals;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public enum Code implements ErrorCode<FilterException> {
UNSUPPORTED_COLUMN_TYPES,
COMPARISON_FILTER_AGAINST_DURATION,
FILTER_REQUIRED_FOR_UPDATE_DELETE,
FILTER_ON_COMPLEX_COLUMNS,
NON_PRIMARY_KEY_FILTER_FOR_UPDATE_DELETE,
INCOMPLETE_PRIMARY_KEY_FILTER,
FULL_PRIMARY_KEY_REQUIRED_FOR_UPDATE_DELETE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ public enum Scope implements ErrorScope {
DOCUMENT,
/** See {@link FilterException} */
FILTER,
/** See {@link UpdateException} */
UPDATE,
/** See {@link SchemaException} */
SCHEMA,
/** See {@link SortException} */
SORT,
/** See {@link UpdateException} */
UPDATE,
/** See {@link WarningException} */
WARNING;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package io.stargate.sgv2.jsonapi.exception;

/**
* Errors relating to the sort clause, this includes sorting with ANN
*
* <p>See {@link APIException} for steps to add a new code.
*/
public class SortException extends RequestException {

public static final Scope SCOPE = Scope.SORT;

public SortException(ErrorInstance errorInstance) {
super(errorInstance);
}

public enum Code implements ErrorCode<SortException> {
CANNOT_MIX_VECTOR_AND_NON_VECTOR_SORT,
CANNOT_SORT_TABLE_UPDATE_COMMAND,
CANNOT_SORT_TABLE_DELETE_COMMAND,
CANNOT_SORT_UNKNOWN_COLUMNS,
CANNOT_VECTOR_SORT_NON_INDEXED_VECTOR_COLUMNS,
CANNOT_VECTOR_SORT_NON_VECTOR_COLUMNS,
MORE_THAN_ONE_VECTOR_SORT,
;

private final ErrorTemplate<SortException> template;

Code() {
template = ErrorTemplate.load(SortException.class, FAMILY, SCOPE, name());
}

@Override
public ErrorTemplate<SortException> template() {
return template;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CommandQueryExecutor;
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CqlPagingState;
import io.stargate.sgv2.jsonapi.service.cqldriver.executor.TableSchemaObject;
import io.stargate.sgv2.jsonapi.service.operation.query.CQLOption;
import io.stargate.sgv2.jsonapi.service.operation.query.CqlOptions;
import io.stargate.sgv2.jsonapi.service.operation.query.SelectCQLClause;
import io.stargate.sgv2.jsonapi.service.operation.query.WhereCQLClause;
import io.stargate.sgv2.jsonapi.service.operation.query.*;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
Expand All @@ -42,6 +39,7 @@ public class ReadAttempt<SchemaT extends TableSchemaObject>

private final SelectCQLClause selectCQLClause;
private final WhereCQLClause<Select> whereCQLClause;
private final OrderByCqlClause orderByCqlClause;
private final CqlOptions<Select> cqlOptions;
private final CqlPagingState pagingState;
// DocumentSourceSupplier is an old idea that needs refactoring at some point, is a supplier of a
Expand All @@ -57,6 +55,7 @@ public ReadAttempt(
SchemaT schemaObject,
SelectCQLClause selectCQLClause,
WhereCQLClause<Select> whereCQLClause,
OrderByCqlClause orderByCqlClause,
CqlOptions<Select> cqlOptions,
CqlPagingState pagingState,
DocumentSourceSupplier documentSourceSupplier) {
Expand All @@ -65,6 +64,7 @@ public ReadAttempt(
// nullable because the subclass may want to implement methods to build the statement itself
this.selectCQLClause = selectCQLClause;
this.whereCQLClause = whereCQLClause;
this.orderByCqlClause = orderByCqlClause;
this.cqlOptions = cqlOptions;
this.pagingState = pagingState;
this.documentSourceSupplier = documentSourceSupplier;
Expand Down Expand Up @@ -153,12 +153,15 @@ protected SimpleStatement buildReadStatement() {
protected Select applySelect(SelectFrom selectFrom, List<Object> positionalValues) {
Objects.requireNonNull(selectCQLClause, "selectFrom must not be null");
Objects.requireNonNull(whereCQLClause, "whereCQLClause must not be null");
Objects.requireNonNull(orderByCqlClause, "orderByCqlClause must not be null");

// Add the columns we want to select
Select select = selectCQLClause.apply(selectFrom);

// Add the where clause
select = whereCQLClause.apply(select, positionalValues);
// and finally order by
select = orderByCqlClause.apply(select);

return select;
}

Expand Down Expand Up @@ -191,7 +194,7 @@ public Optional<ColumnsDescContainer> schemaDescription() {
"Unsupported columns in the result set: %s"
.formatted(errFmtApiColumnDef(readApiColumns.filterByUnsupported())));
}
return Optional.of(readApiColumns.toColumnsDef());
return Optional.of(readApiColumns.toColumnsDesc());
}

// This is a simple container for the result set so we can set one variable in the onSuccess
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package io.stargate.sgv2.jsonapi.service.operation.query;

import com.datastax.oss.driver.api.querybuilder.select.Select;
import java.util.function.Function;

/**
* Interface for a class that can add OrderBY to a CQL query built using the Java Driver Query
* Builder. This includes ANN sorting.
*
* <p>This is the ORDER BY part below:
*
* <pre>
* SELECT
* column1, column2
* FROM
* MyTable
* WHERE
* columnName = B70DE1D0-9908-4AE3-BE34-5573E5B09F14;
* ORDER BY column1 ASC, column2 DESC;
* </pre>
*/
public interface OrderByCqlClause extends Function<Select, Select>, CQLClause {

// No Op implementation just returns the select, use this when no order by is needed
OrderByCqlClause NO_OP = select -> select;

/**
* If true, this means that the query will need to be sorted in memory after the query is
* executed.
*
* @return true if the query needs to be sorted in memory
*/
default boolean inMemorySortNeeded() {
return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package io.stargate.sgv2.jsonapi.service.operation.tables;

import com.datastax.oss.driver.api.core.data.CqlVector;
import com.datastax.oss.driver.api.querybuilder.select.Select;
import io.stargate.sgv2.jsonapi.service.operation.query.OrderByCqlClause;
import io.stargate.sgv2.jsonapi.service.schema.tables.ApiColumnDef;
import io.stargate.sgv2.jsonapi.service.schema.tables.ApiTypeName;
import java.util.Objects;

/**
* A CQL clause that adds an ORDER BY clause to a SELECT statement to ANN sort.
*
* <p>Note: Only supports sorting on vector columns a single column, if there is a secondary sort
* that would be in memory sorting.
*/
public class TableANNOrderByCQlClause implements OrderByCqlClause {

private final ApiColumnDef apiColumnDef;
private final CqlVector<Float> vector;

public TableANNOrderByCQlClause(ApiColumnDef apiColumnDef, CqlVector<Float> vector) {
this.apiColumnDef = Objects.requireNonNull(apiColumnDef, "apiColumnDef must not be null");
this.vector = Objects.requireNonNull(vector, "vector must not be null");
;

// sanity check
if (apiColumnDef.type().typeName() != ApiTypeName.VECTOR) {
throw new IllegalArgumentException(
"ApiColumnDef must be a vector type, got: %s".formatted(apiColumnDef));
}
}

@Override
public Select apply(Select select) {
return select.orderByAnnOf(apiColumnDef.name(), vector);
}

@Override
public boolean inMemorySortNeeded() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,6 @@ public Optional<ColumnsDescContainer> schemaDescription() {
"Unsupported columns primary key: %s" + apiColumns.filterByUnsupported());
}

return Optional.of(apiColumns.toColumnsDef());
return Optional.of(apiColumns.toColumnsDesc());
}
}
Loading