From 4581baa933999f8b86668e106ab5467921ea6f48 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Fri, 14 Jun 2024 09:51:21 -0700 Subject: [PATCH 01/18] init --- .../embedding/DataVectorizerService.java | 43 +++---- .../model/impl/ReadAndUpdateOperation.java | 28 +++- .../FindOneAndReplaceCommandResolver.java | 9 +- .../impl/FindOneAndUpdateCommandResolver.java | 10 +- .../model/impl/UpdateManyCommandResolver.java | 9 +- .../model/impl/UpdateOneCommandResolver.java | 9 +- .../service/updater/DocumentUpdater.java | 67 +++++++++- .../v1/VectorizeSearchIntegrationTest.java | 120 ++++++++++++++++-- .../impl/ReadAndUpdateOperationRetryTest.java | 8 ++ .../impl/ReadAndUpdateOperationTest.java | 14 ++ ...erialConsistencyOverrideOperationTest.java | 5 +- .../CommandResolverWithVectorizerTest.java | 65 +++++++--- .../FindOneAndUpdateCommandResolverTest.java | 12 +- .../impl/UpdateManyCommandResolverTest.java | 10 +- .../impl/UpdateOneCommandResolverTest.java | 12 +- .../service/updater/DocumentUpdaterTest.java | 67 ++++++---- 16 files changed, 376 insertions(+), 112 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java index a8c13591fd..01b579f84c 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java @@ -7,8 +7,6 @@ import io.stargate.sgv2.jsonapi.api.model.command.Command; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.Sortable; -import io.stargate.sgv2.jsonapi.api.model.command.Updatable; -import io.stargate.sgv2.jsonapi.api.model.command.impl.FindOneAndReplaceCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.InsertManyCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.InsertOneCommand; import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; @@ -51,6 +49,17 @@ public DataVectorizerService( */ public Uni vectorize( DataApiRequestInfo dataApiRequestInfo, CommandContext commandContext, Command command) { + final DataVectorizer dataVectorizer = + constructDataVectorizer(dataApiRequestInfo, commandContext); + return vectorizeSortClause(dataVectorizer, commandContext, command) + .onItem() + .transformToUni(flag -> vectorizeDocument(dataVectorizer, commandContext, command)) + .onItem() + .transform(flag -> command); + } + + public DataVectorizer constructDataVectorizer( + DataApiRequestInfo dataApiRequestInfo, CommandContext commandContext) { EmbeddingProvider embeddingProvider = Optional.ofNullable(commandContext.embeddingProvider()) .map( @@ -60,21 +69,13 @@ public Uni vectorize( jsonApiMetricsConfig, dataApiRequestInfo, provider, - command.getClass().getSimpleName())) + commandContext.commandName())) .orElse(null); - final DataVectorizer dataVectorizer = - new DataVectorizer( - embeddingProvider, - objectMapper.getNodeFactory(), - dataApiRequestInfo.getEmbeddingApiKey(), - commandContext.collectionSettings()); - return vectorizeSortClause(dataVectorizer, commandContext, command) - .onItem() - .transformToUni(flag -> vectorizeUpdateClause(dataVectorizer, commandContext, command)) - .onItem() - .transformToUni(flag -> vectorizeDocument(dataVectorizer, commandContext, command)) - .onItem() - .transform(flag -> command); + return new DataVectorizer( + embeddingProvider, + objectMapper.getNodeFactory(), + dataApiRequestInfo.getEmbeddingApiKey(), + commandContext.collectionSettings()); } private Uni vectorizeSortClause( @@ -85,22 +86,12 @@ private Uni vectorizeSortClause( return Uni.createFrom().item(true); } - private Uni vectorizeUpdateClause( - DataVectorizer dataVectorizer, CommandContext commandContext, Command command) { - if (command instanceof Updatable updatable) { - return dataVectorizer.vectorizeUpdateClause(updatable.updateClause()); - } - return Uni.createFrom().item(true); - } - private Uni vectorizeDocument( DataVectorizer dataVectorizer, CommandContext commandContext, Command command) { if (command instanceof InsertOneCommand insertOneCommand) { return dataVectorizer.vectorize(List.of(insertOneCommand.document())); } else if (command instanceof InsertManyCommand insertManyCommand) { return dataVectorizer.vectorize(insertManyCommand.documents()); - } else if (command instanceof FindOneAndReplaceCommand findOneAndReplaceCommand) { - return dataVectorizer.vectorize(List.of(findOneAndReplaceCommand.replacementDocument())); } return Uni.createFrom().item(true); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java index c366cd4ff9..83e4ab60ad 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java @@ -10,6 +10,8 @@ import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; import io.stargate.sgv2.jsonapi.service.cqldriver.serializer.CQLBindValues; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.operation.model.ModifyOperation; import io.stargate.sgv2.jsonapi.service.operation.model.ReadOperation; import io.stargate.sgv2.jsonapi.service.projection.DocumentProjector; @@ -23,7 +25,8 @@ import java.util.function.Supplier; /** - * This operation method is used for 3 commands findOneAndUpdate, updateOne and updateMany + * This operation method is used for 4 commands findOneAndUpdate, findOneAndReplace, updateOne and + * updateMany * * @param commandContext * @param findOperation @@ -40,6 +43,7 @@ public record ReadAndUpdateOperation( CommandContext commandContext, FindOperation findOperation, DocumentUpdater documentUpdater, + DataVectorizerService dataVectorizerService, boolean returnDocumentInResponse, boolean returnUpdatedDocument, boolean upsert, @@ -68,6 +72,26 @@ public Uni> execute( findResponse -> { pageStateReference.set(findResponse.pageState()); final List docs = findResponse.docs(); + // vectorize the updateClause or ReplacementDocument as needed + final DataVectorizer dataVectorizer = + dataVectorizerService.constructDataVectorizer(dataApiRequestInfo, commandContext); + // 1. UpdateCommand(updateOne, findOneAndUpdate, updateMany): + if (documentUpdater.updateType() == DocumentUpdater.UpdateType.UPDATE) { + // if there are documents found, vectorize the updateOperation + if (docs.size() != 0) { + documentUpdater.vectorizeUpdateClause(dataVectorizer); + // if there is no document found, but upsert mode, vectorize the updateOperation + } else if (upsert() && matchedCount.get() == 0) { + documentUpdater.vectorizeUpdateClause(dataVectorizer); + } + // 2.replaceCommand(findOneAndReplace) + } else if (documentUpdater.updateType() == DocumentUpdater.UpdateType.REPLACE) { + // if there is a document found, vectorize it first in documentUpdater + if (docs.size() != 0) { + documentUpdater.vectorizeTheReplacementDocument(dataVectorizer); + } + } + if (upsert() && docs.size() == 0 && matchedCount.get() == 0) { return Multi.createFrom().item(findOperation().getNewDocument()); } else { @@ -153,12 +177,10 @@ private Uni processUpdate( // upsert if we have no transaction if before boolean upsert = readDocument.txnId() == null; JsonNode originalDocument = upsert ? null : readDocument.document(); - // apply document updates // if no changes return null item DocumentUpdater.DocumentUpdaterResponse documentUpdaterResponse = documentUpdater().apply(readDocument.document().deepCopy(), upsert); - // In case no change to document and not an upsert document, short circuit and return if (!documentUpdaterResponse.modified() && !upsert) { // If no change return the original document Issue #390 diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolver.java index 1ffed469e7..32d3e6a697 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolver.java @@ -6,6 +6,7 @@ import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortClause; import io.stargate.sgv2.jsonapi.api.model.command.impl.FindOneAndReplaceCommand; import io.stargate.sgv2.jsonapi.config.OperationsConfig; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.operation.model.Operation; import io.stargate.sgv2.jsonapi.service.operation.model.ReadType; import io.stargate.sgv2.jsonapi.service.operation.model.impl.FindOperation; @@ -27,14 +28,19 @@ public class FindOneAndReplaceCommandResolver extends FilterableResolver updateOperations, + // buildOperations will be executed when apply to update + UpdateClause updateClause, ObjectNode replaceDocument, JsonNode replaceDocumentId, UpdateType updateType) { @@ -23,7 +35,7 @@ public record DocumentUpdater( * @return */ public static DocumentUpdater construct(UpdateClause updateDef) { - return new DocumentUpdater(updateDef.buildOperations(), null, null, UpdateType.UPDATE); + return new DocumentUpdater(updateDef, null, null, UpdateType.UPDATE); } /** @@ -62,6 +74,7 @@ public DocumentUpdaterResponse apply(JsonNode readDocument, boolean docInserted) */ private boolean update(ObjectNode docToUpdate, boolean docInserted) { boolean modified = false; + List updateOperations = updateClause.buildOperations(); for (UpdateOperation updateOperation : updateOperations) { if (updateOperation.shouldApplyIf(docInserted)) { modified |= updateOperation.updateDocument(docToUpdate); @@ -70,6 +83,29 @@ private boolean update(ObjectNode docToUpdate, boolean docInserted) { return modified; } + /** + * vectorize UpdateClause as needed, only when documents are found or upsert will be used by + * updateOne, findOneAndUpdate, updateMany + * + * @param dataVectorizer + */ + public void vectorizeUpdateClause(DataVectorizer dataVectorizer) { + try { + dataVectorizer + .vectorizeUpdateClause(updateClause) + .runSubscriptionOn(Infrastructure.getDefaultWorkerPool()) + .subscribeAsCompletionStage() + .get(); + } catch (Exception e) { + if (e instanceof ExecutionException exception) { + if (exception.getCause() instanceof JsonApiException jsonApiException) { + throw jsonApiException; + } + } + throw new RuntimeException(e); + } + } + /** * Will be used for findOneAndReplace * @@ -105,9 +141,28 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { return true; } + /** + * vectorize replacementDocument as needed, only when document is found will be used by + * findOneAndReplace + * + * @param dataVectorizer + */ + public void vectorizeTheReplacementDocument(DataVectorizer dataVectorizer) { + // TODO: check if $vectorize must be at first level + try { + dataVectorizer + .vectorize(List.of(replaceDocument)) + .runSubscriptionOn(Infrastructure.getDefaultWorkerPool()) + .subscribeAsCompletionStage() + .get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + public record DocumentUpdaterResponse(JsonNode document, boolean modified) {} - private enum UpdateType { + public enum UpdateType { UPDATE, REPLACE } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/VectorizeSearchIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/VectorizeSearchIntegrationTest.java index 7d6ca15524..d5440f4a25 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/VectorizeSearchIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/VectorizeSearchIntegrationTest.java @@ -842,7 +842,7 @@ public void setOnInsertOperation() { class VectorSearchExtendedCommands { @Test @Order(1) - public void findOneAndUpdate() { + public void findOneAndUpdate_sortClause() { insertVectorDocuments(); String json = """ @@ -872,7 +872,61 @@ public void findOneAndUpdate() { @Test @Order(2) - public void updateOne() { + public void findOneAndUpdate_updateClause() { + insertVectorDocuments(); + String json = + """ + { + "findOneAndUpdate": { + "sort" : {"$vectorize" : "A deep learning display that controls your mood"}, + "update" : {"$set" : {"status" : "active","$vectorize":"An AI quilt to help you sleep forever"}}, + "options" : {"returnDocument" : "after"} + } + } + """; + + given() + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(json) + .when() + .post(CollectionResource.BASE_PATH, namespaceName, collectionName) + .then() + .statusCode(200) + .body("data.document._id", is("3")) + .body("data.document.status", is("active")) + .body("status.matchedCount", is(1)) + .body("status.modifiedCount", is(1)) + .body("errors", is(nullValue())); + + json = + """ + { + "findOne": { + "filter" : {"_id" : "3"}, + "projection":{ + "$vector":true, "$vectorize":true + } + } + } + """; + given() + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(json) + .when() + .post(CollectionResource.BASE_PATH, namespaceName, collectionName) + .then() + .statusCode(200) + .body("data.document._id", is("3")) + .body("data.document.$vectorize", is("An AI quilt to help you sleep forever")) + .body("data.document.$vector", contains(0.45f, 0.09f, 0.01f, 0.2f, 0.11f)) + .body("data.document.status", is("active")); + } + + @Test + @Order(3) + public void updateOne_sortClause() { insertVectorDocuments(); String json = """ @@ -916,7 +970,57 @@ public void updateOne() { } @Test - @Order(3) + @Order(4) + public void updateOne_updateClause() { + insertVectorDocuments(); + String json = + """ + { + "updateOne": { + "update" : {"$set" : {"new_col": "new_val", "$vectorize":"ChatGPT upgraded"}}, + "sort" : {"$vectorize" : "ChatGPT integrated sneakers that talk to you"} + } + } + """; + given() + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(json) + .when() + .post(CollectionResource.BASE_PATH, namespaceName, collectionName) + .then() + .statusCode(200) + .body("status.matchedCount", is(1)) + .body("status.modifiedCount", is(1)) + .body("status.moreData", is(nullValue())) + .body("errors", is(nullValue())); + json = + """ + { + "findOne": { + "filter" : {"_id" : "1"}, + "projection":{ + "$vector":true, "$vectorize":true + } + } + } + """; + given() + .headers(getHeaders()) + .contentType(ContentType.JSON) + .body(json) + .when() + .post(CollectionResource.BASE_PATH, namespaceName, collectionName) + .then() + .statusCode(200) + .body("data.document._id", is("1")) + .body("data.document.$vectorize", is("ChatGPT upgraded")) + .body("data.document.$vector", contains(0.1f, 0.16f, 0.31f, 0.22f, 0.15f)) + .body("data.document.new_col", is("new_val")); + } + + @Test + @Order(5) public void findOneAndReplace() { insertVectorDocuments(); String json = @@ -924,7 +1028,7 @@ public void findOneAndReplace() { { "findOneAndReplace": { "projection": { "$vector": 1 }, - "sort" : {"$vectorize" : "ChatGPT integrated sneakers that talk to you"}, + "sort" : {"$vectorize" : "ChatGPT upgraded"}, "replacement" : {"_id" : "1", "username": "user1", "status" : false, "description" : "Updating new data", "$vectorize" : "Updating new data"}, "options" : {"returnDocument" : "after"} } @@ -949,7 +1053,7 @@ public void findOneAndReplace() { } @Test - @Order(4) + @Order(6) public void findOneAndReplaceWithoutVector() { insertVectorDocuments(); String json = @@ -980,7 +1084,7 @@ public void findOneAndReplaceWithoutVector() { } @Test - @Order(6) + @Order(7) public void findOneAndDelete() { insertVectorDocuments(); String json = @@ -1009,7 +1113,7 @@ public void findOneAndDelete() { } @Test - @Order(7) + @Order(8) public void deleteOne() { insertVectorDocuments(); String json = @@ -1058,7 +1162,7 @@ public void deleteOne() { } @Test - @Order(8) + @Order(9) public void createDropDifferentVectorDimension() { String json = """ diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperationRetryTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperationRetryTest.java index fa3a7841d0..7e2afd1c95 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperationRetryTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperationRetryTest.java @@ -23,6 +23,7 @@ import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; import io.stargate.sgv2.jsonapi.service.cqldriver.serializer.CQLBindValues; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.operation.model.ReadType; import io.stargate.sgv2.jsonapi.service.projection.DocumentProjector; import io.stargate.sgv2.jsonapi.service.shredding.Shredder; @@ -55,6 +56,8 @@ public class ReadAndUpdateOperationRetryTest extends OperationTestBase { @Inject Shredder shredder; @Inject ObjectMapper objectMapper; + @Inject DataVectorizerService dataVectorizerService; + private final ColumnDefinitions KEY_TXID_JSON_COLUMNS = buildColumnDefs( OperationTestBase.TestColumn.keyColumn(), @@ -233,6 +236,7 @@ public void findOneAndUpdateWithRetry() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -377,6 +381,7 @@ public void findAndUpdateWithRetryFailure() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -529,6 +534,7 @@ public void findAndUpdateWithRetryFailureWithUpsert() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, true, @@ -716,6 +722,7 @@ public void findAndUpdateWithRetryPartialFailure() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -927,6 +934,7 @@ public void findOneAndUpdateWithRetryMultipleFailure() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperationTest.java index 2be178bac0..806498fbd0 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperationTest.java @@ -28,6 +28,7 @@ import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSettings; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; import io.stargate.sgv2.jsonapi.service.cqldriver.serializer.CQLBindValues; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.operation.model.ReadType; import io.stargate.sgv2.jsonapi.service.projection.DocumentProjector; import io.stargate.sgv2.jsonapi.service.shredding.Shredder; @@ -60,6 +61,7 @@ public class ReadAndUpdateOperationTest extends OperationTestBase { @Inject Shredder shredder; @Inject ObjectMapper objectMapper; + @Inject DataVectorizerService dataVectorizerService; private static String UPDATE = "UPDATE \"%s\".\"%s\" " @@ -255,6 +257,7 @@ public void happyPath() throws Exception { COMMAND_VECTOR_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -342,6 +345,7 @@ public void noChange() throws Exception { commandContext, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -641,6 +645,7 @@ public void happyPathWithSort() throws Exception { commandContext, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -871,6 +876,7 @@ public void happyPathReplace() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -972,6 +978,7 @@ public void happyPathReplaceUpsert() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, true, @@ -1141,6 +1148,7 @@ public void happyPathReplaceWithSort() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -1304,6 +1312,7 @@ public void happyPathWithSortDescending() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -1402,6 +1411,7 @@ public void withUpsert() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, true, @@ -1479,6 +1489,7 @@ public void noData() { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -1629,6 +1640,7 @@ public void happyPath() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, @@ -1728,6 +1740,7 @@ public void withUpsert() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, true, @@ -1807,6 +1820,7 @@ public void noData() { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/SerialConsistencyOverrideOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/SerialConsistencyOverrideOperationTest.java index 40d26a0572..3f1fdb54b9 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/SerialConsistencyOverrideOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/SerialConsistencyOverrideOperationTest.java @@ -27,12 +27,12 @@ import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; import io.stargate.sgv2.jsonapi.service.cqldriver.serializer.CQLBindValues; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.operation.model.ReadType; import io.stargate.sgv2.jsonapi.service.projection.DocumentProjector; import io.stargate.sgv2.jsonapi.service.shredding.Shredder; import io.stargate.sgv2.jsonapi.service.shredding.model.DocumentId; import io.stargate.sgv2.jsonapi.service.shredding.model.WritableShreddedDocument; -// import io.stargate.sgv2.jsonapi.service.testutil.DocumentUpdaterUtils; import io.stargate.sgv2.jsonapi.service.testutil.DocumentUpdaterUtils; import io.stargate.sgv2.jsonapi.service.testutil.MockAsyncResultSet; import io.stargate.sgv2.jsonapi.service.testutil.MockRow; @@ -60,6 +60,8 @@ public class SerialConsistencyOverrideOperationTest extends OperationTestBase { @Inject ObjectMapper objectMapper; @Inject Shredder shredder; + @Inject DataVectorizerService dataVectorizerService; + public static class SerialConsistencyOverrideProfile implements QuarkusTestProfile { @Override public boolean disableGlobalTestResources() { @@ -346,6 +348,7 @@ public void readAndUpdate() throws Exception { COMMAND_CONTEXT, findOperation, documentUpdater, + dataVectorizerService, true, false, false, diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java index 4f4a0e2810..90074a2c88 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java @@ -26,7 +26,6 @@ import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSettings; -import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.embedding.operation.TestEmbeddingProvider; import io.stargate.sgv2.jsonapi.service.operation.model.Operation; @@ -43,7 +42,6 @@ import io.stargate.sgv2.jsonapi.service.updater.DocumentUpdater; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; import jakarta.inject.Inject; -import java.util.Optional; import org.apache.commons.lang3.RandomStringUtils; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -291,7 +289,7 @@ public void updateOne() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -436,6 +434,9 @@ public void findOneAndReplace() throws Exception { FindOneAndReplaceCommand command = objectMapper.readValue(json, FindOneAndReplaceCommand.class); + // vectorizedCommand only have sortClause and document vectorize + // Postpone vectorize replaceDocument at operation level + // DocumentUpdator in ReadAndUpdateOperation specifically final FindOneAndReplaceCommand vectorizedCommand = (FindOneAndReplaceCommand) dataVectorizerService @@ -450,7 +451,9 @@ public void findOneAndReplace() throws Exception { Operation operation = findOneAndReplaceCommandResolver.resolveCommand( TestEmbeddingProvider.commandContextWithVectorize, vectorizedCommand); - String expected = + String expectedBeforeVectorize = + "{\"col1\":\"val1\",\"col2\":\"val2\",\"$vectorize\":\"test data\"}"; + String expectedAfterVectorize = "{\"col1\":\"val1\",\"col2\":\"val2\",\"$vectorize\":\"test data\",\"$vector\":[0.25,0.25,0.25]}"; assertThat(operation) .isInstanceOfSatisfying( @@ -476,9 +479,32 @@ public void findOneAndReplace() throws Exception { } catch (JsonProcessingException e) { e.printStackTrace(); } - assertThat(replacer.replaceDocument().toString()).isEqualTo(expected); + assertThat(replacer.replaceDocument().toString()) + .isEqualTo(expectedBeforeVectorize); assertThat(replacer.replaceDocumentId()).isNull(); }); + // vectorize the replacementDocument + op.documentUpdater() + .vectorizeTheReplacementDocument( + dataVectorizerService.constructDataVectorizer( + dataApiRequestInfo, TestEmbeddingProvider.commandContextWithVectorize)); + assertThat(op.documentUpdater()) + .isInstanceOfSatisfying( + DocumentUpdater.class, + replacer -> { + try { + ObjectNode replacement = + (ObjectNode) + objectMapper.readTree( + "{\"col1\" : \"val1\", \"col2\" : \"val2\"}"); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + assertThat(replacer.replaceDocument().toString()) + .isEqualTo(expectedAfterVectorize); + assertThat(replacer.replaceDocumentId()).isNull(); + }); + assertThat(op.findOperation()) .isInstanceOfSatisfying( FindOperation.class, @@ -493,6 +519,7 @@ public void findOneAndReplace() throws Exception { assertThat(find.pageSize()).isEqualTo(1); assertThat(find.limit()).isEqualTo(1); assertThat(find.pageState()).isNull(); + assertThat(find.readType()).isEqualTo(ReadType.DOCUMENT); assertThat( find.logicalExpression() @@ -522,6 +549,9 @@ public void findOneAndUpdate() throws Exception { """; FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); + // vectorizedCommand only have sortClause and document vectorize + // Postpone vectorize updateClause at operation level + // DocumentUpdator in ReadAndUpdateOperation specifically final FindOneAndUpdateCommand vectorizedCommand = (FindOneAndUpdateCommand) dataVectorizerService @@ -536,20 +566,6 @@ public void findOneAndUpdate() throws Exception { Operation operation = findOneAndUpdateCommandResolver.resolveCommand( TestEmbeddingProvider.commandContextWithVectorize, vectorizedCommand); - UpdateClause updateClause = - DocumentUpdaterUtils.updateClause( - UpdateOperator.SET, objectMapper.createObjectNode().put("$vectorize", "test data")); - - new DataVectorizer( - TestEmbeddingProvider.commandContextWithVectorize.embeddingProvider(), - objectMapper.getNodeFactory(), - Optional.empty(), - TestEmbeddingProvider.commandContextWithVectorize.collectionSettings()) - .vectorizeUpdateClause(updateClause) - .subscribe() - .withSubscriber(UniAssertSubscriber.create()) - .awaitItem() - .getItem(); assertThat(operation) .isInstanceOfSatisfying( ReadAndUpdateOperation.class, @@ -562,12 +578,19 @@ public void findOneAndUpdate() throws Exception { assertThat(op.shredder()).isEqualTo(shredder); assertThat(op.updateLimit()).isEqualTo(1); assertThat(op.retryLimit()).isEqualTo(operationsConfig.lwt().retries()); + // vectorize the updateClause + op.documentUpdater() + .vectorizeUpdateClause( + dataVectorizerService.constructDataVectorizer( + dataApiRequestInfo, TestEmbeddingProvider.commandContextWithVectorize)); assertThat(op.documentUpdater()) .isInstanceOfSatisfying( DocumentUpdater.class, updater -> { - assertThat(updater.updateOperations()) - .isEqualTo(updateClause.buildOperations()); + // there will be two set options, one for $vectorize and one for $vector + assertThat( + updater.updateClause().buildOperations().get(0).actions().size()) + .isEqualTo(2); }); assertThat(op.findOperation()) .isInstanceOfSatisfying( diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndUpdateCommandResolverTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndUpdateCommandResolverTest.java index 92f7c7d004..f275544569 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndUpdateCommandResolverTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndUpdateCommandResolverTest.java @@ -74,7 +74,7 @@ public void idFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -139,7 +139,7 @@ public void filterConditionSort() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -207,7 +207,7 @@ public void filterConditionVectorSearch() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -274,7 +274,7 @@ public void idFilterConditionWithOptions() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -340,7 +340,7 @@ public void filterConditionWithOptionsSort() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -412,7 +412,7 @@ public void dynamicFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java index fbbfa8b128..a571a1c793 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java @@ -76,7 +76,7 @@ public void idFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -138,7 +138,7 @@ public void noFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -191,7 +191,7 @@ public void dynamicFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -260,7 +260,7 @@ public void dynamicFilterConditionSetVectorize() throws Exception { .isInstanceOfSatisfying( DocumentUpdater.class, updater -> { - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -325,7 +325,7 @@ public void withUpsert() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateOneCommandResolverTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateOneCommandResolverTest.java index 6e653cf5cd..2b7a253d00 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateOneCommandResolverTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateOneCommandResolverTest.java @@ -73,7 +73,7 @@ public void idFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -136,7 +136,7 @@ public void noFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -190,7 +190,7 @@ public void dynamicFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -255,7 +255,7 @@ public void dynamicFilterConditionWithSort() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -325,7 +325,7 @@ public void dynamicFilterConditionWithVectorSearch() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -391,7 +391,7 @@ public void withUpsert() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateOperations()) + assertThat(updater.updateClause().buildOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index f5a8463456..77d80e268b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -268,7 +268,9 @@ public void unsupportedUpdateOperator() throws Exception { Throwable t = catchThrowable( () -> { - DocumentUpdater.construct(objectMapper.readValue(updateClause, UpdateClause.class)); + DocumentUpdater.construct(objectMapper.readValue(updateClause, UpdateClause.class)) + .updateClause() + .buildOperations(); }); assertThat(t) .isNotNull() @@ -284,9 +286,11 @@ public void invalidSetDocId() throws Exception { catchThrowable( () -> { DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.SET, - objectMapper.getNodeFactory().objectNode().put("_id", "xyz"))); + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, + objectMapper.getNodeFactory().objectNode().put("_id", "xyz"))) + .updateClause() + .buildOperations(); }); assertThat(t) .isNotNull() @@ -302,9 +306,11 @@ public void invalidUnsetDocId() throws Exception { catchThrowable( () -> { DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.UNSET, - objectMapper.getNodeFactory().objectNode().put("_id", "xyz"))); + DocumentUpdaterUtils.updateClause( + UpdateOperator.UNSET, + objectMapper.getNodeFactory().objectNode().put("_id", "xyz"))) + .updateClause() + .buildOperations(); }); assertThat(t) .isNotNull() @@ -320,11 +326,13 @@ public void invalidSetAndUnsetSameField() throws Exception { catchThrowable( () -> { DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.SET, - (ObjectNode) objectMapper.readTree("{\"setField\":3, \"common\":true}"), - UpdateOperator.UNSET, - (ObjectNode) objectMapper.readTree("{\"unsetField\":1, \"common\":1}"))); + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, + (ObjectNode) objectMapper.readTree("{\"setField\":3, \"common\":true}"), + UpdateOperator.UNSET, + (ObjectNode) objectMapper.readTree("{\"unsetField\":1, \"common\":1}"))) + .updateClause() + .buildOperations(); }); assertThat(t) .isInstanceOf(JsonApiException.class) @@ -338,11 +346,14 @@ public void invalidMulAndIncSameFieldNested() { catchThrowable( () -> DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.INC, - (ObjectNode) objectMapper.readTree("{\"root.x\":-7, \"root.inc\":-3}"), - UpdateOperator.MUL, - (ObjectNode) objectMapper.readTree("{\"root.mul\":3, \"root.x\":2}")))); + DocumentUpdaterUtils.updateClause( + UpdateOperator.INC, + (ObjectNode) + objectMapper.readTree("{\"root.x\":-7, \"root.inc\":-3}"), + UpdateOperator.MUL, + (ObjectNode) objectMapper.readTree("{\"root.mul\":3, \"root.x\":2}"))) + .updateClause() + .buildOperations()); assertThat(t) .isInstanceOf(JsonApiException.class) .hasFieldOrPropertyWithValue("errorCode", ErrorCode.UNSUPPORTED_UPDATE_OPERATION_PARAM) @@ -355,9 +366,11 @@ public void invalidSetOnParentPath() { catchThrowable( () -> DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.SET, - (ObjectNode) objectMapper.readTree("{\"root.1\":-7, \"root\":[ ]}")))); + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, + (ObjectNode) objectMapper.readTree("{\"root.1\":-7, \"root\":[ ]}"))) + .updateClause() + .buildOperations()); assertThat(t) .isInstanceOf(JsonApiException.class) .hasFieldOrPropertyWithValue("errorCode", ErrorCode.UNSUPPORTED_UPDATE_OPERATION_PARAM) @@ -371,11 +384,11 @@ public void invalidSetOnParentPathWithDollar() { catchThrowable( () -> DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.SET, - (ObjectNode) - objectMapper.readTree( - """ + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, + (ObjectNode) + objectMapper.readTree( + """ { "root" : 7, "x" : 3, @@ -383,7 +396,9 @@ public void invalidSetOnParentPathWithDollar() { "y" : 5, "root.a" : 3 } - """)))); + """))) + .updateClause() + .buildOperations()); assertThat(t) .isInstanceOf(JsonApiException.class) .hasFieldOrPropertyWithValue("errorCode", ErrorCode.UNSUPPORTED_UPDATE_OPERATION_PARAM) From 77415add26345f8db7bc6c88bdaf28fc664c2f42 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Fri, 14 Jun 2024 10:02:35 -0700 Subject: [PATCH 02/18] fix --- .../sgv2/jsonapi/service/updater/DocumentUpdater.java | 5 +++++ .../model/impl/CommandResolverWithVectorizerTest.java | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index 2fe20d40e1..7a40b451a5 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -156,6 +156,11 @@ public void vectorizeTheReplacementDocument(DataVectorizer dataVectorizer) { .subscribeAsCompletionStage() .get(); } catch (Exception e) { + if (e instanceof ExecutionException exception) { + if (exception.getCause() instanceof JsonApiException jsonApiException) { + throw jsonApiException; + } + } throw new RuntimeException(e); } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java index 90074a2c88..531796de5b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java @@ -434,7 +434,7 @@ public void findOneAndReplace() throws Exception { FindOneAndReplaceCommand command = objectMapper.readValue(json, FindOneAndReplaceCommand.class); - // vectorizedCommand only have sortClause and document vectorize + // command -> vectorizedCommand, only vectorize sortClause and document vectorize // Postpone vectorize replaceDocument at operation level // DocumentUpdator in ReadAndUpdateOperation specifically final FindOneAndReplaceCommand vectorizedCommand = @@ -549,7 +549,7 @@ public void findOneAndUpdate() throws Exception { """; FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); - // vectorizedCommand only have sortClause and document vectorize + // command -> vectorizedCommand, only vectorize sortClause and document vectorize // Postpone vectorize updateClause at operation level // DocumentUpdator in ReadAndUpdateOperation specifically final FindOneAndUpdateCommand vectorizedCommand = From 8a99d28157b0e825660a911a754d2b884f0fc9e0 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Mon, 17 Jun 2024 09:26:12 -0700 Subject: [PATCH 03/18] fix --- .../sgv2/jsonapi/service/updater/DocumentUpdater.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index 7a40b451a5..238c5acbc2 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -35,6 +35,10 @@ public record DocumentUpdater( * @return */ public static DocumentUpdater construct(UpdateClause updateDef) { + // try to build operations but do not save the result + // this is for validating the UpdateClause, for example, updator path conflict + // error out before update operation's execution + updateDef.buildOperations(); return new DocumentUpdater(updateDef, null, null, UpdateType.UPDATE); } @@ -148,7 +152,6 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { * @param dataVectorizer */ public void vectorizeTheReplacementDocument(DataVectorizer dataVectorizer) { - // TODO: check if $vectorize must be at first level try { dataVectorizer .vectorize(List.of(replaceDocument)) From 7d27b2a3de0cd8df6c73b63a96eee51c0ce31ff1 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Mon, 8 Jul 2024 11:21:20 -0700 Subject: [PATCH 04/18] added logic to check if there is $vectorize text diff --- .../model/impl/ReadAndUpdateOperation.java | 27 +++-- .../service/updater/DocumentUpdater.java | 78 +++++++++----- .../service/updater/DocumentUpdaterTest.java | 101 ++++++++++++++++++ 3 files changed, 168 insertions(+), 38 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java index 83e4ab60ad..6909db2b98 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java @@ -68,30 +68,39 @@ public Uni> execute( .getDocuments(dataApiRequestInfo, queryExecutor, findOperation().pageState(), null); return docsToUpdate .onItem() - .transformToMulti( + .transformToUni( findResponse -> { pageStateReference.set(findResponse.pageState()); final List docs = findResponse.docs(); + // vectorize the updateClause or ReplacementDocument as needed + Uni vectorization = Uni.createFrom().item(false); final DataVectorizer dataVectorizer = dataVectorizerService.constructDataVectorizer(dataApiRequestInfo, commandContext); + // 1. UpdateCommand(updateOne, findOneAndUpdate, updateMany): if (documentUpdater.updateType() == DocumentUpdater.UpdateType.UPDATE) { - // if there are documents found, vectorize the updateOperation - if (docs.size() != 0) { - documentUpdater.vectorizeUpdateClause(dataVectorizer); + // if there are documents found, and there is $vectorize text diff + if (docs.size() != 0 && documentUpdater.hasVectorizeDiff(docs)) { + vectorization = documentUpdater.vectorizeUpdateClause(dataVectorizer); // if there is no document found, but upsert mode, vectorize the updateOperation } else if (upsert() && matchedCount.get() == 0) { - documentUpdater.vectorizeUpdateClause(dataVectorizer); + vectorization = documentUpdater.vectorizeUpdateClause(dataVectorizer); } // 2.replaceCommand(findOneAndReplace) } else if (documentUpdater.updateType() == DocumentUpdater.UpdateType.REPLACE) { - // if there is a document found, vectorize it first in documentUpdater - if (docs.size() != 0) { - documentUpdater.vectorizeTheReplacementDocument(dataVectorizer); + // if there is a document found and there is $vectorize text diff + if (docs.size() != 0 && documentUpdater.hasVectorizeDiff(docs)) { + vectorization = documentUpdater.vectorizeTheReplacementDocument(dataVectorizer); } } - + return vectorization + .onItem() + .transformToUni(vectorized -> Uni.createFrom().item(docs)); + }) + .onItem() + .transformToMulti( + docs -> { if (upsert() && docs.size() == 0 && matchedCount.get() == 0) { return Multi.createFrom().item(findOperation().getNewDocument()); } else { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index 238c5acbc2..f5eeb3f119 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -2,15 +2,15 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import io.smallrye.mutiny.infrastructure.Infrastructure; +import io.smallrye.mutiny.Uni; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.*; import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; +import io.stargate.sgv2.jsonapi.service.operation.model.impl.ReadDocument; import io.stargate.sgv2.jsonapi.util.JsonUtil; import java.util.List; -import java.util.concurrent.ExecutionException; /** * Updates the document read from the database with the updates came as part of the request. @@ -93,21 +93,8 @@ private boolean update(ObjectNode docToUpdate, boolean docInserted) { * * @param dataVectorizer */ - public void vectorizeUpdateClause(DataVectorizer dataVectorizer) { - try { - dataVectorizer - .vectorizeUpdateClause(updateClause) - .runSubscriptionOn(Infrastructure.getDefaultWorkerPool()) - .subscribeAsCompletionStage() - .get(); - } catch (Exception e) { - if (e instanceof ExecutionException exception) { - if (exception.getCause() instanceof JsonApiException jsonApiException) { - throw jsonApiException; - } - } - throw new RuntimeException(e); - } + public Uni vectorizeUpdateClause(DataVectorizer dataVectorizer) { + return dataVectorizer.vectorizeUpdateClause(updateClause); } /** @@ -151,21 +138,54 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { * * @param dataVectorizer */ - public void vectorizeTheReplacementDocument(DataVectorizer dataVectorizer) { - try { - dataVectorizer - .vectorize(List.of(replaceDocument)) - .runSubscriptionOn(Infrastructure.getDefaultWorkerPool()) - .subscribeAsCompletionStage() - .get(); - } catch (Exception e) { - if (e instanceof ExecutionException exception) { - if (exception.getCause() instanceof JsonApiException jsonApiException) { - throw jsonApiException; + public Uni vectorizeTheReplacementDocument(DataVectorizer dataVectorizer) { + return dataVectorizer.vectorize(List.of(replaceDocument)); + } + + /** + * Check if there is any $vectorize diff If there are docs found to update or doc to replace, then + * this is a necessary condition to proceed vectorization + * + * @param foundDocs + */ + public boolean hasVectorizeDiff(List foundDocs) { + String vectorizeTextUpdate = null; + if (updateType().equals(DocumentUpdater.UpdateType.UPDATE)) { + // extract $vectorize if updateClause set operator has it + final ObjectNode setNode = updateClause.updateOperationDefs().get(UpdateOperator.SET); + if (setNode != null) { + final JsonNode updateClauseVectorizeTextJsonNode = + setNode.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); + if (updateClauseVectorizeTextJsonNode != null) { + vectorizeTextUpdate = updateClauseVectorizeTextJsonNode.asText(); + } + } + } else if (updateType().equals(DocumentUpdater.UpdateType.REPLACE)) { + // extract $vectorize if replaceDocument has it + final JsonNode replaceDocumentVectorizeTextJsonNode = + replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); + if (replaceDocumentVectorizeTextJsonNode != null) { + vectorizeTextUpdate = replaceDocumentVectorizeTextJsonNode.asText(); + } + } + + // if there is no $vectorize to update or replace, then no diff. + if (vectorizeTextUpdate == null) { + return false; + } + + // iterate foundDocs, see if there is any diff for $vectorize + for (ReadDocument foundDoc : foundDocs) { + final JsonNode foundDocVectorizeTextJsonNode = + foundDoc.document().get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); + if (foundDocVectorizeTextJsonNode != null) { + if (!foundDocVectorizeTextJsonNode.asText().equals(vectorizeTextUpdate)) { + // There is a diff + return true; } } - throw new RuntimeException(e); } + return false; } public record DocumentUpdaterResponse(JsonNode document, boolean modified) {} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index 77d80e268b..3f54cfa6e2 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -10,11 +10,16 @@ import io.quarkus.test.junit.TestProfile; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; +import io.stargate.sgv2.jsonapi.service.operation.model.impl.ReadDocument; +import io.stargate.sgv2.jsonapi.service.shredding.model.DocumentId; import io.stargate.sgv2.jsonapi.service.testutil.DocumentUpdaterUtils; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; import jakarta.inject.Inject; +import java.util.List; +import java.util.UUID; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -534,4 +539,100 @@ public void replaceEmpty() throws Exception { }); } } + + @Nested + class VectorizeUpdateTest { + + @Test + public void updateOne_onlyVectorizeWithDiff_noDiff() throws Exception { + String updateVectorizeData = + """ + {"$vectorize" : "Beijing City"} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); + + final ReadDocument readDocument = + ReadDocument.from( + DocumentId.fromString("key1"), + UUID.randomUUID(), + objectMapper + .createObjectNode() + .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Beijing City")); + + assertThat(documentUpdater.hasVectorizeDiff(List.of(readDocument))).isFalse(); + } + + @Test + public void updateOne_onlyVectorizeWithDiff() throws Exception { + String updateVectorizeData = + """ + {"$vectorize" : "Beijing City"} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); + + final ReadDocument readDocument = + ReadDocument.from( + DocumentId.fromString("key1"), + UUID.randomUUID(), + objectMapper + .createObjectNode() + .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Shanghai City")); + + assertThat(documentUpdater.hasVectorizeDiff(List.of(readDocument))).isTrue(); + } + + @Test + public void updateMany_onlyVectorizeWithDiff() throws Exception { + String updateVectorizeData = + """ + {"$vectorize" : "Beijing City"} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); + + final ReadDocument readDocument1 = + ReadDocument.from( + DocumentId.fromString("key1"), + UUID.randomUUID(), + objectMapper + .createObjectNode() + .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Shanghai City")); + final ReadDocument readDocument2 = + ReadDocument.from( + DocumentId.fromString("key2"), + UUID.randomUUID(), + objectMapper + .createObjectNode() + .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Beijing City")); + + assertThat(documentUpdater.hasVectorizeDiff(List.of(readDocument1, readDocument2))).isTrue(); + } + + @Test + public void findOneAndReplace_onlyVectorizeWithDiff() throws Exception { + ObjectNode replaceNode = + objectMapper + .createObjectNode() + .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Beijing City"); + DocumentUpdater documentUpdater = DocumentUpdater.construct(replaceNode); + + final ReadDocument readDocument = + ReadDocument.from( + DocumentId.fromString("key1"), + UUID.randomUUID(), + objectMapper + .createObjectNode() + .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Shanghai City")); + + assertThat(documentUpdater.hasVectorizeDiff(List.of(readDocument))).isTrue(); + } + } } From e9612000cccc7c28b20f5d379a2a736eee52d1ef Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Mon, 8 Jul 2024 12:18:16 -0700 Subject: [PATCH 05/18] delete two vectorize update unit tests --- .../CommandResolverWithVectorizerTest.java | 206 ------------------ 1 file changed, 206 deletions(-) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java index afdbeea617..828dacc1e3 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java @@ -2,9 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; import io.quarkus.test.junit.QuarkusTest; import io.quarkus.test.junit.TestProfile; import io.quarkus.test.junit.mockito.InjectMock; @@ -15,8 +13,6 @@ import io.stargate.sgv2.jsonapi.api.model.command.impl.DeleteOneCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.FindCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.FindOneAndDeleteCommand; -import io.stargate.sgv2.jsonapi.api.model.command.impl.FindOneAndReplaceCommand; -import io.stargate.sgv2.jsonapi.api.model.command.impl.FindOneAndUpdateCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.FindOneCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.InsertManyCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.InsertOneCommand; @@ -419,208 +415,6 @@ public void findOneAndDelete() throws Exception { }); } - @Test - public void findOneAndReplace() throws Exception { - String json = - """ - { - "findOneAndReplace": { - "filter" : {"status" : "active"}, - "sort" : {"$vectorize" : "test data"}, - "replacement" : {"col1" : "val1", "col2" : "val2", "$vectorize" : "test data"} - } - } - """; - - FindOneAndReplaceCommand command = - objectMapper.readValue(json, FindOneAndReplaceCommand.class); - // command -> vectorizedCommand, only vectorize sortClause and document vectorize - // Postpone vectorize replaceDocument at operation level - // DocumentUpdator in ReadAndUpdateOperation specifically - final FindOneAndReplaceCommand vectorizedCommand = - (FindOneAndReplaceCommand) - dataVectorizerService - .vectorize( - dataApiRequestInfo, - TestEmbeddingProvider.commandContextWithVectorize, - command) - .subscribe() - .withSubscriber(UniAssertSubscriber.create()) - .awaitItem() - .getItem(); - Operation operation = - findOneAndReplaceCommandResolver.resolveCommand( - TestEmbeddingProvider.commandContextWithVectorize, vectorizedCommand); - String expectedBeforeVectorize = - "{\"col1\":\"val1\",\"col2\":\"val2\",\"$vectorize\":\"test data\"}"; - String expectedAfterVectorize = - "{\"col1\":\"val1\",\"col2\":\"val2\",\"$vectorize\":\"test data\",\"$vector\":[0.25,0.25,0.25]}"; - assertThat(operation) - .isInstanceOfSatisfying( - ReadAndUpdateOperation.class, - op -> { - assertThat(op.commandContext()) - .isEqualTo(TestEmbeddingProvider.commandContextWithVectorize); - assertThat(op.returnDocumentInResponse()).isTrue(); - assertThat(op.returnUpdatedDocument()).isFalse(); - assertThat(op.upsert()).isFalse(); - assertThat(op.shredder()).isEqualTo(shredder); - assertThat(op.updateLimit()).isEqualTo(1); - assertThat(op.retryLimit()).isEqualTo(operationsConfig.lwt().retries()); - assertThat(op.documentUpdater()) - .isInstanceOfSatisfying( - DocumentUpdater.class, - replacer -> { - try { - ObjectNode replacement = - (ObjectNode) - objectMapper.readTree( - "{\"col1\" : \"val1\", \"col2\" : \"val2\"}"); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - assertThat(replacer.replaceDocument().toString()) - .isEqualTo(expectedBeforeVectorize); - assertThat(replacer.replaceDocumentId()).isNull(); - }); - // vectorize the replacementDocument - op.documentUpdater() - .vectorizeTheReplacementDocument( - dataVectorizerService.constructDataVectorizer( - dataApiRequestInfo, TestEmbeddingProvider.commandContextWithVectorize)); - assertThat(op.documentUpdater()) - .isInstanceOfSatisfying( - DocumentUpdater.class, - replacer -> { - try { - ObjectNode replacement = - (ObjectNode) - objectMapper.readTree( - "{\"col1\" : \"val1\", \"col2\" : \"val2\"}"); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - assertThat(replacer.replaceDocument().toString()) - .isEqualTo(expectedAfterVectorize); - assertThat(replacer.replaceDocumentId()).isNull(); - }); - - assertThat(op.findOperation()) - .isInstanceOfSatisfying( - FindOperation.class, - find -> { - DBFilterBase.TextFilter filter = - new DBFilterBase.TextFilter( - "status", DBFilterBase.MapFilterBase.Operator.EQ, "active"); - - assertThat(find.objectMapper()).isEqualTo(objectMapper); - assertThat(find.commandContext()) - .isEqualTo(TestEmbeddingProvider.commandContextWithVectorize); - assertThat(find.pageSize()).isEqualTo(1); - assertThat(find.limit()).isEqualTo(1); - assertThat(find.pageState()).isNull(); - - assertThat(find.readType()).isEqualTo(ReadType.DOCUMENT); - assertThat( - find.logicalExpression() - .comparisonExpressions - .get(0) - .getDbFilters() - .get(0)) - .isEqualTo(filter); - assertThat(find.vector()).isNotNull(); - assertThat(find.vector()).containsExactly(0.25f, 0.25f, 0.25f); - assertThat(find.singleResponse()).isTrue(); - }); - }); - } - - @Test - public void findOneAndUpdate() throws Exception { - String json = - """ - { - "findOneAndUpdate": { - "filter" : {"status" : "active"}, - "sort" : {"$vector" : [0.11, 0.22, 0.33, 0.44]}, - "update" : {"$set" : {"$vectorize" : "test data"}} - } - } - """; - - FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); - // command -> vectorizedCommand, only vectorize sortClause and document vectorize - // Postpone vectorize updateClause at operation level - // DocumentUpdator in ReadAndUpdateOperation specifically - final FindOneAndUpdateCommand vectorizedCommand = - (FindOneAndUpdateCommand) - dataVectorizerService - .vectorize( - dataApiRequestInfo, - TestEmbeddingProvider.commandContextWithVectorize, - command) - .subscribe() - .withSubscriber(UniAssertSubscriber.create()) - .awaitItem() - .getItem(); - Operation operation = - findOneAndUpdateCommandResolver.resolveCommand( - TestEmbeddingProvider.commandContextWithVectorize, vectorizedCommand); - assertThat(operation) - .isInstanceOfSatisfying( - ReadAndUpdateOperation.class, - op -> { - assertThat(op.commandContext()) - .isEqualTo(TestEmbeddingProvider.commandContextWithVectorize); - assertThat(op.returnDocumentInResponse()).isTrue(); - assertThat(op.returnUpdatedDocument()).isFalse(); - assertThat(op.upsert()).isFalse(); - assertThat(op.shredder()).isEqualTo(shredder); - assertThat(op.updateLimit()).isEqualTo(1); - assertThat(op.retryLimit()).isEqualTo(operationsConfig.lwt().retries()); - // vectorize the updateClause - op.documentUpdater() - .vectorizeUpdateClause( - dataVectorizerService.constructDataVectorizer( - dataApiRequestInfo, TestEmbeddingProvider.commandContextWithVectorize)); - assertThat(op.documentUpdater()) - .isInstanceOfSatisfying( - DocumentUpdater.class, - updater -> { - // there will be two set options, one for $vectorize and one for $vector - assertThat( - updater.updateClause().buildOperations().get(0).actions().size()) - .isEqualTo(2); - }); - assertThat(op.findOperation()) - .isInstanceOfSatisfying( - FindOperation.class, - find -> { - DBFilterBase.TextFilter filter = - new DBFilterBase.TextFilter( - "status", DBFilterBase.MapFilterBase.Operator.EQ, "active"); - - assertThat(find.objectMapper()).isEqualTo(objectMapper); - assertThat(find.commandContext()) - .isEqualTo(TestEmbeddingProvider.commandContextWithVectorize); - assertThat(find.pageSize()).isEqualTo(1); - assertThat(find.limit()).isEqualTo(1); - assertThat(find.pageState()).isNull(); - assertThat(find.readType()).isEqualTo(ReadType.DOCUMENT); - assertThat( - find.logicalExpression() - .comparisonExpressions - .get(0) - .getDbFilters() - .get(0)) - .isEqualTo(filter); - assertThat(find.vector()).isNotNull(); - assertThat(find.vector()).containsExactly(0.11f, 0.22f, 0.33f, 0.44f); - assertThat(find.singleResponse()).isTrue(); - }); - }); - } - @Test public void findOne() throws Exception { String json = From 5073fc20165fa037a5850824179d9bb5594722e9 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Wed, 10 Jul 2024 10:41:49 -0700 Subject: [PATCH 06/18] refactor --- .../command/clause/update/SetOperation.java | 37 ++ .../command/clause/update/UnsetOperation.java | 4 + .../UpdateClauseDeserializer.java | 21 +- .../service/embedding/DataVectorizer.java | 107 +----- .../embedding/DataVectorizerService.java | 4 +- .../model/impl/ReadAndUpdateOperation.java | 78 ++--- .../FindOneAndReplaceCommandResolver.java | 9 + .../service/updater/DocumentUpdater.java | 140 ++++---- .../UpdateClauseDeserializerTest.java | 28 ++ .../operation/DataVectorizerTest.java | 256 ++------------ .../CommandResolverWithVectorizerTest.java | 2 +- .../FindOneAndReplaceCommandResolverTest.java | 22 ++ .../FindOneAndUpdateCommandResolverTest.java | 12 +- .../impl/UpdateManyCommandResolverTest.java | 18 +- .../impl/UpdateOneCommandResolverTest.java | 12 +- .../service/updater/DocumentUpdaterTest.java | 323 ++++++++++++------ 16 files changed, 498 insertions(+), 575 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java index eeefdfc72a..a21423d490 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java @@ -2,7 +2,9 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import io.smallrye.mutiny.Uni; import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; import io.stargate.sgv2.jsonapi.util.JsonUtil; import io.stargate.sgv2.jsonapi.util.PathMatch; import io.stargate.sgv2.jsonapi.util.PathMatchLocator; @@ -78,6 +80,13 @@ public boolean updateDocument(ObjectNode doc) { Set setPaths = new HashSet<>(); actions.stream().forEach(action -> setPaths.add(action.locator().path())); for (Action action : actions) { + + if (DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD.equals(action.locator().path())) { + // won't update $vectorize in this method + // will vectorize on demand and update $vectorize in updateVectorize method below + continue; + } + PathMatch target = action.locator().findOrCreate(doc); JsonNode newValue = action.value(); JsonNode oldValue = target.valueNode(); @@ -96,6 +105,34 @@ public boolean updateDocument(ObjectNode doc) { return modified; } + /** + * This updateVectorize method will vectorize as demand and update the $vectorize 1. check if + * there is diff for $vectorize and proceed 2. vectorize updated $vectorize to get the new vector + * 3. update $vector and $vectorize + * + * @param doc Document to update + * @param dataVectorizer dataVectorizer + * @return Uni modified + */ + public Uni updateVectorize(JsonNode doc, DataVectorizer dataVectorizer) { + for (Action action : actions) { + if (DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD.equals(action.locator().path())) { + PathMatch target = action.locator().findOrCreate(doc); + JsonNode newValue = action.value(); + JsonNode oldValue = target.valueNode(); + + if ((oldValue == null) || !JsonUtil.equalsOrdered(oldValue, newValue)) { + // replace the oldValue with newValue first + ((ObjectNode) doc).put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, newValue); + // vectorize the newValue, update $vectorize, $vector + return dataVectorizer.vectorize(List.of(doc), true); + } + } + } + // no diff for $vectorize, so nothing is modified in this method + return Uni.createFrom().item(false); + } + // Needed because some unit tests check for equality @Override public boolean equals(Object o) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java index c344b811f0..47f12e2dcd 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java @@ -40,6 +40,10 @@ public boolean updateDocument(ObjectNode doc) { if (modified && unsetPaths.contains(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) { doc.remove(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); } + // $vectorize field is unset, remove $vector field value + if (modified && unsetPaths.contains(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { + doc.remove(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); + } return modified; } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializer.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializer.java index d6c8e949d2..69a4fd5bbc 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializer.java @@ -7,12 +7,11 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; import java.io.IOException; -import java.util.EnumMap; -import java.util.Iterator; -import java.util.Map; +import java.util.*; /** {@link StdDeserializer} for the {@link UpdateClause}. */ public class UpdateClauseDeserializer extends StdDeserializer { @@ -55,6 +54,22 @@ public UpdateClause deserialize( } updateDefs.put(oper, (ObjectNode) operationArg); } + validateUpdateDefs(updateDefs); return new UpdateClause(updateDefs); } + + public void validateUpdateDefs(EnumMap updateDefs) { + // check1: can not unset $vectorize and $vector at the same time + List checkUpdateOperationNodes = new ArrayList<>(); + checkUpdateOperationNodes.add(updateDefs.get(UpdateOperator.UNSET)); + checkUpdateOperationNodes.add(updateDefs.get(UpdateOperator.SET)); + for (ObjectNode checkUpdateOperationNode : checkUpdateOperationNodes) { + if (checkUpdateOperationNode != null + && checkUpdateOperationNode.has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { + if (checkUpdateOperationNode.has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) { + throw new JsonApiException(ErrorCode.INVALID_USAGE_OF_VECTORIZE); + } + } + } + } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java index bb7fe6e9ab..746ac9bc5b 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java @@ -9,8 +9,6 @@ import io.smallrye.mutiny.Uni; import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortClause; import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortExpression; -import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause; -import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; @@ -54,11 +52,17 @@ public DataVectorizer( } /** - * Vectorize the '$vectorize' fields in the document + * Vectorize the '$vectorize' fields in the document. This method is used by commands: insertOne, + * insertMany (detail in DataVectorizerService vectorizeDocument method) updateOne, updateMany, + * findOneAndUpdate, findOneAndReplace (detail in SetOperation updateVectorize method) + * + *

With isUpdateCommand flag set as true, this method allows to vectorize JsonNode with both + * $vector and $vectorize * * @param documents - Documents to be vectorized + * @param isUpdateCommand - is called from isUpdateCommand or not */ - public Uni vectorize(List documents) { + public Uni vectorize(List documents, boolean isUpdateCommand) { try { int vectorDataPosition = 0; List vectorizeTexts = new ArrayList<>(); @@ -66,7 +70,8 @@ public Uni vectorize(List documents) { for (int position = 0; position < documents.size(); position++) { JsonNode document = documents.get(position); if (document.has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { - if (document.has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) { + // Do not allow using $vector and $vectorize together for insertion commands + if (document.has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD) && !isUpdateCommand) { throw new JsonApiException( ErrorCode.INVALID_USAGE_OF_VECTORIZE, ErrorCode.INVALID_USAGE_OF_VECTORIZE.getMessage() @@ -204,96 +209,4 @@ public Uni vectorize(SortClause sortClause) { return Uni.createFrom().failure(e); } } - - /** - * Vectorize the '$vectorize' fields in the update clause - * - * @param updateClause - Update clause to be vectorized - */ - public Uni vectorizeUpdateClause(UpdateClause updateClause) { - try { - if (updateClause == null) return Uni.createFrom().item(true); - final ObjectNode setNode = updateClause.updateOperationDefs().get(UpdateOperator.SET); - final ObjectNode setOnInsertNode = - updateClause.updateOperationDefs().get(UpdateOperator.SET_ON_INSERT); - return updateVectorize(setNode) - .onItem() - .transformToUni( - vectorized -> { - return updateVectorize(setOnInsertNode); - }) - .onItem() - .transform( - v -> { - final ObjectNode unsetNode = - updateClause.updateOperationDefs().get(UpdateOperator.UNSET); - if (unsetNode != null - && unsetNode.has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { - if (unsetNode.has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) { - throw new JsonApiException(ErrorCode.INVALID_USAGE_OF_VECTORIZE); - } - unsetNode.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); - } - return true; - }); - } catch (JsonApiException e) { - return Uni.createFrom().failure(e); - } - } - - private Uni updateVectorize(ObjectNode node) { - if (node == null) return Uni.createFrom().item(true); - if (node.has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { - if (node.has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) { - throw new JsonApiException(ErrorCode.INVALID_USAGE_OF_VECTORIZE); - } - final JsonNode jsonNode = node.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); - if (jsonNode.isNull()) { - node.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); - } else if (jsonNode.isTextual()) { - final String text = jsonNode.asText(); - if (embeddingProvider == null) { - throw ErrorCode.EMBEDDING_SERVICE_NOT_CONFIGURED.toApiException( - collectionSettings.collectionName()); - } - if (text.isBlank()) { - node.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); - } else { - final Uni> vectors = - embeddingProvider - .vectorize( - 1, - List.of(text), - embeddingApiKey, - EmbeddingProvider.EmbeddingRequestType.INDEX) - .map(res -> res.embeddings()); - return vectors - .onItem() - .transform( - vectorData -> { - float[] vector = vectorData.get(0); - // check if vector have the expected size - if (vector.length != collectionSettings.vectorConfig().vectorSize()) { - throw EMBEDDING_PROVIDER_UNEXPECTED_RESPONSE.toApiException( - "Embedding provider '%s' did not return expected embedding length. Expect: '%d'. Actual: '%d'", - collectionSettings.vectorConfig().vectorizeConfig().provider(), - collectionSettings.vectorConfig().vectorSize(), - vector.length); - } - final ArrayNode arrayNode = nodeFactory.arrayNode(vector.length); - for (float listValue : vector) { - arrayNode.add(nodeFactory.numberNode(listValue)); - } - node.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, arrayNode); - return true; - }); - } - - } else { - throw new JsonApiException( - ErrorCode.SHRED_BAD_VECTORIZE_VALUE, ErrorCode.SHRED_BAD_VECTORIZE_VALUE.getMessage()); - } - } - return Uni.createFrom().item(true); - } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java index 4e153be92a..baebe060da 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java @@ -85,9 +85,9 @@ private Uni vectorizeSortClause( private Uni vectorizeDocument( DataVectorizer dataVectorizer, CommandContext commandContext, Command command) { if (command instanceof InsertOneCommand insertOneCommand) { - return dataVectorizer.vectorize(List.of(insertOneCommand.document())); + return dataVectorizer.vectorize(List.of(insertOneCommand.document()), false); } else if (command instanceof InsertManyCommand insertManyCommand) { - return dataVectorizer.vectorize(insertManyCommand.documents()); + return dataVectorizer.vectorize(insertManyCommand.documents(), false); } return Uni.createFrom().item(true); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java index 6909db2b98..2a2603fae6 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java @@ -68,39 +68,10 @@ public Uni> execute( .getDocuments(dataApiRequestInfo, queryExecutor, findOperation().pageState(), null); return docsToUpdate .onItem() - .transformToUni( + .transformToMulti( findResponse -> { pageStateReference.set(findResponse.pageState()); final List docs = findResponse.docs(); - - // vectorize the updateClause or ReplacementDocument as needed - Uni vectorization = Uni.createFrom().item(false); - final DataVectorizer dataVectorizer = - dataVectorizerService.constructDataVectorizer(dataApiRequestInfo, commandContext); - - // 1. UpdateCommand(updateOne, findOneAndUpdate, updateMany): - if (documentUpdater.updateType() == DocumentUpdater.UpdateType.UPDATE) { - // if there are documents found, and there is $vectorize text diff - if (docs.size() != 0 && documentUpdater.hasVectorizeDiff(docs)) { - vectorization = documentUpdater.vectorizeUpdateClause(dataVectorizer); - // if there is no document found, but upsert mode, vectorize the updateOperation - } else if (upsert() && matchedCount.get() == 0) { - vectorization = documentUpdater.vectorizeUpdateClause(dataVectorizer); - } - // 2.replaceCommand(findOneAndReplace) - } else if (documentUpdater.updateType() == DocumentUpdater.UpdateType.REPLACE) { - // if there is a document found and there is $vectorize text diff - if (docs.size() != 0 && documentUpdater.hasVectorizeDiff(docs)) { - vectorization = documentUpdater.vectorizeTheReplacementDocument(dataVectorizer); - } - } - return vectorization - .onItem() - .transformToUni(vectorized -> Uni.createFrom().item(docs)); - }) - .onItem() - .transformToMulti( - docs -> { if (upsert() && docs.size() == 0 && matchedCount.get() == 0) { return Multi.createFrom().item(findOperation().getNewDocument()); } else { @@ -158,6 +129,7 @@ public Uni> execute( .jsonProcessingMetricsReporter() .reportJsonWrittenDocsMetrics( commandContext().commandName(), modifiedCount.get()); + return new UpdateOperationPage( matchedCount.get(), modifiedCount.get(), @@ -169,27 +141,47 @@ public Uni> execute( private Uni processUpdate( DataApiRequestInfo dataApiRequestInfo, - ReadDocument document, + ReadDocument readDocument, QueryExecutor queryExecutor, AtomicInteger modifiedCount) { return Uni.createFrom() - .item(document) - - // perform update operation and save only if data is modified. + .item(readDocument) .flatMap( - readDocument -> { - // if there is no document return null item + document -> { + // if there is no document: return null item if (readDocument == null) { return Uni.createFrom().nullItem(); } - // upsert if we have no transaction if before boolean upsert = readDocument.txnId() == null; - JsonNode originalDocument = upsert ? null : readDocument.document(); - // apply document updates - // if no changes return null item - DocumentUpdater.DocumentUpdaterResponse documentUpdaterResponse = + // apply document updates: if no changes return null item + // First update, will not vectorize + DocumentUpdater.DocumentUpdaterResponse firstDocumentUpdaterResponse = documentUpdater().apply(readDocument.document().deepCopy(), upsert); + // Second update, will vectorize on demand and update $vectorize and $vector + // accordingly + final DataVectorizer dataVectorizer = + dataVectorizerService.constructDataVectorizer(dataApiRequestInfo, commandContext); + return documentUpdater() + .applyUpdateVectorize( + firstDocumentUpdaterResponse.document(), upsert, dataVectorizer) + .onItem() + .transformToUni( + secondDocumentUpdaterResponse -> { + // Need to combine two modified result here + return Uni.createFrom() + .item( + new DocumentUpdater.DocumentUpdaterResponse( + secondDocumentUpdaterResponse.document(), + firstDocumentUpdaterResponse.modified() + | secondDocumentUpdaterResponse.modified())); + }); + }) + // perform update operation and save only if data is modified. + .flatMap( + documentUpdaterResponse -> { + boolean upsert = readDocument.txnId() == null; + JsonNode originalDocument = upsert ? null : readDocument.document(); // In case no change to document and not an upsert document, short circuit and return if (!documentUpdaterResponse.modified() && !upsert) { // If no change return the original document Issue #390 @@ -220,7 +212,9 @@ private Uni processUpdate( .transform( v -> { // if not insert increment modified count - if (!upsert) modifiedCount.incrementAndGet(); + if (!upsert) { + modifiedCount.incrementAndGet(); + } // resolve doc to return JsonNode documentToReturn = null; diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolver.java index d1fe341bb3..37a29d9610 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolver.java @@ -9,6 +9,9 @@ import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; import io.stargate.sgv2.jsonapi.api.v1.metrics.JsonApiMetricsConfig; import io.stargate.sgv2.jsonapi.config.OperationsConfig; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +import io.stargate.sgv2.jsonapi.exception.ErrorCode; +import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.operation.model.Operation; import io.stargate.sgv2.jsonapi.service.operation.model.ReadType; @@ -62,6 +65,12 @@ public Class getCommandClass() { @Override public Operation resolveCommand(CommandContext commandContext, FindOneAndReplaceCommand command) { + // Add $vector and $vectorize replacement validation here + if (command.replacementDocument().has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD) + && command.replacementDocument().has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) { + throw new JsonApiException(ErrorCode.INVALID_USAGE_OF_VECTORIZE); + } + // FindOperation findOperation = getFindOperation(commandContext, command); final DocumentProjector documentProjector = command.buildProjector(); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index f5eeb3f119..f19cd0620c 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -8,23 +8,12 @@ import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; -import io.stargate.sgv2.jsonapi.service.operation.model.impl.ReadDocument; import io.stargate.sgv2.jsonapi.util.JsonUtil; import java.util.List; -/** - * Updates the document read from the database with the updates came as part of the request. - * DocumentUpdater construct postpone from commandResolver level to operation level, since we want - * the vectorize as needed, only vectorize when there are documents found or upsert - * - * @param updateClause - vectorize it as needed before building update operations - * @param replaceDocument - replaceDocument to replace the one read from DB - * @param replaceDocumentId - documentId from replaceDocument - * @param updateType - UPDATE/REPLACE - */ +/** Updates the document read from the database with the updates came as part of the request. */ public record DocumentUpdater( - // buildOperations will be executed when apply to update - UpdateClause updateClause, + List updateOperations, ObjectNode replaceDocument, JsonNode replaceDocumentId, UpdateType updateType) { @@ -35,11 +24,7 @@ public record DocumentUpdater( * @return */ public static DocumentUpdater construct(UpdateClause updateDef) { - // try to build operations but do not save the result - // this is for validating the UpdateClause, for example, updator path conflict - // error out before update operation's execution - updateDef.buildOperations(); - return new DocumentUpdater(updateDef, null, null, UpdateType.UPDATE); + return new DocumentUpdater(updateDef.buildOperations(), null, null, UpdateType.UPDATE); } /** @@ -54,6 +39,10 @@ public static DocumentUpdater construct(ObjectNode replaceDocument) { } /** + * This method is the entrance for first level update or replace. first level means it won't + * vectorize and update $vectorize so the updatedDocument returned in DocumentUpdaterResponse will + * leave $vectorize unchanged + * * @param readDocument Document to update * @param docInserted True if document was just created (inserted); false if updating existing * document @@ -70,7 +59,8 @@ public DocumentUpdaterResponse apply(JsonNode readDocument, boolean docInserted) } /** - * Will be used for update commands + * Will be used for update commands. This method won't update $vectorize (detail in + * applyUpdateVectorize method) * * @param docToUpdate * @param docInserted @@ -78,7 +68,6 @@ public DocumentUpdaterResponse apply(JsonNode readDocument, boolean docInserted) */ private boolean update(ObjectNode docToUpdate, boolean docInserted) { boolean modified = false; - List updateOperations = updateClause.buildOperations(); for (UpdateOperation updateOperation : updateOperations) { if (updateOperation.shouldApplyIf(docInserted)) { modified |= updateOperation.updateDocument(docToUpdate); @@ -88,17 +77,8 @@ private boolean update(ObjectNode docToUpdate, boolean docInserted) { } /** - * vectorize UpdateClause as needed, only when documents are found or upsert will be used by - * updateOne, findOneAndUpdate, updateMany - * - * @param dataVectorizer - */ - public Uni vectorizeUpdateClause(DataVectorizer dataVectorizer) { - return dataVectorizer.vectorizeUpdateClause(updateClause); - } - - /** - * Will be used for findOneAndReplace + * Will be used for findOneAndReplace. This method will replace $vectorize, but won't re-vectorize + * and replace $vector(detail in applyUpdateVectorize method) * * @param docToUpdate * @param docInserted @@ -115,8 +95,19 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { throw new JsonApiException(ErrorCode.DOCUMENT_REPLACE_DIFFERENT_DOCID); } } + + // If replaceDocument has $vectorize as null value, also set $vector as null here. + // This is because we need to do a comparison for compareDoc and replaceDocument later + JsonNode vectorizeNode = + replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); + if (vectorizeNode != null && vectorizeNode.isNull()) { + ((ObjectNode) replaceDocument) + .put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + } + // In case there is no difference between document return modified as false, so db update // doesn't happen + if (JsonUtil.equalsOrdered(compareDoc, replaceDocument())) { return false; } @@ -128,64 +119,59 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { docToUpdate.set(DocumentConstants.Fields.DOC_ID, replaceDocumentId); } docToUpdate.setAll(replaceDocument()); + // // restore the original $vectorize + // docToUpdate.put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, + // vectorizeNode.asText()); // return modified flag as true return true; } /** - * vectorize replacementDocument as needed, only when document is found will be used by - * findOneAndReplace - * - * @param dataVectorizer - */ - public Uni vectorizeTheReplacementDocument(DataVectorizer dataVectorizer) { - return dataVectorizer.vectorize(List.of(replaceDocument)); - } - - /** - * Check if there is any $vectorize diff If there are docs found to update or doc to replace, then - * this is a necessary condition to proceed vectorization + * This method is the entrance for second level update or replace. This level will vectorize on + * demand and change $vectorize and $vector accordingly. * - * @param foundDocs + * @param readDocument Document to update(This document may has been updated once, detail in first + * level update) + * @param docInserted True if document was just created (inserted); false if updating existing + * document */ - public boolean hasVectorizeDiff(List foundDocs) { - String vectorizeTextUpdate = null; - if (updateType().equals(DocumentUpdater.UpdateType.UPDATE)) { - // extract $vectorize if updateClause set operator has it - final ObjectNode setNode = updateClause.updateOperationDefs().get(UpdateOperator.SET); - if (setNode != null) { - final JsonNode updateClauseVectorizeTextJsonNode = - setNode.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); - if (updateClauseVectorizeTextJsonNode != null) { - vectorizeTextUpdate = updateClauseVectorizeTextJsonNode.asText(); + public Uni applyUpdateVectorize( + JsonNode readDocument, boolean docInserted, DataVectorizer dataVectorizer) { + if (UpdateType.UPDATE == updateType) { + for (UpdateOperation updateOperation : updateOperations) { + if (updateOperation.shouldApplyIf(docInserted) + && updateOperation instanceof SetOperation setOperation) { + // filtering out the setOperation + // try to vectorize on demand and change $vectorize and $vector accordingly. + return setOperation + .updateVectorize(readDocument, dataVectorizer) + .onItem() + .transformToUni( + modified -> { + return Uni.createFrom() + .item(new DocumentUpdaterResponse(readDocument, modified)); + }); } } - } else if (updateType().equals(DocumentUpdater.UpdateType.REPLACE)) { - // extract $vectorize if replaceDocument has it - final JsonNode replaceDocumentVectorizeTextJsonNode = - replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); - if (replaceDocumentVectorizeTextJsonNode != null) { - vectorizeTextUpdate = replaceDocumentVectorizeTextJsonNode.asText(); - } - } - - // if there is no $vectorize to update or replace, then no diff. - if (vectorizeTextUpdate == null) { - return false; } - - // iterate foundDocs, see if there is any diff for $vectorize - for (ReadDocument foundDoc : foundDocs) { - final JsonNode foundDocVectorizeTextJsonNode = - foundDoc.document().get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); - if (foundDocVectorizeTextJsonNode != null) { - if (!foundDocVectorizeTextJsonNode.asText().equals(vectorizeTextUpdate)) { - // There is a diff - return true; - } + if (UpdateType.REPLACE == updateType) { + // Only need to vectorize when: + // replaceDocument has $vectorize(not null), this is consistent with previous behaviour + // This means even if $vectorize has no diff between readDoc and replacementDoc, we still + // re-vectorize + if (!replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD).isNull()) { + return dataVectorizer + // replacement also considered as update, set isUpdateCommand flag as true + .vectorize(List.of(readDocument), true) + .onItem() + .transformToUni( + modified -> { + return Uni.createFrom().item(new DocumentUpdaterResponse(readDocument, modified)); + }); } } - return false; + // there is no setOperation, so won't modify anything + return Uni.createFrom().item(new DocumentUpdaterResponse(readDocument, false)); } public record DocumentUpdaterResponse(JsonNode document, boolean modified) {} diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializerTest.java index 71ff64406f..7c8725e5c8 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializerTest.java @@ -1,6 +1,7 @@ package io.stargate.sgv2.jsonapi.api.model.command.deserializers; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.catchException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -9,6 +10,7 @@ import io.stargate.sgv2.jsonapi.api.model.command.clause.update.SetOperation; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperation; +import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; import jakarta.inject.Inject; import org.junit.jupiter.api.Nested; @@ -148,5 +150,31 @@ public void mustHandleDate() throws Exception { UpdateClause updateClause = objectMapper.readValue(json, UpdateClause.class); assertThat(updateClause.buildOperations()).hasSize(1).contains(operation); } + + @Test + public void invalid_set_vectorize_vector() { + String json = + """ + {"$set" : {"$vectorize": "$vectorize string", "$vector" : [0.1,0.2] }} + """; + + Exception e = catchException(() -> objectMapper.readValue(json, UpdateClause.class)); + assertThat(e) + .isInstanceOf(JsonApiException.class) + .hasMessageContaining("$vectorize` and `$vector` can't be used together"); + } + + @Test + public void invalid_unset_vectorize_vector() { + String json = + """ + {"$unset" : {"$vectorize": "$vectorize string", "$vector" : [0.1,0.2] }} + """; + + Exception e = catchException(() -> objectMapper.readValue(json, UpdateClause.class)); + assertThat(e) + .isInstanceOf(JsonApiException.class) + .hasMessageContaining("$vectorize` and `$vector` can't be used together"); + } } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java index b22b6d1a20..19c350e087 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java @@ -12,9 +12,6 @@ import io.smallrye.mutiny.helpers.test.UniAssertSubscriber; import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortClause; import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortExpression; -import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause; -import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; -import io.stargate.sgv2.jsonapi.api.model.command.impl.FindOneAndUpdateCommand; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSettings; @@ -47,7 +44,7 @@ public void testTextValues() { new DataVectorizer( testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); try { - dataVectorizer.vectorize(documents).subscribe().asCompletionStage().get(); + dataVectorizer.vectorize(documents, false).subscribe().asCompletionStage().get(); } catch (Exception e) { throw new RuntimeException(e); } @@ -74,7 +71,7 @@ public void testEmptyValues() { new DataVectorizer( testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); try { - dataVectorizer.vectorize(documents).subscribe().asCompletionStage().get(); + dataVectorizer.vectorize(documents, false).subscribe().asCompletionStage().get(); } catch (Exception e) { throw new RuntimeException(e); } @@ -106,7 +103,7 @@ public void testNonTextValues() { try { Throwable failure = dataVectorizer - .vectorize(documents) + .vectorize(documents, false) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitFailure() @@ -133,7 +130,7 @@ public void testNullValues() { new DataVectorizer( testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); try { - dataVectorizer.vectorize(documents).subscribe().asCompletionStage().get(); + dataVectorizer.vectorize(documents, false).subscribe().asCompletionStage().get(); } catch (Exception e) { throw new RuntimeException(e); } @@ -158,7 +155,7 @@ public void testWithBothVectorFieldValues() { try { Throwable failure = dataVectorizer - .vectorize(documents) + .vectorize(documents, false) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitFailure() @@ -174,6 +171,33 @@ public void testWithBothVectorFieldValues() { } } + @Test + public void testWithBothVectorFieldValuesFromUpdate() { + List documents = new ArrayList<>(); + + final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", "test data"); + final ArrayNode arrayNode = document.putArray("$vector"); + arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + documents.add(document); + DataVectorizer dataVectorizer = + new DataVectorizer( + testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + // dataVectorizer will accept usage for $vectorize and $vector at the same time + // vectorize the $vectorize, and update both $vectorize and $vector + try { + dataVectorizer.vectorize(documents, true).subscribe().asCompletionStage().get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + for (JsonNode doc : documents) { + assertThat(doc.has("$vectorize")).isTrue(); + assertThat(doc.has("$vector")).isTrue(); + assertThat(doc.get("$vector").isArray()).isTrue(); + assertThat(doc.get("$vector").size()).isEqualTo(3); + } + } + @Test public void testWithUnmatchedVectorsNumber() { TestEmbeddingProvider testProvider = @@ -201,7 +225,7 @@ public Uni vectorize( Throwable failure = dataVectorizer - .vectorize(documents) + .vectorize(documents, false) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitFailure() @@ -239,7 +263,7 @@ public void testWithUnmatchedVectorSize() { Throwable failure = dataVectorizer - .vectorize(documents) + .vectorize(documents, false) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitFailure() @@ -276,216 +300,4 @@ public void sortClauseValues() { assertThat(sortClause.sortExpressions().get(0).vector().length).isEqualTo(3); } } - - @Nested - public class UpdateClauseValues { - @Test - public void updateClauseSetValues() throws Exception { - String json = - """ - { - "findOneAndUpdate": { - "filter" : {"_id" : "id"}, - "update" : {"$set" : {"$vectorize" : "New York"}} - } - } - """; - FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); - UpdateClause updateClause = command.updateClause(); - DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); - try { - dataVectorizer.vectorizeUpdateClause(updateClause).subscribe().asCompletionStage().get(); - } catch (Exception e) { - throw new RuntimeException(e); - } - final ObjectNode setNode = updateClause.updateOperationDefs().get(UpdateOperator.SET); - assertThat(setNode.has("$vectorize")).isTrue(); - assertThat(setNode.has("$vector")).isTrue(); - assertThat(setNode.get("$vector").isArray()).isTrue(); - assertThat(setNode.get("$vector").size()).isEqualTo(3); - } - - @Test - public void updateClauseSetBlankValues() throws Exception { - String json = - """ - { - "findOneAndUpdate": { - "filter" : {"_id" : "id"}, - "update" : {"$set" : {"$vectorize" : " "}} - } - } - """; - FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); - UpdateClause updateClause = command.updateClause(); - DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); - try { - dataVectorizer.vectorizeUpdateClause(updateClause).subscribe().asCompletionStage().get(); - } catch (Exception e) { - throw new RuntimeException(e); - } - final ObjectNode setNode = updateClause.updateOperationDefs().get(UpdateOperator.SET); - assertThat(setNode.has("$vectorize")).isTrue(); - assertThat(setNode.has("$vector")).isTrue(); - assertThat(setNode.get("$vector").isNull()).isTrue(); - } - - @Test - public void updateClauseSetBothValues() throws Exception { - String json = - """ - { - "findOneAndUpdate": { - "filter" : {"_id" : "id"}, - "update" : {"$set" : {"$vectorize" : "New York", "$vector" : [0.11, 0.11]}} - } - } - """; - FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); - UpdateClause updateClause = command.updateClause(); - DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); - Throwable t = - dataVectorizer - .vectorizeUpdateClause(updateClause) - .subscribe() - .withSubscriber(UniAssertSubscriber.create()) - .awaitFailure() - .getFailure(); - assertThat(t) - .isNotNull() - .isInstanceOf(JsonApiException.class) - .withFailMessage("`$vectorize` and `$vector` can't be used together.") - .hasFieldOrPropertyWithValue("errorCode", ErrorCode.INVALID_USAGE_OF_VECTORIZE) - .hasMessage(ErrorCode.INVALID_USAGE_OF_VECTORIZE.getMessage()); - } - - @Test - public void updateClauseSetOnInsertValues() throws Exception { - String json = - """ - { - "findOneAndUpdate": { - "filter" : {"_id" : "id"}, - "update" : {"$setOnInsert" : {"$vectorize" : "New York"}} - } - } - """; - FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); - UpdateClause updateClause = command.updateClause(); - DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.of("test"), collectionSettings); - try { - dataVectorizer.vectorizeUpdateClause(updateClause).subscribe().asCompletionStage().get(); - } catch (Exception e) { - throw new RuntimeException(e); - } - final ObjectNode setNode = - updateClause.updateOperationDefs().get(UpdateOperator.SET_ON_INSERT); - assertThat(setNode.has("$vectorize")).isTrue(); - assertThat(setNode.has("$vector")).isTrue(); - assertThat(setNode.get("$vector").isArray()).isTrue(); - assertThat(setNode.get("$vector").size()).isEqualTo(3); - } - - @Test - public void updateClauseSetOnInsertBothValues() throws Exception { - String json = - """ - { - "findOneAndUpdate": { - "filter" : {"_id" : "id"}, - "update" : {"$setOnInsert" : {"$vectorize" : "New York", "$vector" : [0.11, 0.11]}} - } - } - """; - FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); - UpdateClause updateClause = command.updateClause(); - DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); - Throwable t = - dataVectorizer - .vectorizeUpdateClause(updateClause) - .subscribe() - .withSubscriber(UniAssertSubscriber.create()) - .awaitFailure() - .getFailure(); - assertThat(t) - .isNotNull() - .isInstanceOf(JsonApiException.class) - .withFailMessage("`$vectorize` and `$vector` can't be used together.") - .hasFieldOrPropertyWithValue("errorCode", ErrorCode.INVALID_USAGE_OF_VECTORIZE) - .hasMessage(ErrorCode.INVALID_USAGE_OF_VECTORIZE.getMessage()); - } - - @Test - public void updateClauseUnsetValues() throws Exception { - String json = - """ - { - "findOneAndUpdate": { - "filter" : {"_id" : "id"}, - "update" : {"$unset" : {"$vectorize" : null}} - } - } - """; - FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); - UpdateClause updateClause = command.updateClause(); - DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); - try { - dataVectorizer.vectorizeUpdateClause(updateClause).subscribe().asCompletionStage().get(); - } catch (Exception e) { - throw new RuntimeException(e); - } - final ObjectNode unsetNode = updateClause.updateOperationDefs().get(UpdateOperator.UNSET); - assertThat(unsetNode.has("$vectorize")).isTrue(); - assertThat(unsetNode.has("$vector")).isTrue(); - assertThat(unsetNode.get("$vector").isNull()).isTrue(); - } - - @Test - public void updateClauseUnsetBothValues() throws Exception { - String json = - """ - { - "findOneAndUpdate": { - "filter" : {"_id" : "id"}, - "update" : {"$unset" : {"$vectorize" : null, "$vector" : null}} - } - } - """; - FindOneAndUpdateCommand command = objectMapper.readValue(json, FindOneAndUpdateCommand.class); - UpdateClause updateClause = command.updateClause(); - DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); - try { - Throwable t = - dataVectorizer - .vectorizeUpdateClause(updateClause) - .subscribe() - .withSubscriber(UniAssertSubscriber.create()) - .awaitFailure() - .getFailure(); - - assertThat(t) - .isNotNull() - .isInstanceOf(JsonApiException.class) - .withFailMessage("`$vectorize` and `$vector` can't be used together.") - .hasFieldOrPropertyWithValue("errorCode", ErrorCode.INVALID_USAGE_OF_VECTORIZE) - .hasMessage(ErrorCode.INVALID_USAGE_OF_VECTORIZE.getMessage()); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java index 828dacc1e3..0308d1c6ef 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java @@ -285,7 +285,7 @@ public void updateOne() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolverTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolverTest.java index 7c62036158..accf991fd2 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolverTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndReplaceCommandResolverTest.java @@ -1,6 +1,7 @@ package io.stargate.sgv2.jsonapi.service.resolver.model.impl; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.catchException; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -12,6 +13,7 @@ import io.stargate.sgv2.jsonapi.api.model.command.impl.FindOneAndReplaceCommand; import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; import io.stargate.sgv2.jsonapi.config.OperationsConfig; +import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.operation.model.Operation; import io.stargate.sgv2.jsonapi.service.operation.model.ReadType; import io.stargate.sgv2.jsonapi.service.operation.model.impl.DBFilterBase; @@ -41,6 +43,26 @@ class Resolve { CommandContext commandContext = CommandContext.empty(); + @Test + public void invalidVectorizeUsage() throws Exception { + + String json = + """ + { + "findOneAndReplace": { + "filter" : {"_id" : "id"}, + "replacement" : {"$vectorize" : "vectorize text", "$vector" : [0.1,0.2]} + } + } + """; + FindOneAndReplaceCommand command = + objectMapper.readValue(json, FindOneAndReplaceCommand.class); + Exception e = catchException(() -> resolver.resolveCommand(commandContext, command)); + assertThat(e) + .isInstanceOf(JsonApiException.class) + .hasMessageContaining("$vectorize` and `$vector` can't be used together"); + } + @Test public void idFilterCondition() throws Exception { String json = diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndUpdateCommandResolverTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndUpdateCommandResolverTest.java index 6685ad30dd..b7c094b8e5 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndUpdateCommandResolverTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/FindOneAndUpdateCommandResolverTest.java @@ -77,7 +77,7 @@ public void idFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -142,7 +142,7 @@ public void filterConditionSort() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -210,7 +210,7 @@ public void filterConditionVectorSearch() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -277,7 +277,7 @@ public void idFilterConditionWithOptions() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -343,7 +343,7 @@ public void filterConditionWithOptionsSort() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -415,7 +415,7 @@ public void dynamicFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java index 65ea59acde..e062e34524 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java @@ -12,7 +12,6 @@ import io.stargate.sgv2.jsonapi.api.model.command.impl.UpdateManyCommand; import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; import io.stargate.sgv2.jsonapi.config.OperationsConfig; -import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; import io.stargate.sgv2.jsonapi.service.embedding.operation.TestEmbeddingProvider; import io.stargate.sgv2.jsonapi.service.operation.model.Operation; import io.stargate.sgv2.jsonapi.service.operation.model.ReadType; @@ -25,7 +24,6 @@ import io.stargate.sgv2.jsonapi.service.updater.DocumentUpdater; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; import jakarta.inject.Inject; -import java.util.Optional; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -79,7 +77,7 @@ public void idFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -141,7 +139,7 @@ public void noFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -194,7 +192,7 @@ public void dynamicFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -241,12 +239,6 @@ public void dynamicFilterConditionSetVectorize() throws Exception { UpdateClause updateClause = DocumentUpdaterUtils.updateClause( UpdateOperator.SET, objectMapper.createObjectNode().put("$vectorize", "test data")); - new DataVectorizer( - TestEmbeddingProvider.commandContextWithVectorize.embeddingProvider(), - objectMapper.getNodeFactory(), - Optional.empty(), - TestEmbeddingProvider.commandContextWithVectorize.collectionSettings()) - .vectorizeUpdateClause(updateClause); assertThat(operation) .isInstanceOfSatisfying( ReadAndUpdateOperation.class, @@ -263,7 +255,7 @@ public void dynamicFilterConditionSetVectorize() throws Exception { .isInstanceOfSatisfying( DocumentUpdater.class, updater -> { - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -328,7 +320,7 @@ public void withUpsert() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateOneCommandResolverTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateOneCommandResolverTest.java index 6480eade20..30aed077c4 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateOneCommandResolverTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateOneCommandResolverTest.java @@ -76,7 +76,7 @@ public void idFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -139,7 +139,7 @@ public void noFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -193,7 +193,7 @@ public void dynamicFilterCondition() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -258,7 +258,7 @@ public void dynamicFilterConditionWithSort() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -328,7 +328,7 @@ public void dynamicFilterConditionWithVectorSearch() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) @@ -394,7 +394,7 @@ public void withUpsert() throws Exception { UpdateOperator.SET, objectMapper.createObjectNode().put("location", "New York")); - assertThat(updater.updateClause().buildOperations()) + assertThat(updater.updateOperations()) .isEqualTo(updateClause.buildOperations()); }); assertThat(op.findOperation()) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index 1ffe3b54d3..bd534d701f 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -8,18 +8,19 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import io.quarkus.test.junit.QuarkusTest; import io.quarkus.test.junit.TestProfile; +import io.smallrye.mutiny.helpers.test.UniAssertSubscriber; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; -import io.stargate.sgv2.jsonapi.service.operation.model.impl.ReadDocument; -import io.stargate.sgv2.jsonapi.service.shredding.model.DocumentId; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSettings; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; +import io.stargate.sgv2.jsonapi.service.embedding.operation.EmbeddingProvider; +import io.stargate.sgv2.jsonapi.service.embedding.operation.TestEmbeddingProvider; import io.stargate.sgv2.jsonapi.service.testutil.DocumentUpdaterUtils; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; import jakarta.inject.Inject; -import java.util.List; -import java.util.UUID; +import java.util.Optional; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -274,9 +275,7 @@ public void unsupportedUpdateOperator() throws Exception { Throwable t = catchThrowable( () -> { - DocumentUpdater.construct(objectMapper.readValue(updateClause, UpdateClause.class)) - .updateClause() - .buildOperations(); + DocumentUpdater.construct(objectMapper.readValue(updateClause, UpdateClause.class)); }); assertThat(t) .isNotNull() @@ -293,11 +292,9 @@ public void invalidSetDocId() throws Exception { catchThrowable( () -> { DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.SET, - objectMapper.getNodeFactory().objectNode().put("_id", "xyz"))) - .updateClause() - .buildOperations(); + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, + objectMapper.getNodeFactory().objectNode().put("_id", "xyz"))); }); assertThat(t) .isNotNull() @@ -313,11 +310,9 @@ public void invalidUnsetDocId() throws Exception { catchThrowable( () -> { DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.UNSET, - objectMapper.getNodeFactory().objectNode().put("_id", "xyz"))) - .updateClause() - .buildOperations(); + DocumentUpdaterUtils.updateClause( + UpdateOperator.UNSET, + objectMapper.getNodeFactory().objectNode().put("_id", "xyz"))); }); assertThat(t) .isNotNull() @@ -333,13 +328,11 @@ public void invalidSetAndUnsetSameField() throws Exception { catchThrowable( () -> { DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.SET, - (ObjectNode) objectMapper.readTree("{\"setField\":3, \"common\":true}"), - UpdateOperator.UNSET, - (ObjectNode) objectMapper.readTree("{\"unsetField\":1, \"common\":1}"))) - .updateClause() - .buildOperations(); + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, + (ObjectNode) objectMapper.readTree("{\"setField\":3, \"common\":true}"), + UpdateOperator.UNSET, + (ObjectNode) objectMapper.readTree("{\"unsetField\":1, \"common\":1}"))); }); assertThat(t) .isInstanceOf(JsonApiException.class) @@ -353,14 +346,11 @@ public void invalidMulAndIncSameFieldNested() { catchThrowable( () -> DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.INC, - (ObjectNode) - objectMapper.readTree("{\"root.x\":-7, \"root.inc\":-3}"), - UpdateOperator.MUL, - (ObjectNode) objectMapper.readTree("{\"root.mul\":3, \"root.x\":2}"))) - .updateClause() - .buildOperations()); + DocumentUpdaterUtils.updateClause( + UpdateOperator.INC, + (ObjectNode) objectMapper.readTree("{\"root.x\":-7, \"root.inc\":-3}"), + UpdateOperator.MUL, + (ObjectNode) objectMapper.readTree("{\"root.mul\":3, \"root.x\":2}")))); assertThat(t) .isInstanceOf(JsonApiException.class) .hasFieldOrPropertyWithValue("errorCode", ErrorCode.UNSUPPORTED_UPDATE_OPERATION_PARAM) @@ -373,11 +363,9 @@ public void invalidSetOnParentPath() { catchThrowable( () -> DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.SET, - (ObjectNode) objectMapper.readTree("{\"root.1\":-7, \"root\":[ ]}"))) - .updateClause() - .buildOperations()); + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, + (ObjectNode) objectMapper.readTree("{\"root.1\":-7, \"root\":[ ]}")))); assertThat(t) .isInstanceOf(JsonApiException.class) .hasFieldOrPropertyWithValue("errorCode", ErrorCode.UNSUPPORTED_UPDATE_OPERATION_PARAM) @@ -391,11 +379,11 @@ public void invalidSetOnParentPathWithDollar() { catchThrowable( () -> DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.SET, - (ObjectNode) - objectMapper.readTree( - """ + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, + (ObjectNode) + objectMapper.readTree( + """ { "root" : 7, "x" : 3, @@ -403,9 +391,7 @@ public void invalidSetOnParentPathWithDollar() { "y" : 5, "root.a" : 3 } - """))) - .updateClause() - .buildOperations()); + """)))); assertThat(t) .isInstanceOf(JsonApiException.class) .hasFieldOrPropertyWithValue("errorCode", ErrorCode.UNSUPPORTED_UPDATE_OPERATION_PARAM) @@ -545,96 +531,221 @@ public void replaceEmpty() throws Exception { @Nested class VectorizeUpdateTest { + private final EmbeddingProvider testService = new TestEmbeddingProvider(); + private final CollectionSettings collectionSettings = + TestEmbeddingProvider.commandContextWithVectorize.collectionSettings(); + @Test - public void updateOne_onlyVectorizeWithDiff_noDiff() throws Exception { + public void two_levels_update() throws Exception { + // First level update will skip $vectorize for setOperation + // vectorization will be done in second level String updateVectorizeData = """ - {"$vectorize" : "Beijing City"} - """; + {"$vectorize" : "Beijing is a big city", "location" : "Beijing City"} + """; DocumentUpdater documentUpdater = DocumentUpdater.construct( DocumentUpdaterUtils.updateClause( UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); - final ReadDocument readDocument = - ReadDocument.from( - DocumentId.fromString("key1"), - UUID.randomUUID(), - objectMapper - .createObjectNode() - .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Beijing City")); + String expected_level_1 = + """ + { + "_id": "1", + "location": "Beijing City" + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON); // location as London + JsonNode expectedData1 = objectMapper.readTree(expected_level_1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()).isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()).isEqualTo(true); // modified location + }); - assertThat(documentUpdater.hasVectorizeDiff(List.of(readDocument))).isFalse(); + // Second level update will vectorize in setOperation + DataVectorizer dataVectorizer = + new DataVectorizer( + testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + final DocumentUpdater.DocumentUpdaterResponse secondResponse = + documentUpdater + .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) + .subscribe() + .withSubscriber(UniAssertSubscriber.create()) + .awaitItem() + .getItem(); + + String expected_level_2 = + """ + { + "_id":"1", + "location": "Beijing City", + "$vectorize" : "Beijing is a big city", + "$vector": [0.25,0.25,0.25] + } + """; + JsonNode expectedData2 = objectMapper.readTree(expected_level_2); + assertThat(secondResponse) + .isNotNull() + .satisfies( + secondResponseNode -> { + assertThat(secondResponseNode.document()) + .usingRecursiveComparison() + .ignoringFields("order") + .isEqualTo(expectedData2); + assertThat(secondResponseNode.modified()) + .isEqualTo(true); // modified $vectorize and $vector + }); } @Test - public void updateOne_onlyVectorizeWithDiff() throws Exception { + public void not_modified_for_first_update() throws Exception { + // First level update will skip $vectorize for setOperation + // vectorization will be done in second level String updateVectorizeData = """ - {"$vectorize" : "Beijing City"} - """; + {"$vectorize" : "Beijing is a big city"} + """; DocumentUpdater documentUpdater = DocumentUpdater.construct( DocumentUpdaterUtils.updateClause( UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); - final ReadDocument readDocument = - ReadDocument.from( - DocumentId.fromString("key1"), - UUID.randomUUID(), - objectMapper - .createObjectNode() - .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Shanghai City")); + String expected_level_1 = + """ + { + "_id": "1", + "location": "London" + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON); // location as London + JsonNode expectedData1 = objectMapper.readTree(expected_level_1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()).isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()) + .isEqualTo(false); // location is not modified + }); - assertThat(documentUpdater.hasVectorizeDiff(List.of(readDocument))).isTrue(); + // Second level update will vectorize in setOperation + DataVectorizer dataVectorizer = + new DataVectorizer( + testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + final DocumentUpdater.DocumentUpdaterResponse secondResponse = + documentUpdater + .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) + .subscribe() + .withSubscriber(UniAssertSubscriber.create()) + .awaitItem() + .getItem(); + + String expected_level_2 = + """ + { + "_id":"1", + "location": "London", + "$vectorize" : "Beijing is a big city", + "$vector": [0.25,0.25,0.25] + } + """; + JsonNode expectedData2 = objectMapper.readTree(expected_level_2); + assertThat(secondResponse) + .isNotNull() + .satisfies( + secondResponseNode -> { + assertThat(secondResponseNode.document()) + .usingRecursiveComparison() + .ignoringFields("order") + .isEqualTo(expectedData2); + assertThat(secondResponseNode.modified()) + .isEqualTo(true); // modified $vectorize and $vector + }); } @Test - public void updateMany_onlyVectorizeWithDiff() throws Exception { + public void update_vector_at_first_level() throws Exception { + + // final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", + // "test data"); + // final ArrayNode arrayNode = document.putArray("$vector"); + // arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + // arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + // documents.add(document); String updateVectorizeData = """ - {"$vectorize" : "Beijing City"} - """; + {"$vectorize" : "Beijing is a big city", "$vector" : [0.2,0.4,0.5]} + """; DocumentUpdater documentUpdater = DocumentUpdater.construct( DocumentUpdaterUtils.updateClause( UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); + String expected_level_1 = + """ + { + "_id": "1", + "location": "London", + "$vector": [0.2,0.4,0.5] + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON); // location as London + JsonNode expectedData1 = objectMapper.readTree(expected_level_1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()) + .usingRecursiveComparison() + .ignoringFields("order") + .isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()).isEqualTo(true); // vector is modified + }); - final ReadDocument readDocument1 = - ReadDocument.from( - DocumentId.fromString("key1"), - UUID.randomUUID(), - objectMapper - .createObjectNode() - .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Shanghai City")); - final ReadDocument readDocument2 = - ReadDocument.from( - DocumentId.fromString("key2"), - UUID.randomUUID(), - objectMapper - .createObjectNode() - .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Beijing City")); - - assertThat(documentUpdater.hasVectorizeDiff(List.of(readDocument1, readDocument2))).isTrue(); - } - - @Test - public void findOneAndReplace_onlyVectorizeWithDiff() throws Exception { - ObjectNode replaceNode = - objectMapper - .createObjectNode() - .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Beijing City"); - DocumentUpdater documentUpdater = DocumentUpdater.construct(replaceNode); - - final ReadDocument readDocument = - ReadDocument.from( - DocumentId.fromString("key1"), - UUID.randomUUID(), - objectMapper - .createObjectNode() - .put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, "Shanghai City")); - - assertThat(documentUpdater.hasVectorizeDiff(List.of(readDocument))).isTrue(); + // Second level update will vectorize in setOperation + DataVectorizer dataVectorizer = + new DataVectorizer( + testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + final DocumentUpdater.DocumentUpdaterResponse secondResponse = + documentUpdater + .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) + .subscribe() + .withSubscriber(UniAssertSubscriber.create()) + .awaitItem() + .getItem(); + + String expected_level_2 = + """ + { + "_id":"1", + "location": "London", + "$vectorize" : "Beijing is a big city", + "$vector": [0.25,0.25,0.25] + } + """; + JsonNode expectedData2 = objectMapper.readTree(expected_level_2); + assertThat(secondResponse) + .isNotNull() + .satisfies( + secondResponseNode -> { + assertThat(secondResponseNode.document()) + .usingRecursiveComparison() + .ignoringFields("order") + .isEqualTo(expectedData2); + assertThat(secondResponseNode.modified()) + .isEqualTo(true); // modified $vectorize and $vector + }); } } } From b1b5ad7ff0aa935c7b21fa14cf58894c872d76c8 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Wed, 10 Jul 2024 13:59:31 -0700 Subject: [PATCH 07/18] fix --- .../command/clause/update/SetOperation.java | 3 +- .../deserializers/SortClauseDeserializer.java | 2 +- .../service/embedding/DataVectorizer.java | 78 ++++++++++-- .../embedding/DataVectorizerService.java | 4 +- .../model/impl/ReadAndUpdateOperation.java | 2 +- .../service/updater/DocumentUpdater.java | 27 ++-- .../operation/DataVectorizerTest.java | 115 ++++++++++++------ .../service/updater/DocumentUpdaterTest.java | 7 -- 8 files changed, 173 insertions(+), 65 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java index a21423d490..d425b941b2 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java @@ -121,11 +121,12 @@ public Uni updateVectorize(JsonNode doc, DataVectorizer dataVectorizer) JsonNode newValue = action.value(); JsonNode oldValue = target.valueNode(); + // if there is no oldValue or there is a diff if ((oldValue == null) || !JsonUtil.equalsOrdered(oldValue, newValue)) { // replace the oldValue with newValue first ((ObjectNode) doc).put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, newValue); // vectorize the newValue, update $vectorize, $vector - return dataVectorizer.vectorize(List.of(doc), true); + return dataVectorizer.vectorizeUpdateDocument(doc); } } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/SortClauseDeserializer.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/SortClauseDeserializer.java index ad656d7a53..8232fc1007 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/SortClauseDeserializer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/SortClauseDeserializer.java @@ -69,7 +69,7 @@ public SortClause deserialize(JsonParser parser, DeserializationContext ctxt) } else { ArrayNode arrayNode = (ArrayNode) inner.getValue(); float[] arrayVals = new float[arrayNode.size()]; - if (arrayNode.size() == 0) { + if (arrayNode.vectorize(documents)) { throw new JsonApiException( ErrorCode.SHRED_BAD_VECTOR_SIZE, ErrorCode.SHRED_BAD_VECTOR_SIZE.getMessage()); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java index 746ac9bc5b..e8f27d491c 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java @@ -52,17 +52,11 @@ public DataVectorizer( } /** - * Vectorize the '$vectorize' fields in the document. This method is used by commands: insertOne, - * insertMany (detail in DataVectorizerService vectorizeDocument method) updateOne, updateMany, - * findOneAndUpdate, findOneAndReplace (detail in SetOperation updateVectorize method) - * - *

With isUpdateCommand flag set as true, this method allows to vectorize JsonNode with both - * $vector and $vectorize + * Vectorize the '$vectorize' fields in the document * * @param documents - Documents to be vectorized - * @param isUpdateCommand - is called from isUpdateCommand or not */ - public Uni vectorize(List documents, boolean isUpdateCommand) { + public Uni vectorize(List documents) { try { int vectorDataPosition = 0; List vectorizeTexts = new ArrayList<>(); @@ -70,8 +64,7 @@ public Uni vectorize(List documents, boolean isUpdateCommand) for (int position = 0; position < documents.size(); position++) { JsonNode document = documents.get(position); if (document.has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { - // Do not allow using $vector and $vectorize together for insertion commands - if (document.has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD) && !isUpdateCommand) { + if (document.has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) { throw new JsonApiException( ErrorCode.INVALID_USAGE_OF_VECTORIZE, ErrorCode.INVALID_USAGE_OF_VECTORIZE.getMessage() @@ -161,6 +154,71 @@ public Uni vectorize(List documents, boolean isUpdateCommand) } } + /** + * This method will be used by documentUpdater(updateOne, updateMany, findOneAndUpdate, + * findOneAndReplace) Since we need to vectorize on demand, so vectorization for updateCommands + * will postpone and move into ReadAndUpdateOperation. + * + * @param document - Document to be vectorized + * @return Uni - have modified the document or not + */ + public Uni vectorizeUpdateDocument(JsonNode document) { + if (!document.has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { + return Uni.createFrom().item(false); + } + final JsonNode jsonNode = document.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); + // $vectorize as null value, also update $vector as null, modified + if (jsonNode.isNull()) { + ((ObjectNode) document).put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + return Uni.createFrom().item(true); + } + // $vectorize is not textual value + if (!jsonNode.isTextual()) { + throw ErrorCode.INVALID_VECTORIZE_VALUE_TYPE.toApiException(); + } + String vectorizeData = jsonNode.asText(); + // $vectorize is blank text value, set $vector as null value, modified + if (vectorizeData.isBlank()) { + ((ObjectNode) document).put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + return Uni.createFrom().item(true); + } + + // $vectorize is textual and not blank, going to vectorize it + if (embeddingProvider == null) { + throw ErrorCode.EMBEDDING_SERVICE_NOT_CONFIGURED.toApiException( + collectionSettings.collectionName()); + } + Uni> vectors = + embeddingProvider + .vectorize( + 1, + List.of(vectorizeData), + embeddingApiKey, + EmbeddingProvider.EmbeddingRequestType.INDEX) + .map(EmbeddingProvider.Response::embeddings); + return vectors + .onItem() + .transform( + vectorData -> { + float[] vector = vectorData.get(0); + // check if vector have the expected size + if (vector.length != collectionSettings.vectorConfig().vectorSize()) { + throw EMBEDDING_PROVIDER_UNEXPECTED_RESPONSE.toApiException( + "Embedding provider '%s' did not return expected embedding length. Expect: '%d'. Actual: '%d'", + collectionSettings.vectorConfig().vectorizeConfig().provider(), + collectionSettings.vectorConfig().vectorSize(), + vector.length); + } + final ArrayNode arrayNode = nodeFactory.arrayNode(vector.length); + for (float listValue : vector) { + arrayNode.add(nodeFactory.numberNode(listValue)); + } + ((ObjectNode) document) + .put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, arrayNode); + return true; + }); + } + /** * Vectorize the '$vectorize' fields in the sort clause * diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java index baebe060da..4e153be92a 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java @@ -85,9 +85,9 @@ private Uni vectorizeSortClause( private Uni vectorizeDocument( DataVectorizer dataVectorizer, CommandContext commandContext, Command command) { if (command instanceof InsertOneCommand insertOneCommand) { - return dataVectorizer.vectorize(List.of(insertOneCommand.document()), false); + return dataVectorizer.vectorize(List.of(insertOneCommand.document())); } else if (command instanceof InsertManyCommand insertManyCommand) { - return dataVectorizer.vectorize(insertManyCommand.documents(), false); + return dataVectorizer.vectorize(insertManyCommand.documents()); } return Uni.createFrom().item(true); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java index 2a2603fae6..9ad90d5f1d 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java @@ -72,7 +72,7 @@ public Uni> execute( findResponse -> { pageStateReference.set(findResponse.pageState()); final List docs = findResponse.docs(); - if (upsert() && docs.size() == 0 && matchedCount.get() == 0) { + if (upsert() && docs.isEmpty() && matchedCount.get() == 0) { return Multi.createFrom().item(findOperation().getNewDocument()); } else { matchedCount.addAndGet(docs.size()); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index f19cd0620c..85b5bd90e3 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -77,8 +77,8 @@ private boolean update(ObjectNode docToUpdate, boolean docInserted) { } /** - * Will be used for findOneAndReplace. This method will replace $vectorize, but won't re-vectorize - * and replace $vector(detail in applyUpdateVectorize method) + * Will be used for findOneAndReplace. This is first level replace. This method will replace the + * document, but won't re-vectorize yet(detail in applyUpdateVectorize method) * * @param docToUpdate * @param docInserted @@ -96,11 +96,13 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { } } - // If replaceDocument has $vectorize as null value, also set $vector as null here. - // This is because we need to do a comparison for compareDoc and replaceDocument later + // If replaceDocument has $vectorize as null or blank text value, also set $vector as null value + // here. JsonNode vectorizeNode = replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); - if (vectorizeNode != null && vectorizeNode.isNull()) { + if (vectorizeNode != null + && (vectorizeNode.isNull() + || (vectorizeNode.isTextual() && vectorizeNode.asText().isBlank()))) { ((ObjectNode) replaceDocument) .put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); } @@ -156,13 +158,20 @@ public Uni applyUpdateVectorize( } if (UpdateType.REPLACE == updateType) { // Only need to vectorize when: - // replaceDocument has $vectorize(not null), this is consistent with previous behaviour + // replaceDocument has $vectorize(not null value, not blank text value), this is consistent + // with previous behaviour // This means even if $vectorize has no diff between readDoc and replacementDoc, we still // re-vectorize - if (!replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD).isNull()) { + final JsonNode replaceVectorizeNode = + replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); + if (replaceVectorizeNode != null) { + // if replace $vectorize is null value or blank text value, no need to vectorize + if (replaceVectorizeNode.isNull() + || (replaceVectorizeNode.isTextual() && replaceVectorizeNode.asText().isBlank())) { + return Uni.createFrom().item(new DocumentUpdaterResponse(readDocument, false)); + } return dataVectorizer - // replacement also considered as update, set isUpdateCommand flag as true - .vectorize(List.of(readDocument), true) + .vectorizeUpdateDocument(readDocument) .onItem() .transformToUni( modified -> { diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java index 19c350e087..4a146461dc 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java @@ -1,6 +1,7 @@ package io.stargate.sgv2.jsonapi.service.embedding.operation; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.catchThrowable; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -44,7 +45,7 @@ public void testTextValues() { new DataVectorizer( testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); try { - dataVectorizer.vectorize(documents, false).subscribe().asCompletionStage().get(); + dataVectorizer.vectorize(documents).subscribe().asCompletionStage().get(); } catch (Exception e) { throw new RuntimeException(e); } @@ -71,7 +72,7 @@ public void testEmptyValues() { new DataVectorizer( testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); try { - dataVectorizer.vectorize(documents, false).subscribe().asCompletionStage().get(); + dataVectorizer.vectorize(documents).subscribe().asCompletionStage().get(); } catch (Exception e) { throw new RuntimeException(e); } @@ -103,7 +104,7 @@ public void testNonTextValues() { try { Throwable failure = dataVectorizer - .vectorize(documents, false) + .vectorize(documents) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitFailure() @@ -130,7 +131,7 @@ public void testNullValues() { new DataVectorizer( testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); try { - dataVectorizer.vectorize(documents, false).subscribe().asCompletionStage().get(); + dataVectorizer.vectorize(documents).subscribe().asCompletionStage().get(); } catch (Exception e) { throw new RuntimeException(e); } @@ -155,7 +156,7 @@ public void testWithBothVectorFieldValues() { try { Throwable failure = dataVectorizer - .vectorize(documents, false) + .vectorize(documents) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitFailure() @@ -171,33 +172,6 @@ public void testWithBothVectorFieldValues() { } } - @Test - public void testWithBothVectorFieldValuesFromUpdate() { - List documents = new ArrayList<>(); - - final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", "test data"); - final ArrayNode arrayNode = document.putArray("$vector"); - arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); - arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); - documents.add(document); - DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); - // dataVectorizer will accept usage for $vectorize and $vector at the same time - // vectorize the $vectorize, and update both $vectorize and $vector - try { - dataVectorizer.vectorize(documents, true).subscribe().asCompletionStage().get(); - } catch (Exception e) { - throw new RuntimeException(e); - } - for (JsonNode doc : documents) { - assertThat(doc.has("$vectorize")).isTrue(); - assertThat(doc.has("$vector")).isTrue(); - assertThat(doc.get("$vector").isArray()).isTrue(); - assertThat(doc.get("$vector").size()).isEqualTo(3); - } - } - @Test public void testWithUnmatchedVectorsNumber() { TestEmbeddingProvider testProvider = @@ -225,7 +199,7 @@ public Uni vectorize( Throwable failure = dataVectorizer - .vectorize(documents, false) + .vectorize(documents) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitFailure() @@ -263,7 +237,7 @@ public void testWithUnmatchedVectorSize() { Throwable failure = dataVectorizer - .vectorize(documents, false) + .vectorize(documents) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitFailure() @@ -300,4 +274,77 @@ public void sortClauseValues() { assertThat(sortClause.sortExpressions().get(0).vector().length).isEqualTo(3); } } + + @Nested + public class updateText { + @Test + public void vectorizeUpdate() { + final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", "test data"); + final ArrayNode arrayNode = document.putArray("$vector"); + arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + DataVectorizer dataVectorizer = + new DataVectorizer( + testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + try { + final Boolean modified = + dataVectorizer.vectorizeUpdateDocument(document).subscribe().asCompletionStage().get(); + // modified, since we need to re-vectorize, and update the $vector + assertThat(modified).isTrue(); + } catch (Exception e) { + throw new RuntimeException(e); + } + assertThat(document.has("$vectorize")).isTrue(); + assertThat(document.has("$vector")).isTrue(); + assertThat(document.get("$vector").isArray()).isTrue(); + assertThat(document.get("$vector").size()).isEqualTo(3); // vector updated + } + + @Test + public void vectorizeBlank() { + final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", ""); + final ArrayNode arrayNode = document.putArray("$vector"); + arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + DataVectorizer dataVectorizer = + new DataVectorizer( + testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + try { + final Boolean modified = + dataVectorizer.vectorizeUpdateDocument(document).subscribe().asCompletionStage().get(); + // modified, since we need to re-vectorize, and update the $vector + assertThat(modified).isTrue(); + } catch (Exception e) { + throw new RuntimeException(e); + } + assertThat(document.has("$vectorize")).isTrue(); + assertThat(document.has("$vector")).isTrue(); + assertThat(document.get("$vector").isNull()).isTrue(); // will set $vector as null value + } + + @Test + public void vectorizeUpdateFailureNonTextual() { + final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", 123); + final ArrayNode arrayNode = document.putArray("$vector"); + arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); + DataVectorizer dataVectorizer = + new DataVectorizer( + testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + Throwable failure = + catchThrowable( + () -> { + dataVectorizer + .vectorizeUpdateDocument(document) + .subscribe() + .withSubscriber(UniAssertSubscriber.create()) + .awaitFailure() + .getFailure(); + }); + assertThat(failure) + .isInstanceOf(JsonApiException.class) + .hasFieldOrPropertyWithValue("errorCode", ErrorCode.INVALID_VECTORIZE_VALUE_TYPE) + .hasFieldOrPropertyWithValue("message", "$vectorize value needs to be text value"); + } + } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index bd534d701f..e7bddbb19c 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -674,13 +674,6 @@ public void not_modified_for_first_update() throws Exception { @Test public void update_vector_at_first_level() throws Exception { - - // final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", - // "test data"); - // final ArrayNode arrayNode = document.putArray("$vector"); - // arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); - // arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); - // documents.add(document); String updateVectorizeData = """ {"$vectorize" : "Beijing is a big city", "$vector" : [0.2,0.4,0.5]} From 62ce663257075cd828a97e2747c8d94fe745aab0 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Wed, 10 Jul 2024 14:05:18 -0700 Subject: [PATCH 08/18] fix --- .../api/model/command/deserializers/SortClauseDeserializer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/SortClauseDeserializer.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/SortClauseDeserializer.java index 8232fc1007..ad656d7a53 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/SortClauseDeserializer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/SortClauseDeserializer.java @@ -69,7 +69,7 @@ public SortClause deserialize(JsonParser parser, DeserializationContext ctxt) } else { ArrayNode arrayNode = (ArrayNode) inner.getValue(); float[] arrayVals = new float[arrayNode.size()]; - if (arrayNode.vectorize(documents)) { + if (arrayNode.size() == 0) { throw new JsonApiException( ErrorCode.SHRED_BAD_VECTOR_SIZE, ErrorCode.SHRED_BAD_VECTOR_SIZE.getMessage()); } From 4f1d47c24edcec62215014d57b772e23b1f31614 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Wed, 10 Jul 2024 14:33:17 -0700 Subject: [PATCH 09/18] add unset test --- .../service/updater/DocumentUpdater.java | 9 +-- .../service/updater/DocumentUpdaterTest.java | 66 +++++++++++++++++++ 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index 85b5bd90e3..1e6caa1d95 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -109,7 +109,6 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { // In case there is no difference between document return modified as false, so db update // doesn't happen - if (JsonUtil.equalsOrdered(compareDoc, replaceDocument())) { return false; } @@ -121,9 +120,6 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { docToUpdate.set(DocumentConstants.Fields.DOC_ID, replaceDocumentId); } docToUpdate.setAll(replaceDocument()); - // // restore the original $vectorize - // docToUpdate.put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, - // vectorizeNode.asText()); // return modified flag as true return true; } @@ -136,6 +132,7 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { * level update) * @param docInserted True if document was just created (inserted); false if updating existing * document + * @param dataVectorizer dataVectorizer */ public Uni applyUpdateVectorize( JsonNode readDocument, boolean docInserted, DataVectorizer dataVectorizer) { @@ -179,13 +176,13 @@ public Uni applyUpdateVectorize( }); } } - // there is no setOperation, so won't modify anything + // If there is no return from above, meaning nothing is modified at this second level update return Uni.createFrom().item(new DocumentUpdaterResponse(readDocument, false)); } public record DocumentUpdaterResponse(JsonNode document, boolean modified) {} - public enum UpdateType { + private enum UpdateType { UPDATE, REPLACE } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index e7bddbb19c..c0a3efd72f 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -740,5 +740,71 @@ public void update_vector_at_first_level() throws Exception { .isEqualTo(true); // modified $vectorize and $vector }); } + + @Test + public void two_levels_update_unset() throws Exception { + String updateVectorizeData = + """ + {"$vectorize" : "Beijing is a big city", "location" : "London"} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.UNSET, + (ObjectNode) + objectMapper.readTree( + updateVectorizeData))); // will unset $vectorize, $vector and location + + String expected_level_1 = + """ + { + "_id": "1" + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); + JsonNode expectedData1 = objectMapper.readTree(expected_level_1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()).isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()) + .isEqualTo(true); // modified $vectorize, $vector and location + }); + + // Second level update will try to vectorize, in this test case, will do nothing, since there + // is no setOperation + DataVectorizer dataVectorizer = + new DataVectorizer( + testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + final DocumentUpdater.DocumentUpdaterResponse secondResponse = + documentUpdater + .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) + .subscribe() + .withSubscriber(UniAssertSubscriber.create()) + .awaitItem() + .getItem(); + + String expected_level_2 = + """ + { + "_id":"1" + } + """; + JsonNode expectedData2 = objectMapper.readTree(expected_level_2); + assertThat(secondResponse) + .isNotNull() + .satisfies( + secondResponseNode -> { + assertThat(secondResponseNode.document()) + .usingRecursiveComparison() + .ignoringFields("order") + .isEqualTo(expectedData2); + assertThat(secondResponseNode.modified()).isEqualTo(false); // nothing is modified + }); + } } } From 930dc2ee8b1a9c616e423f61f8daedcb54e60dd5 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Wed, 10 Jul 2024 17:31:57 -0700 Subject: [PATCH 10/18] merge from main --- .../service/embedding/DataVectorizerService.java | 2 +- .../embedding/operation/DataVectorizerTest.java | 3 +-- .../service/updater/DocumentUpdaterTest.java | 13 ++++--------- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java index 4e153be92a..79f2dbb15d 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java @@ -70,7 +70,7 @@ public DataVectorizer constructDataVectorizer( return new DataVectorizer( embeddingProvider, objectMapper.getNodeFactory(), - dataApiRequestInfo.getEmbeddingApiKey(), + dataApiRequestInfo.getAndValidateEmbeddingApiKey(), commandContext.collectionSettings()); } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java index 8fcc34e339..8a2ebcf6be 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java @@ -318,8 +318,7 @@ public void vectorizeUpdateFailureNonTextual() { arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); Throwable failure = catchThrowable( () -> { diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index c0a3efd72f..3b5e5dfe52 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -20,7 +20,6 @@ import io.stargate.sgv2.jsonapi.service.testutil.DocumentUpdaterUtils; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; import jakarta.inject.Inject; -import java.util.Optional; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -570,8 +569,7 @@ public void two_levels_update() throws Exception { // Second level update will vectorize in setOperation DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = documentUpdater .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) @@ -639,8 +637,7 @@ public void not_modified_for_first_update() throws Exception { // Second level update will vectorize in setOperation DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = documentUpdater .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) @@ -708,8 +705,7 @@ public void update_vector_at_first_level() throws Exception { // Second level update will vectorize in setOperation DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = documentUpdater .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) @@ -778,8 +774,7 @@ public void two_levels_update_unset() throws Exception { // Second level update will try to vectorize, in this test case, will do nothing, since there // is no setOperation DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); + new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = documentUpdater .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) From a65880c1caacd0f4039d4e73237f8ea8f0ab2f59 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Wed, 10 Jul 2024 19:58:20 -0700 Subject: [PATCH 11/18] fix IT --- .../model/impl/ReadAndUpdateOperation.java | 114 ++++++++++-------- 1 file changed, 61 insertions(+), 53 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java index 9ad90d5f1d..def09fa455 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java @@ -141,19 +141,21 @@ public Uni> execute( private Uni processUpdate( DataApiRequestInfo dataApiRequestInfo, - ReadDocument readDocument, + ReadDocument document, QueryExecutor queryExecutor, AtomicInteger modifiedCount) { return Uni.createFrom() - .item(readDocument) + .item(document) .flatMap( - document -> { + readDocument -> { // if there is no document: return null item if (readDocument == null) { return Uni.createFrom().nullItem(); } // upsert if we have no transaction if before boolean upsert = readDocument.txnId() == null; + JsonNode originalDocument = upsert ? null : readDocument.document(); + // apply document updates: if no changes return null item // First update, will not vectorize DocumentUpdater.DocumentUpdaterResponse firstDocumentUpdaterResponse = @@ -175,60 +177,66 @@ private Uni processUpdate( secondDocumentUpdaterResponse.document(), firstDocumentUpdaterResponse.modified() | secondDocumentUpdaterResponse.modified())); - }); - }) - // perform update operation and save only if data is modified. - .flatMap( - documentUpdaterResponse -> { - boolean upsert = readDocument.txnId() == null; - JsonNode originalDocument = upsert ? null : readDocument.document(); - // In case no change to document and not an upsert document, short circuit and return - if (!documentUpdaterResponse.modified() && !upsert) { - // If no change return the original document Issue #390 - if (returnDocumentInResponse) { - resultProjection.applyProjection(originalDocument); - return Uni.createFrom() - .item(new UpdatedDocument(readDocument.id(), upsert, originalDocument, null)); - } else { - return Uni.createFrom().nullItem(); - } - } + }) + .onItem() + .transformToUni( + combinedUpdaterResponse -> { + // In case no change to document and not an upsert document, short circuit + // and return + if (!combinedUpdaterResponse.modified() && !upsert) { + // If no change return the original document Issue #390 + if (returnDocumentInResponse) { + resultProjection.applyProjection(originalDocument); + return Uni.createFrom() + .item( + new UpdatedDocument( + readDocument.id(), upsert, originalDocument, null)); + } else { + return Uni.createFrom().nullItem(); + } + } - final WritableShreddedDocument writableShreddedDocument = - shredder() - .shred( - commandContext(), - documentUpdaterResponse.document(), - readDocument.txnId()); + final WritableShreddedDocument writableShreddedDocument = + shredder() + .shred( + commandContext(), + combinedUpdaterResponse.document(), + readDocument.txnId()); - // Have to do this because shredder adds _id field to the document if it doesn't exist - JsonNode updatedDocument = writableShreddedDocument.docJsonNode(); - // update the document - return updatedDocument(dataApiRequestInfo, queryExecutor, writableShreddedDocument) + // Have to do this because shredder adds _id field to the document if it + // doesn't exist + JsonNode updatedDocument = writableShreddedDocument.docJsonNode(); + // update the document + return updatedDocument( + dataApiRequestInfo, queryExecutor, writableShreddedDocument) - // send result back depending on the input - .onItem() - .ifNotNull() - .transform( - v -> { - // if not insert increment modified count - if (!upsert) { - modifiedCount.incrementAndGet(); - } + // send result back depending on the input + .onItem() + .ifNotNull() + .transform( + v -> { + // if not insert increment modified count + if (!upsert) { + modifiedCount.incrementAndGet(); + } - // resolve doc to return - JsonNode documentToReturn = null; - if (returnDocumentInResponse) { - documentToReturn = - returnUpdatedDocument ? updatedDocument : originalDocument; - // Some operations (findOneAndUpdate) define projection to apply to - // result: - if (documentToReturn != null) { // null for some Operation tests - resultProjection.applyProjection(documentToReturn); - } - } - return new UpdatedDocument( - writableShreddedDocument.id(), upsert, documentToReturn, null); + // resolve doc to return + JsonNode documentToReturn = null; + if (returnDocumentInResponse) { + documentToReturn = + returnUpdatedDocument ? updatedDocument : originalDocument; + // operations (findOneAndUpdate) define projection to apply to + // result + if (documentToReturn != null) { // null for some Operation tests + resultProjection.applyProjection(documentToReturn); + } + } + return new UpdatedDocument( + writableShreddedDocument.id(), + upsert, + documentToReturn, + null); + }); }); }); } From 2e3e7f60248ee5399824a4d96d699e13d955807c Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Tue, 16 Jul 2024 10:19:19 -0700 Subject: [PATCH 12/18] refactor --- .../clause/update/AddToSetOperation.java | 4 +- .../clause/update/CurrentDateOperation.java | 4 +- .../update/EmbeddingUpdateOperation.java | 34 + .../command/clause/update/IncOperation.java | 4 +- .../clause/update/MinMaxOperation.java | 4 +- .../command/clause/update/MulOperation.java | 4 +- .../command/clause/update/PopOperation.java | 4 +- .../command/clause/update/PushOperation.java | 4 +- .../clause/update/RenameOperation.java | 4 +- .../command/clause/update/SetOperation.java | 65 +- .../command/clause/update/UnsetOperation.java | 4 +- .../clause/update/UpdateOperation.java | 5 +- .../service/embedding/DataVectorizer.java | 52 +- .../embedding/DataVectorizerService.java | 2 +- .../model/impl/ReadAndUpdateOperation.java | 49 +- .../service/updater/DocumentUpdater.java | 133 ++-- .../operation/DataVectorizerTest.java | 66 +- .../clause/update/AddToSetOperationTest.java | 28 +- .../update/CurrentDateOperationTest.java | 6 +- .../clause/update/IncOperationTest.java | 12 +- .../clause/update/MinMaxOperationTest.java | 20 +- .../clause/update/MulOperationTest.java | 16 +- .../clause/update/PopOperationTest.java | 20 +- .../clause/update/PushOperationTest.java | 38 +- .../clause/update/RenameOperationTest.java | 12 +- .../clause/update/SetOperationTest.java | 28 +- .../clause/update/UnsetOperationTest.java | 12 +- .../CommandResolverWithVectorizerTest.java | 2 - .../impl/UpdateManyCommandResolverTest.java | 2 - .../service/updater/DocumentUpdaterTest.java | 635 +++++++++++++++--- 30 files changed, 800 insertions(+), 473 deletions(-) create mode 100644 src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/AddToSetOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/AddToSetOperation.java index 54e2f9d3f8..b41d2717e5 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/AddToSetOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/AddToSetOperation.java @@ -103,7 +103,7 @@ private static boolean hasModifier(ObjectNode node) { } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { boolean modified = false; for (Action action : actions) { PathMatch target = action.locator().findOrCreate(doc); @@ -136,7 +136,7 @@ public boolean updateDocument(ObjectNode doc) { } } - return modified; + return new UpdateOperationResult(modified, null); } private boolean addToSet(ArrayNode set, JsonNode elementToAdd) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/CurrentDateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/CurrentDateOperation.java index 233d315620..7cf1726fbd 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/CurrentDateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/CurrentDateOperation.java @@ -52,7 +52,7 @@ private static void verifyIsTrueOrDate(JsonNode value) { } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { boolean modified = false; final long now = System.currentTimeMillis(); ObjectNode newValue = JsonUtil.createEJSonDate(doc, now); @@ -66,7 +66,7 @@ public boolean updateDocument(ObjectNode doc) { modified = true; } } - return modified; + return new UpdateOperationResult(modified, null); } record Action(PathMatchLocator locator) implements ActionWithLocator {} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java new file mode 100644 index 0000000000..edfb61bada --- /dev/null +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java @@ -0,0 +1,34 @@ +package io.stargate.sgv2.jsonapi.api.model.command.clause.update; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; + +public record EmbeddingUpdateOperation(String vectorizeContent) { + // UpdateOperation.UpdateOperationResult + // updateDocument(ObjectNode doc, float[] vector) + + /** + * // TODO 一定会换的,因为我们已经知道 vectorize 有 diff了 , 而且你已经拿到vector array了 + * + *

update the document with corresponding vector + * + * @param doc Document to update + * @param dataVectorizer dataVectorizer + * @return Uni modified + */ + public void updateDocument(JsonNode doc, float[] vector) { + // TODO can I do this instancitation? + ObjectMapper objectMapper = new ObjectMapper(); + JsonNodeFactory nodeFactory = objectMapper.getNodeFactory(); + final JsonNode vectorJsonNode = doc.get(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); + final ArrayNode arrayNode = nodeFactory.arrayNode(vector.length); + for (float listValue : vector) { + arrayNode.add(nodeFactory.numberNode(listValue)); + } + ((ObjectNode) doc).put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, arrayNode); + } +} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/IncOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/IncOperation.java index 53807a0d2c..fbc346ef5f 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/IncOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/IncOperation.java @@ -46,7 +46,7 @@ public static IncOperation construct(ObjectNode args) { } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { // Almost always changes, except if adding zero; need to track boolean modified = false; for (Action action : actions) { @@ -77,7 +77,7 @@ public boolean updateDocument(ObjectNode doc) { } } - return modified; + return new UpdateOperationResult(modified, null); } private JsonNode addNumbers(ObjectNode doc, NumericNode nr1, NumericNode nr2) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MinMaxOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MinMaxOperation.java index 96d62aa89e..5963ad876a 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MinMaxOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MinMaxOperation.java @@ -46,7 +46,7 @@ private static MinMaxOperation construct(ObjectNode args, UpdateOperator oper, b } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { // Almost always changes, except if adding zero; need to track boolean modified = false; for (Action action : actions) { @@ -66,7 +66,7 @@ public boolean updateDocument(ObjectNode doc) { } } - return modified; + return new UpdateOperationResult(modified, null); } private boolean shouldReplace(JsonNode oldValue, JsonNode newValue) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MulOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MulOperation.java index a387bd6e2e..d8fe754f4f 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MulOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MulOperation.java @@ -47,7 +47,7 @@ public static MulOperation construct(ObjectNode args) { } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { boolean modified = false; for (Action action : actions) { final NumericNode multiplier = action.value; @@ -76,7 +76,7 @@ public boolean updateDocument(ObjectNode doc) { } } - return modified; + return new UpdateOperationResult(modified, null); } private JsonNode multiply(ObjectNode doc, JsonNode oldValue, JsonNode multiplierValue) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PopOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PopOperation.java index 8ada92797c..54106a13c0 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PopOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PopOperation.java @@ -57,7 +57,7 @@ public static PopOperation construct(ObjectNode args) { } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { boolean changes = false; for (Action action : actions) { PathMatch target = action.locator().findIfExists(doc); @@ -89,7 +89,7 @@ public boolean updateDocument(ObjectNode doc) { + value.getNodeType()); } } - return changes; + return new UpdateOperationResult(changes, null); } /** Value class for per-field Pop operation definitions. */ diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PushOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PushOperation.java index 9b03bc1a81..a79ef757c9 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PushOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PushOperation.java @@ -125,7 +125,7 @@ private static boolean hasModifier(ObjectNode node) { } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { for (Action action : actions) { final JsonNode toAdd = action.value; @@ -174,7 +174,7 @@ public boolean updateDocument(ObjectNode doc) { } // Every valid update operation modifies document so need just one: - return !actions.isEmpty(); + return new UpdateOperationResult(!actions.isEmpty(), null); } // Just needed for tests diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/RenameOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/RenameOperation.java index d60238b688..28355d12a1 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/RenameOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/RenameOperation.java @@ -44,7 +44,7 @@ public static RenameOperation construct(ObjectNode args) { } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { boolean modified = false; for (Action action : actions) { PathMatch src = action.sourceLocator().findIfExists(doc); @@ -78,7 +78,7 @@ public boolean updateDocument(ObjectNode doc) { dst.replaceValue(value); } } - return modified; + return new UpdateOperationResult(modified, null); } // Unlike most operations, we have 2 locators (src, dest), use explicit names diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java index d425b941b2..4c5369f922 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java @@ -2,9 +2,8 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import io.smallrye.mutiny.Uni; import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; -import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; +import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.util.JsonUtil; import io.stargate.sgv2.jsonapi.util.PathMatch; import io.stargate.sgv2.jsonapi.util.PathMatchLocator; @@ -75,63 +74,47 @@ public boolean shouldApplyIf(boolean isInsert) { } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { boolean modified = false; Set setPaths = new HashSet<>(); actions.stream().forEach(action -> setPaths.add(action.locator().path())); - for (Action action : actions) { - - if (DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD.equals(action.locator().path())) { - // won't update $vectorize in this method - // will vectorize on demand and update $vectorize in updateVectorize method below - continue; - } + EmbeddingUpdateOperation embeddingUpdateOperation = null; + for (Action action : actions) { PathMatch target = action.locator().findOrCreate(doc); JsonNode newValue = action.value(); JsonNode oldValue = target.valueNode(); // Modify if no old value OR new value differs, as per Mongo-equality rules if ((oldValue == null) || !JsonUtil.equalsOrdered(oldValue, newValue)) { + // replace old value with matched path new value target.replaceValue(newValue); - // $vector is updated and $vectorize is not updated, remove the $vectorize field in the - // document + + // $vector is updated and $vectorize is not updated, remove $vectorize in the document if (DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD.equals(action.locator().path()) && !setPaths.contains(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { doc.remove(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); } - modified = true; - } - } - return modified; - } - /** - * This updateVectorize method will vectorize as demand and update the $vectorize 1. check if - * there is diff for $vectorize and proceed 2. vectorize updated $vectorize to get the new vector - * 3. update $vector and $vectorize - * - * @param doc Document to update - * @param dataVectorizer dataVectorizer - * @return Uni modified - */ - public Uni updateVectorize(JsonNode doc, DataVectorizer dataVectorizer) { - for (Action action : actions) { - if (DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD.equals(action.locator().path())) { - PathMatch target = action.locator().findOrCreate(doc); - JsonNode newValue = action.value(); - JsonNode oldValue = target.valueNode(); - - // if there is no oldValue or there is a diff - if ((oldValue == null) || !JsonUtil.equalsOrdered(oldValue, newValue)) { - // replace the oldValue with newValue first - ((ObjectNode) doc).put(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD, newValue); - // vectorize the newValue, update $vectorize, $vector - return dataVectorizer.vectorizeUpdateDocument(doc); + // $vectorize + if (DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD.equals(action.locator().path())) { + if (newValue.isNull()) { + // if $vectorize is null value, update $vector as null + doc.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + } else if (!newValue.isTextual()) { + // if $vectorize is not textual value + throw ErrorCode.INVALID_VECTORIZE_VALUE_TYPE.toApiException(); + } else if (newValue.asText().isBlank()) { + // $vectorize is blank text value, set $vector as null value, no need to vectorize + doc.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + } else { + // if $vectorize is textual and not blank, create embeddingUpdateOperation + embeddingUpdateOperation = new EmbeddingUpdateOperation(newValue.asText()); + } } + modified = true; } } - // no diff for $vectorize, so nothing is modified in this method - return Uni.createFrom().item(false); + return new UpdateOperationResult(modified, embeddingUpdateOperation); } // Needed because some unit tests check for equality diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java index 47f12e2dcd..efe74dccf8 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java @@ -28,7 +28,7 @@ public static UnsetOperation construct(ObjectNode args) { } @Override - public boolean updateDocument(ObjectNode doc) { + public UpdateOperationResult updateDocument(ObjectNode doc) { boolean modified = false; Set unsetPaths = new HashSet<>(); actions.stream().forEach(action -> unsetPaths.add(action.locator().path())); @@ -44,7 +44,7 @@ public boolean updateDocument(ObjectNode doc) { if (modified && unsetPaths.contains(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { doc.remove(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); } - return modified; + return new UpdateOperationResult(modified, null); } record Action(PathMatchLocator locator) implements ActionWithLocator {} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java index 97f80a63a7..b33c4a2b58 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java @@ -30,7 +30,7 @@ public List actions() { * @param doc Document to apply operation to * @return True if document was modified by operation; false if not. */ - public abstract boolean updateDocument(ObjectNode doc); + public abstract UpdateOperationResult updateDocument(ObjectNode doc); /** * Method called to see if update operator should be applied for specific kind of update: @@ -110,4 +110,7 @@ public int compare(ActionWithLocator o1, ActionWithLocator o2) { return o1.path().compareTo(o2.path()); } } + + public record UpdateOperationResult( + boolean modified, EmbeddingUpdateOperation embeddingUpdateOperation) {} } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java index ebc34011e8..debedbce6d 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java @@ -9,8 +9,6 @@ import io.smallrye.mutiny.Uni; import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortClause; import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortExpression; -import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause; -import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; import io.stargate.sgv2.jsonapi.api.request.EmbeddingCredentials; import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; @@ -157,45 +155,19 @@ public Uni vectorize(List documents) { } /** - * This method will be used by documentUpdater(updateOne, updateMany, findOneAndUpdate, - * findOneAndReplace) Since we need to vectorize on demand, so vectorization for updateCommands - * will postpone and move into ReadAndUpdateOperation. + * This method will be used to vectorize the $vectorize string content vectorizeContent must be + * not null and not blank text * - * @param document - Document to be vectorized - * @return Uni - have modified the document or not + * @param vectorizeContent - vectorize string to be vectorized + * @return Uni - result vector float array */ - public Uni vectorizeUpdateDocument(JsonNode document) { - if (!document.has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { - return Uni.createFrom().item(false); - } - final JsonNode jsonNode = document.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); - // $vectorize as null value, also update $vector as null, modified - if (jsonNode.isNull()) { - ((ObjectNode) document).put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); - return Uni.createFrom().item(true); - } - // $vectorize is not textual value - if (!jsonNode.isTextual()) { - throw ErrorCode.INVALID_VECTORIZE_VALUE_TYPE.toApiException(); - } - String vectorizeData = jsonNode.asText(); - // $vectorize is blank text value, set $vector as null value, modified - if (vectorizeData.isBlank()) { - ((ObjectNode) document).put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); - return Uni.createFrom().item(true); - } - - // $vectorize is textual and not blank, going to vectorize it - if (embeddingProvider == null) { - throw ErrorCode.EMBEDDING_SERVICE_NOT_CONFIGURED.toApiException( - collectionSettings.collectionName()); - } + public Uni vectorize(String vectorizeContent) { Uni> vectors = embeddingProvider .vectorize( 1, - List.of(vectorizeData), - embeddingCredentials, + List.of(vectorizeContent), + embeddingCredentials, EmbeddingProvider.EmbeddingRequestType.INDEX) .map(EmbeddingProvider.Response::embeddings); return vectors @@ -211,13 +183,7 @@ public Uni vectorizeUpdateDocument(JsonNode document) { collectionSettings.vectorConfig().vectorSize(), vector.length); } - final ArrayNode arrayNode = nodeFactory.arrayNode(vector.length); - for (float listValue : vector) { - arrayNode.add(nodeFactory.numberNode(listValue)); - } - ((ObjectNode) document) - .put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, arrayNode); - return true; + return vector; }); } @@ -243,7 +209,7 @@ public Uni vectorize(SortClause sortClause) { .vectorize( 1, List.of(text), - embeddingCredentials, + embeddingCredentials, EmbeddingProvider.EmbeddingRequestType.SEARCH) .map(res -> res.embeddings()); return vectors diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java index 5cf87b1e48..d5722b752f 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java @@ -70,7 +70,7 @@ public DataVectorizer constructDataVectorizer( return new DataVectorizer( embeddingProvider, objectMapper.getNodeFactory(), - dataApiRequestInfo.getEmbeddingCredentials(), + dataApiRequestInfo.getEmbeddingCredentials(), commandContext.collectionSettings()); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java index def09fa455..4e5f3769b7 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java @@ -25,8 +25,7 @@ import java.util.function.Supplier; /** - * This operation method is used for 4 commands findOneAndUpdate, findOneAndReplace, updateOne and - * updateMany + * This operation method is used for 3 commands findOneAndUpdate, updateOne and updateMany * * @param commandContext * @param findOperation @@ -72,7 +71,7 @@ public Uni> execute( findResponse -> { pageStateReference.set(findResponse.pageState()); final List docs = findResponse.docs(); - if (upsert() && docs.isEmpty() && matchedCount.get() == 0) { + if (upsert() && docs.size() == 0 && matchedCount.get() == 0) { return Multi.createFrom().item(findOperation().getNewDocument()); } else { matchedCount.addAndGet(docs.size()); @@ -129,7 +128,6 @@ public Uni> execute( .jsonProcessingMetricsReporter() .reportJsonWrittenDocsMetrics( commandContext().commandName(), modifiedCount.get()); - return new UpdateOperationPage( matchedCount.get(), modifiedCount.get(), @@ -146,44 +144,32 @@ private Uni processUpdate( AtomicInteger modifiedCount) { return Uni.createFrom() .item(document) + + // perform update operation and save only if data is modified. .flatMap( readDocument -> { - // if there is no document: return null item + // if there is no document return null item if (readDocument == null) { return Uni.createFrom().nullItem(); } + // upsert if we have no transaction if before boolean upsert = readDocument.txnId() == null; JsonNode originalDocument = upsert ? null : readDocument.document(); - // apply document updates: if no changes return null item - // First update, will not vectorize - DocumentUpdater.DocumentUpdaterResponse firstDocumentUpdaterResponse = + DocumentUpdater.DocumentUpdaterResponse documentUpdaterResponse = documentUpdater().apply(readDocument.document().deepCopy(), upsert); - // Second update, will vectorize on demand and update $vectorize and $vector - // accordingly + final DataVectorizer dataVectorizer = dataVectorizerService.constructDataVectorizer(dataApiRequestInfo, commandContext); - return documentUpdater() - .applyUpdateVectorize( - firstDocumentUpdaterResponse.document(), upsert, dataVectorizer) + return documentUpdater + .updateEmbeddingVector(documentUpdaterResponse, dataVectorizer) .onItem() .transformToUni( - secondDocumentUpdaterResponse -> { - // Need to combine two modified result here - return Uni.createFrom() - .item( - new DocumentUpdater.DocumentUpdaterResponse( - secondDocumentUpdaterResponse.document(), - firstDocumentUpdaterResponse.modified() - | secondDocumentUpdaterResponse.modified())); - }) - .onItem() - .transformToUni( - combinedUpdaterResponse -> { + vectorizedDocumentUpdaterResponse -> { // In case no change to document and not an upsert document, short circuit // and return - if (!combinedUpdaterResponse.modified() && !upsert) { + if (!vectorizedDocumentUpdaterResponse.modified() && !upsert) { // If no change return the original document Issue #390 if (returnDocumentInResponse) { resultProjection.applyProjection(originalDocument); @@ -200,7 +186,7 @@ private Uni processUpdate( shredder() .shred( commandContext(), - combinedUpdaterResponse.document(), + vectorizedDocumentUpdaterResponse.document(), readDocument.txnId()); // Have to do this because shredder adds _id field to the document if it @@ -216,17 +202,16 @@ private Uni processUpdate( .transform( v -> { // if not insert increment modified count - if (!upsert) { - modifiedCount.incrementAndGet(); - } + if (!upsert) modifiedCount.incrementAndGet(); // resolve doc to return JsonNode documentToReturn = null; if (returnDocumentInResponse) { documentToReturn = returnUpdatedDocument ? updatedDocument : originalDocument; - // operations (findOneAndUpdate) define projection to apply to - // result + // Some operations (findOneAndUpdate) define projection to apply + // to + // result: if (documentToReturn != null) { // null for some Operation tests resultProjection.applyProjection(documentToReturn); } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index 1e6caa1d95..9a65b5f551 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -39,9 +39,9 @@ public static DocumentUpdater construct(ObjectNode replaceDocument) { } /** - * This method is the entrance for first level update or replace. first level means it won't - * vectorize and update $vectorize so the updatedDocument returned in DocumentUpdaterResponse will - * leave $vectorize unchanged + * This method is the entrance for first level update or replace. First level means it won't + * vectorize if needed, but will warp an EmbeddingUpdateOperation in the DocumentUpdaterResponse + * to do the following embedding update. * * @param readDocument Document to update * @param docInserted True if document was just created (inserted); false if updating existing @@ -50,41 +50,43 @@ public static DocumentUpdater construct(ObjectNode replaceDocument) { public DocumentUpdaterResponse apply(JsonNode readDocument, boolean docInserted) { ObjectNode docToUpdate = (ObjectNode) readDocument; if (UpdateType.UPDATE == updateType) { - boolean modified = update(docToUpdate, docInserted); - return new DocumentUpdaterResponse(readDocument, modified); + return update(docToUpdate, docInserted); } else { - boolean modified = replace(docToUpdate, docInserted); - return new DocumentUpdaterResponse(readDocument, modified); + return replace(docToUpdate, docInserted); } } /** - * Will be used for update commands. This method won't update $vectorize (detail in - * applyUpdateVectorize method) + * Will be used for update commands. This is first level replace. This method will replace the + * document, but won't re-vectorize yet(detail in updateEmbeddingVector method) * * @param docToUpdate * @param docInserted * @return */ - private boolean update(ObjectNode docToUpdate, boolean docInserted) { + private DocumentUpdaterResponse update(ObjectNode docToUpdate, boolean docInserted) { boolean modified = false; + EmbeddingUpdateOperation embeddingUpdateOperation = null; for (UpdateOperation updateOperation : updateOperations) { if (updateOperation.shouldApplyIf(docInserted)) { - modified |= updateOperation.updateDocument(docToUpdate); + final UpdateOperation.UpdateOperationResult updateOperationResult = + updateOperation.updateDocument(docToUpdate); + modified |= updateOperationResult.modified(); + embeddingUpdateOperation = updateOperationResult.embeddingUpdateOperation(); } } - return modified; + return new DocumentUpdaterResponse(docToUpdate, modified, embeddingUpdateOperation); } /** * Will be used for findOneAndReplace. This is first level replace. This method will replace the - * document, but won't re-vectorize yet(detail in applyUpdateVectorize method) + * document, but won't re-vectorize yet(detail in updateEmbeddingVector method) * * @param docToUpdate * @param docInserted * @return */ - private boolean replace(ObjectNode docToUpdate, boolean docInserted) { + private DocumentUpdaterResponse replace(ObjectNode docToUpdate, boolean docInserted) { // Do deep clone so we can remove _id field and check ObjectNode compareDoc = docToUpdate.deepCopy(); JsonNode idNode = compareDoc.remove(DocumentConstants.Fields.DOC_ID); @@ -96,21 +98,31 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { } } - // If replaceDocument has $vectorize as null or blank text value, also set $vector as null value - // here. + EmbeddingUpdateOperation embeddingUpdateOperation = null; JsonNode vectorizeNode = replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); - if (vectorizeNode != null - && (vectorizeNode.isNull() - || (vectorizeNode.isTextual() && vectorizeNode.asText().isBlank()))) { - ((ObjectNode) replaceDocument) - .put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + if (vectorizeNode != null) { + // If replaceDocument has $vectorize as null value or blank text value, also set $vector as + // null value here. + if (vectorizeNode.isNull()) { + // if $vectorize is null value, update $vector as null + replaceDocument.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + } else if (!vectorizeNode.isTextual()) { + // if $vectorize is not textual value + throw ErrorCode.INVALID_VECTORIZE_VALUE_TYPE.toApiException(); + } else if (vectorizeNode.asText().isBlank()) { + // $vectorize is blank text value, set $vector as null value, no need to vectorize + replaceDocument.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + } else { + // if $vectorize is textual and not blank, create embeddingUpdateOperation + embeddingUpdateOperation = new EmbeddingUpdateOperation(vectorizeNode.asText()); + } } // In case there is no difference between document return modified as false, so db update // doesn't happen if (JsonUtil.equalsOrdered(compareDoc, replaceDocument())) { - return false; + return new DocumentUpdaterResponse(docToUpdate, false, null); } // remove all data and add _id as first field; either from original document or from replacement docToUpdate.removeAll(); @@ -121,66 +133,41 @@ private boolean replace(ObjectNode docToUpdate, boolean docInserted) { } docToUpdate.setAll(replaceDocument()); // return modified flag as true - return true; + return new DocumentUpdaterResponse(docToUpdate, true, embeddingUpdateOperation); } /** - * This method is the entrance for second level update or replace. This level will vectorize on - * demand and change $vectorize and $vector accordingly. + * This method is used for potential vectorize There may exist a not-null embeddingUpdateOperation + * in responseBeforeVectorize param, then use dataVectorizer to vectorize the content and then use + * embeddingUpdateOperation to update the document's $vector field * - * @param readDocument Document to update(This document may has been updated once, detail in first - * level update) - * @param docInserted True if document was just created (inserted); false if updating existing - * document + * @param responseBeforeVectorize response before vectorize * @param dataVectorizer dataVectorizer + * @return Uni */ - public Uni applyUpdateVectorize( - JsonNode readDocument, boolean docInserted, DataVectorizer dataVectorizer) { - if (UpdateType.UPDATE == updateType) { - for (UpdateOperation updateOperation : updateOperations) { - if (updateOperation.shouldApplyIf(docInserted) - && updateOperation instanceof SetOperation setOperation) { - // filtering out the setOperation - // try to vectorize on demand and change $vectorize and $vector accordingly. - return setOperation - .updateVectorize(readDocument, dataVectorizer) - .onItem() - .transformToUni( - modified -> { - return Uni.createFrom() - .item(new DocumentUpdaterResponse(readDocument, modified)); - }); - } - } + public Uni updateEmbeddingVector( + DocumentUpdaterResponse responseBeforeVectorize, DataVectorizer dataVectorizer) { + final EmbeddingUpdateOperation embeddingUpdateOperation = + responseBeforeVectorize.embeddingUpdateOperation(); + if (embeddingUpdateOperation == null) { + return Uni.createFrom().item(responseBeforeVectorize); } - if (UpdateType.REPLACE == updateType) { - // Only need to vectorize when: - // replaceDocument has $vectorize(not null value, not blank text value), this is consistent - // with previous behaviour - // This means even if $vectorize has no diff between readDoc and replacementDoc, we still - // re-vectorize - final JsonNode replaceVectorizeNode = - replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); - if (replaceVectorizeNode != null) { - // if replace $vectorize is null value or blank text value, no need to vectorize - if (replaceVectorizeNode.isNull() - || (replaceVectorizeNode.isTextual() && replaceVectorizeNode.asText().isBlank())) { - return Uni.createFrom().item(new DocumentUpdaterResponse(readDocument, false)); - } - return dataVectorizer - .vectorizeUpdateDocument(readDocument) - .onItem() - .transformToUni( - modified -> { - return Uni.createFrom().item(new DocumentUpdaterResponse(readDocument, modified)); - }); - } - } - // If there is no return from above, meaning nothing is modified at this second level update - return Uni.createFrom().item(new DocumentUpdaterResponse(readDocument, false)); + return dataVectorizer + .vectorize(embeddingUpdateOperation.vectorizeContent()) + .onItem() + .transformToUni( + vector -> { + embeddingUpdateOperation.updateDocument(responseBeforeVectorize.document, vector); + return Uni.createFrom().item(responseBeforeVectorize); + }); } - public record DocumentUpdaterResponse(JsonNode document, boolean modified) {} + /** + * The documentUpdaterResponse has the updated document, boolean flag to indicate the document is + * modified or not, an embeddingUpdateOperation to update the embedding + */ + public record DocumentUpdaterResponse( + JsonNode document, boolean modified, EmbeddingUpdateOperation embeddingUpdateOperation) {} private enum UpdateType { UPDATE, diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java index 97809e3129..f9958bf8c1 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/embedding/operation/DataVectorizerTest.java @@ -1,7 +1,6 @@ package io.stargate.sgv2.jsonapi.service.embedding.operation; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.catchThrowable; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -13,9 +12,6 @@ import io.smallrye.mutiny.helpers.test.UniAssertSubscriber; import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortClause; import io.stargate.sgv2.jsonapi.api.model.command.clause.sort.SortExpression; -import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause; -import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; -import io.stargate.sgv2.jsonapi.api.model.command.impl.FindOneAndUpdateCommand; import io.stargate.sgv2.jsonapi.api.request.EmbeddingCredentials; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; @@ -285,9 +281,9 @@ public void sortClauseValues() { } @Nested - public class updateText { + public class vectorizeText { @Test - public void vectorizeUpdate() { + public void vectorize() { final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", "test data"); final ArrayNode arrayNode = document.putArray("$vector"); arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); @@ -296,63 +292,15 @@ public void vectorizeUpdate() { new DataVectorizer( testService, objectMapper.getNodeFactory(), embeddingCredentials, collectionSettings); try { - final Boolean modified = - dataVectorizer.vectorizeUpdateDocument(document).subscribe().asCompletionStage().get(); - // modified, since we need to re-vectorize, and update the $vector - assertThat(modified).isTrue(); - } catch (Exception e) { - throw new RuntimeException(e); - } - assertThat(document.has("$vectorize")).isTrue(); - assertThat(document.has("$vector")).isTrue(); - assertThat(document.get("$vector").isArray()).isTrue(); - assertThat(document.get("$vector").size()).isEqualTo(3); // vector updated - } + final float[] testData = + dataVectorizer.vectorize("test data").subscribe().asCompletionStage().get(); + assertThat(testData[0]).isEqualTo(0.25f); + assertThat(testData[1]).isEqualTo(0.25f); + assertThat(testData[2]).isEqualTo(0.25f); - @Test - public void vectorizeBlank() { - final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", ""); - final ArrayNode arrayNode = document.putArray("$vector"); - arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); - arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); - DataVectorizer dataVectorizer = - new DataVectorizer( - testService, objectMapper.getNodeFactory(), embeddingCredentials, collectionSettings); - try { - final Boolean modified = - dataVectorizer.vectorizeUpdateDocument(document).subscribe().asCompletionStage().get(); - // modified, since we need to re-vectorize, and update the $vector - assertThat(modified).isTrue(); } catch (Exception e) { throw new RuntimeException(e); } - assertThat(document.has("$vectorize")).isTrue(); - assertThat(document.has("$vector")).isTrue(); - assertThat(document.get("$vector").isNull()).isTrue(); // will set $vector as null value - } - - @Test - public void vectorizeUpdateFailureNonTextual() { - final ObjectNode document = objectMapper.createObjectNode().put("$vectorize", 123); - final ArrayNode arrayNode = document.putArray("$vector"); - arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); - arrayNode.add(objectMapper.getNodeFactory().numberNode(0.11f)); - DataVectorizer dataVectorizer = - new DataVectorizer(testService, objectMapper.getNodeFactory(), Optional.empty(), collectionSettings); - Throwable failure = - catchThrowable( - () -> { - dataVectorizer - .vectorizeUpdateDocument(document) - .subscribe() - .withSubscriber(UniAssertSubscriber.create()) - .awaitFailure() - .getFailure(); - }); - assertThat(failure) - .isInstanceOf(JsonApiException.class) - .hasFieldOrPropertyWithValue("errorCode", ErrorCode.INVALID_VECTORIZE_VALUE_TYPE) - .hasFieldOrPropertyWithValue("message", "$vectorize value needs to be text value"); } } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/AddToSetOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/AddToSetOperationTest.java index dd738d8183..1876a0c357 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/AddToSetOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/AddToSetOperationTest.java @@ -26,7 +26,7 @@ public void addToRootArray() { UpdateOperator.ADD_TO_SET.resolveOperation(objectFromJson("{ \"array\" : 32 }")); assertThat(oper).isInstanceOf(AddToSetOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -42,7 +42,7 @@ public void tryAddExistingValueRoot() { ObjectNode doc = objectFromJson("{ \"array\" : [ true, \"foo\", 19 ] }"); ObjectNode expected = doc.deepCopy(); // Won't add since we already had same value - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(expected); } @@ -51,7 +51,7 @@ public void addToNestedArray() { UpdateOperation oper = UpdateOperator.ADD_TO_SET.resolveOperation(objectFromJson("{ \"subdoc.array\" : 32 }")); ObjectNode doc = objectFromJson("{ \"subdoc\" : { \"array\" : [ true ] } }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -68,7 +68,7 @@ public void tryAddExistingValueNested() { ObjectNode doc = objectFromJson("{ \"subdoc\" : { \"array\" : [ \"a\", \"b\", \"c\" ] } }"); ObjectNode expected = doc.deepCopy(); // Already had "b", no change - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(expected); } @@ -78,7 +78,7 @@ public void addToNewArrayRoot() { UpdateOperator.ADD_TO_SET.resolveOperation( objectFromJson("{ \"newArray\" : \"value\" }")); ObjectNode doc = objectFromJson("{ \"a\": 1, \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -93,7 +93,7 @@ public void addToNewArrayNested() { UpdateOperator.ADD_TO_SET.resolveOperation( objectFromJson("{ \"subdoc.newArray\" : \"value\" }")); ObjectNode doc = objectFromJson("{ \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -121,7 +121,7 @@ public void addSubDocIfOrderDifferent() { } """); // Should add, change - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -149,7 +149,7 @@ public void dontAddSubDocIfSameIncludingOrdering() { } """); // No add, no change - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); ObjectNode expected = objectFromJson( """ @@ -248,7 +248,7 @@ public void withEachToExistingRoot() { { "array" : { "$each" : [ 17, false ] } } """)); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -266,7 +266,7 @@ public void withEachToExistingNested() { { "nested.array" : { "$each" : [ 17, false ] } } """)); ObjectNode doc = objectFromJson("{ \"nested\": { \"array\" : [ true ] } }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -284,7 +284,7 @@ public void withEachToNonExistingRoot() { { "newArray" : { "$each" : [ -50, "abc" ] } } """)); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -302,7 +302,7 @@ public void withEachToNonExistingNested() { { "nested.newArray" : { "$each" : [ -50, "abc" ] } } """)); ObjectNode doc = objectFromJson("{ \"nested\": { \"array\" : [ true ] } }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -321,7 +321,7 @@ public void withEachNestedArray() { """ { "array" : { "$each" : [ [ 1, 2], [ 3 ] ] } } """)); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -339,7 +339,7 @@ public void withEachNestedArrayNonExisting() { { "array" : { "$each" : [ [ 1, 2], [ 3 ] ] } } """)); ObjectNode doc = objectFromJson("{ \"x\" : 1 }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/CurrentDateOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/CurrentDateOperationTest.java index e738cc642c..457339618b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/CurrentDateOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/CurrentDateOperationTest.java @@ -27,7 +27,7 @@ public void simpleRoot() { UpdateOperator.CURRENT_DATE.resolveOperation( objectFromJson("{ \"createdAt\": true, \"updatedAt\": true}")); final long startTime = System.currentTimeMillis(); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc).hasSize(2); verifyApproximateDate(startTime, doc.path("createdAt")); @@ -42,7 +42,7 @@ public void simpleNested() { UpdateOperator.CURRENT_DATE.resolveOperation( objectFromJson("{ \"item1.a\": true, \"item2.a\":true}")); final long startTime = System.currentTimeMillis(); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc).hasSize(2); verifyApproximateDate(startTime, doc.at("/item1/a")); @@ -55,7 +55,7 @@ public void currentDateInArray() { UpdateOperation oper = UpdateOperator.CURRENT_DATE.resolveOperation(objectFromJson("{\"a.1\":true }")); final long startTime = System.currentTimeMillis(); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc).hasSize(1); verifyApproximateDate(startTime, doc.at("/a/1")); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/IncOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/IncOperationTest.java index 50a641a137..e6b7fc845b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/IncOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/IncOperationTest.java @@ -40,7 +40,7 @@ public void testSimpleIncOfExisting() { """ { "integer" : 1, "fp" : 0.25, "text" : "value" } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -64,7 +64,7 @@ public void testSimpleIncOfNonExisting() { """ { "integer" : 1, "fp" : 0.25, "text" : "value" } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -88,7 +88,7 @@ public void testSimpleIncWithNoChange() { """ { "integer" : 1, "fp" : 0.25, "text" : "value" } """); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); ObjectNode expected = objectFromJson( """ @@ -121,7 +121,7 @@ public void testIncOfExisting() { "fpArray" : [ 0, 0.25 ] } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -150,7 +150,7 @@ public void testIncOfNonExisting() { "text" : "value" }" """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -187,7 +187,7 @@ public void testIncWithNoChange() { }" """); ObjectNode expected = doc.deepCopy(); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); // NOTE: need to use "toPrettyString()" since NumberNode types may differ assertThat(doc.toPrettyString()).isEqualTo(expected.toPrettyString()); } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/MinMaxOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/MinMaxOperationTest.java index bd580b41df..3e9e52fb1e 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/MinMaxOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/MinMaxOperationTest.java @@ -22,7 +22,7 @@ public void testSimpleMinRoot() { // 3 updates: 2 for existing property, one for not UpdateOperation oper = UpdateOperator.MIN.resolveOperation(objectFromJson("{ \"x\": -1, \"y\":99, \"z\":0}")); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"x\": -1, \"y\":2, \"z\":0}"); assertThat(doc).isEqualTo(expected); } @@ -35,7 +35,7 @@ public void testSimpleMinNested() { UpdateOperator.MIN.resolveOperation( objectFromJson( "{ \"subdoc.x\": \"afx\", \"subdoc.y\":\"\", \"subdoc.z\":\"value\"}")); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{\"subdoc\":{\"x\": \"abc\", \"y\":\"\", \"z\":\"value\"}}"); assertThat(doc).isEqualTo(expected); @@ -47,7 +47,7 @@ public void testMinNoChanges() { ObjectNode doc = orig.deepCopy(); UpdateOperation oper = UpdateOperator.MIN.resolveOperation(objectFromJson("{\"a\":2, \"b\":true }")); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(orig); } @@ -56,7 +56,7 @@ public void testMinMixedTypes() { ObjectNode doc = objectFromJson("{ \"a\":1, \"b\":true}"); UpdateOperation oper = UpdateOperator.MIN.resolveOperation(objectFromJson("{\"a\":\"value\", \"b\":123 }")); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"a\":1, \"b\":123}"); assertThat(doc).isEqualTo(expected); } @@ -66,7 +66,7 @@ public void testMinWithArray() { ObjectNode doc = objectFromJson("{ \"a\":[1, true]}"); UpdateOperation oper = UpdateOperator.MIN.resolveOperation(objectFromJson("{\"a\":[1, false] }")); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{\"a\":[1, false] }"); assertThat(doc).isEqualTo(expected); } @@ -80,7 +80,7 @@ public void testSimpleMaxRoot() { // 3 updates: 2 for existing property, one for not UpdateOperation oper = UpdateOperator.MAX.resolveOperation(objectFromJson("{ \"x\": -1, \"y\":99, \"z\":0}")); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"x\": 1, \"y\":99, \"z\":0}"); assertThat(doc).isEqualTo(expected); } @@ -93,7 +93,7 @@ public void testSimpleMaxNested() { UpdateOperator.MAX.resolveOperation( objectFromJson( "{ \"subdoc.x\": \"afx\", \"subdoc.y\":\"\", \"subdoc.z\":\"value\"}")); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{\"subdoc\":{\"x\": \"afx\", \"y\":\"def\", \"z\":\"value\"}}"); assertThat(doc).isEqualTo(expected); @@ -105,7 +105,7 @@ public void testMaxNoChanges() { ObjectNode doc = orig.deepCopy(); UpdateOperation oper = UpdateOperator.MAX.resolveOperation(objectFromJson("{\"a\":0, \"b\":true }")); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(orig); } @@ -114,7 +114,7 @@ public void testMaxMixedTypes() { ObjectNode doc = objectFromJson("{ \"a\":1, \"b\":true}"); UpdateOperation oper = UpdateOperator.MAX.resolveOperation(objectFromJson("{\"a\":\"value\", \"b\":123 }")); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"a\":\"value\", \"b\":true}"); assertThat(doc).isEqualTo(expected); } @@ -124,7 +124,7 @@ public void testMaxWithArray() { ObjectNode doc = objectFromJson("{ \"arr\":[1, 2]}"); UpdateOperation oper = UpdateOperator.MAX.resolveOperation(objectFromJson("{\"arr\":[1, 2, 3] }")); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{\"arr\":[1, 2, 3] }"); assertThat(doc).isEqualTo(expected); } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/MulOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/MulOperationTest.java index 3b221cb162..7d68566e25 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/MulOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/MulOperationTest.java @@ -37,7 +37,7 @@ public void testSimpleMulOfExisting() { """ { "integer" : 2, "fp" : 0.5, "text" : "value" } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -60,7 +60,7 @@ public void testSimpleMulOfNonExisting() { """ { "integer" : 1, "fp" : 0.25, "text" : "value" } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -85,7 +85,7 @@ public void testSimpleMulWithNoChange() { { "integer" : 5, "fp" : 0.25, "text" : "value" } """); ObjectNode expected = doc.deepCopy(); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); // NOTE: need to use "toPrettyString()" since NumberNode types may differ assertThat(asPrettyJson(doc)).isEqualTo(asPrettyJson(expected)); } @@ -111,7 +111,7 @@ public void testMulOfExisting() { "fpArray" : [ 0, 0.25 ] } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -138,7 +138,7 @@ public void testMulOfNonExisting() { "text" : "value" }" """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -173,7 +173,7 @@ public void testMulWithNoChange() { }" """); ObjectNode expected = doc.deepCopy(); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc.toPrettyString()).isEqualTo(expected.toPrettyString()); } } @@ -221,7 +221,7 @@ public void testMulOnRootStringProperty() { Exception e = catchException( () -> { - oper.updateDocument(doc); + oper.updateDocument(doc).modified(); }); assertThat(e) .isInstanceOf(JsonApiException.class) @@ -251,7 +251,7 @@ public void testMulOnExplicitNullProperty() { Exception e = catchException( () -> { - oper.updateDocument(doc); + oper.updateDocument(doc).modified(); }); assertThat(e) .isInstanceOf(JsonApiException.class) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/PopOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/PopOperationTest.java index 584c4cbe8c..4f8c28f1d1 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/PopOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/PopOperationTest.java @@ -29,7 +29,7 @@ public void testSimplePopFirstFromExisting() { UpdateOperator.POP.resolveOperation(objectFromJson("{ \"array\" : -1 }")); assertThat(oper).isInstanceOf(PopOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ 1, 2, 3 ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -44,7 +44,7 @@ public void testSimplePopLastFromExisting() { UpdateOperator.POP.resolveOperation(objectFromJson("{ \"array\" : 1 }")); assertThat(oper).isInstanceOf(PopOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ 1, 2, 3 ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -60,7 +60,7 @@ public void testSimplePopFirstFromEmpty() { assertThat(oper).isInstanceOf(PopOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ ] }"); ObjectNode expected = doc.deepCopy(); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(expected); } @@ -71,7 +71,7 @@ public void testSimplePopLastFromEmpty() { assertThat(oper).isInstanceOf(PopOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ ] }"); ObjectNode expected = doc.deepCopy(); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(expected); } @@ -83,7 +83,7 @@ public void testSimplePopFirstFromNonExisting() { ObjectNode doc = objectFromJson("{ \"a\" : 1}"); ObjectNode expected = doc.deepCopy(); // No changes - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(expected); } @@ -95,7 +95,7 @@ public void testSimplePopLastFromNonExisting() { ObjectNode doc = objectFromJson("{ \"a\" : 1}"); ObjectNode expected = doc.deepCopy(); // No changes - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(expected); } } @@ -109,7 +109,7 @@ public void testNestedPopFromExisting() { UpdateOperator.POP.resolveOperation(objectFromJson("{ \"subdoc.array\" : -1 }")); assertThat(oper).isInstanceOf(PopOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"subdoc\" : { \"array\" : [ 1, 2, 3 ] } }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -125,7 +125,7 @@ public void testNestedPopFromEmpty() { assertThat(oper).isInstanceOf(PopOperation.class); ObjectNode doc = objectFromJson("{ \"subdoc\" : { \"array\" : [ ] } }"); ObjectNode expected = doc.deepCopy(); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(expected); } @@ -137,14 +137,14 @@ public void testNestedPopFromNonExisting() { ObjectNode doc = objectFromJson("{ \"a\" : 1, \"doc\" : { } }"); ObjectNode expected = doc.deepCopy(); // No changes - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(expected); // But let's verify longer nesting too oper = UpdateOperator.POP.resolveOperation(objectFromJson("{ \"a.b.c.d\" : -1 }")); doc = objectFromJson("{ }"); // No changes here either - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(objectFromJson("{ }")); } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/PushOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/PushOperationTest.java index 27da087ffb..c48df9bc03 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/PushOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/PushOperationTest.java @@ -29,7 +29,7 @@ public void testPushToExistingRoot() { UpdateOperator.PUSH.resolveOperation(objectFromJson("{ \"array\" : 32 }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -44,7 +44,7 @@ public void testPushToExistingNested() { UpdateOperator.PUSH.resolveOperation(objectFromJson("{ \"subdoc.array\" : 32 }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"subdoc\" : { \"array\" : [ true ] } }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -59,7 +59,7 @@ public void testPushToNonExistingRoot() { UpdateOperator.PUSH.resolveOperation(objectFromJson("{ \"newArray\" : \"value\" }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"a\": 1, \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -75,7 +75,7 @@ public void testPushToNonExistingNested() { objectFromJson("{ \"subdoc.newArray\" : \"value\" }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -93,7 +93,7 @@ public void testPushToNonExistingOrdered() { objectFromJson("{ \"subdoc.newArray\" : \"value\", \"array\": 3 }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -120,7 +120,7 @@ public void testPushOnNonArrayProperty() { Exception e = catchException( () -> { - oper.updateDocument(doc); + oper.updateDocument(doc).modified(); }); assertThat(e) .isInstanceOf(JsonApiException.class) @@ -198,7 +198,7 @@ public void withEachToExistingRoot() { """)); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -217,7 +217,7 @@ public void withEachToExistingNested() { """)); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"nested\": { \"array\" : [ true ] } }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -236,7 +236,7 @@ public void withEachToNonExistingRoot() { """)); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ true ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -255,7 +255,7 @@ public void withEachToNonExistingNested() { """)); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"nested\": { \"array\" : [ true ] } }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -274,7 +274,7 @@ public void withEachNestedArray() { """)); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"a\" : 1, \"array\" : [ null ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -293,7 +293,7 @@ public void withEachNestedArrayNonExisting() { """)); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"x\" : 1 }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -359,7 +359,7 @@ public void withEachToExistingPositiveRoot() { objectFromJson("{ \"array\": { \"$each\" : [true, false], \"$position\" : 1 } }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"array\": [ 1, 2, 3, 4 ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"array\": [ 1, true, false, 2, 3, 4 ] }"); assertThat(doc).isEqualTo(expected); @@ -369,7 +369,7 @@ public void withEachToExistingPositiveRoot() { objectFromJson("{ \"array\": { \"$each\" : [true, false], \"$position\" : 999 } }")); assertThat(oper).isInstanceOf(PushOperation.class); doc = objectFromJson("{ \"array\": [ 1, 2, 3, 4 ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); expected = objectFromJson("{ \"array\": [ 1, 2, 3, 4, true, false ] }"); assertThat(doc).isEqualTo(expected); } @@ -382,7 +382,7 @@ public void withEachToExistingPositiveNested() { "{ \"nested.array\": { \"$each\" : [true, false], \"$position\" : 1 } }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"nested\": { \"array\": [ 1, 2, 3, 4 ] } }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"nested\" : { \"array\": [ 1, true, false, 2, 3, 4 ] } }"); assertThat(doc).isEqualTo(expected); @@ -396,7 +396,7 @@ public void withEachToExistingNegative() { objectFromJson("{ \"array\": { \"$each\" : [true, false], \"$position\" : -1 } }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"array\": [ 1, 2, 3, 4 ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"array\": [ 1, 2, 3, true, false, 4 ] }"); assertThat(doc).isEqualTo(expected); @@ -406,7 +406,7 @@ public void withEachToExistingNegative() { objectFromJson("{ \"array\": { \"$each\" : [true, false], \"$position\" : -999 } }")); assertThat(oper).isInstanceOf(PushOperation.class); doc = objectFromJson("{ \"array\": [ 1, 2, 3, 4 ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); expected = objectFromJson("{ \"array\": [ true, false, 1, 2, 3, 4 ] }"); assertThat(doc).isEqualTo(expected); } @@ -418,7 +418,7 @@ public void withEachToNonExistingRoot() { objectFromJson("{ \"newArray\": { \"$each\" : [true, false], \"$position\": 1 } }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ \"array\": [ 1, 2, 3 ] }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"array\": [ 1, 2, 3 ], \"newArray\": [true, false] }"); assertThat(doc).isEqualTo(expected); @@ -432,7 +432,7 @@ public void withEachToNonExistingNested() { "{ \"nested.array\": { \"$each\" : [true, false], \"$position\": 1 } }")); assertThat(oper).isInstanceOf(PushOperation.class); ObjectNode doc = objectFromJson("{ }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"nested\": { \"array\": [true, false] } }"); assertThat(doc).isEqualTo(expected); } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/RenameOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/RenameOperationTest.java index 330fdd448c..c367ff9dc7 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/RenameOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/RenameOperationTest.java @@ -27,7 +27,7 @@ public void testSimpleRenameOfExisting() { assertThat(oper).isInstanceOf(RenameOperation.class); ObjectNode doc = objectFromJson("{ \"a\": 1 }"); // Should indicate document being modified - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc).isEqualTo(fromJson("{\"b\": 1}")); } @@ -37,7 +37,7 @@ public void testSimpleRenameOfNonExisting() { UpdateOperator.RENAME.resolveOperation(objectFromJson("{\"a\":\"b\"}")); ObjectNode doc = objectFromJson("{ \"b\": 3 }"); // Nothing to rename, no change - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(fromJson("{\"b\": 3}")); } } @@ -57,7 +57,7 @@ public void testRenameOfExistingNested() { } } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); // Will leave empty Object after removing the only property: assertThat(doc) .isEqualTo( @@ -88,7 +88,7 @@ public void testRenameOfMissingNested() { """); ObjectNode origDoc = doc.deepCopy(); // No source property, no change - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(origDoc); } } @@ -101,7 +101,7 @@ public void invalidRenameSourceArray() { UpdateOperation oper = UpdateOperator.RENAME.resolveOperation(objectFromJson("{\"array.0\":\"x\"}")); ObjectNode doc = objectFromJson("{\"array\" : [1, 2]}"); - Exception e = catchException(() -> oper.updateDocument(doc)); + Exception e = catchException(() -> oper.updateDocument(doc).modified()); assertThat(e) .isInstanceOf(JsonApiException.class) @@ -116,7 +116,7 @@ public void invalidRenameDestinationArray() { UpdateOperation oper = UpdateOperator.RENAME.resolveOperation(objectFromJson("{\"x\":\"array.0\"}")); ObjectNode doc = objectFromJson("{\"x\":3, \"array\" : [1]}"); - Exception e = catchException(() -> oper.updateDocument(doc)); + Exception e = catchException(() -> oper.updateDocument(doc).modified()); assertThat(e) .isInstanceOf(JsonApiException.class) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/SetOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/SetOperationTest.java index 5d2aef4581..546504cd08 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/SetOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/SetOperationTest.java @@ -36,7 +36,7 @@ public void testSimpleSetOfExisting() { """ { "a" : 1, "c" : true, "b" : 1234 } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc) .isEqualTo( fromJson( @@ -60,7 +60,7 @@ public void testSimpleSetOfVector() { """ { "a" : 1, "c" : true, "$vector" : [0.44, 0.44, 0.66] } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc) .isEqualTo( fromJson( @@ -84,7 +84,7 @@ public void testSimpleSetOfNonExisting() { { "a" : 1, "c" : true, "b" : 1234 } """); // Will append the new property so there is modification - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson( """ @@ -105,7 +105,7 @@ public void testOrderedSetOfNonExisting() { assertThat(oper).isInstanceOf(SetOperation.class); // Will append the new property so there is modification ObjectNode doc = objectFromJson("{ }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode expected = objectFromJson("{ \"a\": 1, \"b\": 2 }"); // Important! Compare serialization as that preserves ordering: not ObjectNode @@ -129,7 +129,7 @@ public void testSimpleSetWithoutChange() { """); ObjectNode doc = orig.deepCopy(); // No change, c was already set as true - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); // And document should not be changed assertThat(doc).isEqualTo(orig); } @@ -159,7 +159,7 @@ public void testSetOfExistingNested() { } """); // Should indicate document being modified - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc) .isEqualTo( fromJson( @@ -183,7 +183,7 @@ public void testSetOfMissingNested() { } """)); ObjectNode doc = objectFromJson("{ }"); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc) .isEqualTo( fromJson( @@ -203,7 +203,7 @@ public void testNoChangeWithNested() { ObjectNode exp = doc.deepCopy(); // No change reported, none observed - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(exp); } } @@ -232,7 +232,7 @@ public void testSetOfNestedArrays() { } """); // Should indicate document being modified - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc) .isEqualTo( fromJson( @@ -262,7 +262,7 @@ public void testNoChangeOfNestedArrays() { ObjectNode exp = doc.deepCopy(); // Should indicate NO change; as well as, well, not change :) - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); assertThat(doc).isEqualTo(exp); } @@ -308,7 +308,7 @@ public void testMixedNested() { } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc).isEqualTo(exp); } } @@ -333,7 +333,7 @@ public void testNoChangeForIdenticalObject() { { "people" : { "name":"Bob", "age":42 } } """)); // No actual change - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); // Compare Strings to verify ordering is identical -- ObjectNode.equals() is // order-INsensitive: assertThat(doc.toPrettyString()).isEqualTo(expected.toPrettyString()); @@ -363,7 +363,7 @@ public void testChangeForObjectWithDifferentFieldOrder() { """); // Actual change due to reordering of fields - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); // Compare Strings to verify ordering is identical -- ObjectNode.equals() is // order-INsensitive: assertThat(doc.toPrettyString()).isEqualTo(expected.toPrettyString()); @@ -407,7 +407,7 @@ public void testReplaceDocIdWithSetOnInsert() { """ { "_id": 0, "a": 1 } """); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); assertThat(doc) .isEqualTo( fromJson( diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/UnsetOperationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/UnsetOperationTest.java index 30d4eb1f4d..17b7a4a688 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/UnsetOperationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/model/command/clause/update/UnsetOperationTest.java @@ -35,7 +35,7 @@ public void testSimpleUnsetOfExisting() { assertThat(oper).isInstanceOf(UnsetOperation.class); // Should indicate document being modified ObjectNode doc = defaultTestDocABC(); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); // and be left with just one property assertThat(doc) .isEqualTo( @@ -57,7 +57,7 @@ public void testSimpleUnsetOfExistingVector() { assertThat(oper).isInstanceOf(UnsetOperation.class); // Should indicate document being modified ObjectNode doc = defaultTestDocABCVector(); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); // and be left with just one property assertThat(doc).isEqualTo(defaultTestDocABC()); } @@ -74,7 +74,7 @@ public void testSimpleUnsetOfNonExisting() { assertThat(oper).isInstanceOf(UnsetOperation.class); ObjectNode doc = defaultTestDocABC(); // No modifications - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); // and be left with same as original (but get a new copy just to make sure) assertThat(doc).isEqualTo(defaultTestDocABC()); } @@ -115,7 +115,7 @@ public void testNestedPropertiesExist() { "array.x" : 1 } """)); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); ObjectNode exp = objectFromJson( @@ -149,7 +149,7 @@ public void testNestedPropertiesNotExist() { UpdateOperation oper = UpdateOperator.UNSET.resolveOperation(objectFromJson("{\"subdoc.b\": 1, \"x.y\": 1 }")); - assertThat(oper.updateDocument(doc)).isFalse(); + assertThat(oper.updateDocument(doc).modified()).isFalse(); // and no modifications expected assertThat(doc).isEqualTo(orig); } @@ -183,7 +183,7 @@ public void testNestedArrays() { "array2" : 1 } """)); - assertThat(oper.updateDocument(doc)).isTrue(); + assertThat(oper.updateDocument(doc).modified()).isTrue(); // Note: in Array values, placeholder nulls must be added (to retain index positions); // but replacing WHOLE array is fine (no null left) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java index 98f963f6bd..0308d1c6ef 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/CommandResolverWithVectorizerTest.java @@ -18,7 +18,6 @@ import io.stargate.sgv2.jsonapi.api.model.command.impl.InsertOneCommand; import io.stargate.sgv2.jsonapi.api.model.command.impl.UpdateOneCommand; import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; -import io.stargate.sgv2.jsonapi.api.request.EmbeddingCredentials; import io.stargate.sgv2.jsonapi.config.OperationsConfig; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; @@ -39,7 +38,6 @@ import io.stargate.sgv2.jsonapi.service.updater.DocumentUpdater; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; import jakarta.inject.Inject; -import java.util.Optional; import org.apache.commons.lang3.RandomStringUtils; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java index 1518490994..e062e34524 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/resolver/model/impl/UpdateManyCommandResolverTest.java @@ -11,7 +11,6 @@ import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; import io.stargate.sgv2.jsonapi.api.model.command.impl.UpdateManyCommand; import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; -import io.stargate.sgv2.jsonapi.api.request.EmbeddingCredentials; import io.stargate.sgv2.jsonapi.config.OperationsConfig; import io.stargate.sgv2.jsonapi.service.embedding.operation.TestEmbeddingProvider; import io.stargate.sgv2.jsonapi.service.operation.model.Operation; @@ -25,7 +24,6 @@ import io.stargate.sgv2.jsonapi.service.updater.DocumentUpdater; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; import jakarta.inject.Inject; -import java.util.Optional; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index 3b5e5dfe52..fd679ce9bb 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -3,6 +3,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.catchThrowable; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -535,28 +536,27 @@ class VectorizeUpdateTest { TestEmbeddingProvider.commandContextWithVectorize.collectionSettings(); @Test - public void two_levels_update() throws Exception { - // First level update will skip $vectorize for setOperation - // vectorization will be done in second level + public void updateVectorize() throws Exception { String updateVectorizeData = """ - {"$vectorize" : "Beijing is a big city", "location" : "Beijing City"} - """; + {"$vectorize" : "Beijing is a big city", "location" : "Beijing City"} + """; DocumentUpdater documentUpdater = DocumentUpdater.construct( DocumentUpdaterUtils.updateClause( UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); - String expected_level_1 = + String expected1 = """ - { - "_id": "1", - "location": "Beijing City" - } - """; + { + "_id": "1", + "location": "Beijing City", + "$vectorize": "Beijing is a big city" + } + """; JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON); // location as London - JsonNode expectedData1 = objectMapper.readTree(expected_level_1); + JsonNode expectedData1 = objectMapper.readTree(expected1); DocumentUpdater.DocumentUpdaterResponse firstResponse = documentUpdater.apply(baseData, false); assertThat(firstResponse) @@ -565,20 +565,23 @@ public void two_levels_update() throws Exception { firstResponseNode -> { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()).isEqualTo(true); // modified location + assertThat(firstResponseNode.embeddingUpdateOperation()).isNotNull(); + assertThat(firstResponseNode.embeddingUpdateOperation().vectorizeContent()) + .isEqualTo("Beijing is a big city"); }); - // Second level update will vectorize in setOperation + // Second update will vectorize DataVectorizer dataVectorizer = new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = documentUpdater - .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) + .updateEmbeddingVector(firstResponse, dataVectorizer) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitItem() .getItem(); - String expected_level_2 = + String expected2 = """ { "_id":"1", @@ -587,7 +590,7 @@ public void two_levels_update() throws Exception { "$vector": [0.25,0.25,0.25] } """; - JsonNode expectedData2 = objectMapper.readTree(expected_level_2); + JsonNode expectedData2 = objectMapper.readTree(expected2); assertThat(secondResponse) .isNotNull() .satisfies( @@ -596,34 +599,99 @@ public void two_levels_update() throws Exception { .usingRecursiveComparison() .ignoringFields("order") .isEqualTo(expectedData2); - assertThat(secondResponseNode.modified()) - .isEqualTo(true); // modified $vectorize and $vector + assertThat(secondResponseNode.modified()).isEqualTo(true); // modified $vector }); } @Test - public void not_modified_for_first_update() throws Exception { - // First level update will skip $vectorize for setOperation - // vectorization will be done in second level + public void update_noVectorize() throws Exception { String updateVectorizeData = """ - {"$vectorize" : "Beijing is a big city"} - """; + {"location" : "Beijing City"} + """; DocumentUpdater documentUpdater = DocumentUpdater.construct( DocumentUpdaterUtils.updateClause( UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); - String expected_level_1 = + String expected1 = """ - { - "_id": "1", - "location": "London" - } - """; + { + "_id": "1", + "location": "Beijing City" + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON); // location as London + JsonNode expectedData1 = objectMapper.readTree(expected1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()).isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()).isEqualTo(true); // modified location + assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); // should be null + }); + } + + @Test + public void update_notModified() throws Exception { + String updateVectorizeData = + """ + {"location" : "London"} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); + + String expected1 = + """ + { + "_id": "1", + "location": "London" + } + """; JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON); // location as London - JsonNode expectedData1 = objectMapper.readTree(expected_level_1); + JsonNode expectedData1 = objectMapper.readTree(expected1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()).isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()).isEqualTo(false); // not modified + assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); // should be null + }); + } + + @Test + public void update_notModifiedVectorize() throws Exception { + String updateVectorizeData = + """ + {"location" : "London", "$vectorize": "London City"} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); + + String expected1 = + """ + { + "_id": "1", + "location": "London", + "$vector": [0.11, 0.22, 0.33], + "$vectorize": "London City" + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); // location as London + JsonNode expectedData1 = objectMapper.readTree(expected1); DocumentUpdater.DocumentUpdaterResponse firstResponse = documentUpdater.apply(baseData, false); assertThat(firstResponse) @@ -632,30 +700,100 @@ public void not_modified_for_first_update() throws Exception { firstResponseNode -> { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()) - .isEqualTo(false); // location is not modified + .isEqualTo(false); // $vectorize has no diff, not modified + assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); // should be null }); + } + + @Test + public void update_modifiedVector() throws Exception { + String updateVectorizeData = + """ + {"location" : "London", "$vector": [0.1,0.5,0.3]} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); - // Second level update will vectorize in setOperation + String expected1 = + """ + { + "_id": "1", + "location": "London", + "$vector": [0.1,0.5,0.3] + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); // location as London + JsonNode expectedData1 = objectMapper.readTree(expected1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()).isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()) + .isEqualTo(true); // $vector is updated, $vectorize is not + assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); // should be null + }); + } + + @Test + public void update_vectorizeOverwriteVector() throws Exception { + String updateVectorizeData = + """ + {"location" : "London", "$vector": [0.1,0.9,0.6], "$vectorize":"London is rainy"} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); + + String expected1 = + """ + { + "_id": "1", + "location": "London", + "$vector": [0.1,0.9,0.6], + "$vectorize": "London is rainy" + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); // location as London + JsonNode expectedData1 = objectMapper.readTree(expected1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()).isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()) + .isEqualTo(true); // $vector is updated but not overwrite, $vectorize is updated + assertThat(firstResponseNode.embeddingUpdateOperation()).isNotNull(); // not null + }); + // Second update will vectorize and overwrite $vector DataVectorizer dataVectorizer = new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = documentUpdater - .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) + .updateEmbeddingVector(firstResponse, dataVectorizer) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitItem() .getItem(); - - String expected_level_2 = + String expected2 = """ - { - "_id":"1", - "location": "London", - "$vectorize" : "Beijing is a big city", - "$vector": [0.25,0.25,0.25] - } - """; - JsonNode expectedData2 = objectMapper.readTree(expected_level_2); + { + "_id": "1", + "location": "London", + "$vector": [0.25,0.25,0.25], + "$vectorize": "London is rainy1" + } + """; + JsonNode expectedData2 = objectMapper.readTree(expected2); assertThat(secondResponse) .isNotNull() .satisfies( @@ -664,66 +802,170 @@ public void not_modified_for_first_update() throws Exception { .usingRecursiveComparison() .ignoringFields("order") .isEqualTo(expectedData2); - assertThat(secondResponseNode.modified()) - .isEqualTo(true); // modified $vectorize and $vector + assertThat(secondResponseNode.modified()).isEqualTo(true); }); } @Test - public void update_vector_at_first_level() throws Exception { + public void update_vectorizeBlank() throws JsonProcessingException { + String updateVectorizeData = """ - {"$vectorize" : "Beijing is a big city", "$vector" : [0.2,0.4,0.5]} - """; + {"location" : "London", "$vectorize":""} + """; DocumentUpdater documentUpdater = DocumentUpdater.construct( DocumentUpdaterUtils.updateClause( UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); - String expected_level_1 = + + String expected1 = + """ + { + "_id": "1", + "location": "London", + "$vector": null, + "$vectorize": "" + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); // location as London + JsonNode expectedData1 = objectMapper.readTree(expected1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()).isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()) + .isEqualTo(true); // $vector is updated , $vectorize is updated + assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); + }); + } + + @Test + public void update_vectorizeNullValue() throws JsonProcessingException { + + String updateVectorizeData = + """ + {"location" : "London", "$vectorize":null} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); + + String expected1 = + """ + { + "_id": "1", + "location": "London", + "$vector": null, + "$vectorize": null + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); // location as London + JsonNode expectedData1 = objectMapper.readTree(expected1); + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + assertThat(firstResponse) + .isNotNull() + .satisfies( + firstResponseNode -> { + assertThat(firstResponseNode.document()).isEqualTo(expectedData1); + assertThat(firstResponseNode.modified()) + .isEqualTo(true); // $vector is updated , $vectorize is updated + assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); + }); + } + + @Test + public void update_vectorizeNonTextualFailure() throws JsonProcessingException { + + String updateVectorizeData = + """ + {"location" : "London", "$vectorize":123} + """; + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + DocumentUpdaterUtils.updateClause( + UpdateOperator.SET, (ObjectNode) objectMapper.readTree(updateVectorizeData))); + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); // location as London + + Throwable failure = + catchThrowable( + () -> { + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + }); + assertThat(failure) + .isInstanceOf(JsonApiException.class) + .hasFieldOrPropertyWithValue("errorCode", ErrorCode.INVALID_VECTORIZE_VALUE_TYPE) + .hasFieldOrPropertyWithValue("message", "$vectorize value needs to be text value"); + } + } + + @Nested + class replaceVectorizeTest { + + private final EmbeddingProvider testService = new TestEmbeddingProvider(); + private final CollectionSettings collectionSettings = + TestEmbeddingProvider.commandContextWithVectorize.collectionSettings(); + + @Test + public void replaceDocument() throws Exception { + String expected1 = """ { "_id": "1", - "location": "London", - "$vector": [0.2,0.4,0.5] + "$vectorize" : "random text" } """; - JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON); // location as London - JsonNode expectedData1 = objectMapper.readTree(expected_level_1); - DocumentUpdater.DocumentUpdaterResponse firstResponse = + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON); + JsonNode expectedData = objectMapper.readTree(expected1); + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + (ObjectNode) + objectMapper.readTree( + """ + { + "$vectorize" : "random text" + } + """)); + DocumentUpdater.DocumentUpdaterResponse updatedDocument = documentUpdater.apply(baseData, false); - assertThat(firstResponse) + assertThat(updatedDocument) .isNotNull() .satisfies( - firstResponseNode -> { - assertThat(firstResponseNode.document()) - .usingRecursiveComparison() - .ignoringFields("order") - .isEqualTo(expectedData1); - assertThat(firstResponseNode.modified()).isEqualTo(true); // vector is modified + node -> { + assertThat(node.document()).isEqualTo(expectedData); + assertThat(node.embeddingUpdateOperation()).isNotNull(); + + assertThat(node.modified()).isEqualTo(true); }); - // Second level update will vectorize in setOperation + // Second update will vectorize DataVectorizer dataVectorizer = new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = documentUpdater - .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) + .updateEmbeddingVector(updatedDocument, dataVectorizer) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitItem() .getItem(); - String expected_level_2 = + String expected2 = """ - { - "_id":"1", - "location": "London", - "$vectorize" : "Beijing is a big city", - "$vector": [0.25,0.25,0.25] - } - """; - JsonNode expectedData2 = objectMapper.readTree(expected_level_2); + { + "_id": "1", + "$vectorize" : "random text", + "$vector": [0.25,0.25,0.25] + } + """; + JsonNode expectedData2 = objectMapper.readTree(expected2); assertThat(secondResponse) .isNotNull() .satisfies( @@ -732,64 +974,246 @@ public void update_vector_at_first_level() throws Exception { .usingRecursiveComparison() .ignoringFields("order") .isEqualTo(expectedData2); - assertThat(secondResponseNode.modified()) - .isEqualTo(true); // modified $vectorize and $vector + assertThat(secondResponseNode.modified()).isEqualTo(true); + // modified $vector }); } @Test - public void two_levels_update_unset() throws Exception { - String updateVectorizeData = + public void replaceDocument_only_replace_vector() throws Exception { + String expected1 = """ - {"$vectorize" : "Beijing is a big city", "location" : "London"} - """; + { + "_id": "1", + "$vector": [0.2,0.5,0.7] + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); + JsonNode expectedData = objectMapper.readTree(expected1); DocumentUpdater documentUpdater = DocumentUpdater.construct( - DocumentUpdaterUtils.updateClause( - UpdateOperator.UNSET, - (ObjectNode) - objectMapper.readTree( - updateVectorizeData))); // will unset $vectorize, $vector and location + (ObjectNode) + objectMapper.readTree( + """ + { + "$vector": [0.2,0.5,0.7] + } + """)); + DocumentUpdater.DocumentUpdaterResponse updatedDocument = + documentUpdater.apply(baseData, false); + assertThat(updatedDocument) + .isNotNull() + .satisfies( + node -> { + assertThat(node.document()).isEqualTo(expectedData); + assertThat(node.embeddingUpdateOperation()).isNull(); + assertThat(node.modified()).isEqualTo(true); + }); + } - String expected_level_1 = + @Test + public void replaceDocument_vectorizeBlankTest() throws Exception { + String expected1 = """ - { - "_id": "1" - } - """; + { + "_id": "1", + "$vectorize": "", + "$vector":null + } + """; JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); - JsonNode expectedData1 = objectMapper.readTree(expected_level_1); - DocumentUpdater.DocumentUpdaterResponse firstResponse = + JsonNode expectedData = objectMapper.readTree(expected1); + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + (ObjectNode) + objectMapper.readTree( + """ + { + "$vectorize": "" + } + """)); + DocumentUpdater.DocumentUpdaterResponse updatedDocument = documentUpdater.apply(baseData, false); - assertThat(firstResponse) + assertThat(updatedDocument) .isNotNull() .satisfies( - firstResponseNode -> { - assertThat(firstResponseNode.document()).isEqualTo(expectedData1); - assertThat(firstResponseNode.modified()) - .isEqualTo(true); // modified $vectorize, $vector and location + node -> { + assertThat(node.document()).isEqualTo(expectedData); + assertThat(node.embeddingUpdateOperation()).isNull(); + assertThat(node.modified()).isEqualTo(true); + }); + } + + @Test + public void replaceDocument_vectorizeNonTextFailure() throws Exception { + String expected1 = + """ + { + "_id": "1", + "$vectorize": "", + "$vector":null + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); + JsonNode expectedData = objectMapper.readTree(expected1); + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + (ObjectNode) + objectMapper.readTree( + """ + { + "$vectorize": 123 + } + """)); + Throwable failure = + catchThrowable( + () -> { + DocumentUpdater.DocumentUpdaterResponse firstResponse = + documentUpdater.apply(baseData, false); + }); + assertThat(failure) + .isInstanceOf(JsonApiException.class) + .hasFieldOrPropertyWithValue("errorCode", ErrorCode.INVALID_VECTORIZE_VALUE_TYPE) + .hasFieldOrPropertyWithValue("message", "$vectorize value needs to be text value"); + } + + @Test + public void replaceDocument_vectorizeNullValue() throws Exception { + String expected1 = + """ + { + "_id": "1", + "$vectorize": null, + "$vector":null + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); + JsonNode expectedData = objectMapper.readTree(expected1); + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + (ObjectNode) + objectMapper.readTree( + """ + { + "$vectorize": null + } + """)); + DocumentUpdater.DocumentUpdaterResponse updatedDocument = + documentUpdater.apply(baseData, false); + assertThat(updatedDocument) + .isNotNull() + .satisfies( + node -> { + assertThat(node.document()).isEqualTo(expectedData); + assertThat(node.embeddingUpdateOperation()).isNull(); + assertThat(node.modified()).isEqualTo(true); + }); + } + + @Test + public void replaceDocument_allNull() throws Exception { + String expected1 = + """ + { + "_id": "123", + "$vectorize": null, + "$vector":null + } + """; + + String allNull = + """ + { + "_id": "123", + "$vectorize": null, + "$vector":null + } + """; + + JsonNode baseData = objectMapper.readTree(allNull); + JsonNode expectedData = objectMapper.readTree(expected1); + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + (ObjectNode) + objectMapper.readTree( + """ + { + "$vectorize": null + } + """)); + DocumentUpdater.DocumentUpdaterResponse updatedDocument = + documentUpdater.apply(baseData, false); + assertThat(updatedDocument) + .isNotNull() + .satisfies( + node -> { + assertThat(node.document()).isEqualTo(expectedData); + assertThat(node.embeddingUpdateOperation()).isNull(); + assertThat(node.modified()).isEqualTo(false); // identical, so not modified + }); + } + + @Test + public void replaceDocument_willVectorizeEvenVectorizeHasNoDiff() throws Exception { + String expected1 = + """ + { + "_id": "1", + "location": "London", + "$vectorize": "London City" + } + """; + + JsonNode baseData = objectMapper.readTree(BASE_DOC_JSON_VECTOR); + JsonNode expectedData = objectMapper.readTree(expected1); + DocumentUpdater documentUpdater = + DocumentUpdater.construct( + (ObjectNode) + objectMapper.readTree( + """ + { + "$vectorize": "London City", + "location": "London" + } + """)); + DocumentUpdater.DocumentUpdaterResponse updatedDocument = + documentUpdater.apply(baseData, false); + assertThat(updatedDocument) + .isNotNull() + .satisfies( + node -> { + assertThat(node.document()).isEqualTo(expectedData); + assertThat(node.embeddingUpdateOperation()).isNotNull(); // need to re-vectorize + assertThat(node.modified()) + .isEqualTo( + true); // not identical, because there is no $vector in replaceDocument }); - // Second level update will try to vectorize, in this test case, will do nothing, since there - // is no setOperation + // Second update will vectorize DataVectorizer dataVectorizer = new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = documentUpdater - .applyUpdateVectorize(firstResponse.document(), false, dataVectorizer) + .updateEmbeddingVector(updatedDocument, dataVectorizer) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitItem() .getItem(); - String expected_level_2 = + String expected2 = """ - { - "_id":"1" - } - """; - JsonNode expectedData2 = objectMapper.readTree(expected_level_2); + { + "_id": "1", + "location": "London", + "$vectorize": "London City", + "$vector": [0.25,0.25,0.25] + } + """; + JsonNode expectedData2 = objectMapper.readTree(expected2); assertThat(secondResponse) .isNotNull() .satisfies( @@ -798,7 +1222,8 @@ public void two_levels_update_unset() throws Exception { .usingRecursiveComparison() .ignoringFields("order") .isEqualTo(expectedData2); - assertThat(secondResponseNode.modified()).isEqualTo(false); // nothing is modified + assertThat(secondResponseNode.modified()).isEqualTo(true); + // modified $vector }); } } From fc5b0fc881ce6f9426b8c02a66179a2929fcfaf8 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Tue, 16 Jul 2024 14:51:56 -0700 Subject: [PATCH 13/18] fix --- .../clause/update/EmbeddingUpdateOperation.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java index edfb61bada..f05f4f660e 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java @@ -7,14 +7,16 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +/** + * This operation will be used to pass along $vectorize content and provides a method to update a + * document with new vector + * + * @param vectorizeContent, $vectorize content needed to be vectorized + */ public record EmbeddingUpdateOperation(String vectorizeContent) { - // UpdateOperation.UpdateOperationResult - // updateDocument(ObjectNode doc, float[] vector) /** - * // TODO 一定会换的,因为我们已经知道 vectorize 有 diff了 , 而且你已经拿到vector array了 - * - *

update the document with corresponding vector + * update the document with corresponding vector * * @param doc Document to update * @param dataVectorizer dataVectorizer From 7d3e0df20162b95cf81ee4c3a985a68484f9c8a4 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Mon, 22 Jul 2024 12:22:04 -0700 Subject: [PATCH 14/18] fix comments --- .../clause/update/AddToSetOperation.java | 2 +- .../clause/update/CurrentDateOperation.java | 2 +- .../update/EmbeddingUpdateOperation.java | 1 - .../command/clause/update/IncOperation.java | 2 +- .../clause/update/MinMaxOperation.java | 2 +- .../command/clause/update/MulOperation.java | 2 +- .../clause/update/RenameOperation.java | 2 +- .../command/clause/update/SetOperation.java | 15 ++-- .../command/clause/update/UnsetOperation.java | 2 +- .../clause/update/UpdateOperation.java | 2 +- .../model/impl/ReadAndUpdateOperation.java | 15 ++-- .../service/updater/DocumentUpdater.java | 90 ++++++++++++------- .../service/updater/DocumentUpdaterTest.java | 51 ++++++----- 13 files changed, 112 insertions(+), 76 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/AddToSetOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/AddToSetOperation.java index b41d2717e5..623d439d40 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/AddToSetOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/AddToSetOperation.java @@ -136,7 +136,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { } } - return new UpdateOperationResult(modified, null); + return new UpdateOperationResult(modified, List.of()); } private boolean addToSet(ArrayNode set, JsonNode elementToAdd) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/CurrentDateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/CurrentDateOperation.java index 7cf1726fbd..66e9786c61 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/CurrentDateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/CurrentDateOperation.java @@ -66,7 +66,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { modified = true; } } - return new UpdateOperationResult(modified, null); + return new UpdateOperationResult(modified, List.of()); } record Action(PathMatchLocator locator) implements ActionWithLocator {} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java index f05f4f660e..8087464123 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/EmbeddingUpdateOperation.java @@ -23,7 +23,6 @@ public record EmbeddingUpdateOperation(String vectorizeContent) { * @return Uni modified */ public void updateDocument(JsonNode doc, float[] vector) { - // TODO can I do this instancitation? ObjectMapper objectMapper = new ObjectMapper(); JsonNodeFactory nodeFactory = objectMapper.getNodeFactory(); final JsonNode vectorJsonNode = doc.get(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/IncOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/IncOperation.java index fbc346ef5f..f0bf25c2d3 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/IncOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/IncOperation.java @@ -77,7 +77,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { } } - return new UpdateOperationResult(modified, null); + return new UpdateOperationResult(modified, List.of()); } private JsonNode addNumbers(ObjectNode doc, NumericNode nr1, NumericNode nr2) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MinMaxOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MinMaxOperation.java index 5963ad876a..82fdce131b 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MinMaxOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MinMaxOperation.java @@ -66,7 +66,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { } } - return new UpdateOperationResult(modified, null); + return new UpdateOperationResult(modified, List.of()); } private boolean shouldReplace(JsonNode oldValue, JsonNode newValue) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MulOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MulOperation.java index d8fe754f4f..74704e8177 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MulOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/MulOperation.java @@ -76,7 +76,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { } } - return new UpdateOperationResult(modified, null); + return new UpdateOperationResult(modified, List.of()); } private JsonNode multiply(ObjectNode doc, JsonNode oldValue, JsonNode multiplierValue) { diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/RenameOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/RenameOperation.java index 28355d12a1..cb11d61039 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/RenameOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/RenameOperation.java @@ -78,7 +78,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { dst.replaceValue(value); } } - return new UpdateOperationResult(modified, null); + return new UpdateOperationResult(modified, List.of()); } // Unlike most operations, we have 2 locators (src, dest), use explicit names diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java index 4c5369f922..5afaa78b49 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/SetOperation.java @@ -7,11 +7,7 @@ import io.stargate.sgv2.jsonapi.util.JsonUtil; import io.stargate.sgv2.jsonapi.util.PathMatch; import io.stargate.sgv2.jsonapi.util.PathMatchLocator; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Objects; -import java.util.Set; +import java.util.*; /** * Implementation of {@code $set} update operation used to assign values to document fields; also @@ -78,7 +74,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { boolean modified = false; Set setPaths = new HashSet<>(); actions.stream().forEach(action -> setPaths.add(action.locator().path())); - EmbeddingUpdateOperation embeddingUpdateOperation = null; + List embeddingUpdateOperationList = new ArrayList<>(); for (Action action : actions) { PathMatch target = action.locator().findOrCreate(doc); @@ -100,21 +96,22 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { if (newValue.isNull()) { // if $vectorize is null value, update $vector as null doc.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + doc.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); } else if (!newValue.isTextual()) { // if $vectorize is not textual value throw ErrorCode.INVALID_VECTORIZE_VALUE_TYPE.toApiException(); } else if (newValue.asText().isBlank()) { // $vectorize is blank text value, set $vector as null value, no need to vectorize - doc.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + doc.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); } else { // if $vectorize is textual and not blank, create embeddingUpdateOperation - embeddingUpdateOperation = new EmbeddingUpdateOperation(newValue.asText()); + embeddingUpdateOperationList.add(new EmbeddingUpdateOperation(newValue.asText())); } } modified = true; } } - return new UpdateOperationResult(modified, embeddingUpdateOperation); + return new UpdateOperationResult(modified, embeddingUpdateOperationList); } // Needed because some unit tests check for equality diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java index efe74dccf8..ebbc97dcc3 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UnsetOperation.java @@ -44,7 +44,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { if (modified && unsetPaths.contains(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { doc.remove(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); } - return new UpdateOperationResult(modified, null); + return new UpdateOperationResult(modified, List.of()); } record Action(PathMatchLocator locator) implements ActionWithLocator {} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java index b33c4a2b58..52f5133869 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java @@ -112,5 +112,5 @@ public int compare(ActionWithLocator o1, ActionWithLocator o2) { } public record UpdateOperationResult( - boolean modified, EmbeddingUpdateOperation embeddingUpdateOperation) {} + boolean modified, List embeddingUpdateOperations) {} } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java index 4e5f3769b7..cd1d6be19a 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/model/impl/ReadAndUpdateOperation.java @@ -10,7 +10,6 @@ import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; import io.stargate.sgv2.jsonapi.service.cqldriver.serializer.CQLBindValues; -import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.operation.model.ModifyOperation; import io.stargate.sgv2.jsonapi.service.operation.model.ReadOperation; @@ -160,10 +159,12 @@ private Uni processUpdate( DocumentUpdater.DocumentUpdaterResponse documentUpdaterResponse = documentUpdater().apply(readDocument.document().deepCopy(), upsert); - final DataVectorizer dataVectorizer = - dataVectorizerService.constructDataVectorizer(dataApiRequestInfo, commandContext); - return documentUpdater - .updateEmbeddingVector(documentUpdaterResponse, dataVectorizer) + return documentUpdaterResponse + .updateEmbeddingVector( + documentUpdaterResponse, + dataVectorizerService, + dataApiRequestInfo, + commandContext) .onItem() .transformToUni( vectorizedDocumentUpdaterResponse -> { @@ -202,7 +203,9 @@ private Uni processUpdate( .transform( v -> { // if not insert increment modified count - if (!upsert) modifiedCount.incrementAndGet(); + if (!upsert) { + modifiedCount.incrementAndGet(); + } // resolve doc to return JsonNode documentToReturn = null; diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index 9a65b5f551..1281bdca39 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -3,12 +3,16 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import io.smallrye.mutiny.Uni; +import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.*; +import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.util.JsonUtil; +import java.util.ArrayList; import java.util.List; /** Updates the document read from the database with the updates came as part of the request. */ @@ -66,16 +70,16 @@ public DocumentUpdaterResponse apply(JsonNode readDocument, boolean docInserted) */ private DocumentUpdaterResponse update(ObjectNode docToUpdate, boolean docInserted) { boolean modified = false; - EmbeddingUpdateOperation embeddingUpdateOperation = null; + List embeddingUpdateOperationList = new ArrayList<>(); for (UpdateOperation updateOperation : updateOperations) { if (updateOperation.shouldApplyIf(docInserted)) { final UpdateOperation.UpdateOperationResult updateOperationResult = updateOperation.updateDocument(docToUpdate); modified |= updateOperationResult.modified(); - embeddingUpdateOperation = updateOperationResult.embeddingUpdateOperation(); + embeddingUpdateOperationList.addAll(updateOperationResult.embeddingUpdateOperations()); } } - return new DocumentUpdaterResponse(docToUpdate, modified, embeddingUpdateOperation); + return new DocumentUpdaterResponse(docToUpdate, modified, embeddingUpdateOperationList); } /** @@ -106,13 +110,13 @@ private DocumentUpdaterResponse replace(ObjectNode docToUpdate, boolean docInser // null value here. if (vectorizeNode.isNull()) { // if $vectorize is null value, update $vector as null - replaceDocument.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + replaceDocument.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); } else if (!vectorizeNode.isTextual()) { // if $vectorize is not textual value throw ErrorCode.INVALID_VECTORIZE_VALUE_TYPE.toApiException(); } else if (vectorizeNode.asText().isBlank()) { // $vectorize is blank text value, set $vector as null value, no need to vectorize - replaceDocument.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null); + replaceDocument.putNull(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD); } else { // if $vectorize is textual and not blank, create embeddingUpdateOperation embeddingUpdateOperation = new EmbeddingUpdateOperation(vectorizeNode.asText()); @@ -136,38 +140,60 @@ private DocumentUpdaterResponse replace(ObjectNode docToUpdate, boolean docInser return new DocumentUpdaterResponse(docToUpdate, true, embeddingUpdateOperation); } - /** - * This method is used for potential vectorize There may exist a not-null embeddingUpdateOperation - * in responseBeforeVectorize param, then use dataVectorizer to vectorize the content and then use - * embeddingUpdateOperation to update the document's $vector field - * - * @param responseBeforeVectorize response before vectorize - * @param dataVectorizer dataVectorizer - * @return Uni - */ - public Uni updateEmbeddingVector( - DocumentUpdaterResponse responseBeforeVectorize, DataVectorizer dataVectorizer) { - final EmbeddingUpdateOperation embeddingUpdateOperation = - responseBeforeVectorize.embeddingUpdateOperation(); - if (embeddingUpdateOperation == null) { - return Uni.createFrom().item(responseBeforeVectorize); - } - return dataVectorizer - .vectorize(embeddingUpdateOperation.vectorizeContent()) - .onItem() - .transformToUni( - vector -> { - embeddingUpdateOperation.updateDocument(responseBeforeVectorize.document, vector); - return Uni.createFrom().item(responseBeforeVectorize); - }); - } - /** * The documentUpdaterResponse has the updated document, boolean flag to indicate the document is * modified or not, an embeddingUpdateOperation to update the embedding */ public record DocumentUpdaterResponse( - JsonNode document, boolean modified, EmbeddingUpdateOperation embeddingUpdateOperation) {} + JsonNode document, + boolean modified, + List embeddingUpdateOperations) { + + /** + * This method is used for potential vectorize There may exist a not-null + * embeddingUpdateOperation in responseBeforeVectorize param, then use dataVectorizer to + * vectorize the content and then use embeddingUpdateOperation to update the document's $vector + * field. + * + * @param responseBeforeVectorize response before vectorization + * @param DataVectorizerService dataVectorizerService + * @param DataApiRequestInfo dataApiRequestInfo + * @param CommandContext commandContext + * @return Uni + */ + public Uni updateEmbeddingVector( + DocumentUpdaterResponse responseBeforeVectorize, + DataVectorizerService dataVectorizerService, + DataApiRequestInfo dataApiRequestInfo, + CommandContext commandContext) { + + List embeddingUpdateOperations = + responseBeforeVectorize.embeddingUpdateOperations(); + if (embeddingUpdateOperations.isEmpty()) { + return Uni.createFrom().item(responseBeforeVectorize); + } + // lazy construct the dataVectorizer, only when embeddingUpdateOperation is not null + final DataVectorizer dataVectorizer = + dataVectorizerService.constructDataVectorizer(dataApiRequestInfo, commandContext); + // TODO: only SetOperation and Replacement may create one embeddingUpdateOperation, Refactor + // when there are multiple + final EmbeddingUpdateOperation embeddingUpdateOperation = embeddingUpdateOperations.get(0); + return dataVectorizer + .vectorize(embeddingUpdateOperation.vectorizeContent()) + .onItem() + .transformToUni( + vector -> { + embeddingUpdateOperation.updateDocument(responseBeforeVectorize.document, vector); + // create new DocumentUpdaterResponse, set embeddingUpdateOperation as null. + return Uni.createFrom() + .item( + new DocumentUpdaterResponse( + responseBeforeVectorize.document, + responseBeforeVectorize.modified, + List.of())); + }); + } + } private enum UpdateType { UPDATE, diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index fd679ce9bb..5d2476382b 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -12,15 +12,17 @@ import io.smallrye.mutiny.helpers.test.UniAssertSubscriber; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator; +import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSettings; -import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.embedding.operation.EmbeddingProvider; import io.stargate.sgv2.jsonapi.service.embedding.operation.TestEmbeddingProvider; import io.stargate.sgv2.jsonapi.service.testutil.DocumentUpdaterUtils; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; import jakarta.inject.Inject; +import java.util.Optional; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @@ -28,6 +30,7 @@ @TestProfile(NoGlobalResourcesTestProfile.Impl.class) public class DocumentUpdaterTest { @Inject ObjectMapper objectMapper; + @Inject DataVectorizerService dataVectorizerService; private static String BASE_DOC_JSON = """ @@ -571,11 +574,13 @@ public void updateVectorize() throws Exception { }); // Second update will vectorize - DataVectorizer dataVectorizer = - new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = - documentUpdater - .updateEmbeddingVector(firstResponse, dataVectorizer) + firstResponse + .updateEmbeddingVector( + firstResponse, + dataVectorizerService, + new DataApiRequestInfo(Optional.of("testTenant")), + TestEmbeddingProvider.commandContextWithVectorize) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitItem() @@ -775,11 +780,13 @@ public void update_vectorizeOverwriteVector() throws Exception { assertThat(firstResponseNode.embeddingUpdateOperation()).isNotNull(); // not null }); // Second update will vectorize and overwrite $vector - DataVectorizer dataVectorizer = - new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = - documentUpdater - .updateEmbeddingVector(firstResponse, dataVectorizer) + firstResponse + .updateEmbeddingVector( + firstResponse, + dataVectorizerService, + new DataApiRequestInfo(Optional.of("testTenant")), + TestEmbeddingProvider.commandContextWithVectorize) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitItem() @@ -933,7 +940,7 @@ public void replaceDocument() throws Exception { { "$vectorize" : "random text" } - """)); + """)); DocumentUpdater.DocumentUpdaterResponse updatedDocument = documentUpdater.apply(baseData, false); assertThat(updatedDocument) @@ -947,11 +954,13 @@ public void replaceDocument() throws Exception { }); // Second update will vectorize - DataVectorizer dataVectorizer = - new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = - documentUpdater - .updateEmbeddingVector(updatedDocument, dataVectorizer) + updatedDocument + .updateEmbeddingVector( + updatedDocument, + dataVectorizerService, + new DataApiRequestInfo(Optional.of("testTenant")), + TestEmbeddingProvider.commandContextWithVectorize) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitItem() @@ -964,7 +973,7 @@ public void replaceDocument() throws Exception { "$vectorize" : "random text", "$vector": [0.25,0.25,0.25] } - """; + """; JsonNode expectedData2 = objectMapper.readTree(expected2); assertThat(secondResponse) .isNotNull() @@ -1033,7 +1042,7 @@ public void replaceDocument_vectorizeBlankTest() throws Exception { { "$vectorize": "" } - """)); + """)); DocumentUpdater.DocumentUpdaterResponse updatedDocument = documentUpdater.apply(baseData, false); assertThat(updatedDocument) @@ -1194,11 +1203,13 @@ public void replaceDocument_willVectorizeEvenVectorizeHasNoDiff() throws Excepti }); // Second update will vectorize - DataVectorizer dataVectorizer = - new DataVectorizer(testService, objectMapper.getNodeFactory(), null, collectionSettings); final DocumentUpdater.DocumentUpdaterResponse secondResponse = - documentUpdater - .updateEmbeddingVector(updatedDocument, dataVectorizer) + updatedDocument + .updateEmbeddingVector( + updatedDocument, + dataVectorizerService, + new DataApiRequestInfo(Optional.of("testTenant")), + TestEmbeddingProvider.commandContextWithVectorize) .subscribe() .withSubscriber(UniAssertSubscriber.create()) .awaitItem() From 2eec55b064ae73b63db76da979bcf1f82ac3fd3b Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Mon, 22 Jul 2024 13:42:24 -0700 Subject: [PATCH 15/18] fix comments --- .../service/updater/DocumentUpdater.java | 6 ++-- .../service/updater/DocumentUpdaterTest.java | 36 +++++++++++-------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index 1281bdca39..071b60b0a3 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -102,6 +102,7 @@ private DocumentUpdaterResponse replace(ObjectNode docToUpdate, boolean docInser } } + List embeddingUpdateOperationList = new ArrayList<>(); EmbeddingUpdateOperation embeddingUpdateOperation = null; JsonNode vectorizeNode = replaceDocument.get(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD); @@ -120,13 +121,14 @@ private DocumentUpdaterResponse replace(ObjectNode docToUpdate, boolean docInser } else { // if $vectorize is textual and not blank, create embeddingUpdateOperation embeddingUpdateOperation = new EmbeddingUpdateOperation(vectorizeNode.asText()); + embeddingUpdateOperationList.add(embeddingUpdateOperation); } } // In case there is no difference between document return modified as false, so db update // doesn't happen if (JsonUtil.equalsOrdered(compareDoc, replaceDocument())) { - return new DocumentUpdaterResponse(docToUpdate, false, null); + return new DocumentUpdaterResponse(docToUpdate, false, List.of()); } // remove all data and add _id as first field; either from original document or from replacement docToUpdate.removeAll(); @@ -137,7 +139,7 @@ private DocumentUpdaterResponse replace(ObjectNode docToUpdate, boolean docInser } docToUpdate.setAll(replaceDocument()); // return modified flag as true - return new DocumentUpdaterResponse(docToUpdate, true, embeddingUpdateOperation); + return new DocumentUpdaterResponse(docToUpdate, true, embeddingUpdateOperationList); } /** diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index 5d2476382b..6bb67e1b09 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -568,8 +568,9 @@ public void updateVectorize() throws Exception { firstResponseNode -> { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()).isEqualTo(true); // modified location - assertThat(firstResponseNode.embeddingUpdateOperation()).isNotNull(); - assertThat(firstResponseNode.embeddingUpdateOperation().vectorizeContent()) + assertThat(firstResponseNode.embeddingUpdateOperations()).isNotEmpty(); + ; + assertThat(firstResponseNode.embeddingUpdateOperations().get(0).vectorizeContent()) .isEqualTo("Beijing is a big city"); }); @@ -637,7 +638,8 @@ public void update_noVectorize() throws Exception { firstResponseNode -> { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()).isEqualTo(true); // modified location - assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); // should be null + assertThat(firstResponseNode.embeddingUpdateOperations()) + .isEmpty(); // should be null }); } @@ -670,7 +672,8 @@ public void update_notModified() throws Exception { firstResponseNode -> { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()).isEqualTo(false); // not modified - assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); // should be null + assertThat(firstResponseNode.embeddingUpdateOperations()) + .isEmpty(); // should be null }); } @@ -706,7 +709,8 @@ public void update_notModifiedVectorize() throws Exception { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()) .isEqualTo(false); // $vectorize has no diff, not modified - assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); // should be null + assertThat(firstResponseNode.embeddingUpdateOperations()) + .isEmpty(); // should be null }); } @@ -741,7 +745,8 @@ public void update_modifiedVector() throws Exception { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()) .isEqualTo(true); // $vector is updated, $vectorize is not - assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); // should be null + assertThat(firstResponseNode.embeddingUpdateOperations()) + .isEmpty(); // should be null }); } @@ -777,7 +782,8 @@ public void update_vectorizeOverwriteVector() throws Exception { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()) .isEqualTo(true); // $vector is updated but not overwrite, $vectorize is updated - assertThat(firstResponseNode.embeddingUpdateOperation()).isNotNull(); // not null + assertThat(firstResponseNode.embeddingUpdateOperations()).isNotEmpty(); + ; // not null }); // Second update will vectorize and overwrite $vector final DocumentUpdater.DocumentUpdaterResponse secondResponse = @@ -846,7 +852,7 @@ public void update_vectorizeBlank() throws JsonProcessingException { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()) .isEqualTo(true); // $vector is updated , $vectorize is updated - assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); + assertThat(firstResponseNode.embeddingUpdateOperations()).isEmpty(); }); } @@ -883,7 +889,7 @@ public void update_vectorizeNullValue() throws JsonProcessingException { assertThat(firstResponseNode.document()).isEqualTo(expectedData1); assertThat(firstResponseNode.modified()) .isEqualTo(true); // $vector is updated , $vectorize is updated - assertThat(firstResponseNode.embeddingUpdateOperation()).isNull(); + assertThat(firstResponseNode.embeddingUpdateOperations()).isEmpty(); }); } @@ -948,7 +954,7 @@ public void replaceDocument() throws Exception { .satisfies( node -> { assertThat(node.document()).isEqualTo(expectedData); - assertThat(node.embeddingUpdateOperation()).isNotNull(); + assertThat(node.embeddingUpdateOperations()).isNotEmpty(); assertThat(node.modified()).isEqualTo(true); }); @@ -1016,7 +1022,7 @@ public void replaceDocument_only_replace_vector() throws Exception { .satisfies( node -> { assertThat(node.document()).isEqualTo(expectedData); - assertThat(node.embeddingUpdateOperation()).isNull(); + assertThat(node.embeddingUpdateOperations()).isEmpty(); assertThat(node.modified()).isEqualTo(true); }); } @@ -1050,7 +1056,7 @@ public void replaceDocument_vectorizeBlankTest() throws Exception { .satisfies( node -> { assertThat(node.document()).isEqualTo(expectedData); - assertThat(node.embeddingUpdateOperation()).isNull(); + assertThat(node.embeddingUpdateOperations()).isEmpty(); assertThat(node.modified()).isEqualTo(true); }); } @@ -1118,7 +1124,7 @@ public void replaceDocument_vectorizeNullValue() throws Exception { .satisfies( node -> { assertThat(node.document()).isEqualTo(expectedData); - assertThat(node.embeddingUpdateOperation()).isNull(); + assertThat(node.embeddingUpdateOperations()).isEmpty(); assertThat(node.modified()).isEqualTo(true); }); } @@ -1161,7 +1167,7 @@ public void replaceDocument_allNull() throws Exception { .satisfies( node -> { assertThat(node.document()).isEqualTo(expectedData); - assertThat(node.embeddingUpdateOperation()).isNull(); + assertThat(node.embeddingUpdateOperations()).isEmpty(); assertThat(node.modified()).isEqualTo(false); // identical, so not modified }); } @@ -1196,7 +1202,7 @@ public void replaceDocument_willVectorizeEvenVectorizeHasNoDiff() throws Excepti .satisfies( node -> { assertThat(node.document()).isEqualTo(expectedData); - assertThat(node.embeddingUpdateOperation()).isNotNull(); // need to re-vectorize + assertThat(node.embeddingUpdateOperations()).isNotEmpty(); // need to re-vectorize assertThat(node.modified()) .isEqualTo( true); // not identical, because there is no $vector in replaceDocument From f2f6a21119ed971ba66de7b3c2a29bc52ed15a0c Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Mon, 22 Jul 2024 14:00:10 -0700 Subject: [PATCH 16/18] fix IT --- .../jsonapi/api/model/command/clause/update/PopOperation.java | 2 +- .../jsonapi/api/model/command/clause/update/PushOperation.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PopOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PopOperation.java index 54106a13c0..2a133954f3 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PopOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PopOperation.java @@ -89,7 +89,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { + value.getNodeType()); } } - return new UpdateOperationResult(changes, null); + return new UpdateOperationResult(changes, List.of()); } /** Value class for per-field Pop operation definitions. */ diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PushOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PushOperation.java index a79ef757c9..fe4393b5df 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PushOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/PushOperation.java @@ -174,7 +174,7 @@ public UpdateOperationResult updateDocument(ObjectNode doc) { } // Every valid update operation modifies document so need just one: - return new UpdateOperationResult(!actions.isEmpty(), null); + return new UpdateOperationResult(!actions.isEmpty(), List.of()); } // Just needed for tests From 577703f404bacfabf1463dcaeada116b38f62220 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Tue, 23 Jul 2024 10:08:03 -0700 Subject: [PATCH 17/18] merged from tables, fix conflicts --- .../service/embedding/DataVectorizer.java | 10 +-- .../embedding/DataVectorizerService.java | 2 +- .../ReadAndUpdateCollectionOperation.java | 17 +++-- .../FindOneAndReplaceCommandResolver.java | 16 ++-- .../FindOneAndUpdateCommandResolver.java | 7 +- .../resolver/UpdateManyCommandResolver.java | 6 +- .../resolver/UpdateOneCommandResolver.java | 6 +- ...AndUpdateCollectionOperationRetryTest.java | 3 +- .../ReadAndUpdateCollectionOperationTest.java | 1 - ...erialConsistencyOverrideOperationTest.java | 6 +- .../CommandResolverWithVectorizerTest.java | 2 - .../FindOneAndReplaceCommandResolverTest.java | 9 +-- .../UpdateManyCommandResolverTest.java | 75 ------------------- .../service/updater/DocumentUpdaterTest.java | 10 --- 14 files changed, 27 insertions(+), 143 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java index 8692fce249..cab5eecc5f 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizer.java @@ -179,8 +179,8 @@ public Uni vectorize(String vectorizeContent) { if (vector.length != schemaObject.vectorConfig().vectorSize()) { throw EMBEDDING_PROVIDER_UNEXPECTED_RESPONSE.toApiException( "Embedding provider '%s' did not return expected embedding length. Expect: '%d'. Actual: '%d'", - schemaObject.vectorConfig().vectorizeConfig().provider(), - schemaObject.vectorConfig().vectorSize(), + schemaObject.vectorConfig().vectorizeConfig().provider(), + schemaObject.vectorConfig().vectorSize(), vector.length); } return vector; @@ -202,7 +202,7 @@ public Uni vectorize(SortClause sortClause) { String text = expression.vectorize(); if (embeddingProvider == null) { throw ErrorCode.EMBEDDING_SERVICE_NOT_CONFIGURED.toApiException( - schemaObject.name.table()); + schemaObject.name.table()); } Uni> vectors = embeddingProvider @@ -221,8 +221,8 @@ public Uni vectorize(SortClause sortClause) { if (vector.length != schemaObject.vectorConfig().vectorSize()) { throw EMBEDDING_PROVIDER_UNEXPECTED_RESPONSE.toApiException( "Embedding provider '%s' did not return expected embedding length. Expect: '%d'. Actual: '%d'", - schemaObject.vectorConfig().vectorizeConfig().provider(), - schemaObject.vectorConfig().vectorSize(), + schemaObject.vectorConfig().vectorizeConfig().provider(), + schemaObject.vectorConfig().vectorSize(), vector.length); } sortExpressions.clear(); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java index 267436d858..372843006d 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/embedding/DataVectorizerService.java @@ -55,7 +55,7 @@ public Uni vectorize( .transform(flag -> command); } - public DataVectorizer constructDataVectorizer( + public DataVectorizer constructDataVectorizer( DataApiRequestInfo dataApiRequestInfo, CommandContext commandContext) { EmbeddingProvider embeddingProvider = Optional.ofNullable(commandContext.embeddingProvider()) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/ReadAndUpdateCollectionOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/ReadAndUpdateCollectionOperation.java index 0fefd9667e..1637fa3d73 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/ReadAndUpdateCollectionOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/collections/ReadAndUpdateCollectionOperation.java @@ -11,10 +11,8 @@ import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSchemaObject; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.QueryExecutor; import io.stargate.sgv2.jsonapi.service.cqldriver.serializer.CQLBindValues; -import io.stargate.sgv2.jsonapi.service.operation.filters.collection.IDCollectionFilter; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; -import io.stargate.sgv2.jsonapi.service.operation.model.ModifyOperation; -import io.stargate.sgv2.jsonapi.service.operation.model.ReadOperation; +import io.stargate.sgv2.jsonapi.service.operation.filters.collection.IDCollectionFilter; import io.stargate.sgv2.jsonapi.service.projection.DocumentProjector; import io.stargate.sgv2.jsonapi.service.shredding.collections.DocumentId; import io.stargate.sgv2.jsonapi.service.shredding.collections.DocumentShredder; @@ -188,20 +186,23 @@ private Uni processUpdate( return Uni.createFrom() .item( new UpdatedDocument( - readDocument.id().orElseThrow(), upsert, originalDocument, null)); + readDocument.id().orElseThrow(), + upsert, + originalDocument, + null)); } else { return Uni.createFrom().nullItem(); } } final WritableShreddedDocument writableShreddedDocument = - documentShredder() + documentShredder() .shred( commandContext(), vectorizedDocumentUpdaterResponse.document(), - readDocument - .txnId() - .orElse(null) ); // will be empty when this is a upsert'd doc + readDocument + .txnId() + .orElse(null)); // will be empty when this is a upsert'd doc // Have to do this because shredder adds _id field to the document if it // doesn't exist diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/FindOneAndReplaceCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/FindOneAndReplaceCommandResolver.java index fcfc1fb764..e25f24994e 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/FindOneAndReplaceCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/FindOneAndReplaceCommandResolver.java @@ -9,19 +9,15 @@ import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; import io.stargate.sgv2.jsonapi.api.v1.metrics.JsonApiMetricsConfig; import io.stargate.sgv2.jsonapi.config.OperationsConfig; +import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; +import io.stargate.sgv2.jsonapi.exception.ErrorCode; +import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSchemaObject; +import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.operation.Operation; import io.stargate.sgv2.jsonapi.service.operation.collections.CollectionReadType; import io.stargate.sgv2.jsonapi.service.operation.collections.FindCollectionOperation; import io.stargate.sgv2.jsonapi.service.operation.collections.ReadAndUpdateCollectionOperation; -import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; -import io.stargate.sgv2.jsonapi.exception.ErrorCode; -import io.stargate.sgv2.jsonapi.exception.JsonApiException; -import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; -import io.stargate.sgv2.jsonapi.service.operation.model.Operation; -import io.stargate.sgv2.jsonapi.service.operation.model.ReadType; -import io.stargate.sgv2.jsonapi.service.operation.model.impl.FindOperation; -import io.stargate.sgv2.jsonapi.service.operation.model.impl.ReadAndUpdateOperation; import io.stargate.sgv2.jsonapi.service.projection.DocumentProjector; import io.stargate.sgv2.jsonapi.service.resolver.matcher.FilterableResolver; import io.stargate.sgv2.jsonapi.service.shredding.collections.DocumentShredder; @@ -47,7 +43,7 @@ public class FindOneAndReplaceCommandResolver extends FilterableResolver { - assertThat(op.commandContext()) - .isEqualTo(TestEmbeddingProvider.commandContextWithVectorize); - assertThat(op.returnDocumentInResponse()).isFalse(); - assertThat(op.returnUpdatedDocument()).isFalse(); - assertThat(op.upsert()).isFalse(); - assertThat(op.documentShredder()).isEqualTo(documentShredder); - assertThat(op.updateLimit()).isEqualTo(20); - assertThat(op.retryLimit()).isEqualTo(operationsConfig.lwt().retries()); - assertThat(op.documentUpdater()) - .isInstanceOfSatisfying( - DocumentUpdater.class, - updater -> { - assertThat(updater.updateOperations()) - .isEqualTo(updateClause.buildOperations()); - }); - assertThat(op.findCollectionOperation()) - .isInstanceOfSatisfying( - FindCollectionOperation.class, - find -> { - TextCollectionFilter filter = - new TextCollectionFilter( - "col", MapCollectionFilter.Operator.EQ, "val"); - - assertThat(find.objectMapper()).isEqualTo(objectMapper); - assertThat(find.commandContext()) - .isEqualTo(TestEmbeddingProvider.commandContextWithVectorize); - assertThat(find.pageSize()).isEqualTo(20); - assertThat(find.limit()).isEqualTo(Integer.MAX_VALUE); - assertThat(find.pageState()).isNull(); - assertThat(find.readType()).isEqualTo(CollectionReadType.DOCUMENT); - assertThat( - find.logicalExpression() - .comparisonExpressions - .get(0) - .getDbFilters() - .get(0)) - .isEqualTo(filter); - assertThat(find.singleResponse()).isFalse(); - }); - }); - } - @Test public void withUpsert() throws Exception { String json = diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java index 6bb67e1b09..63d0de0ebd 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdaterTest.java @@ -15,9 +15,7 @@ import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.JsonApiException; -import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSettings; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; -import io.stargate.sgv2.jsonapi.service.embedding.operation.EmbeddingProvider; import io.stargate.sgv2.jsonapi.service.embedding.operation.TestEmbeddingProvider; import io.stargate.sgv2.jsonapi.service.testutil.DocumentUpdaterUtils; import io.stargate.sgv2.jsonapi.testresource.NoGlobalResourcesTestProfile; @@ -534,10 +532,6 @@ public void replaceEmpty() throws Exception { @Nested class VectorizeUpdateTest { - private final EmbeddingProvider testService = new TestEmbeddingProvider(); - private final CollectionSettings collectionSettings = - TestEmbeddingProvider.commandContextWithVectorize.collectionSettings(); - @Test public void updateVectorize() throws Exception { String updateVectorizeData = @@ -922,10 +916,6 @@ public void update_vectorizeNonTextualFailure() throws JsonProcessingException { @Nested class replaceVectorizeTest { - private final EmbeddingProvider testService = new TestEmbeddingProvider(); - private final CollectionSettings collectionSettings = - TestEmbeddingProvider.commandContextWithVectorize.collectionSettings(); - @Test public void replaceDocument() throws Exception { String expected1 = From ca9d04f59a0a32de21a4788dff6b71daa74c5b35 Mon Sep 17 00:00:00 2001 From: Yuqi Du Date: Fri, 26 Jul 2024 13:32:28 -0700 Subject: [PATCH 18/18] fix comments --- .../clause/update/UpdateOperation.java | 7 ++- .../UpdateClauseDeserializer.java | 2 +- .../FindOneAndReplaceCommandResolver.java | 3 +- .../service/updater/DocumentUpdater.java | 44 ++++++++++++------- 4 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java index 9bd449cb59..091a419a64 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/clause/update/UpdateOperation.java @@ -27,7 +27,7 @@ public List actions() { * Method called to apply operation to given document. * * @param doc Document to apply operation to - * @return True if document was modified by operation; false if not. + * @return UpdateOperationResult */ public abstract UpdateOperationResult updateDocument(ObjectNode doc); @@ -100,6 +100,11 @@ public int compare(ActionWithLocator o1, ActionWithLocator o2) { } } + /** + * Abstract method updateDocument will return a UpdateOperationResult. UpdateOperationResult + * indicated the doc is modified or not, also a List of embeddingUpdateOperation, empty is there + * is not any embeddingUpdateOperations + */ public record UpdateOperationResult( boolean modified, List embeddingUpdateOperations) {} } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializer.java b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializer.java index 9edc2dfb93..cf9cd41577 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializer.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/api/model/command/deserializers/UpdateClauseDeserializer.java @@ -64,7 +64,7 @@ public void validateUpdateDefs(EnumMap updateDefs) { if (checkUpdateOperationNode != null && checkUpdateOperationNode.has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) { if (checkUpdateOperationNode.has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) { - throw new JsonApiException(ErrorCode.INVALID_USAGE_OF_VECTORIZE); + throw ErrorCode.INVALID_USAGE_OF_VECTORIZE.toApiException(); } } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/FindOneAndReplaceCommandResolver.java b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/FindOneAndReplaceCommandResolver.java index e25f24994e..d2deb60edc 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/FindOneAndReplaceCommandResolver.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/resolver/FindOneAndReplaceCommandResolver.java @@ -11,7 +11,6 @@ import io.stargate.sgv2.jsonapi.config.OperationsConfig; import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; -import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSchemaObject; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.service.operation.Operation; @@ -70,7 +69,7 @@ public Operation resolveCollectionCommand( // Add $vector and $vectorize replacement validation here if (command.replacementDocument().has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD) && command.replacementDocument().has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) { - throw new JsonApiException(ErrorCode.INVALID_USAGE_OF_VECTORIZE); + throw ErrorCode.INVALID_USAGE_OF_VECTORIZE.toApiException(); } // diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java index fe5a34a568..7af0b43e62 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/updater/DocumentUpdater.java @@ -2,13 +2,13 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import io.smallrye.mutiny.Multi; import io.smallrye.mutiny.Uni; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.model.command.clause.update.*; import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; import io.stargate.sgv2.jsonapi.exception.ErrorCode; -import io.stargate.sgv2.jsonapi.exception.JsonApiException; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizer; import io.stargate.sgv2.jsonapi.service.embedding.DataVectorizerService; import io.stargate.sgv2.jsonapi.util.JsonUtil; @@ -178,23 +178,33 @@ public Uni updateEmbeddingVector( // lazy construct the dataVectorizer, only when embeddingUpdateOperation is not null final DataVectorizer dataVectorizer = dataVectorizerService.constructDataVectorizer(dataApiRequestInfo, commandContext); - // TODO: only SetOperation and Replacement may create one embeddingUpdateOperation, Refactor - // when there are multiple - final EmbeddingUpdateOperation embeddingUpdateOperation = embeddingUpdateOperations.get(0); - return dataVectorizer - .vectorize(embeddingUpdateOperation.vectorizeContent()) + // currently, there is only one $vectorize for document + return Multi.createFrom() + .iterable(embeddingUpdateOperations) .onItem() - .transformToUni( - vector -> { - embeddingUpdateOperation.updateDocument(responseBeforeVectorize.document, vector); - // create new DocumentUpdaterResponse, set embeddingUpdateOperation as null. - return Uni.createFrom() - .item( - new DocumentUpdaterResponse( - responseBeforeVectorize.document, - responseBeforeVectorize.modified, - List.of())); - }); + .transformToUniAndConcatenate( + embeddingUpdateOperation -> + dataVectorizer + .vectorize(embeddingUpdateOperation.vectorizeContent()) + .onItem() + .transform( + vector -> { + embeddingUpdateOperation.updateDocument( + responseBeforeVectorize.document, vector); + // Return null since we don't need individual results + return null; + })) + .collect() + .asList() + .onItem() + .transform( + ignored -> + new DocumentUpdaterResponse( + responseBeforeVectorize.document, + responseBeforeVectorize.modified, + List.of() // Assuming the embeddingUpdateOperations are not needed in the + // final response + )); } }