From 8d9059d45bd42f420fcc8738e6a1215a331a675e Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Tue, 12 Mar 2024 18:33:00 -0700 Subject: [PATCH] Implement #922: allow ObjectId/UUID auto-generation (#959) --- .../executor/CollectionSettings.java | 5 + .../executor/CollectionSettingsV1Reader.java | 6 +- .../jsonapi/service/shredding/Shredder.java | 43 +++++- .../ShredderWithExtendedTypesTest.java | 143 +++++++++++++++++- 4 files changed, 187 insertions(+), 10 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettings.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettings.java index aec5c51ab8..3bd663ad92 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettings.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettings.java @@ -45,6 +45,11 @@ public static CollectionSettings empty() { return EMPTY; } + public CollectionSettings withIdType(IdType idType) { + return new CollectionSettings( + collectionName, new IdConfig(idType), vectorConfig, indexingConfig); + } + public record IdConfig(IdType idType) { public static IdConfig defaultIdConfig() { return new IdConfig(IdType.UNDEFINED); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettingsV1Reader.java b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettingsV1Reader.java index 8b9f337dc6..ae6fd49a75 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettingsV1Reader.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/cqldriver/executor/CollectionSettingsV1Reader.java @@ -29,13 +29,15 @@ public CollectionSettings readCollectionSettings( indexingConfig = CollectionSettings.IndexingConfig.fromJson(indexing); } // construct collectionSettings idConfig, default idType as uuid - CollectionSettings.IdConfig idConfig = CollectionSettings.IdConfig.defaultIdConfig(); + final CollectionSettings.IdConfig idConfig; JsonNode idConfigNode = collectionOptionsNode.path(TableCommentConstants.DEFAULT_ID_KEY); // should always have idConfigNode in table comment since schema v1 - if (!idConfigNode.isMissingNode() && idConfigNode.has("type")) { + if (idConfigNode.has("type")) { idConfig = new CollectionSettings.IdConfig( CollectionSettings.IdType.fromString(idConfigNode.get("type").asText())); + } else { + idConfig = CollectionSettings.IdConfig.defaultIdConfig(); } return new CollectionSettings(collectionName, idConfig, vectorConfig, indexingConfig); diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/shredding/Shredder.java b/src/main/java/io/stargate/sgv2/jsonapi/service/shredding/Shredder.java index d98be81bda..d278a67686 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/shredding/Shredder.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/shredding/Shredder.java @@ -3,7 +3,10 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.uuid.Generators; +import com.fasterxml.uuid.NoArgGenerator; import io.stargate.sgv2.jsonapi.api.model.command.CommandContext; import io.stargate.sgv2.jsonapi.api.v1.metrics.JsonProcessingMetricsReporter; import io.stargate.sgv2.jsonapi.config.DocumentLimitsConfig; @@ -24,6 +27,7 @@ import java.util.OptionalInt; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; +import org.bson.types.ObjectId; /** * Shred an incoming JSON document into the data we need to store in the DB, and then de-shred. @@ -37,6 +41,10 @@ */ @ApplicationScoped public class Shredder { + private static final NoArgGenerator UUID_V4_GENERATOR = Generators.randomBasedGenerator(); + private static final NoArgGenerator UUID_V6_GENERATOR = Generators.timeBasedReorderedGenerator(); + private static final NoArgGenerator UUID_V7_GENERATOR = Generators.timeBasedEpochGenerator(); + private final ObjectMapper objectMapper; private final DocumentLimitsConfig documentLimits; @@ -89,7 +97,7 @@ public WritableShreddedDocument shred( "document to shred must be a JSON Object, instead got %s", doc.getNodeType()); } - final ObjectNode docWithId = normalizeDocumentId((ObjectNode) doc); + final ObjectNode docWithId = normalizeDocumentId(collectionSettings, (ObjectNode) doc); final DocumentId docId = DocumentId.fromJson(docWithId.get(DocumentConstants.Fields.DOC_ID)); final String docJson; @@ -143,16 +151,17 @@ public WritableShreddedDocument shred( * is the very first property in the document (reordering as needed). Note that a new document is * created and returned; input document is never modified. * + * @param collectionSettings Collection settings to use for document id generation * @param doc Document to use as the base * @return Document that has _id as its first property */ - private ObjectNode normalizeDocumentId(ObjectNode doc) { + private ObjectNode normalizeDocumentId(CollectionSettings collectionSettings, ObjectNode doc) { // First: see if we have Object Id present or not JsonNode idNode = doc.get(DocumentConstants.Fields.DOC_ID); // If not, generate one if (idNode == null) { - idNode = generateDocumentId(); + idNode = generateDocumentId(collectionSettings); } // Either way we need to construct actual document with _id as the first property; // unfortunately there is no way to reorder properties in-place. @@ -163,10 +172,30 @@ private ObjectNode normalizeDocumentId(ObjectNode doc) { return docWithIdAsFirstProperty; } - private JsonNode generateDocumentId() { - // Currently we generate UUID-as-String; alternatively could use and create - // ObjectId-compatible values for better interoperability - return objectMapper.getNodeFactory().textNode(UUID.randomUUID().toString()); + private JsonNode generateDocumentId(CollectionSettings collectionSettings) { + CollectionSettings.IdType idType = collectionSettings.idConfig().idType(); + if (idType == null) { + idType = CollectionSettings.IdType.UNDEFINED; + } + final JsonNodeFactory jnf = objectMapper.getNodeFactory(); + switch (idType) { + case OBJECT_ID: + return wrapExtensionType(jnf, JsonExtensionType.OBJECT_ID, new ObjectId()); + case UUID: + return wrapExtensionType(jnf, JsonExtensionType.UUID, UUID_V4_GENERATOR.generate()); + case UUID_V6: + return wrapExtensionType(jnf, JsonExtensionType.UUID, UUID_V6_GENERATOR.generate()); + case UUID_V7: + return wrapExtensionType(jnf, JsonExtensionType.UUID, UUID_V7_GENERATOR.generate()); + case UNDEFINED: + } + // Default for "undefined"/"unspecified" is legacy unwrapped UUIDv4 (random) + return jnf.textNode(UUID_V4_GENERATOR.generate().toString()); + } + + private static JsonNode wrapExtensionType( + JsonNodeFactory jnf, JsonExtensionType etype, Object value) { + return jnf.objectNode().put(etype.encodedName(), value.toString()); } /** diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/shredding/ShredderWithExtendedTypesTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/shredding/ShredderWithExtendedTypesTest.java index 70b837c3c8..8a83155968 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/shredding/ShredderWithExtendedTypesTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/shredding/ShredderWithExtendedTypesTest.java @@ -6,12 +6,15 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.uuid.impl.UUIDUtil; import io.quarkus.test.InjectMock; import io.quarkus.test.junit.QuarkusTest; import io.quarkus.test.junit.TestProfile; import io.stargate.sgv2.common.testprofiles.NoGlobalResourcesTestProfile; import io.stargate.sgv2.jsonapi.api.request.DataApiRequestInfo; import io.stargate.sgv2.jsonapi.exception.ErrorCode; +import io.stargate.sgv2.jsonapi.service.cqldriver.executor.CollectionSettings; +import io.stargate.sgv2.jsonapi.service.projection.DocumentProjector; import io.stargate.sgv2.jsonapi.service.shredding.model.DocValueHasher; import io.stargate.sgv2.jsonapi.service.shredding.model.DocumentId; import io.stargate.sgv2.jsonapi.service.shredding.model.JsonExtensionType; @@ -43,7 +46,7 @@ public class ShredderWithExtendedTypesTest { @InjectMock protected DataApiRequestInfo bogusRequestInfo; @Nested - class OkCasesId { + class OkCasesExplicitId { @Test public void shredSimpleWithUUIDKeyAndValue() throws Exception { final String idUUID = defaultTestUUID().toString(); @@ -190,6 +193,144 @@ public void shredSimpleWithoutId() throws Exception { } } + @Nested + class OkCasesGeneratedId { + @Test + public void shredSimpleWithoutIdGenLegacyUUID() throws Exception { + final String inputJson = "{\"value\": 42}"; + final JsonNode inputDoc = objectMapper.readTree(inputJson); + WritableShreddedDocument doc = + shredder.shred( + inputDoc, + null, + DocumentProjector.identityProjector(), + "test", + CollectionSettings.empty().withIdType(CollectionSettings.IdType.UNDEFINED)); + + DocumentId docId = doc.id(); + // Legacy UUID generated as "plain" String id + assertThat(docId).isInstanceOf(DocumentId.StringId.class); + + // should be auto-generated ObjectId: verify by constructing from String representation: + UUID typedId = UUIDUtil.uuid(((DocumentId.StringId) docId).key()); + assertThat(typedId).isNotNull(); + List expPaths = Arrays.asList(JsonPath.from("_id"), JsonPath.from("value")); + + assertThat(doc.existKeys()).isEqualTo(new HashSet<>(expPaths)); + assertThat(doc.arraySize()).isEmpty(); + assertThat(doc.arrayContains()).containsExactlyInAnyOrder("value N42"); + + // Also, the document should be the same, including _id added: + ObjectNode jsonFromShredded = (ObjectNode) objectMapper.readTree(doc.docJson()); + JsonNode idNode = jsonFromShredded.get("_id"); + + assertThat(idNode.asText()).isEqualTo(typedId.toString()); + + // Then atomic value containers + assertThat(doc.queryBoolValues()).isEmpty(); + assertThat(doc.queryNullValues()).isEmpty(); + assertThat(doc.queryNumberValues()) + .isEqualTo(Map.of(JsonPath.from("value"), BigDecimal.valueOf(42))); + assertThat(doc.queryTextValues()).isEqualTo(Map.of(JsonPath.from("_id"), typedId.toString())); + } + + @Test + public void shredSimpleWithoutIdGenObjectId() throws Exception { + final String inputJson = "{\"value\": 42}"; + final JsonNode inputDoc = objectMapper.readTree(inputJson); + WritableShreddedDocument doc = + shredder.shred( + inputDoc, + null, + DocumentProjector.identityProjector(), + "test", + CollectionSettings.empty().withIdType(CollectionSettings.IdType.OBJECT_ID)); + + DocumentId docId = doc.id(); + assertThat(docId).isInstanceOf(DocumentId.ExtensionTypeId.class); + + // should be auto-generated ObjectId: verify by constructing from String representation: + ObjectId typedId = new ObjectId(((DocumentId.ExtensionTypeId) docId).valueAsString()); + assertThat(typedId).isNotNull(); + List expPaths = Arrays.asList(JsonPath.from("_id"), JsonPath.from("value")); + + assertThat(doc.existKeys()).isEqualTo(new HashSet<>(expPaths)); + assertThat(doc.arraySize()).isEmpty(); + assertThat(doc.arrayContains()).containsExactlyInAnyOrder("value N42"); + + // Also, the document should be the same, including _id added: + ObjectNode jsonFromShredded = (ObjectNode) objectMapper.readTree(doc.docJson()); + JsonNode idNode = jsonFromShredded.get("_id"); + + assertThat(idNode).isNotNull().isInstanceOf(ObjectNode.class).hasSize(1); + assertThat(objectMapper.createObjectNode().put("$objectId", typedId.toString())) + .isEqualTo(idNode); + + // Then atomic value containers + assertThat(doc.queryBoolValues()).isEmpty(); + assertThat(doc.queryNullValues()).isEmpty(); + assertThat(doc.queryNumberValues()) + .isEqualTo(Map.of(JsonPath.from("value"), BigDecimal.valueOf(42))); + assertThat(doc.queryTextValues()).isEqualTo(Map.of(JsonPath.from("_id"), typedId.toString())); + } + + @Test + public void shredSimpleWithoutIdGenUUIDv4() throws Exception { + _testShredUUIDAutoGeneration(CollectionSettings.IdType.UUID, 4); + } + + @Test + public void shredSimpleWithoutIdGenUUIDv6() throws Exception { + _testShredUUIDAutoGeneration(CollectionSettings.IdType.UUID_V6, 6); + } + + @Test + public void shredSimpleWithoutIdGenUUIDv7() throws Exception { + _testShredUUIDAutoGeneration(CollectionSettings.IdType.UUID_V7, 7); + } + + private void _testShredUUIDAutoGeneration(CollectionSettings.IdType idType, int uuidVersion) + throws Exception { + final String inputJson = "{\"value\": 42}"; + final JsonNode inputDoc = objectMapper.readTree(inputJson); + WritableShreddedDocument doc = + shredder.shred( + inputDoc, + null, + DocumentProjector.identityProjector(), + "test", + CollectionSettings.empty().withIdType(idType)); + + DocumentId docId = doc.id(); + assertThat(docId).isInstanceOf(DocumentId.ExtensionTypeId.class); + + // should be auto-generated UUID of version 4: verify by constructing from String + // representation + UUID typedId = UUIDUtil.uuid(((DocumentId.ExtensionTypeId) docId).valueAsString()); + assertThat(typedId.version()).isEqualTo(uuidVersion); + List expPaths = Arrays.asList(JsonPath.from("_id"), JsonPath.from("value")); + + assertThat(doc.existKeys()).isEqualTo(new HashSet<>(expPaths)); + assertThat(doc.arraySize()).isEmpty(); + assertThat(doc.arrayContains()).containsExactlyInAnyOrder("value N42"); + + // Also, the document should be the same, including _id added: + ObjectNode jsonFromShredded = (ObjectNode) objectMapper.readTree(doc.docJson()); + JsonNode idNode = jsonFromShredded.get("_id"); + + assertThat(idNode).isNotNull().isInstanceOf(ObjectNode.class).hasSize(1); + assertThat(objectMapper.createObjectNode().put("$uuid", typedId.toString())) + .isEqualTo(idNode); + + // Then atomic value containers + assertThat(doc.queryBoolValues()).isEmpty(); + assertThat(doc.queryNullValues()).isEmpty(); + assertThat(doc.queryNumberValues()) + .isEqualTo(Map.of(JsonPath.from("value"), BigDecimal.valueOf(42))); + assertThat(doc.queryTextValues()).isEqualTo(Map.of(JsonPath.from("_id"), typedId.toString())); + } + } + @Nested class ErrorCasesDocId { @Test