From fb22cf0714543d1303e2783c193dcebf1c2b8376 Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Tue, 17 Sep 2024 11:26:50 -0700 Subject: [PATCH] Fix #1364: add tests, fixes to text value codecs (to CQL) (#1419) --- .../catchable/ToCQLCodecException.java | 32 ++++++++-- .../filters/table/codecs/JSONCodec.java | 31 ++++++++++ .../table/codecs/JSONCodecRegistry.java | 14 ++--- .../filters/table/codecs/JSONCodecs.java | 2 +- .../AbstractTableIntegrationTestBase.java | 4 +- .../tables/FindOneTableIntegrationTest.java | 58 +++++++++++++++++++ .../api/v1/util/DataApiResponseValidator.java | 19 ++++++ .../table/codecs/JSONCodecRegistryTest.java | 48 ++++++++++++++- .../codecs/JSONCodecRegistryTestData.java | 11 ++++ 9 files changed, 202 insertions(+), 17 deletions(-) diff --git a/src/main/java/io/stargate/sgv2/jsonapi/exception/catchable/ToCQLCodecException.java b/src/main/java/io/stargate/sgv2/jsonapi/exception/catchable/ToCQLCodecException.java index ef8cc22135..1f59981f89 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/exception/catchable/ToCQLCodecException.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/exception/catchable/ToCQLCodecException.java @@ -13,13 +13,35 @@ public class ToCQLCodecException extends CheckedApiException { public final Object value; public final DataType targetCQLType; - public ToCQLCodecException(Object value, DataType targetCQLType, Exception cause) { + public ToCQLCodecException(Object value, DataType targetCQLType, Exception rootCause) { super( - String.format( - "Error trying to convert to targetCQLType `%s` from value.class `%s`, value %s. Root cause: %s", - targetCQLType, value.getClass().getName(), value, cause.getMessage()), - cause); + formatMessage(value, targetCQLType, (rootCause == null) ? "NULL" : rootCause.getMessage()), + rootCause); this.value = value; this.targetCQLType = targetCQLType; } + + public ToCQLCodecException(Object value, DataType targetCQLType, String rootCauseMessage) { + super(formatMessage(value, targetCQLType, rootCauseMessage)); + this.value = value; + this.targetCQLType = targetCQLType; + } + + private static String formatMessage( + Object value, DataType targetCQLType, String rootCauseMessage) { + return String.format( + "Error trying to convert to targetCQLType `%s` from value.class `%s`, value %s. Root cause: %s", + targetCQLType, value.getClass().getName(), valueDesc(value), rootCauseMessage); + } + + // Add a place to slightly massage value; can be further improved + private static String valueDesc(Object value) { + if (value == null) { + return "null"; + } + if (value instanceof String) { + return "\"" + value + "\""; + } + return String.valueOf(value); + } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodec.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodec.java index 156a92a4c2..478e70cb2e 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodec.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodec.java @@ -5,6 +5,7 @@ import com.datastax.oss.driver.api.core.type.reflect.GenericType; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; import io.stargate.sgv2.jsonapi.exception.catchable.ToCQLCodecException; import io.stargate.sgv2.jsonapi.exception.catchable.ToJSONCodecException; import io.stargate.sgv2.jsonapi.service.shredding.tables.RowShredder; @@ -167,6 +168,36 @@ static ToCQL unsafeIdentity() { return (toCQLType, value) -> value; } + /** + * Returns given String if (and only if) it only contains 7-bit ASCII characters; otherwise it + * will throw an {@link ToCQLCodecException} + */ + static String safeAscii(DataType targetTextType, String value) throws ToCQLCodecException { + // Not sure if this is strictly needed, but for now assert that the target type is ASCII + Preconditions.checkArgument( + targetTextType == DataTypes.ASCII, + "Should only be called for type DataTypes.ASCII, was called on %s", + targetTextType); + + for (int i = 0, len = value.length(); i < len; ++i) { + /* Check if the character is ASCII: internally Unicode characters in Java are 16-bit + * UCS-2 (similar to UTF-16) encoded, so only characters in the range 0x0000 to 0x007F are ASCII characters. + * This is not only true for "regular" (Basic-Multilingual Plane, BMP) characters, but also for + * Unicode surrogate pairs, which are used to encode characters outside the BMP. In latter + * case chars are in range above 0xD400, never confused with ASCII characters. + */ + if (value.charAt(i) > 0x7F) { + throw new ToCQLCodecException( + value, + targetTextType, + String.format( + "String contains non-ASCII character at index #%d (length=%d): \\u%04X", + i, len, (int) value.charAt(i))); + } + } + return value; + } + /** * Returns an instance that converts the value to the target type, catching any arithmetic * exceptions and throwing them as a {@link ToCQLCodecException} diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistry.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistry.java index 75eaaadc38..aab424d797 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistry.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistry.java @@ -113,15 +113,13 @@ public JSONCodec codecToJSON(DataType targetCQLType) } /** - * Same as {@link #internalCodecForToCQL(DataType, Object)} + * Like {@link #internalCodecForToCQL(DataType, Object)} but for converting in the opposite + * direction, from CQL type to JSON. * - * @param targetCQLType - * @return + * @param fromCQLType + * @return Codec to use for conversion, or `null` if none found. */ - private JSONCodec internalCodecForToJSON(DataType targetCQLType) { - return codecs.stream() - .filter(codec -> codec.testToJSON(targetCQLType)) - .findFirst() - .orElse(null); + private JSONCodec internalCodecForToJSON(DataType fromCQLType) { + return codecs.stream().filter(codec -> codec.testToJSON(fromCQLType)).findFirst().orElse(null); } } diff --git a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecs.java b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecs.java index d36a6e070d..54fc022e6d 100644 --- a/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecs.java +++ b/src/main/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecs.java @@ -204,7 +204,7 @@ public abstract class JSONCodecs { new JSONCodec<>( GenericType.STRING, DataTypes.ASCII, - JSONCodec.ToCQL.unsafeIdentity(), + JSONCodec.ToCQL::safeAscii, JSONCodec.ToJSON.unsafeNodeFactory(JsonNodeFactory.instance::textNode)); public static final JSONCodec TEXT = diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/AbstractTableIntegrationTestBase.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/AbstractTableIntegrationTestBase.java index 2638e8a0e6..b7fe7b385d 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/AbstractTableIntegrationTestBase.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/AbstractTableIntegrationTestBase.java @@ -48,8 +48,8 @@ protected DataApiResponseValidator deleteTable(String tableName) { .body("status.ok", is(1)); } - protected void insertOneInTable(String tableName, String documentJSON) { - DataApiCommandSenders.assertTableCommand(keyspaceName, tableName) + protected DataApiResponseValidator insertOneInTable(String tableName, String documentJSON) { + return DataApiCommandSenders.assertTableCommand(keyspaceName, tableName) .postInsertOne(documentJSON) .hasNoErrors() .body("status.insertedIds", hasSize(1)); diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/FindOneTableIntegrationTest.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/FindOneTableIntegrationTest.java index cf4a55f799..d39e941714 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/FindOneTableIntegrationTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/tables/FindOneTableIntegrationTest.java @@ -3,6 +3,7 @@ import io.quarkus.test.common.WithTestResource; import io.quarkus.test.junit.QuarkusIntegrationTest; import io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders; +import io.stargate.sgv2.jsonapi.exception.DocumentException; import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1; import io.stargate.sgv2.jsonapi.testresource.DseTestResource; import java.util.Map; @@ -18,6 +19,7 @@ @TestClassOrder(ClassOrderer.OrderAnnotation.class) public class FindOneTableIntegrationTest extends AbstractTableIntegrationTestBase { static final String TABLE_WITH_STRING_ID_AGE_NAME = "findOneSingleStringKeyTable"; + static final String TABLE_WITH_TEXT_COLUMNS = "findOneTextColumnsTable"; @BeforeAll public final void createDefaultTables() { @@ -31,6 +33,16 @@ public final void createDefaultTables() { "name", Map.of("type", "text")), "id"); + createTableWithColumns( + TABLE_WITH_TEXT_COLUMNS, + Map.of( + "idText", + Map.of("type", "text"), + "asciiText", + Map.of("type", "ascii"), + "varcharText", + Map.of("type", "text")), + "idText"); } // On-empty tests to be run before ones that populate tables @@ -193,6 +205,52 @@ public void findOneDocIdKey() { @Nested @Order(3) + @TestClassOrder(ClassOrderer.OrderAnnotation.class) + class FindOneTextColumns { + public final String STRING_UTF8_WITH_2BYTE_CHAR = "utf8-2-byte-\u00a2"; // cent symbol + public final String STRING_UTF8_WITH_3BYTE_CHAR = "utf8-3-byte-\u20ac"; // euro symbol + + @Test + void insertWithTextColumnsAndFind() { + final String DOC_JSON = + """ + { + "idText": "abc", + "asciiText": "safe value", + "varcharText": "%s/%s" + } + """ + .formatted(STRING_UTF8_WITH_2BYTE_CHAR, STRING_UTF8_WITH_3BYTE_CHAR); + insertOneInTable(TABLE_WITH_TEXT_COLUMNS, DOC_JSON); + + DataApiCommandSenders.assertTableCommand(keyspaceName, TABLE_WITH_TEXT_COLUMNS) + .postFindOne("{ \"filter\": { \"idText\": \"abc\" } }") + .hasNoErrors() + .hasJSONField("data.document", DOC_JSON); + } + + @Test + void failTryingToInsertNonAscii() { + final String DOC_JSON = + """ + { + "idText": "def", + "asciiText": "%s", + "varcharText": "safe value" + } + """ + .formatted(STRING_UTF8_WITH_2BYTE_CHAR); + DataApiCommandSenders.assertTableCommand(keyspaceName, TABLE_WITH_TEXT_COLUMNS) + .postInsertOne(DOC_JSON) + .hasSingleApiError( + DocumentException.Code.INVALID_COLUMN_VALUES, + DocumentException.class, + "String contains non-ASCII character"); + } + } + + @Nested + @Order(4) class FindOneFail { @Test @Order(1) diff --git a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/util/DataApiResponseValidator.java b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/util/DataApiResponseValidator.java index 542d81799d..19ede549c8 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/api/v1/util/DataApiResponseValidator.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/api/v1/util/DataApiResponseValidator.java @@ -7,6 +7,8 @@ import static org.hamcrest.Matchers.nullValue; import io.restassured.response.ValidatableResponse; +import io.stargate.sgv2.jsonapi.exception.APIException; +import io.stargate.sgv2.jsonapi.exception.ErrorCode; import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1; import org.hamcrest.Matcher; @@ -52,6 +54,23 @@ public DataApiResponseValidator hasSingleApiError( return hasSingleApiError(errorCode).body("errors[0].message", messageMatcher); } + public DataApiResponseValidator hasSingleApiError( + ErrorCode errorCode, Class errorClass) { + return body("errors", hasSize(1)) + .body("errors[0].exceptionClass", is(errorClass.getSimpleName())) + .body("errors[0].errorCode", is(errorCode.toString())); + } + + public DataApiResponseValidator hasSingleApiError( + ErrorCode errorCode, Class errorClass, String messageSnippet) { + return hasSingleApiError(errorCode, errorClass, containsString(messageSnippet)); + } + + public DataApiResponseValidator hasSingleApiError( + ErrorCode errorCode, Class errorClass, Matcher messageMatcher) { + return hasSingleApiError(errorCode, errorClass).body("errors[0].message", messageMatcher); + } + // // // API-aware validation: non-error content // // // public DataApiResponseValidator hasNoField(String path) { diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistryTest.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistryTest.java index 371ccce190..74ce045ae2 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistryTest.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistryTest.java @@ -115,7 +115,19 @@ private static Stream validCodecToCQLTestCases() { Arguments.of(DataTypes.DOUBLE, BigDecimal.valueOf(0.25), Double.valueOf(0.25)), Arguments.of(DataTypes.FLOAT, 123L, Float.valueOf(123L)), Arguments.of(DataTypes.FLOAT, BigInteger.valueOf(34567L), Float.valueOf(34567L)), - Arguments.of(DataTypes.FLOAT, BigDecimal.valueOf(0.25), Float.valueOf(0.25f))); + Arguments.of(DataTypes.FLOAT, BigDecimal.valueOf(0.25), Float.valueOf(0.25f)), + + // Textual types: ASCII, TEXT (VARCHAR is an alias for TEXT). + Arguments.of(DataTypes.ASCII, TEST_DATA.STRING_ASCII_SAFE, TEST_DATA.STRING_ASCII_SAFE), + Arguments.of(DataTypes.TEXT, TEST_DATA.STRING_ASCII_SAFE, TEST_DATA.STRING_ASCII_SAFE), + Arguments.of( + DataTypes.TEXT, + TEST_DATA.STRING_WITH_2BYTE_UTF8_CHAR, + TEST_DATA.STRING_WITH_2BYTE_UTF8_CHAR), + Arguments.of( + DataTypes.TEXT, + TEST_DATA.STRING_WITH_3BYTE_UTF8_CHAR, + TEST_DATA.STRING_WITH_3BYTE_UTF8_CHAR)); } @Test @@ -276,4 +288,38 @@ private static Stream nonExactToCqlIntegerTestCases() { Arguments.of(DataTypes.TINYINT, TEST_DATA.NOT_EXACT_AS_INTEGER), Arguments.of(DataTypes.VARINT, TEST_DATA.NOT_EXACT_AS_INTEGER)); } + + @ParameterizedTest + @MethodSource("nonAsciiValueFailTestCases") + public void nonAsciiValueFail(String valueToTest) { + var codec = assertGetCodecToCQL(DataTypes.ASCII, valueToTest); + + var error = + assertThrowsExactly( + ToCQLCodecException.class, + () -> codec.toCQL(valueToTest), + String.format( + "Throw ToCQLCodecException when attempting to convert `%s` from non-ASCII value %s", + DataTypes.ASCII, valueToTest)); + + assertThat(error) + .satisfies( + e -> { + assertThat(e.targetCQLType).isEqualTo(DataTypes.ASCII); + assertThat(e.value).isEqualTo(valueToTest); + + assertThat(e.getMessage()) + .contains(DataTypes.ASCII.toString()) + .contains(valueToTest.getClass().getName()) + .contains(valueToTest.toString()) + .contains("Root cause: String contains non-ASCII character at index"); + }); + } + + private static Stream nonAsciiValueFailTestCases() { + return Stream.of( + Arguments.of(TEST_DATA.STRING_WITH_2BYTE_UTF8_CHAR), + Arguments.of(TEST_DATA.STRING_WITH_3BYTE_UTF8_CHAR), + Arguments.of(TEST_DATA.STRING_WITH_4BYTE_SURROGATE_CHAR)); + } } diff --git a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistryTestData.java b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistryTestData.java index cdee691f6d..1c29f6c9b2 100644 --- a/src/test/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistryTestData.java +++ b/src/test/java/io/stargate/sgv2/jsonapi/service/operation/filters/table/codecs/JSONCodecRegistryTestData.java @@ -38,6 +38,17 @@ public class JSONCodecRegistryTestData { public final BigDecimal NOT_EXACT_AS_INTEGER = new BigDecimal("1.25"); + public final String STRING_ASCII_SAFE = "ascii-safe-string"; + + public final String STRING_WITH_2BYTE_UTF8_CHAR = "text-with-2-byte-utf8-\u00a2"; // cent symbol + + public final String STRING_WITH_3BYTE_UTF8_CHAR = "text-with-3-byte-utf8-\u20ac"; // euro symbol + + // 4 byte / 2 UCS-2 char Unicode Surrogate Pair characters (see + // https://codepoints.net/U+10437?lang=en) + public final String STRING_WITH_4BYTE_SURROGATE_CHAR = + "text-with-4-byte-surrogate-\uD801\uDC37"; // Deseret + /** * Returns a mocked {@link TableMetadata} that has a column of the specified type. *