Skip to content

Commit

Permalink
Fix #1364: add tests, fixes to text value codecs (to CQL) (#1419)
Browse files Browse the repository at this point in the history
  • Loading branch information
tatu-at-datastax authored Sep 17, 2024
1 parent 292ff5d commit fb22cf0
Show file tree
Hide file tree
Showing 9 changed files with 202 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,35 @@ public class ToCQLCodecException extends CheckedApiException {
public final Object value;
public final DataType targetCQLType;

public ToCQLCodecException(Object value, DataType targetCQLType, Exception cause) {
public ToCQLCodecException(Object value, DataType targetCQLType, Exception rootCause) {
super(
String.format(
"Error trying to convert to targetCQLType `%s` from value.class `%s`, value %s. Root cause: %s",
targetCQLType, value.getClass().getName(), value, cause.getMessage()),
cause);
formatMessage(value, targetCQLType, (rootCause == null) ? "NULL" : rootCause.getMessage()),
rootCause);
this.value = value;
this.targetCQLType = targetCQLType;
}

public ToCQLCodecException(Object value, DataType targetCQLType, String rootCauseMessage) {
super(formatMessage(value, targetCQLType, rootCauseMessage));
this.value = value;
this.targetCQLType = targetCQLType;
}

private static String formatMessage(
Object value, DataType targetCQLType, String rootCauseMessage) {
return String.format(
"Error trying to convert to targetCQLType `%s` from value.class `%s`, value %s. Root cause: %s",
targetCQLType, value.getClass().getName(), valueDesc(value), rootCauseMessage);
}

// Add a place to slightly massage value; can be further improved
private static String valueDesc(Object value) {
if (value == null) {
return "null";
}
if (value instanceof String) {
return "\"" + value + "\"";
}
return String.valueOf(value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.datastax.oss.driver.api.core.type.reflect.GenericType;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import io.stargate.sgv2.jsonapi.exception.catchable.ToCQLCodecException;
import io.stargate.sgv2.jsonapi.exception.catchable.ToJSONCodecException;
import io.stargate.sgv2.jsonapi.service.shredding.tables.RowShredder;
Expand Down Expand Up @@ -167,6 +168,36 @@ static <JavaT> ToCQL<JavaT, JavaT> unsafeIdentity() {
return (toCQLType, value) -> value;
}

/**
* Returns given String if (and only if) it only contains 7-bit ASCII characters; otherwise it
* will throw an {@link ToCQLCodecException}
*/
static String safeAscii(DataType targetTextType, String value) throws ToCQLCodecException {
// Not sure if this is strictly needed, but for now assert that the target type is ASCII
Preconditions.checkArgument(
targetTextType == DataTypes.ASCII,
"Should only be called for type DataTypes.ASCII, was called on %s",
targetTextType);

for (int i = 0, len = value.length(); i < len; ++i) {
/* Check if the character is ASCII: internally Unicode characters in Java are 16-bit
* UCS-2 (similar to UTF-16) encoded, so only characters in the range 0x0000 to 0x007F are ASCII characters.
* This is not only true for "regular" (Basic-Multilingual Plane, BMP) characters, but also for
* Unicode surrogate pairs, which are used to encode characters outside the BMP. In latter
* case chars are in range above 0xD400, never confused with ASCII characters.
*/
if (value.charAt(i) > 0x7F) {
throw new ToCQLCodecException(
value,
targetTextType,
String.format(
"String contains non-ASCII character at index #%d (length=%d): \\u%04X",
i, len, (int) value.charAt(i)));
}
}
return value;
}

/**
* Returns an instance that converts the value to the target type, catching any arithmetic
* exceptions and throwing them as a {@link ToCQLCodecException}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,13 @@ public <JavaT, CqlT> JSONCodec<JavaT, CqlT> codecToJSON(DataType targetCQLType)
}

/**
* Same as {@link #internalCodecForToCQL(DataType, Object)}
* Like {@link #internalCodecForToCQL(DataType, Object)} but for converting in the opposite
* direction, from CQL type to JSON.
*
* @param targetCQLType
* @return
* @param fromCQLType
* @return Codec to use for conversion, or `null` if none found.
*/
private JSONCodec<?, ?> internalCodecForToJSON(DataType targetCQLType) {
return codecs.stream()
.filter(codec -> codec.testToJSON(targetCQLType))
.findFirst()
.orElse(null);
private JSONCodec<?, ?> internalCodecForToJSON(DataType fromCQLType) {
return codecs.stream().filter(codec -> codec.testToJSON(fromCQLType)).findFirst().orElse(null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ public abstract class JSONCodecs {
new JSONCodec<>(
GenericType.STRING,
DataTypes.ASCII,
JSONCodec.ToCQL.unsafeIdentity(),
JSONCodec.ToCQL::safeAscii,
JSONCodec.ToJSON.unsafeNodeFactory(JsonNodeFactory.instance::textNode));

public static final JSONCodec<String, String> TEXT =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ protected DataApiResponseValidator deleteTable(String tableName) {
.body("status.ok", is(1));
}

protected void insertOneInTable(String tableName, String documentJSON) {
DataApiCommandSenders.assertTableCommand(keyspaceName, tableName)
protected DataApiResponseValidator insertOneInTable(String tableName, String documentJSON) {
return DataApiCommandSenders.assertTableCommand(keyspaceName, tableName)
.postInsertOne(documentJSON)
.hasNoErrors()
.body("status.insertedIds", hasSize(1));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import io.quarkus.test.common.WithTestResource;
import io.quarkus.test.junit.QuarkusIntegrationTest;
import io.stargate.sgv2.jsonapi.api.v1.util.DataApiCommandSenders;
import io.stargate.sgv2.jsonapi.exception.DocumentException;
import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1;
import io.stargate.sgv2.jsonapi.testresource.DseTestResource;
import java.util.Map;
Expand All @@ -18,6 +19,7 @@
@TestClassOrder(ClassOrderer.OrderAnnotation.class)
public class FindOneTableIntegrationTest extends AbstractTableIntegrationTestBase {
static final String TABLE_WITH_STRING_ID_AGE_NAME = "findOneSingleStringKeyTable";
static final String TABLE_WITH_TEXT_COLUMNS = "findOneTextColumnsTable";

@BeforeAll
public final void createDefaultTables() {
Expand All @@ -31,6 +33,16 @@ public final void createDefaultTables() {
"name",
Map.of("type", "text")),
"id");
createTableWithColumns(
TABLE_WITH_TEXT_COLUMNS,
Map.of(
"idText",
Map.of("type", "text"),
"asciiText",
Map.of("type", "ascii"),
"varcharText",
Map.of("type", "text")),
"idText");
}

// On-empty tests to be run before ones that populate tables
Expand Down Expand Up @@ -193,6 +205,52 @@ public void findOneDocIdKey() {

@Nested
@Order(3)
@TestClassOrder(ClassOrderer.OrderAnnotation.class)
class FindOneTextColumns {
public final String STRING_UTF8_WITH_2BYTE_CHAR = "utf8-2-byte-\u00a2"; // cent symbol
public final String STRING_UTF8_WITH_3BYTE_CHAR = "utf8-3-byte-\u20ac"; // euro symbol

@Test
void insertWithTextColumnsAndFind() {
final String DOC_JSON =
"""
{
"idText": "abc",
"asciiText": "safe value",
"varcharText": "%s/%s"
}
"""
.formatted(STRING_UTF8_WITH_2BYTE_CHAR, STRING_UTF8_WITH_3BYTE_CHAR);
insertOneInTable(TABLE_WITH_TEXT_COLUMNS, DOC_JSON);

DataApiCommandSenders.assertTableCommand(keyspaceName, TABLE_WITH_TEXT_COLUMNS)
.postFindOne("{ \"filter\": { \"idText\": \"abc\" } }")
.hasNoErrors()
.hasJSONField("data.document", DOC_JSON);
}

@Test
void failTryingToInsertNonAscii() {
final String DOC_JSON =
"""
{
"idText": "def",
"asciiText": "%s",
"varcharText": "safe value"
}
"""
.formatted(STRING_UTF8_WITH_2BYTE_CHAR);
DataApiCommandSenders.assertTableCommand(keyspaceName, TABLE_WITH_TEXT_COLUMNS)
.postInsertOne(DOC_JSON)
.hasSingleApiError(
DocumentException.Code.INVALID_COLUMN_VALUES,
DocumentException.class,
"String contains non-ASCII character");
}
}

@Nested
@Order(4)
class FindOneFail {
@Test
@Order(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import static org.hamcrest.Matchers.nullValue;

import io.restassured.response.ValidatableResponse;
import io.stargate.sgv2.jsonapi.exception.APIException;
import io.stargate.sgv2.jsonapi.exception.ErrorCode;
import io.stargate.sgv2.jsonapi.exception.ErrorCodeV1;
import org.hamcrest.Matcher;

Expand Down Expand Up @@ -52,6 +54,23 @@ public DataApiResponseValidator hasSingleApiError(
return hasSingleApiError(errorCode).body("errors[0].message", messageMatcher);
}

public <T extends APIException> DataApiResponseValidator hasSingleApiError(
ErrorCode<T> errorCode, Class<T> errorClass) {
return body("errors", hasSize(1))
.body("errors[0].exceptionClass", is(errorClass.getSimpleName()))
.body("errors[0].errorCode", is(errorCode.toString()));
}

public <T extends APIException> DataApiResponseValidator hasSingleApiError(
ErrorCode<T> errorCode, Class<T> errorClass, String messageSnippet) {
return hasSingleApiError(errorCode, errorClass, containsString(messageSnippet));
}

public <T extends APIException> DataApiResponseValidator hasSingleApiError(
ErrorCode<T> errorCode, Class<T> errorClass, Matcher<String> messageMatcher) {
return hasSingleApiError(errorCode, errorClass).body("errors[0].message", messageMatcher);
}

// // // API-aware validation: non-error content // // //

public DataApiResponseValidator hasNoField(String path) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,19 @@ private static Stream<Arguments> validCodecToCQLTestCases() {
Arguments.of(DataTypes.DOUBLE, BigDecimal.valueOf(0.25), Double.valueOf(0.25)),
Arguments.of(DataTypes.FLOAT, 123L, Float.valueOf(123L)),
Arguments.of(DataTypes.FLOAT, BigInteger.valueOf(34567L), Float.valueOf(34567L)),
Arguments.of(DataTypes.FLOAT, BigDecimal.valueOf(0.25), Float.valueOf(0.25f)));
Arguments.of(DataTypes.FLOAT, BigDecimal.valueOf(0.25), Float.valueOf(0.25f)),

// Textual types: ASCII, TEXT (VARCHAR is an alias for TEXT).
Arguments.of(DataTypes.ASCII, TEST_DATA.STRING_ASCII_SAFE, TEST_DATA.STRING_ASCII_SAFE),
Arguments.of(DataTypes.TEXT, TEST_DATA.STRING_ASCII_SAFE, TEST_DATA.STRING_ASCII_SAFE),
Arguments.of(
DataTypes.TEXT,
TEST_DATA.STRING_WITH_2BYTE_UTF8_CHAR,
TEST_DATA.STRING_WITH_2BYTE_UTF8_CHAR),
Arguments.of(
DataTypes.TEXT,
TEST_DATA.STRING_WITH_3BYTE_UTF8_CHAR,
TEST_DATA.STRING_WITH_3BYTE_UTF8_CHAR));
}

@Test
Expand Down Expand Up @@ -276,4 +288,38 @@ private static Stream<Arguments> nonExactToCqlIntegerTestCases() {
Arguments.of(DataTypes.TINYINT, TEST_DATA.NOT_EXACT_AS_INTEGER),
Arguments.of(DataTypes.VARINT, TEST_DATA.NOT_EXACT_AS_INTEGER));
}

@ParameterizedTest
@MethodSource("nonAsciiValueFailTestCases")
public void nonAsciiValueFail(String valueToTest) {
var codec = assertGetCodecToCQL(DataTypes.ASCII, valueToTest);

var error =
assertThrowsExactly(
ToCQLCodecException.class,
() -> codec.toCQL(valueToTest),
String.format(
"Throw ToCQLCodecException when attempting to convert `%s` from non-ASCII value %s",
DataTypes.ASCII, valueToTest));

assertThat(error)
.satisfies(
e -> {
assertThat(e.targetCQLType).isEqualTo(DataTypes.ASCII);
assertThat(e.value).isEqualTo(valueToTest);

assertThat(e.getMessage())
.contains(DataTypes.ASCII.toString())
.contains(valueToTest.getClass().getName())
.contains(valueToTest.toString())
.contains("Root cause: String contains non-ASCII character at index");
});
}

private static Stream<Arguments> nonAsciiValueFailTestCases() {
return Stream.of(
Arguments.of(TEST_DATA.STRING_WITH_2BYTE_UTF8_CHAR),
Arguments.of(TEST_DATA.STRING_WITH_3BYTE_UTF8_CHAR),
Arguments.of(TEST_DATA.STRING_WITH_4BYTE_SURROGATE_CHAR));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,17 @@ public class JSONCodecRegistryTestData {

public final BigDecimal NOT_EXACT_AS_INTEGER = new BigDecimal("1.25");

public final String STRING_ASCII_SAFE = "ascii-safe-string";

public final String STRING_WITH_2BYTE_UTF8_CHAR = "text-with-2-byte-utf8-\u00a2"; // cent symbol

public final String STRING_WITH_3BYTE_UTF8_CHAR = "text-with-3-byte-utf8-\u20ac"; // euro symbol

// 4 byte / 2 UCS-2 char Unicode Surrogate Pair characters (see
// https://codepoints.net/U+10437?lang=en)
public final String STRING_WITH_4BYTE_SURROGATE_CHAR =
"text-with-4-byte-surrogate-\uD801\uDC37"; // Deseret

/**
* Returns a mocked {@link TableMetadata} that has a column of the specified type.
*
Expand Down

0 comments on commit fb22cf0

Please sign in to comment.