From e55f807ac344c5b147075dde924432f6b682642f Mon Sep 17 00:00:00 2001 From: Leonid Startsev Date: Mon, 7 Aug 2023 19:35:38 +0200 Subject: [PATCH] Test & fix several exception messages from Json parser To avoid cryptic and incorrect ones, such as `Expected quotation mark '"', but had '"' instead` or `unexpected token: 10`. Fixes #2360 Fixes #2399 Also remove @PublishedApi from BATCH_SIZE to remove it from public API dump. --- docs/basic-serialization.md | 2 +- .../json/JsonErrorMessagesTest.kt | 165 ++++++++++++++++++ .../features/JsonSequencePathTest.kt | 2 +- .../json/api/kotlinx-serialization-json.api | 4 - .../json/internal/JsonTreeReader.kt | 2 +- .../json/internal/TreeJsonDecoder.kt | 4 +- .../json/internal/lexer/AbstractJsonLexer.kt | 60 ++++--- .../json/internal/lexer/JsonLexer.kt | 1 - .../json/internal/lexer/StringJsonLexer.kt | 8 +- guide/test/BasicSerializationTest.kt | 2 +- 10 files changed, 217 insertions(+), 33 deletions(-) create mode 100644 formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonErrorMessagesTest.kt diff --git a/docs/basic-serialization.md b/docs/basic-serialization.md index a897de6727..3853376eee 100644 --- a/docs/basic-serialization.md +++ b/docs/basic-serialization.md @@ -534,7 +534,7 @@ the `null` value to it. ```text Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected string literal but 'null' literal was found at path: $.language -Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values. +Use 'coerceInputValues = true' in 'Json {}' builder to coerce nulls to default values. ``` diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonErrorMessagesTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonErrorMessagesTest.kt new file mode 100644 index 0000000000..8c16ac01f2 --- /dev/null +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonErrorMessagesTest.kt @@ -0,0 +1,165 @@ +/* + * Copyright 2017-2023 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license. + */ + + +package kotlinx.serialization.json + +import kotlinx.serialization.* +import kotlin.test.* + + +class JsonErrorMessagesTest : JsonTestBase() { + + @Test + fun testJsonTokensAreProperlyReported() = parametrizedTest { mode -> + val input1 = """{"boxed":4}""" + val input2 = """{"boxed":"str"}""" + + val serString = serializer>() + val serInt = serializer>() + + checkSerializationException({ + default.decodeFromString(serString, input1, mode) + }, { message -> + if (mode == JsonTestingMode.TREE) + assertContains(message, "String literal for key 'boxed' should be quoted.") + else + assertContains( + message, + "Unexpected JSON token at offset 9: Expected quotation mark '\"', but had '4' instead at path: \$.boxed" + ) + }) + + checkSerializationException({ + default.decodeFromString(serInt, input2, mode) + }, { message -> + if (mode != JsonTestingMode.TREE) + // we allow number values to be quoted, so the message pointing to 's' is correct + assertContains( + message, + "Unexpected JSON token at offset 9: Unexpected symbol 's' in numeric literal at path: \$.boxed" + ) + else + assertContains(message, "Failed to parse literal as 'int' value") + }) + } + + @Test + fun testMissingClosingQuote() = parametrizedTest { mode -> + val input1 = """{"boxed:4}""" + val input2 = """{"boxed":"str}""" + val input3 = """{"boxed:"str"}""" + val serString = serializer>() + val serInt = serializer>() + + checkSerializationException({ + default.decodeFromString(serInt, input1, mode) + }, { message -> + // For discussion: + // Technically, both of these messages are correct despite them being completely different. + // A `:` instead of `"` is a good guess, but `:`/`}` is a perfectly valid token inside Json string — for example, + // it can be some kind of path `{"foo:bar:baz":"my:resource:locator:{123}"}` or even URI used as a string key/value. + // So if the closing quote is missing, there's really no way to correctly tell where the key or value is supposed to end. + // Although we may try to unify these messages for consistency. + if (mode in setOf(JsonTestingMode.STREAMING, JsonTestingMode.TREE)) + assertContains( + message, + "Unexpected JSON token at offset 7: Expected quotation mark '\"', but had ':' instead at path: \$" + ) + else + assertContains( + message, "Unexpected EOF at path: \$" + ) + }) + + checkSerializationException({ + default.decodeFromString(serString, input2, mode) + }, { message -> + if (mode in setOf(JsonTestingMode.STREAMING, JsonTestingMode.TREE)) + assertContains( + message, + "Unexpected JSON token at offset 13: Expected quotation mark '\"', but had '}' instead at path: \$" + ) + else + assertContains(message, "Unexpected EOF at path: \$.boxed") + }) + + checkSerializationException({ + default.decodeFromString(serString, input3, mode) + }, { message -> + assertContains( + message, + "Unexpected JSON token at offset 9: Expected colon ':', but had 's' instead at path: \$" + ) + }) + } + + @Test + fun testUnquoted() = parametrizedTest { mode -> + val input1 = """{boxed:str}""" + val input2 = """{"boxed":str}""" + val ser = serializer>() + + checkSerializationException({ + default.decodeFromString(ser, input1, mode) + }, { message -> + assertContains( + message, + """Unexpected JSON token at offset 1: Expected quotation mark '"', but had 'b' instead at path: ${'$'}""" + ) + }) + + checkSerializationException({ + default.decodeFromString(ser, input2, mode) + }, { message -> + if (mode == JsonTestingMode.TREE) assertContains( + message, + """String literal for key 'boxed' should be quoted.""" + ) + else assertContains( + message, + """Unexpected JSON token at offset 9: Expected quotation mark '"', but had 's' instead at path: ${'$'}.boxed""" + ) + }) + } + + @Test + fun testNullLiteralForNotNull() = parametrizedTest { mode -> + val input = """{"boxed":null}""" + val ser = serializer>() + checkSerializationException({ + default.decodeFromString(ser, input, mode) + }, { message -> + if (mode == JsonTestingMode.TREE) + assertContains(message, "Unexpected 'null' literal when non-nullable string was expected") + else + assertContains( + message, + "Unexpected JSON token at offset 9: Expected string literal but 'null' literal was found at path: \$.boxed" + ) + }) + } + + @Test + fun testEof() = parametrizedTest { mode -> + val input = """{"boxed":""" + checkSerializationException({ + default.decodeFromString>(input, mode) + }, { message -> + if (mode == JsonTestingMode.TREE) + assertContains(message, "Cannot read Json element because of unexpected end of the input at path: $") + else + assertContains(message, "Expected quotation mark '\"', but had 'EOF' instead at path: \$.boxed") + + }) + + } + + private fun checkSerializationException(action: () -> Unit, assertions: SerializationException.(String) -> Unit) { + val e = assertFailsWith(SerializationException::class, action) + assertNotNull(e.message) + e.assertions(e.message!!) + } + +} diff --git a/formats/json-tests/jvmTest/src/kotlinx/serialization/features/JsonSequencePathTest.kt b/formats/json-tests/jvmTest/src/kotlinx/serialization/features/JsonSequencePathTest.kt index 397caffdf8..554deab16e 100644 --- a/formats/json-tests/jvmTest/src/kotlinx/serialization/features/JsonSequencePathTest.kt +++ b/formats/json-tests/jvmTest/src/kotlinx/serialization/features/JsonSequencePathTest.kt @@ -25,7 +25,7 @@ class JsonSequencePathTest { val iterator = Json.decodeToSequence(source).iterator() iterator.next() // Ignore assertFailsWithMessage( - "Expected quotation mark '\"', but had '2' instead at path: \$.data.s" + "Expected quotation mark '\"', but had '4' instead at path: \$.data.s" ) { iterator.next() } } diff --git a/formats/json/api/kotlinx-serialization-json.api b/formats/json/api/kotlinx-serialization-json.api index 3b97d17474..e6a2328dcd 100644 --- a/formats/json/api/kotlinx-serialization-json.api +++ b/formats/json/api/kotlinx-serialization-json.api @@ -383,10 +383,6 @@ public final class kotlinx/serialization/json/JvmStreamsKt { public static final fun encodeToStream (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/SerializationStrategy;Ljava/lang/Object;Ljava/io/OutputStream;)V } -public final class kotlinx/serialization/json/internal/JsonLexerKt { - public static final field BATCH_SIZE I -} - public final class kotlinx/serialization/json/internal/JsonStreamsKt { public static final fun decodeByReader (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/DeserializationStrategy;Lkotlinx/serialization/json/internal/SerialReader;)Ljava/lang/Object; public static final fun decodeToSequenceByReader (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/json/internal/SerialReader;Lkotlinx/serialization/DeserializationStrategy;Lkotlinx/serialization/json/DecodeSequenceMode;)Lkotlin/sequences/Sequence; diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt index 7c01daa8fc..060c36bd2d 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonTreeReader.kt @@ -101,7 +101,7 @@ internal class JsonTreeReader( result } TC_BEGIN_LIST -> readArray() - else -> lexer.fail("Cannot begin reading element, unexpected token: $token") + else -> lexer.fail("Cannot read Json element because of unexpected ${tokenDescription(token)}") } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt index 365ad2eca0..40a3e41f54 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt @@ -143,7 +143,7 @@ private sealed class AbstractJsonTreeDecoder( } private fun unparsedPrimitive(primitive: String): Nothing { - throw JsonDecodingException(-1, "Failed to parse '$primitive'", currentObject().toString()) + throw JsonDecodingException(-1, "Failed to parse literal as '$primitive' value", currentObject().toString()) } override fun decodeTaggedString(tag: String): String { @@ -159,7 +159,7 @@ private sealed class AbstractJsonTreeDecoder( } private fun JsonPrimitive.asLiteral(type: String): JsonLiteral { - return this as? JsonLiteral ?: throw JsonDecodingException(-1, "Unexpected 'null' when $type was expected") + return this as? JsonLiteral ?: throw JsonDecodingException(-1, "Unexpected 'null' literal when non-nullable $type was expected") } override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder = diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt index 3e2c6ad425..a1a6a5eed1 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt @@ -10,8 +10,8 @@ import kotlin.js.* import kotlin.jvm.* import kotlin.math.* -internal const val lenientHint = "Use 'isLenient = true' in 'Json {}` builder to accept non-compliant JSON." -internal const val coerceInputValuesHint = "Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values." +internal const val lenientHint = "Use 'isLenient = true' in 'Json {}' builder to accept non-compliant JSON." +internal const val coerceInputValuesHint = "Use 'coerceInputValues = true' in 'Json {}' builder to coerce nulls to default values." internal const val specialFlowingValuesHint = "It is possible to deserialize them using 'JsonBuilder.allowSpecialFloatingPointValues = true'" internal const val ignoreUnknownKeysHint = "Use 'ignoreUnknownKeys = true' in 'Json {}' builder to ignore unknown keys." @@ -56,6 +56,20 @@ private const val ESC2C_MAX = 0x75 internal const val asciiCaseMask = 1 shl 5 +internal fun tokenDescription(token: Byte) = when (token) { + TC_STRING -> "quotation mark '\"'" + TC_STRING_ESC -> "string escape sequence '\\'" + TC_COMMA -> "comma ','" + TC_COLON -> "colon ':'" + TC_BEGIN_OBJ -> "start of the object '{'" + TC_END_OBJ -> "end of the object '}'" + TC_BEGIN_LIST -> "start of the array '['" + TC_END_LIST -> "end of the array ']'" + TC_EOF -> "end of the input" + TC_INVALID -> "invalid token" + else -> "valid token" // should never happen +} + // object instead of @SharedImmutable because there is mutual initialization in [initC2ESC] and [initC2TC] internal object CharMappings { @JvmField @@ -200,28 +214,23 @@ internal abstract class AbstractJsonLexer { } protected fun unexpectedToken(expected: Char) { - --currentPosition // To properly handle null - if (currentPosition >= 0 && expected == STRING && consumeStringLenient() == NULL) { - fail("Expected string literal but 'null' literal was found", currentPosition - 4, coerceInputValuesHint) + if (currentPosition > 0 && expected == STRING) { + val inputLiteral = withPositionRollback { + currentPosition-- + consumeStringLenient() + } + if (inputLiteral == NULL) + fail("Expected string literal but 'null' literal was found", currentPosition - 1, coerceInputValuesHint) } fail(charToTokenClass(expected)) } - internal fun fail(expectedToken: Byte): Nothing { - // We know that the token was consumed prior to this call + internal fun fail(expectedToken: Byte, wasConsumed: Boolean = true): Nothing { // Slow path, never called in normal code, can avoid optimizing it - val expected = when (expectedToken) { - TC_STRING -> "quotation mark '\"'" - TC_COMMA -> "comma ','" - TC_COLON -> "colon ':'" - TC_BEGIN_OBJ -> "start of the object '{'" - TC_END_OBJ -> "end of the object '}'" - TC_BEGIN_LIST -> "start of the array '['" - TC_END_LIST -> "end of the array ']'" - else -> "valid token" // should never happen - } - val s = if (currentPosition == source.length || currentPosition <= 0) "EOF" else source[currentPosition - 1].toString() - fail("Expected $expected, but had '$s' instead", currentPosition - 1) + val expected = tokenDescription(expectedToken) + val position = if (wasConsumed) currentPosition - 1 else currentPosition + val s = if (currentPosition == source.length || position < 0) "EOF" else source[position].toString() + fail("Expected $expected, but had '$s' instead", position) } fun peekNextToken(): Byte { @@ -385,7 +394,7 @@ internal abstract class AbstractJsonLexer { usedAppend = true currentPosition = prefetchOrEof(appendEscape(lastPosition, currentPosition)) if (currentPosition == -1) - fail("EOF", currentPosition) + fail("Unexpected EOF", currentPosition) lastPosition = currentPosition } else if (++currentPosition >= source.length) { usedAppend = true @@ -393,7 +402,7 @@ internal abstract class AbstractJsonLexer { appendRange(lastPosition, currentPosition) currentPosition = prefetchOrEof(currentPosition) if (currentPosition == -1) - fail("EOF", currentPosition) + fail("Unexpected EOF", currentPosition) lastPosition = currentPosition } char = source[currentPosition] @@ -743,4 +752,13 @@ internal abstract class AbstractJsonLexer { currentPosition = current + literalSuffix.length } + + private inline fun withPositionRollback(action: () -> T): T { + val snapshot = currentPosition + try { + return action() + } finally { + currentPosition = snapshot + } + } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt index 85ef13ab95..3c785f86e9 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt @@ -6,7 +6,6 @@ package kotlinx.serialization.json.internal -@PublishedApi internal const val BATCH_SIZE: Int = 16 * 1024 private const val DEFAULT_THRESHOLD = 128 diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt index 717932c7d5..9f2e519020 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt @@ -73,6 +73,7 @@ internal class StringJsonLexer(override val source: String) : AbstractJsonLexer( if (c == expected) return unexpectedToken(expected) } + currentPosition = -1 // for correct EOF reporting unexpectedToken(expected) // EOF } @@ -85,7 +86,12 @@ internal class StringJsonLexer(override val source: String) : AbstractJsonLexer( consumeNextToken(STRING) val current = currentPosition val closingQuote = source.indexOf('"', current) - if (closingQuote == -1) fail(TC_STRING) + if (closingQuote == -1) { + // advance currentPosition to a token after the end of the string to guess position in the error msg + // (not always correct, as `:`/`,` are valid contents of the string, but good guess anyway) + consumeStringLenient() + fail(TC_STRING, wasConsumed = false) + } // Now we _optimistically_ know where the string ends (it might have been an escaped quote) for (i in current until closingQuote) { // Encountered escape sequence, should fallback to "slow" path and symbolic scanning diff --git a/guide/test/BasicSerializationTest.kt b/guide/test/BasicSerializationTest.kt index abb7eb87be..dc89feb69c 100644 --- a/guide/test/BasicSerializationTest.kt +++ b/guide/test/BasicSerializationTest.kt @@ -110,7 +110,7 @@ class BasicSerializationTest { fun testExampleClasses12() { captureOutput("ExampleClasses12") { example.exampleClasses12.main() }.verifyOutputLinesStart( "Exception in thread \"main\" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected string literal but 'null' literal was found at path: $.language", - "Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values." + "Use 'coerceInputValues = true' in 'Json {}' builder to coerce nulls to default values." ) }