Skip to content

Commit

Permalink
Test & fix several exception messages from Json parser
Browse files Browse the repository at this point in the history
To avoid cryptic and incorrect ones, such as `Expected quotation mark '"', but had '"' instead` or `unexpected token: 10`.

Fixes #2360
Fixes #2399

Also remove @PublishedApi from BATCH_SIZE to remove it from public API dump.
  • Loading branch information
sandwwraith committed Aug 11, 2023
1 parent 7bf105e commit e55f807
Show file tree
Hide file tree
Showing 10 changed files with 217 additions and 33 deletions.
2 changes: 1 addition & 1 deletion docs/basic-serialization.md
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ the `null` value to it.

```text
Exception in thread "main" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected string literal but 'null' literal was found at path: $.language
Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values.
Use 'coerceInputValues = true' in 'Json {}' builder to coerce nulls to default values.
```

<!--- TEST LINES_START -->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/*
* Copyright 2017-2023 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/


package kotlinx.serialization.json

import kotlinx.serialization.*
import kotlin.test.*


class JsonErrorMessagesTest : JsonTestBase() {

@Test
fun testJsonTokensAreProperlyReported() = parametrizedTest { mode ->
val input1 = """{"boxed":4}"""
val input2 = """{"boxed":"str"}"""

val serString = serializer<Box<String>>()
val serInt = serializer<Box<Int>>()

checkSerializationException({
default.decodeFromString(serString, input1, mode)
}, { message ->
if (mode == JsonTestingMode.TREE)
assertContains(message, "String literal for key 'boxed' should be quoted.")
else
assertContains(
message,
"Unexpected JSON token at offset 9: Expected quotation mark '\"', but had '4' instead at path: \$.boxed"
)
})

checkSerializationException({
default.decodeFromString(serInt, input2, mode)
}, { message ->
if (mode != JsonTestingMode.TREE)
// we allow number values to be quoted, so the message pointing to 's' is correct
assertContains(
message,
"Unexpected JSON token at offset 9: Unexpected symbol 's' in numeric literal at path: \$.boxed"
)
else
assertContains(message, "Failed to parse literal as 'int' value")
})
}

@Test
fun testMissingClosingQuote() = parametrizedTest { mode ->
val input1 = """{"boxed:4}"""
val input2 = """{"boxed":"str}"""
val input3 = """{"boxed:"str"}"""
val serString = serializer<Box<String>>()
val serInt = serializer<Box<Int>>()

checkSerializationException({
default.decodeFromString(serInt, input1, mode)
}, { message ->
// For discussion:
// Technically, both of these messages are correct despite them being completely different.
// A `:` instead of `"` is a good guess, but `:`/`}` is a perfectly valid token inside Json string — for example,
// it can be some kind of path `{"foo:bar:baz":"my:resource:locator:{123}"}` or even URI used as a string key/value.
// So if the closing quote is missing, there's really no way to correctly tell where the key or value is supposed to end.
// Although we may try to unify these messages for consistency.
if (mode in setOf(JsonTestingMode.STREAMING, JsonTestingMode.TREE))
assertContains(
message,
"Unexpected JSON token at offset 7: Expected quotation mark '\"', but had ':' instead at path: \$"
)
else
assertContains(
message, "Unexpected EOF at path: \$"
)
})

checkSerializationException({
default.decodeFromString(serString, input2, mode)
}, { message ->
if (mode in setOf(JsonTestingMode.STREAMING, JsonTestingMode.TREE))
assertContains(
message,
"Unexpected JSON token at offset 13: Expected quotation mark '\"', but had '}' instead at path: \$"
)
else
assertContains(message, "Unexpected EOF at path: \$.boxed")
})

checkSerializationException({
default.decodeFromString(serString, input3, mode)
}, { message ->
assertContains(
message,
"Unexpected JSON token at offset 9: Expected colon ':', but had 's' instead at path: \$"
)
})
}

@Test
fun testUnquoted() = parametrizedTest { mode ->
val input1 = """{boxed:str}"""
val input2 = """{"boxed":str}"""
val ser = serializer<Box<String>>()

checkSerializationException({
default.decodeFromString(ser, input1, mode)
}, { message ->
assertContains(
message,
"""Unexpected JSON token at offset 1: Expected quotation mark '"', but had 'b' instead at path: ${'$'}"""
)
})

checkSerializationException({
default.decodeFromString(ser, input2, mode)
}, { message ->
if (mode == JsonTestingMode.TREE) assertContains(
message,
"""String literal for key 'boxed' should be quoted."""
)
else assertContains(
message,
"""Unexpected JSON token at offset 9: Expected quotation mark '"', but had 's' instead at path: ${'$'}.boxed"""
)
})
}

@Test
fun testNullLiteralForNotNull() = parametrizedTest { mode ->
val input = """{"boxed":null}"""
val ser = serializer<Box<String>>()
checkSerializationException({
default.decodeFromString(ser, input, mode)
}, { message ->
if (mode == JsonTestingMode.TREE)
assertContains(message, "Unexpected 'null' literal when non-nullable string was expected")
else
assertContains(
message,
"Unexpected JSON token at offset 9: Expected string literal but 'null' literal was found at path: \$.boxed"
)
})
}

@Test
fun testEof() = parametrizedTest { mode ->
val input = """{"boxed":"""
checkSerializationException({
default.decodeFromString<Box<String>>(input, mode)
}, { message ->
if (mode == JsonTestingMode.TREE)
assertContains(message, "Cannot read Json element because of unexpected end of the input at path: $")
else
assertContains(message, "Expected quotation mark '\"', but had 'EOF' instead at path: \$.boxed")

})

}

private fun checkSerializationException(action: () -> Unit, assertions: SerializationException.(String) -> Unit) {
val e = assertFailsWith(SerializationException::class, action)
assertNotNull(e.message)
e.assertions(e.message!!)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class JsonSequencePathTest {
val iterator = Json.decodeToSequence<Data>(source).iterator()
iterator.next() // Ignore
assertFailsWithMessage<SerializationException>(
"Expected quotation mark '\"', but had '2' instead at path: \$.data.s"
"Expected quotation mark '\"', but had '4' instead at path: \$.data.s"
) { iterator.next() }
}

Expand Down
4 changes: 0 additions & 4 deletions formats/json/api/kotlinx-serialization-json.api
Original file line number Diff line number Diff line change
Expand Up @@ -383,10 +383,6 @@ public final class kotlinx/serialization/json/JvmStreamsKt {
public static final fun encodeToStream (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/SerializationStrategy;Ljava/lang/Object;Ljava/io/OutputStream;)V
}

public final class kotlinx/serialization/json/internal/JsonLexerKt {
public static final field BATCH_SIZE I
}

public final class kotlinx/serialization/json/internal/JsonStreamsKt {
public static final fun decodeByReader (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/DeserializationStrategy;Lkotlinx/serialization/json/internal/SerialReader;)Ljava/lang/Object;
public static final fun decodeToSequenceByReader (Lkotlinx/serialization/json/Json;Lkotlinx/serialization/json/internal/SerialReader;Lkotlinx/serialization/DeserializationStrategy;Lkotlinx/serialization/json/DecodeSequenceMode;)Lkotlin/sequences/Sequence;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ internal class JsonTreeReader(
result
}
TC_BEGIN_LIST -> readArray()
else -> lexer.fail("Cannot begin reading element, unexpected token: $token")
else -> lexer.fail("Cannot read Json element because of unexpected ${tokenDescription(token)}")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ private sealed class AbstractJsonTreeDecoder(
}

private fun unparsedPrimitive(primitive: String): Nothing {
throw JsonDecodingException(-1, "Failed to parse '$primitive'", currentObject().toString())
throw JsonDecodingException(-1, "Failed to parse literal as '$primitive' value", currentObject().toString())
}

override fun decodeTaggedString(tag: String): String {
Expand All @@ -159,7 +159,7 @@ private sealed class AbstractJsonTreeDecoder(
}

private fun JsonPrimitive.asLiteral(type: String): JsonLiteral {
return this as? JsonLiteral ?: throw JsonDecodingException(-1, "Unexpected 'null' when $type was expected")
return this as? JsonLiteral ?: throw JsonDecodingException(-1, "Unexpected 'null' literal when non-nullable $type was expected")
}

override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import kotlin.js.*
import kotlin.jvm.*
import kotlin.math.*

internal const val lenientHint = "Use 'isLenient = true' in 'Json {}` builder to accept non-compliant JSON."
internal const val coerceInputValuesHint = "Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values."
internal const val lenientHint = "Use 'isLenient = true' in 'Json {}' builder to accept non-compliant JSON."
internal const val coerceInputValuesHint = "Use 'coerceInputValues = true' in 'Json {}' builder to coerce nulls to default values."
internal const val specialFlowingValuesHint =
"It is possible to deserialize them using 'JsonBuilder.allowSpecialFloatingPointValues = true'"
internal const val ignoreUnknownKeysHint = "Use 'ignoreUnknownKeys = true' in 'Json {}' builder to ignore unknown keys."
Expand Down Expand Up @@ -56,6 +56,20 @@ private const val ESC2C_MAX = 0x75

internal const val asciiCaseMask = 1 shl 5

internal fun tokenDescription(token: Byte) = when (token) {
TC_STRING -> "quotation mark '\"'"
TC_STRING_ESC -> "string escape sequence '\\'"
TC_COMMA -> "comma ','"
TC_COLON -> "colon ':'"
TC_BEGIN_OBJ -> "start of the object '{'"
TC_END_OBJ -> "end of the object '}'"
TC_BEGIN_LIST -> "start of the array '['"
TC_END_LIST -> "end of the array ']'"
TC_EOF -> "end of the input"
TC_INVALID -> "invalid token"
else -> "valid token" // should never happen
}

// object instead of @SharedImmutable because there is mutual initialization in [initC2ESC] and [initC2TC]
internal object CharMappings {
@JvmField
Expand Down Expand Up @@ -200,28 +214,23 @@ internal abstract class AbstractJsonLexer {
}

protected fun unexpectedToken(expected: Char) {
--currentPosition // To properly handle null
if (currentPosition >= 0 && expected == STRING && consumeStringLenient() == NULL) {
fail("Expected string literal but 'null' literal was found", currentPosition - 4, coerceInputValuesHint)
if (currentPosition > 0 && expected == STRING) {
val inputLiteral = withPositionRollback {
currentPosition--
consumeStringLenient()
}
if (inputLiteral == NULL)
fail("Expected string literal but 'null' literal was found", currentPosition - 1, coerceInputValuesHint)
}
fail(charToTokenClass(expected))
}

internal fun fail(expectedToken: Byte): Nothing {
// We know that the token was consumed prior to this call
internal fun fail(expectedToken: Byte, wasConsumed: Boolean = true): Nothing {
// Slow path, never called in normal code, can avoid optimizing it
val expected = when (expectedToken) {
TC_STRING -> "quotation mark '\"'"
TC_COMMA -> "comma ','"
TC_COLON -> "colon ':'"
TC_BEGIN_OBJ -> "start of the object '{'"
TC_END_OBJ -> "end of the object '}'"
TC_BEGIN_LIST -> "start of the array '['"
TC_END_LIST -> "end of the array ']'"
else -> "valid token" // should never happen
}
val s = if (currentPosition == source.length || currentPosition <= 0) "EOF" else source[currentPosition - 1].toString()
fail("Expected $expected, but had '$s' instead", currentPosition - 1)
val expected = tokenDescription(expectedToken)
val position = if (wasConsumed) currentPosition - 1 else currentPosition
val s = if (currentPosition == source.length || position < 0) "EOF" else source[position].toString()
fail("Expected $expected, but had '$s' instead", position)
}

fun peekNextToken(): Byte {
Expand Down Expand Up @@ -385,15 +394,15 @@ internal abstract class AbstractJsonLexer {
usedAppend = true
currentPosition = prefetchOrEof(appendEscape(lastPosition, currentPosition))
if (currentPosition == -1)
fail("EOF", currentPosition)
fail("Unexpected EOF", currentPosition)
lastPosition = currentPosition
} else if (++currentPosition >= source.length) {
usedAppend = true
// end of chunk
appendRange(lastPosition, currentPosition)
currentPosition = prefetchOrEof(currentPosition)
if (currentPosition == -1)
fail("EOF", currentPosition)
fail("Unexpected EOF", currentPosition)
lastPosition = currentPosition
}
char = source[currentPosition]
Expand Down Expand Up @@ -743,4 +752,13 @@ internal abstract class AbstractJsonLexer {

currentPosition = current + literalSuffix.length
}

private inline fun <T> withPositionRollback(action: () -> T): T {
val snapshot = currentPosition
try {
return action()
} finally {
currentPosition = snapshot
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

package kotlinx.serialization.json.internal

@PublishedApi
internal const val BATCH_SIZE: Int = 16 * 1024
private const val DEFAULT_THRESHOLD = 128

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ internal class StringJsonLexer(override val source: String) : AbstractJsonLexer(
if (c == expected) return
unexpectedToken(expected)
}
currentPosition = -1 // for correct EOF reporting
unexpectedToken(expected) // EOF
}

Expand All @@ -85,7 +86,12 @@ internal class StringJsonLexer(override val source: String) : AbstractJsonLexer(
consumeNextToken(STRING)
val current = currentPosition
val closingQuote = source.indexOf('"', current)
if (closingQuote == -1) fail(TC_STRING)
if (closingQuote == -1) {
// advance currentPosition to a token after the end of the string to guess position in the error msg
// (not always correct, as `:`/`,` are valid contents of the string, but good guess anyway)
consumeStringLenient()
fail(TC_STRING, wasConsumed = false)
}
// Now we _optimistically_ know where the string ends (it might have been an escaped quote)
for (i in current until closingQuote) {
// Encountered escape sequence, should fallback to "slow" path and symbolic scanning
Expand Down
2 changes: 1 addition & 1 deletion guide/test/BasicSerializationTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class BasicSerializationTest {
fun testExampleClasses12() {
captureOutput("ExampleClasses12") { example.exampleClasses12.main() }.verifyOutputLinesStart(
"Exception in thread \"main\" kotlinx.serialization.json.internal.JsonDecodingException: Unexpected JSON token at offset 52: Expected string literal but 'null' literal was found at path: $.language",
"Use 'coerceInputValues = true' in 'Json {}` builder to coerce nulls to default values."
"Use 'coerceInputValues = true' in 'Json {}' builder to coerce nulls to default values."
)
}

Expand Down

0 comments on commit e55f807

Please sign in to comment.