diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index 593f759..cf9b1bc 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -8,5 +8,5 @@ dependencies { implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlinVer") implementation("org.jetbrains.kotlin:kotlin-allopen:$kotlinVer") implementation("org.jetbrains.kotlinx:kotlinx-benchmark-plugin:0.4.8") - implementation("org.jetbrains.dokka:dokka-gradle-plugin:1.8.20") + implementation("org.jetbrains.dokka:dokka-gradle-plugin:1.9.0") } diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt index a0bc858..3a0a7ce 100644 --- a/src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt +++ b/src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt @@ -2,6 +2,7 @@ package me.alllex.parsus.parser import me.alllex.parsus.token.Token import me.alllex.parsus.token.TokenMatch +import me.alllex.parsus.util.replaceNonPrintable /** * Result of a parse that is either a [parsed value][ParsedValue] @@ -23,20 +24,96 @@ abstract class ParseError : ParseResult() { */ abstract val offset: Int - override fun toString(): String = "ParseError" + open val contextProvider: ParseErrorContextProvider? get() = null + + abstract fun describe(): String + + override fun toString(): String = describe() + + protected fun format(message: String, messageAtOffset: String): String = buildString { + append(message) + contextProvider?.getParseErrorContext(offset)?.run { + appendLine() + append(" ".repeat(lookBehind)).append(messageAtOffset) + appendLine() + append(" ".repeat(lookBehind)).append("| offset=$offset (or after ignored tokens)") + appendLine() + appendLine(replaceNonPrintable(inputSection)) + if (previousTokenMatch != null) { + append("^".repeat(previousTokenMatch.length.coerceAtLeast(1))) + append(" Previous token: ${previousTokenMatch.token} at offset=${previousTokenMatch.offset}") + appendLine() + } + } + } } -data class UnmatchedToken(val expected: Token, override val offset: Int) : ParseError() +data class ParseErrorContext( + val inputSection: String, + val lookBehind: Int, + val lookAhead: Int, + val previousTokenMatch: TokenMatch?, +) -data class MismatchedToken(val expected: Token, val found: TokenMatch) : ParseError() { +fun interface ParseErrorContextProvider { + fun getParseErrorContext(offset: Int): ParseErrorContext? +} + +data class UnmatchedToken( + val expected: Token, + override val offset: Int, + override val contextProvider: ParseErrorContextProvider? = null +) : ParseError() { + + override fun toString(): String = describe() + + override fun describe(): String = format( + message = "Unmatched token at offset=$offset, when expected: $expected", + messageAtOffset = "Expected token: $expected" + ) +} + +data class MismatchedToken( + val expected: Token, + val found: TokenMatch, + override val contextProvider: ParseErrorContextProvider? = null, +) : ParseError() { override val offset: Int get() = found.offset + override fun toString(): String = describe() + override fun describe(): String = format( + message = "Mismatched token at offset=$offset, when expected: $expected, got: ${found.token}", + messageAtOffset = "Expected token: $expected at offset=$offset, got: ${found.token}" + ) +} + +data class NoMatchingToken( + override val offset: Int, +) : ParseError() { + + override fun toString(): String = describe() + override fun describe(): String = format( + message = "No matching token at offset=$offset", + messageAtOffset = "No matching token" + ) +} + +data class NoViableAlternative( + override val offset: Int, +) : ParseError() { + override fun toString(): String = describe() + override fun describe(): String = format( + message = "None of the alternatives succeeded at offset=$offset", + messageAtOffset = "None of the alternatives succeeded" + ) +} + +data class NotEnoughRepetition(override val offset: Int, val expectedAtLeast: Int, val actualCount: Int) : ParseError() { + override fun toString(): String = describe() + override fun describe(): String = "Expected at least $expectedAtLeast, found $actualCount" } -data class NoMatchingToken(override val offset: Int) : ParseError() -data class NoViableAlternative(override val offset: Int) : ParseError() -data class NotEnoughRepetition(override val offset: Int, val expectedAtLeast: Int, val actualCount: Int) : ParseError() class ParseException(val error: ParseError) : Exception() { - override fun toString(): String = "ParseException($error)" + override fun toString(): String = "ParseException: ${error.describe()}" } inline fun ParseResult.map(f: (T) -> R): ParseResult { diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt index 3b67e62..c038f86 100644 --- a/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt +++ b/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt @@ -20,9 +20,12 @@ internal class ParsingContext( private val debugMode: Boolean = false ) : ParsingScope { + private val inputLength = tokenizer.input.length + private var backtrackCont: Continuation? = null private var cont: Continuation? = null private var position: Int = 0 + private var lastTokenMatchContext = LastTokenMatchContext(tokenizer.input, currentOffset = 0) private var result: Result = PENDING_RESULT fun runParser(parser: Parser): ParseResult { @@ -60,13 +63,23 @@ internal class ParsingContext( override fun tryParse(token: Token): ParseResult { val fromIndex = this.position val match = tokenizer.findMatchOf(fromIndex, token) - ?: return UnmatchedToken(token, fromIndex) - // TODO: clean up, as this should not happen anymore - if (match.token != token) return MismatchedToken(token, match) - this.position = match.offset + match.length + ?: return UnmatchedToken(token, fromIndex, getParseErrorContextProviderOrNull()) + + // This can only happen with EagerTokenizer + if (match.token != token) return MismatchedToken(token, match, getParseErrorContextProviderOrNull()) + + val newPosition = match.nextOffset.coerceAtMost(inputLength) + this.position = newPosition + this.lastTokenMatchContext.currentOffset = newPosition + this.lastTokenMatchContext.lastMatch = match + return ParsedValue(match) } + private fun getParseErrorContextProviderOrNull(): ParseErrorContextProvider { + return this.lastTokenMatchContext + } + override suspend fun fail(error: ParseError): Nothing { suspendCoroutineUninterceptedOrReturn { withCont(backtrackCont) // may be null @@ -164,3 +177,39 @@ internal class ParsingContext( } } } + +internal class LastTokenMatchContext( + val input: String, + var currentOffset: Int, + var lastMatch: TokenMatch? = null, +) : ParseErrorContextProvider { + + override fun toString() = "LastTokenMatchContext(currentOffset=$currentOffset, lastMatch=$lastMatch)" + + override fun getParseErrorContext(offset: Int): ParseErrorContext? { + if (offset != currentOffset) { + return null + } + + val lastMatch = this.lastMatch + val lookAhead = 20 + return if (lastMatch == null || lastMatch.nextOffset != offset) { + ParseErrorContext( + inputSection = getInputSection(offset, offset + lookAhead), + lookBehind = 0, + lookAhead = lookAhead, + previousTokenMatch = null + ) + } else { + ParseErrorContext( + inputSection = getInputSection(lastMatch.offset, lastMatch.nextOffset + lookAhead), + lookBehind = lastMatch.length, + lookAhead = lookAhead, + previousTokenMatch = lastMatch + ) + } + } + + private fun getInputSection(inputSectionStart: Int, inputSectionStop: Int) = + input.substring(inputSectionStart, inputSectionStop.coerceAtMost(input.length)) +} diff --git a/src/commonMain/kotlin/me/alllex/parsus/token/TokenMatch.kt b/src/commonMain/kotlin/me/alllex/parsus/token/TokenMatch.kt index c4a2b85..2eb26d6 100644 --- a/src/commonMain/kotlin/me/alllex/parsus/token/TokenMatch.kt +++ b/src/commonMain/kotlin/me/alllex/parsus/token/TokenMatch.kt @@ -8,4 +8,9 @@ data class TokenMatch( val token: Token, val offset: Int, val length: Int, -) +) { + /** + * Offset of the next character after the match. + */ + val nextOffset: Int get() = offset + length +} diff --git a/src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt b/src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt index b66fa4e..da5a1a7 100644 --- a/src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt +++ b/src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt @@ -3,6 +3,7 @@ package me.alllex.parsus.trace import me.alllex.parsus.annotations.ExperimentalParsusApi import me.alllex.parsus.token.Token import me.alllex.parsus.token.TokenMatch +import me.alllex.parsus.util.replaceNonPrintable @ExperimentalParsusApi @@ -82,13 +83,3 @@ fun formatTokenMatchingTrace( } return sb.toString() } - -private fun replaceNonPrintable(char: Char): Char { - return when (char) { - ' ' -> '␣' // U+2423 OPEN BOX - '\n' -> '␤' // U+2424 SYMBOL FOR NEWLINE - '\r' -> '␍' // U+240D SYMBOL FOR CARRIAGE RETURN - '\t' -> '␉' // U+2409 SYMBOL FOR HORIZONTAL TABULATION - else -> char - } -} diff --git a/src/commonMain/kotlin/me/alllex/parsus/util/text.kt b/src/commonMain/kotlin/me/alllex/parsus/util/text.kt new file mode 100644 index 0000000..5fa94a7 --- /dev/null +++ b/src/commonMain/kotlin/me/alllex/parsus/util/text.kt @@ -0,0 +1,19 @@ +package me.alllex.parsus.util + +internal fun replaceNonPrintable(string: String): String { + return buildString { + for (char in string) { + append(replaceNonPrintable(char)) + } + } +} + +internal fun replaceNonPrintable(char: Char): Char { + return when (char) { + ' ' -> '␣' // U+2423 OPEN BOX + '\n' -> '␤' // U+2424 SYMBOL FOR NEWLINE + '\r' -> '␍' // U+240D SYMBOL FOR CARRIAGE RETURN + '\t' -> '␉' // U+2409 SYMBOL FOR HORIZONTAL TABULATION + else -> char + } +} diff --git a/src/commonTest/kotlin/me/alllex/parsus/ParseErrorTest.kt b/src/commonTest/kotlin/me/alllex/parsus/ParseErrorTest.kt new file mode 100644 index 0000000..2712c4e --- /dev/null +++ b/src/commonTest/kotlin/me/alllex/parsus/ParseErrorTest.kt @@ -0,0 +1,97 @@ +package me.alllex.parsus + +import assertk.assertions.isEqualTo +import assertk.assertions.prop +import me.alllex.parsus.parser.Grammar +import me.alllex.parsus.parser.ParseError +import me.alllex.parsus.parser.map +import me.alllex.parsus.parser.times +import me.alllex.parsus.token.literalToken +import me.alllex.parsus.token.regexToken +import kotlin.test.Test + +class ParseErrorTest { + + @Test + fun unmatchedTokenErrorsProvideUserFriendlyDescriptions() { + object : Grammar() { + val ab by literalToken("ab") + val cd by literalToken("cd") + override val root by ab * cd map { (v1, v2) -> "${v1.text}-${v2.text}" } + }.run { + + assertParsed("abcd").isEqualTo("ab-cd") + + assertNotParsed("abab").prop(ParseError::describe).isEqualTo( + "Unmatched token at offset=2, when expected: LiteralToken('cd')\n" + """ + Expected token: LiteralToken('cd') + | offset=2 (or after ignored tokens) + abab + ^^ Previous token: LiteralToken('ab') at offset=0 + """.trimIndent() + "\n" + ) + + assertNotParsed("cd").prop(ParseError::describe).isEqualTo( + "Unmatched token at offset=0, when expected: LiteralToken('ab')\n" + """ + Expected token: LiteralToken('ab') + | offset=0 (or after ignored tokens) + cd + """.trimIndent() + "\n" + ) + + assertNotParsed("abcdab").prop(ParseError::describe).isEqualTo( + "Unmatched token at offset=4, when expected: Token(EOF)\n" + """ + Expected token: Token(EOF) + | offset=4 (or after ignored tokens) + cdab + ^^ Previous token: LiteralToken('cd') at offset=2 + """.trimIndent() + "\n" + ) + } + } + + @Test + fun lastMatchDescriptionIsPresentWhenThereAreIgnoredTokensInBetween() { + object : Grammar() { + val ws by literalToken(" ", ignored = true) + val ab by literalToken("ab") + val cd by literalToken("cd") + override val root by ab * cd map { (v1, v2) -> "${v1.text}-${v2.text}" } + }.run { + assertParsed("ab cd").isEqualTo("ab-cd") + + assertNotParsed("ab ab").prop(ParseError::describe).isEqualTo( + "Unmatched token at offset=2, when expected: LiteralToken('cd')\n" + """ + Expected token: LiteralToken('cd') + | offset=2 (or after ignored tokens) + ab␣ab + ^^ Previous token: LiteralToken('ab') at offset=0 + """.trimIndent() + "\n" + ) + } + } + + @Test + fun unprintableCharactersAreReplacedInErrors() { + object : Grammar() { + val ws by regexToken("\\s+") + val ab by literalToken("ab") + @Suppress("unused") + val cd by literalToken("cd") + override val root by ws * ab map { (v1, v2) -> "${v1.text}-${v2.text}" } + }.run { + assertParsed(" \t\r\nab").isEqualTo(" \t\r\n-ab") + + assertNotParsed(" \t\r\ncd").prop(ParseError::describe).isEqualTo( + "Unmatched token at offset=4, when expected: LiteralToken('ab')\n" + """ + Expected token: LiteralToken('ab') + | offset=4 (or after ignored tokens) + ␣␉␍␤cd + ^^^^ Previous token: RegexToken(ws [\s+]) at offset=0 + """.trimIndent() + "\n" + ) + } + + } + +} diff --git a/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt b/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt index 8a5e0dd..4f10fe2 100644 --- a/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt +++ b/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt @@ -83,4 +83,19 @@ class TokenTests { } } + @Test + fun explicitEofMatchesDoNotOverflowInputLength() { + object : Grammar>() { + val ab by literalToken("ab") + val eof by EofToken + override val root by ab * eof * eof map { it.toList() } + }.run { + assertParsed("ab").isEqualTo(listOf( + TokenMatch(ab, 0, 2), + TokenMatch(EofToken, 2, 1), + TokenMatch(EofToken, 2, 1), + )) + } + } + }