Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

User-friendly unmatched token errors #25

Merged
merged 7 commits into from
Oct 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion buildSrc/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ dependencies {
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlinVer")
implementation("org.jetbrains.kotlin:kotlin-allopen:$kotlinVer")
implementation("org.jetbrains.kotlinx:kotlinx-benchmark-plugin:0.4.8")
implementation("org.jetbrains.dokka:dokka-gradle-plugin:1.8.20")
implementation("org.jetbrains.dokka:dokka-gradle-plugin:1.9.0")
}
91 changes: 84 additions & 7 deletions src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package me.alllex.parsus.parser

import me.alllex.parsus.token.Token
import me.alllex.parsus.token.TokenMatch
import me.alllex.parsus.util.replaceNonPrintable

/**
* Result of a parse that is either a [parsed value][ParsedValue]
Expand All @@ -23,20 +24,96 @@ abstract class ParseError : ParseResult<Nothing>() {
*/
abstract val offset: Int

override fun toString(): String = "ParseError"
open val contextProvider: ParseErrorContextProvider? get() = null

abstract fun describe(): String

override fun toString(): String = describe()

protected fun format(message: String, messageAtOffset: String): String = buildString {
append(message)
contextProvider?.getParseErrorContext(offset)?.run {
appendLine()
append(" ".repeat(lookBehind)).append(messageAtOffset)
appendLine()
append(" ".repeat(lookBehind)).append("| offset=$offset (or after ignored tokens)")
appendLine()
appendLine(replaceNonPrintable(inputSection))
if (previousTokenMatch != null) {
append("^".repeat(previousTokenMatch.length.coerceAtLeast(1)))
append(" Previous token: ${previousTokenMatch.token} at offset=${previousTokenMatch.offset}")
appendLine()
}
}
}
}

data class UnmatchedToken(val expected: Token, override val offset: Int) : ParseError()
data class ParseErrorContext(
val inputSection: String,
val lookBehind: Int,
val lookAhead: Int,
val previousTokenMatch: TokenMatch?,
)

data class MismatchedToken(val expected: Token, val found: TokenMatch) : ParseError() {
fun interface ParseErrorContextProvider {
fun getParseErrorContext(offset: Int): ParseErrorContext?
}

data class UnmatchedToken(
val expected: Token,
override val offset: Int,
override val contextProvider: ParseErrorContextProvider? = null
) : ParseError() {

override fun toString(): String = describe()

override fun describe(): String = format(
message = "Unmatched token at offset=$offset, when expected: $expected",
messageAtOffset = "Expected token: $expected"
)
}

data class MismatchedToken(
val expected: Token,
val found: TokenMatch,
override val contextProvider: ParseErrorContextProvider? = null,
) : ParseError() {
override val offset: Int get() = found.offset
override fun toString(): String = describe()
override fun describe(): String = format(
message = "Mismatched token at offset=$offset, when expected: $expected, got: ${found.token}",
messageAtOffset = "Expected token: $expected at offset=$offset, got: ${found.token}"
)
}

data class NoMatchingToken(
override val offset: Int,
) : ParseError() {

override fun toString(): String = describe()
override fun describe(): String = format(
message = "No matching token at offset=$offset",
messageAtOffset = "No matching token"
)
}

data class NoViableAlternative(
override val offset: Int,
) : ParseError() {
override fun toString(): String = describe()
override fun describe(): String = format(
message = "None of the alternatives succeeded at offset=$offset",
messageAtOffset = "None of the alternatives succeeded"
)
}

data class NotEnoughRepetition(override val offset: Int, val expectedAtLeast: Int, val actualCount: Int) : ParseError() {
override fun toString(): String = describe()
override fun describe(): String = "Expected at least $expectedAtLeast, found $actualCount"
}
data class NoMatchingToken(override val offset: Int) : ParseError()
data class NoViableAlternative(override val offset: Int) : ParseError()
data class NotEnoughRepetition(override val offset: Int, val expectedAtLeast: Int, val actualCount: Int) : ParseError()

class ParseException(val error: ParseError) : Exception() {
override fun toString(): String = "ParseException($error)"
override fun toString(): String = "ParseException: ${error.describe()}"
}

inline fun <T, R> ParseResult<T>.map(f: (T) -> R): ParseResult<R> {
Expand Down
57 changes: 53 additions & 4 deletions src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ internal class ParsingContext(
private val debugMode: Boolean = false
) : ParsingScope {

private val inputLength = tokenizer.input.length

private var backtrackCont: Continuation<ParseError>? = null
private var cont: Continuation<Any?>? = null
private var position: Int = 0
private var lastTokenMatchContext = LastTokenMatchContext(tokenizer.input, currentOffset = 0)
private var result: Result<Any?> = PENDING_RESULT

fun <T> runParser(parser: Parser<T>): ParseResult<T> {
Expand Down Expand Up @@ -60,13 +63,23 @@ internal class ParsingContext(
override fun tryParse(token: Token): ParseResult<TokenMatch> {
val fromIndex = this.position
val match = tokenizer.findMatchOf(fromIndex, token)
?: return UnmatchedToken(token, fromIndex)
// TODO: clean up, as this should not happen anymore
if (match.token != token) return MismatchedToken(token, match)
this.position = match.offset + match.length
?: return UnmatchedToken(token, fromIndex, getParseErrorContextProviderOrNull())

// This can only happen with EagerTokenizer
if (match.token != token) return MismatchedToken(token, match, getParseErrorContextProviderOrNull())

val newPosition = match.nextOffset.coerceAtMost(inputLength)
this.position = newPosition
this.lastTokenMatchContext.currentOffset = newPosition
this.lastTokenMatchContext.lastMatch = match

return ParsedValue(match)
}

private fun getParseErrorContextProviderOrNull(): ParseErrorContextProvider {
return this.lastTokenMatchContext
}

override suspend fun fail(error: ParseError): Nothing {
suspendCoroutineUninterceptedOrReturn<Any?> {
withCont(backtrackCont) // may be null
Expand Down Expand Up @@ -164,3 +177,39 @@ internal class ParsingContext(
}
}
}

internal class LastTokenMatchContext(
val input: String,
var currentOffset: Int,
var lastMatch: TokenMatch? = null,
) : ParseErrorContextProvider {

override fun toString() = "LastTokenMatchContext(currentOffset=$currentOffset, lastMatch=$lastMatch)"

override fun getParseErrorContext(offset: Int): ParseErrorContext? {
if (offset != currentOffset) {
return null
}

val lastMatch = this.lastMatch
val lookAhead = 20
return if (lastMatch == null || lastMatch.nextOffset != offset) {
ParseErrorContext(
inputSection = getInputSection(offset, offset + lookAhead),
lookBehind = 0,
lookAhead = lookAhead,
previousTokenMatch = null
)
} else {
ParseErrorContext(
inputSection = getInputSection(lastMatch.offset, lastMatch.nextOffset + lookAhead),
lookBehind = lastMatch.length,
lookAhead = lookAhead,
previousTokenMatch = lastMatch
)
}
}

private fun getInputSection(inputSectionStart: Int, inputSectionStop: Int) =
input.substring(inputSectionStart, inputSectionStop.coerceAtMost(input.length))
}
7 changes: 6 additions & 1 deletion src/commonMain/kotlin/me/alllex/parsus/token/TokenMatch.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,9 @@ data class TokenMatch(
val token: Token,
val offset: Int,
val length: Int,
)
) {
/**
* Offset of the next character after the match.
*/
val nextOffset: Int get() = offset + length
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package me.alllex.parsus.trace
import me.alllex.parsus.annotations.ExperimentalParsusApi
import me.alllex.parsus.token.Token
import me.alllex.parsus.token.TokenMatch
import me.alllex.parsus.util.replaceNonPrintable


@ExperimentalParsusApi
Expand Down Expand Up @@ -82,13 +83,3 @@ fun formatTokenMatchingTrace(
}
return sb.toString()
}

private fun replaceNonPrintable(char: Char): Char {
return when (char) {
' ' -> '␣' // U+2423 OPEN BOX
'\n' -> '␤' // U+2424 SYMBOL FOR NEWLINE
'\r' -> '␍' // U+240D SYMBOL FOR CARRIAGE RETURN
'\t' -> '␉' // U+2409 SYMBOL FOR HORIZONTAL TABULATION
else -> char
}
}
19 changes: 19 additions & 0 deletions src/commonMain/kotlin/me/alllex/parsus/util/text.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package me.alllex.parsus.util

internal fun replaceNonPrintable(string: String): String {
return buildString {
for (char in string) {
append(replaceNonPrintable(char))
}
}
}

internal fun replaceNonPrintable(char: Char): Char {
return when (char) {
' ' -> '␣' // U+2423 OPEN BOX
'\n' -> '␤' // U+2424 SYMBOL FOR NEWLINE
'\r' -> '␍' // U+240D SYMBOL FOR CARRIAGE RETURN
'\t' -> '␉' // U+2409 SYMBOL FOR HORIZONTAL TABULATION
else -> char
}
}
97 changes: 97 additions & 0 deletions src/commonTest/kotlin/me/alllex/parsus/ParseErrorTest.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package me.alllex.parsus

import assertk.assertions.isEqualTo
import assertk.assertions.prop
import me.alllex.parsus.parser.Grammar
import me.alllex.parsus.parser.ParseError
import me.alllex.parsus.parser.map
import me.alllex.parsus.parser.times
import me.alllex.parsus.token.literalToken
import me.alllex.parsus.token.regexToken
import kotlin.test.Test

class ParseErrorTest {

@Test
fun unmatchedTokenErrorsProvideUserFriendlyDescriptions() {
object : Grammar<String>() {
val ab by literalToken("ab")
val cd by literalToken("cd")
override val root by ab * cd map { (v1, v2) -> "${v1.text}-${v2.text}" }
}.run {

assertParsed("abcd").isEqualTo("ab-cd")

assertNotParsed("abab").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=2, when expected: LiteralToken('cd')\n" + """
Expected token: LiteralToken('cd')
| offset=2 (or after ignored tokens)
abab
^^ Previous token: LiteralToken('ab') at offset=0
""".trimIndent() + "\n"
)

assertNotParsed("cd").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=0, when expected: LiteralToken('ab')\n" + """
Expected token: LiteralToken('ab')
| offset=0 (or after ignored tokens)
cd
""".trimIndent() + "\n"
)

assertNotParsed("abcdab").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=4, when expected: Token(EOF)\n" + """
Expected token: Token(EOF)
| offset=4 (or after ignored tokens)
cdab
^^ Previous token: LiteralToken('cd') at offset=2
""".trimIndent() + "\n"
)
}
}

@Test
fun lastMatchDescriptionIsPresentWhenThereAreIgnoredTokensInBetween() {
object : Grammar<String>() {
val ws by literalToken(" ", ignored = true)
val ab by literalToken("ab")
val cd by literalToken("cd")
override val root by ab * cd map { (v1, v2) -> "${v1.text}-${v2.text}" }
}.run {
assertParsed("ab cd").isEqualTo("ab-cd")

assertNotParsed("ab ab").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=2, when expected: LiteralToken('cd')\n" + """
Expected token: LiteralToken('cd')
| offset=2 (or after ignored tokens)
ab␣ab
^^ Previous token: LiteralToken('ab') at offset=0
""".trimIndent() + "\n"
)
}
}

@Test
fun unprintableCharactersAreReplacedInErrors() {
object : Grammar<String>() {
val ws by regexToken("\\s+")
val ab by literalToken("ab")
@Suppress("unused")
val cd by literalToken("cd")
override val root by ws * ab map { (v1, v2) -> "${v1.text}-${v2.text}" }
}.run {
assertParsed(" \t\r\nab").isEqualTo(" \t\r\n-ab")

assertNotParsed(" \t\r\ncd").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=4, when expected: LiteralToken('ab')\n" + """
Expected token: LiteralToken('ab')
| offset=4 (or after ignored tokens)
␣␉␍␤cd
^^^^ Previous token: RegexToken(ws [\s+]) at offset=0
""".trimIndent() + "\n"
)
}

}

}
15 changes: 15 additions & 0 deletions src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,19 @@ class TokenTests {
}
}

@Test
fun explicitEofMatchesDoNotOverflowInputLength() {
object : Grammar<List<TokenMatch>>() {
val ab by literalToken("ab")
val eof by EofToken
override val root by ab * eof * eof map { it.toList() }
}.run {
assertParsed("ab").isEqualTo(listOf(
TokenMatch(ab, 0, 2),
TokenMatch(EofToken, 2, 1),
TokenMatch(EofToken, 2, 1),
))
}
}

}
Loading