Skip to content

Commit

Permalink
Merge pull request #25 from alllex/error-descriptions
Browse files Browse the repository at this point in the history
User-friendly unmatched token errors
  • Loading branch information
alllex committed Oct 7, 2023
2 parents 801f4d7 + 4876834 commit bc6ead1
Show file tree
Hide file tree
Showing 8 changed files with 276 additions and 23 deletions.
2 changes: 1 addition & 1 deletion buildSrc/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ dependencies {
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlinVer")
implementation("org.jetbrains.kotlin:kotlin-allopen:$kotlinVer")
implementation("org.jetbrains.kotlinx:kotlinx-benchmark-plugin:0.4.8")
implementation("org.jetbrains.dokka:dokka-gradle-plugin:1.8.20")
implementation("org.jetbrains.dokka:dokka-gradle-plugin:1.9.0")
}
91 changes: 84 additions & 7 deletions src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package me.alllex.parsus.parser

import me.alllex.parsus.token.Token
import me.alllex.parsus.token.TokenMatch
import me.alllex.parsus.util.replaceNonPrintable

/**
* Result of a parse that is either a [parsed value][ParsedValue]
Expand All @@ -23,20 +24,96 @@ abstract class ParseError : ParseResult<Nothing>() {
*/
abstract val offset: Int

override fun toString(): String = "ParseError"
open val contextProvider: ParseErrorContextProvider? get() = null

abstract fun describe(): String

override fun toString(): String = describe()

protected fun format(message: String, messageAtOffset: String): String = buildString {
append(message)
contextProvider?.getParseErrorContext(offset)?.run {
appendLine()
append(" ".repeat(lookBehind)).append(messageAtOffset)
appendLine()
append(" ".repeat(lookBehind)).append("| offset=$offset (or after ignored tokens)")
appendLine()
appendLine(replaceNonPrintable(inputSection))
if (previousTokenMatch != null) {
append("^".repeat(previousTokenMatch.length.coerceAtLeast(1)))
append(" Previous token: ${previousTokenMatch.token} at offset=${previousTokenMatch.offset}")
appendLine()
}
}
}
}

data class UnmatchedToken(val expected: Token, override val offset: Int) : ParseError()
data class ParseErrorContext(
val inputSection: String,
val lookBehind: Int,
val lookAhead: Int,
val previousTokenMatch: TokenMatch?,
)

data class MismatchedToken(val expected: Token, val found: TokenMatch) : ParseError() {
fun interface ParseErrorContextProvider {
fun getParseErrorContext(offset: Int): ParseErrorContext?
}

data class UnmatchedToken(
val expected: Token,
override val offset: Int,
override val contextProvider: ParseErrorContextProvider? = null
) : ParseError() {

override fun toString(): String = describe()

override fun describe(): String = format(
message = "Unmatched token at offset=$offset, when expected: $expected",
messageAtOffset = "Expected token: $expected"
)
}

data class MismatchedToken(
val expected: Token,
val found: TokenMatch,
override val contextProvider: ParseErrorContextProvider? = null,
) : ParseError() {
override val offset: Int get() = found.offset
override fun toString(): String = describe()
override fun describe(): String = format(
message = "Mismatched token at offset=$offset, when expected: $expected, got: ${found.token}",
messageAtOffset = "Expected token: $expected at offset=$offset, got: ${found.token}"
)
}

data class NoMatchingToken(
override val offset: Int,
) : ParseError() {

override fun toString(): String = describe()
override fun describe(): String = format(
message = "No matching token at offset=$offset",
messageAtOffset = "No matching token"
)
}

data class NoViableAlternative(
override val offset: Int,
) : ParseError() {
override fun toString(): String = describe()
override fun describe(): String = format(
message = "None of the alternatives succeeded at offset=$offset",
messageAtOffset = "None of the alternatives succeeded"
)
}

data class NotEnoughRepetition(override val offset: Int, val expectedAtLeast: Int, val actualCount: Int) : ParseError() {
override fun toString(): String = describe()
override fun describe(): String = "Expected at least $expectedAtLeast, found $actualCount"
}
data class NoMatchingToken(override val offset: Int) : ParseError()
data class NoViableAlternative(override val offset: Int) : ParseError()
data class NotEnoughRepetition(override val offset: Int, val expectedAtLeast: Int, val actualCount: Int) : ParseError()

class ParseException(val error: ParseError) : Exception() {
override fun toString(): String = "ParseException($error)"
override fun toString(): String = "ParseException: ${error.describe()}"
}

inline fun <T, R> ParseResult<T>.map(f: (T) -> R): ParseResult<R> {
Expand Down
57 changes: 53 additions & 4 deletions src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ internal class ParsingContext(
private val debugMode: Boolean = false
) : ParsingScope {

private val inputLength = tokenizer.input.length

private var backtrackCont: Continuation<ParseError>? = null
private var cont: Continuation<Any?>? = null
private var position: Int = 0
private var lastTokenMatchContext = LastTokenMatchContext(tokenizer.input, currentOffset = 0)
private var result: Result<Any?> = PENDING_RESULT

fun <T> runParser(parser: Parser<T>): ParseResult<T> {
Expand Down Expand Up @@ -60,13 +63,23 @@ internal class ParsingContext(
override fun tryParse(token: Token): ParseResult<TokenMatch> {
val fromIndex = this.position
val match = tokenizer.findMatchOf(fromIndex, token)
?: return UnmatchedToken(token, fromIndex)
// TODO: clean up, as this should not happen anymore
if (match.token != token) return MismatchedToken(token, match)
this.position = match.offset + match.length
?: return UnmatchedToken(token, fromIndex, getParseErrorContextProviderOrNull())

// This can only happen with EagerTokenizer
if (match.token != token) return MismatchedToken(token, match, getParseErrorContextProviderOrNull())

val newPosition = match.nextOffset.coerceAtMost(inputLength)
this.position = newPosition
this.lastTokenMatchContext.currentOffset = newPosition
this.lastTokenMatchContext.lastMatch = match

return ParsedValue(match)
}

private fun getParseErrorContextProviderOrNull(): ParseErrorContextProvider {
return this.lastTokenMatchContext
}

override suspend fun fail(error: ParseError): Nothing {
suspendCoroutineUninterceptedOrReturn<Any?> {
withCont(backtrackCont) // may be null
Expand Down Expand Up @@ -164,3 +177,39 @@ internal class ParsingContext(
}
}
}

internal class LastTokenMatchContext(
val input: String,
var currentOffset: Int,
var lastMatch: TokenMatch? = null,
) : ParseErrorContextProvider {

override fun toString() = "LastTokenMatchContext(currentOffset=$currentOffset, lastMatch=$lastMatch)"

override fun getParseErrorContext(offset: Int): ParseErrorContext? {
if (offset != currentOffset) {
return null
}

val lastMatch = this.lastMatch
val lookAhead = 20
return if (lastMatch == null || lastMatch.nextOffset != offset) {
ParseErrorContext(
inputSection = getInputSection(offset, offset + lookAhead),
lookBehind = 0,
lookAhead = lookAhead,
previousTokenMatch = null
)
} else {
ParseErrorContext(
inputSection = getInputSection(lastMatch.offset, lastMatch.nextOffset + lookAhead),
lookBehind = lastMatch.length,
lookAhead = lookAhead,
previousTokenMatch = lastMatch
)
}
}

private fun getInputSection(inputSectionStart: Int, inputSectionStop: Int) =
input.substring(inputSectionStart, inputSectionStop.coerceAtMost(input.length))
}
7 changes: 6 additions & 1 deletion src/commonMain/kotlin/me/alllex/parsus/token/TokenMatch.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,9 @@ data class TokenMatch(
val token: Token,
val offset: Int,
val length: Int,
)
) {
/**
* Offset of the next character after the match.
*/
val nextOffset: Int get() = offset + length
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package me.alllex.parsus.trace
import me.alllex.parsus.annotations.ExperimentalParsusApi
import me.alllex.parsus.token.Token
import me.alllex.parsus.token.TokenMatch
import me.alllex.parsus.util.replaceNonPrintable


@ExperimentalParsusApi
Expand Down Expand Up @@ -82,13 +83,3 @@ fun formatTokenMatchingTrace(
}
return sb.toString()
}

private fun replaceNonPrintable(char: Char): Char {
return when (char) {
' ' -> '' // U+2423 OPEN BOX
'\n' -> '' // U+2424 SYMBOL FOR NEWLINE
'\r' -> '' // U+240D SYMBOL FOR CARRIAGE RETURN
'\t' -> '' // U+2409 SYMBOL FOR HORIZONTAL TABULATION
else -> char
}
}
19 changes: 19 additions & 0 deletions src/commonMain/kotlin/me/alllex/parsus/util/text.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package me.alllex.parsus.util

internal fun replaceNonPrintable(string: String): String {
return buildString {
for (char in string) {
append(replaceNonPrintable(char))
}
}
}

internal fun replaceNonPrintable(char: Char): Char {
return when (char) {
' ' -> '' // U+2423 OPEN BOX
'\n' -> '' // U+2424 SYMBOL FOR NEWLINE
'\r' -> '' // U+240D SYMBOL FOR CARRIAGE RETURN
'\t' -> '' // U+2409 SYMBOL FOR HORIZONTAL TABULATION
else -> char
}
}
97 changes: 97 additions & 0 deletions src/commonTest/kotlin/me/alllex/parsus/ParseErrorTest.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package me.alllex.parsus

import assertk.assertions.isEqualTo
import assertk.assertions.prop
import me.alllex.parsus.parser.Grammar
import me.alllex.parsus.parser.ParseError
import me.alllex.parsus.parser.map
import me.alllex.parsus.parser.times
import me.alllex.parsus.token.literalToken
import me.alllex.parsus.token.regexToken
import kotlin.test.Test

class ParseErrorTest {

@Test
fun unmatchedTokenErrorsProvideUserFriendlyDescriptions() {
object : Grammar<String>() {
val ab by literalToken("ab")
val cd by literalToken("cd")
override val root by ab * cd map { (v1, v2) -> "${v1.text}-${v2.text}" }
}.run {

assertParsed("abcd").isEqualTo("ab-cd")

assertNotParsed("abab").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=2, when expected: LiteralToken('cd')\n" + """
Expected token: LiteralToken('cd')
| offset=2 (or after ignored tokens)
abab
^^ Previous token: LiteralToken('ab') at offset=0
""".trimIndent() + "\n"
)

assertNotParsed("cd").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=0, when expected: LiteralToken('ab')\n" + """
Expected token: LiteralToken('ab')
| offset=0 (or after ignored tokens)
cd
""".trimIndent() + "\n"
)

assertNotParsed("abcdab").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=4, when expected: Token(EOF)\n" + """
Expected token: Token(EOF)
| offset=4 (or after ignored tokens)
cdab
^^ Previous token: LiteralToken('cd') at offset=2
""".trimIndent() + "\n"
)
}
}

@Test
fun lastMatchDescriptionIsPresentWhenThereAreIgnoredTokensInBetween() {
object : Grammar<String>() {
val ws by literalToken(" ", ignored = true)
val ab by literalToken("ab")
val cd by literalToken("cd")
override val root by ab * cd map { (v1, v2) -> "${v1.text}-${v2.text}" }
}.run {
assertParsed("ab cd").isEqualTo("ab-cd")

assertNotParsed("ab ab").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=2, when expected: LiteralToken('cd')\n" + """
Expected token: LiteralToken('cd')
| offset=2 (or after ignored tokens)
ab␣ab
^^ Previous token: LiteralToken('ab') at offset=0
""".trimIndent() + "\n"
)
}
}

@Test
fun unprintableCharactersAreReplacedInErrors() {
object : Grammar<String>() {
val ws by regexToken("\\s+")
val ab by literalToken("ab")
@Suppress("unused")
val cd by literalToken("cd")
override val root by ws * ab map { (v1, v2) -> "${v1.text}-${v2.text}" }
}.run {
assertParsed(" \t\r\nab").isEqualTo(" \t\r\n-ab")

assertNotParsed(" \t\r\ncd").prop(ParseError::describe).isEqualTo(
"Unmatched token at offset=4, when expected: LiteralToken('ab')\n" + """
Expected token: LiteralToken('ab')
| offset=4 (or after ignored tokens)
␣␉␍␤cd
^^^^ Previous token: RegexToken(ws [\s+]) at offset=0
""".trimIndent() + "\n"
)
}

}

}
15 changes: 15 additions & 0 deletions src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,19 @@ class TokenTests {
}
}

@Test
fun explicitEofMatchesDoNotOverflowInputLength() {
object : Grammar<List<TokenMatch>>() {
val ab by literalToken("ab")
val eof by EofToken
override val root by ab * eof * eof map { it.toList() }
}.run {
assertParsed("ab").isEqualTo(listOf(
TokenMatch(ab, 0, 2),
TokenMatch(EofToken, 2, 1),
TokenMatch(EofToken, 2, 1),
))
}
}

}

0 comments on commit bc6ead1

Please sign in to comment.