Skip to content

Commit

Permalink
Merge pull request #24 from alllex/parsing-trace
Browse files Browse the repository at this point in the history
Introduce token tracing
  • Loading branch information
alllex authored Oct 5, 2023
2 parents 804e749 + 2993b36 commit 725491b
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package me.alllex.parsus.annotations


@RequiresOptIn(
level = RequiresOptIn.Level.WARNING,
message = "This API is experimental. It may be changed in the future without notice."
)
@Retention(AnnotationRetention.BINARY)
@Target(AnnotationTarget.CLASS, AnnotationTarget.FUNCTION)
annotation class ExperimentalParsusApi
31 changes: 28 additions & 3 deletions src/commonMain/kotlin/me/alllex/parsus/parser/Grammar.kt
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package me.alllex.parsus.parser

import me.alllex.parsus.annotations.ExperimentalParsusApi
import me.alllex.parsus.token.EofToken
import me.alllex.parsus.token.Token
import me.alllex.parsus.trace.TokenMatchingTrace
import me.alllex.parsus.trace.TracedParseResult
import kotlin.reflect.KProperty

/**
Expand Down Expand Up @@ -109,6 +112,11 @@ abstract class Grammar<out V>(
return parseOrThrow(input)
}

@ExperimentalParsusApi
fun parseTracingTokenMatching(input: String): TracedParseResult<V, TokenMatchingTrace> {
return parseTracingEntire(root, input)
}

override fun toString(): String {
return "Grammar(${_tokens.size} tokens, root = $root)"
}
Expand Down Expand Up @@ -150,16 +158,33 @@ abstract class Grammar<out V>(
protected operator fun <R> Parser<R>.getValue(thisRef: Grammar<*>, property: KProperty<*>): Parser<R> = this

private fun <T> parseEntire(parser: Parser<T>, input: String): ParseResult<T> {
freezeTokens = true
beforeParsing()
val lexer = Lexer(input, _tokens)
val parsingContext = ParsingContext(lexer, debugMode)
return parsingContext.runParser(createUntilEofParser(parser))
}

@ExperimentalParsusApi
private fun <T> parseTracingEntire(parser: Parser<T>, input: String): TracedParseResult<T, TokenMatchingTrace> {
beforeParsing()
val lexer = Lexer(input, _tokens, traceTokenMatching = true)
val parsingContext = ParsingContext(lexer, debugMode)
val result = parsingContext.runParser(createUntilEofParser(parser))
val trace = lexer.getTokenMatchingTrace() ?: error("Token matching trace is not available")
return TracedParseResult(result, trace)
}

private fun beforeParsing() {
freezeTokens = true
}

private fun <T> createUntilEofParser(parser: Parser<T>): Parser<T> {
val untilEofParser = parser {
val r = parser()
EofToken()
r
}

return parsingContext.runParser(untilEofParser)
return untilEofParser
}
}

Expand Down
29 changes: 27 additions & 2 deletions src/commonMain/kotlin/me/alllex/parsus/parser/Lexer.kt
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
package me.alllex.parsus.parser

import me.alllex.parsus.annotations.ExperimentalParsusApi
import me.alllex.parsus.token.Token
import me.alllex.parsus.token.TokenMatch
import me.alllex.parsus.trace.TokenMatchingEvent
import me.alllex.parsus.trace.TokenMatchingTrace

/**
* Lexer is responsible for [finding][findMatch] token-matches in the given position
* in the input string.
*/
@OptIn(ExperimentalParsusApi::class)
internal class Lexer(
val input: String,
private val tokens: List<Token>,
traceTokenMatching: Boolean = false,
) {

private val tokensByFirstChar: Map<Char, List<Token>>
private var cachedFromIndex: Int = -1
private var cachedTokenMatch: TokenMatch? = null

private val traceEvents: MutableList<TokenMatchingEvent>? = if (traceTokenMatching) mutableListOf() else null

init {
tokensByFirstChar = mutableMapOf<Char, MutableList<Token>>()
val unknownFirstCharTokens = mutableListOf<Token>()
Expand All @@ -36,6 +43,10 @@ internal class Lexer(
}
}

internal fun getTokenMatchingTrace(): TokenMatchingTrace? {
return traceEvents?.let { TokenMatchingTrace(input, it) }
}

fun findMatch(fromIndex: Int): TokenMatch? {
if (fromIndex == cachedFromIndex && cachedTokenMatch != null) {
return cachedTokenMatch
Expand Down Expand Up @@ -77,7 +88,21 @@ internal class Lexer(

private fun matchImpl(fromIndex: Int, token: Token): TokenMatch? {
val length = token.match(input, fromIndex)
if (length == 0) return null
return TokenMatch(token, fromIndex, length)
if (length == 0) {
traceMismatch(token, fromIndex)
return null
}

val match = TokenMatch(token, fromIndex, length)
traceMatch(token, match)
return match
}

private fun traceMismatch(token: Token, offset: Int) {
traceEvents?.add(TokenMatchingEvent(token, offset, null))
}

private fun traceMatch(token: Token, match: TokenMatch) {
traceEvents?.add(TokenMatchingEvent(token, match.offset, match))
}
}
94 changes: 94 additions & 0 deletions src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package me.alllex.parsus.trace

import me.alllex.parsus.annotations.ExperimentalParsusApi
import me.alllex.parsus.token.Token
import me.alllex.parsus.token.TokenMatch


@ExperimentalParsusApi
data class TokenMatchingEvent(
val token: Token,
val offset: Int,
val match: TokenMatch?,
)

@ExperimentalParsusApi
data class TokenMatchingTrace(
val input: String,
val events: List<TokenMatchingEvent>,
)

@ExperimentalParsusApi
fun formatTokenMatchingTrace(
trace: TokenMatchingTrace,
lookBehind: Int = 5,
lookAhead: Int = 20,
): String {

val input = trace.input.let { rawInput ->
buildString {
for (char in rawInput) {
append(replaceNonPrintable(char))
}
}
}

val sb = StringBuilder()
var lastMismatchOffset = -1
for (event in trace.events) {
val offset = event.offset
val match = event.match
val matchLength = match?.length ?: 0

// avoid re-printing the input line, when the previous event was *also* a mismatch at the same offset
if (match != null || offset != lastMismatchOffset) {
val rawToOffset = offset + matchLength + lookAhead
val toOffset = rawToOffset.coerceAtMost(input.length)
val inputDisplayLineLength = lookBehind + (matchLength + lookAhead).coerceAtMost(input.length) + 1
sb.append("_".repeat(inputDisplayLineLength))
sb.appendLine()

var inputDisplayLinePrintedLength = 0
val prefix = when {
offset <= lookBehind -> "·".repeat(lookBehind - offset + 1) + input.substring(0, offset)
else -> "" + input.substring(offset - lookBehind, offset)
}
sb.append(prefix)
inputDisplayLinePrintedLength += prefix.length

val inputChunkAtOffset = input.substring(offset, toOffset)
sb.append(inputChunkAtOffset)
inputDisplayLinePrintedLength += inputChunkAtOffset.length

if (toOffset < input.length) {
sb.append("")
inputDisplayLinePrintedLength += 1
}

if (inputDisplayLinePrintedLength < inputDisplayLineLength) {
sb.append("·".repeat(inputDisplayLineLength - inputDisplayLinePrintedLength))
}
sb.appendLine()
}

lastMismatchOffset = if (match != null) -1 else offset

val matchSymbol = if (match != null) "^" else "x"
sb.append(" ".repeat(lookBehind + 1))
sb.append(matchSymbol.repeat(matchLength.coerceAtLeast(1)))
sb.append(" [$offset").append(if (match != null) " - ${offset + matchLength - 1}" else "")
.append("] ").append(event.token)
sb.appendLine()
}
return sb.toString()
}

private fun replaceNonPrintable(char: Char): Char {
return when (char) {
' ' -> '' // U+2423 OPEN BOX
'\n' -> '' // U+2424 SYMBOL FOR NEWLINE
'\r' -> '' // U+240D SYMBOL FOR CARRIAGE RETURN
'\t' -> '' // U+2409 SYMBOL FOR HORIZONTAL TABULATION
else -> char
}
}
10 changes: 10 additions & 0 deletions src/commonMain/kotlin/me/alllex/parsus/trace/TracedParseResult.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package me.alllex.parsus.trace

import me.alllex.parsus.annotations.ExperimentalParsusApi
import me.alllex.parsus.parser.ParseResult

@ExperimentalParsusApi
class TracedParseResult<out R, T>(
val result: ParseResult<R>,
val trace: T,
)
58 changes: 58 additions & 0 deletions src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package me.alllex.parsus

import assertk.assertThat
import assertk.assertions.isEqualTo
import me.alllex.parsus.annotations.ExperimentalParsusApi
import me.alllex.parsus.parser.*
import me.alllex.parsus.token.literalToken
import me.alllex.parsus.trace.formatTokenMatchingTrace
import me.alllex.parsus.tree.SyntaxTree
import me.alllex.parsus.tree.lexeme
import me.alllex.parsus.tree.plus
import kotlin.test.Test

@OptIn(ExperimentalParsusApi::class)
class TokenMatchingTraceTest {

@Test
fun tokenMatchingTraceIsFormatted() {
object : Grammar<SyntaxTree>() {
val a by literalToken("a")
val b by literalToken("b")
val cd by literalToken("cd")
val ab by parser { node(lexeme(a) + lexeme(b)) }
override val root by ab * parlex(cd) map { (v1, v2) -> node(v1, v2) }
}.run {
val input = "abcd"
val tracedResult = parseTracingTokenMatching(input)
assertThat(tracedResult.result).isEqualTo(ParsedValue(node(node(a.lex(0), b.lex(1)), cd.lex(2))))
val formattedTrace = formatTokenMatchingTrace(tracedResult.trace)
assertThat("\n" + formattedTrace).isEqualTo(
"""
__________
······abcd
x [0] Token(EOF)
__________
······abcd
^ [0 - 0] LiteralToken('a')
__________
·····abcd·
x [1] Token(EOF)
__________
·····abcd·
^ [1 - 1] LiteralToken('b')
__________
····abcd··
x [2] Token(EOF)
__________
····abcd··
^^ [2 - 3] LiteralToken('cd')
__________
··abcd····
^ [4 - 4] Token(EOF)
"""
)
}
}

}

0 comments on commit 725491b

Please sign in to comment.