-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #24 from alllex/parsing-trace
Introduce token tracing
- Loading branch information
Showing
6 changed files
with
227 additions
and
5 deletions.
There are no files selected for viewing
10 changes: 10 additions & 0 deletions
10
src/commonMain/kotlin/me/alllex/parsus/annotations/ExperimentalParsusApi.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package me.alllex.parsus.annotations | ||
|
||
|
||
@RequiresOptIn( | ||
level = RequiresOptIn.Level.WARNING, | ||
message = "This API is experimental. It may be changed in the future without notice." | ||
) | ||
@Retention(AnnotationRetention.BINARY) | ||
@Target(AnnotationTarget.CLASS, AnnotationTarget.FUNCTION) | ||
annotation class ExperimentalParsusApi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
94 changes: 94 additions & 0 deletions
94
src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
package me.alllex.parsus.trace | ||
|
||
import me.alllex.parsus.annotations.ExperimentalParsusApi | ||
import me.alllex.parsus.token.Token | ||
import me.alllex.parsus.token.TokenMatch | ||
|
||
|
||
@ExperimentalParsusApi | ||
data class TokenMatchingEvent( | ||
val token: Token, | ||
val offset: Int, | ||
val match: TokenMatch?, | ||
) | ||
|
||
@ExperimentalParsusApi | ||
data class TokenMatchingTrace( | ||
val input: String, | ||
val events: List<TokenMatchingEvent>, | ||
) | ||
|
||
@ExperimentalParsusApi | ||
fun formatTokenMatchingTrace( | ||
trace: TokenMatchingTrace, | ||
lookBehind: Int = 5, | ||
lookAhead: Int = 20, | ||
): String { | ||
|
||
val input = trace.input.let { rawInput -> | ||
buildString { | ||
for (char in rawInput) { | ||
append(replaceNonPrintable(char)) | ||
} | ||
} | ||
} | ||
|
||
val sb = StringBuilder() | ||
var lastMismatchOffset = -1 | ||
for (event in trace.events) { | ||
val offset = event.offset | ||
val match = event.match | ||
val matchLength = match?.length ?: 0 | ||
|
||
// avoid re-printing the input line, when the previous event was *also* a mismatch at the same offset | ||
if (match != null || offset != lastMismatchOffset) { | ||
val rawToOffset = offset + matchLength + lookAhead | ||
val toOffset = rawToOffset.coerceAtMost(input.length) | ||
val inputDisplayLineLength = lookBehind + (matchLength + lookAhead).coerceAtMost(input.length) + 1 | ||
sb.append("_".repeat(inputDisplayLineLength)) | ||
sb.appendLine() | ||
|
||
var inputDisplayLinePrintedLength = 0 | ||
val prefix = when { | ||
offset <= lookBehind -> "·".repeat(lookBehind - offset + 1) + input.substring(0, offset) | ||
else -> "…" + input.substring(offset - lookBehind, offset) | ||
} | ||
sb.append(prefix) | ||
inputDisplayLinePrintedLength += prefix.length | ||
|
||
val inputChunkAtOffset = input.substring(offset, toOffset) | ||
sb.append(inputChunkAtOffset) | ||
inputDisplayLinePrintedLength += inputChunkAtOffset.length | ||
|
||
if (toOffset < input.length) { | ||
sb.append("…") | ||
inputDisplayLinePrintedLength += 1 | ||
} | ||
|
||
if (inputDisplayLinePrintedLength < inputDisplayLineLength) { | ||
sb.append("·".repeat(inputDisplayLineLength - inputDisplayLinePrintedLength)) | ||
} | ||
sb.appendLine() | ||
} | ||
|
||
lastMismatchOffset = if (match != null) -1 else offset | ||
|
||
val matchSymbol = if (match != null) "^" else "x" | ||
sb.append(" ".repeat(lookBehind + 1)) | ||
sb.append(matchSymbol.repeat(matchLength.coerceAtLeast(1))) | ||
sb.append(" [$offset").append(if (match != null) " - ${offset + matchLength - 1}" else "") | ||
.append("] ").append(event.token) | ||
sb.appendLine() | ||
} | ||
return sb.toString() | ||
} | ||
|
||
private fun replaceNonPrintable(char: Char): Char { | ||
return when (char) { | ||
' ' -> '␣' // U+2423 OPEN BOX | ||
'\n' -> '' // U+2424 SYMBOL FOR NEWLINE | ||
'\r' -> '␍' // U+240D SYMBOL FOR CARRIAGE RETURN | ||
'\t' -> '␉' // U+2409 SYMBOL FOR HORIZONTAL TABULATION | ||
else -> char | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
src/commonMain/kotlin/me/alllex/parsus/trace/TracedParseResult.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package me.alllex.parsus.trace | ||
|
||
import me.alllex.parsus.annotations.ExperimentalParsusApi | ||
import me.alllex.parsus.parser.ParseResult | ||
|
||
@ExperimentalParsusApi | ||
class TracedParseResult<out R, T>( | ||
val result: ParseResult<R>, | ||
val trace: T, | ||
) |
58 changes: 58 additions & 0 deletions
58
src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package me.alllex.parsus | ||
|
||
import assertk.assertThat | ||
import assertk.assertions.isEqualTo | ||
import me.alllex.parsus.annotations.ExperimentalParsusApi | ||
import me.alllex.parsus.parser.* | ||
import me.alllex.parsus.token.literalToken | ||
import me.alllex.parsus.trace.formatTokenMatchingTrace | ||
import me.alllex.parsus.tree.SyntaxTree | ||
import me.alllex.parsus.tree.lexeme | ||
import me.alllex.parsus.tree.plus | ||
import kotlin.test.Test | ||
|
||
@OptIn(ExperimentalParsusApi::class) | ||
class TokenMatchingTraceTest { | ||
|
||
@Test | ||
fun tokenMatchingTraceIsFormatted() { | ||
object : Grammar<SyntaxTree>() { | ||
val a by literalToken("a") | ||
val b by literalToken("b") | ||
val cd by literalToken("cd") | ||
val ab by parser { node(lexeme(a) + lexeme(b)) } | ||
override val root by ab * parlex(cd) map { (v1, v2) -> node(v1, v2) } | ||
}.run { | ||
val input = "abcd" | ||
val tracedResult = parseTracingTokenMatching(input) | ||
assertThat(tracedResult.result).isEqualTo(ParsedValue(node(node(a.lex(0), b.lex(1)), cd.lex(2)))) | ||
val formattedTrace = formatTokenMatchingTrace(tracedResult.trace) | ||
assertThat("\n" + formattedTrace).isEqualTo( | ||
""" | ||
__________ | ||
······abcd | ||
x [0] Token(EOF) | ||
__________ | ||
······abcd | ||
^ [0 - 0] LiteralToken('a') | ||
__________ | ||
·····abcd· | ||
x [1] Token(EOF) | ||
__________ | ||
·····abcd· | ||
^ [1 - 1] LiteralToken('b') | ||
__________ | ||
····abcd·· | ||
x [2] Token(EOF) | ||
__________ | ||
····abcd·· | ||
^^ [2 - 3] LiteralToken('cd') | ||
__________ | ||
··abcd···· | ||
^ [4 - 4] Token(EOF) | ||
""" | ||
) | ||
} | ||
} | ||
|
||
} |