From d9604e025d100bf9ecdcf5c2794afbb65086126b Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Wed, 4 Oct 2023 08:58:20 +0200
Subject: [PATCH 01/11] Implement scannerless parsing

---
 .../me/alllex/parsus/parser/ChoiceParser.kt   |  5 ++-
 .../kotlin/me/alllex/parsus/parser/Lexer.kt   | 37 +++++++++++++++----
 .../me/alllex/parsus/parser/ParseResult.kt    |  2 +
 .../me/alllex/parsus/parser/ParsingContext.kt | 12 ++++--
 4 files changed, 43 insertions(+), 13 deletions(-)
diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt
index 0cf8c46..4dbbdbc 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt
@@ -29,8 +29,9 @@ internal class ChoiceParser<out T>(
     private val unknownFirstTokenParsers = parsers.filter { it.hasUnknownFirstTokens() }
 
     override suspend fun ParsingScope.parse(): T {
-        val currentToken = currentToken?.token ?: fail(NoMatchingToken(currentOffset))
-        val parsers = parsersByFirstToken[currentToken] ?: unknownFirstTokenParsers
+        // TODO: clean up
+//        val currentToken = currentToken?.token ?: fail(NoMatchingToken(currentOffset))
+//        val parsers = parsersByFirstToken[currentToken] ?: unknownFirstTokenParsers
         for (parser in parsers) {
             val r = tryParse(parser)
             if (r is ParsedValue) return r.value
diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/Lexer.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/Lexer.kt
index 87ed8b4..74da9bc 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/Lexer.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/parser/Lexer.kt
@@ -12,9 +12,10 @@ internal class Lexer(
     private val tokens: List<Token>,
 ) {
 
+    private val ignoredTokens = tokens.filter { it.ignored }
     private val tokensByFirstChar: Map<Char, List<Token>>
-    private var cachedFromIndex: Int = -1
-    private var cachedTokenMatch: TokenMatch? = null
+//    private var cachedFromIndex: Int = -1
+//    private var cachedTokenMatch: TokenMatch? = null
 
     init {
         tokensByFirstChar = mutableMapOf<Char, MutableList<Token>>()
@@ -36,14 +37,36 @@ internal class Lexer(
         }
     }
 
-    fun findMatch(fromIndex: Int): TokenMatch? {
-        if (fromIndex == cachedFromIndex && cachedTokenMatch != null) {
-            return cachedTokenMatch
+    fun findMatchOf(fromIndex: Int, targetToken: Token): TokenMatch? {
+        var pos = fromIndex
+        while (true) {
+            matchImpl(pos, targetToken)?.let { return it }
+
+            val preIgnorePos = pos
+            for (ignoredToken in ignoredTokens) {
+                val ignoredMatch = matchImpl(pos, ignoredToken)
+                if (ignoredMatch != null) {
+                    pos = ignoredMatch.offset + ignoredMatch.length
+                    break
+                }
+            }
+
+            if (preIgnorePos == pos) {
+                // No ignored tokens matched, so we can't find the target token
+                return null
+            }
         }
+        // The loop will exit via a mismatch, because no tokens can match "after the end of input"
+    }
+
+    fun findMatch(fromIndex: Int): TokenMatch? {
+//        if (fromIndex == cachedFromIndex && cachedTokenMatch != null) {
+//            return cachedTokenMatch
+//        }
 
         val foundTokenMatch = findMatchIgnoring(fromIndex)
-        cachedFromIndex = fromIndex
-        cachedTokenMatch = foundTokenMatch
+//        cachedFromIndex = fromIndex
+//        cachedTokenMatch = foundTokenMatch
         return foundTokenMatch
     }
 
diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt
index e1f7da0..a0bc858 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/parser/ParseResult.kt
@@ -26,6 +26,8 @@ abstract class ParseError : ParseResult<Nothing>() {
     override fun toString(): String = "ParseError"
 }
 
+data class UnmatchedToken(val expected: Token, override val offset: Int) : ParseError()
+
 data class MismatchedToken(val expected: Token, val found: TokenMatch) : ParseError() {
     override val offset: Int get() = found.offset
 }
diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt
index 2bb99dd..c002631 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt
@@ -5,7 +5,9 @@ import me.alllex.parsus.token.TokenMatch
 import kotlin.coroutines.Continuation
 import kotlin.coroutines.CoroutineContext
 import kotlin.coroutines.EmptyCoroutineContext
-import kotlin.coroutines.intrinsics.*
+import kotlin.coroutines.intrinsics.COROUTINE_SUSPENDED
+import kotlin.coroutines.intrinsics.createCoroutineUnintercepted
+import kotlin.coroutines.intrinsics.suspendCoroutineUninterceptedOrReturn
 
 /**
  * Executes parsers, keeping track of current position in the input and error-continuations.
@@ -39,8 +41,9 @@ internal class ParsingContext(
 
     override val currentOffset: Int get() = position
 
+    // TODO: clean up
     override val currentToken: TokenMatch?
-        get() = lexer.findMatch(position)
+        get() = null // lexer.findMatch(position)
 
     override suspend fun <R> Parser<R>.invoke(): R = parse()
 
@@ -55,8 +58,9 @@ internal class ParsingContext(
 
     override fun tryParse(token: Token): ParseResult<TokenMatch> {
         val fromIndex = this.position
-        val match = lexer.findMatch(fromIndex)
-            ?: return NoMatchingToken(fromIndex)
+        val match = lexer.findMatchOf(fromIndex, token)
+            ?: return UnmatchedToken(token, fromIndex)
+        // TODO: clean up, as this should not happen anymore
         if (match.token != token) return MismatchedToken(token, match)
         this.position = match.offset + match.length
         return ParsedValue(match)

From c72512106a8d06b40013fb3d93165d70854506b5 Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Wed, 4 Oct 2023 08:58:46 +0200
Subject: [PATCH 02/11] Fix tests

---
 .../kotlin/me/alllex/parsus/GrammarTests.kt        |  2 +-
 .../kotlin/me/alllex/parsus/IgnoreCaseTests.kt     |  4 ++--
 src/commonTest/kotlin/me/alllex/parsus/Tests.kt    | 14 +++++++-------
 .../kotlin/me/alllex/parsus/TokenTests.kt          | 14 ++++++++++++++
 src/commonTest/kotlin/me/alllex/parsus/util.kt     |  8 ++++++++
 5 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/commonTest/kotlin/me/alllex/parsus/GrammarTests.kt b/src/commonTest/kotlin/me/alllex/parsus/GrammarTests.kt
index 09db2f7..34ecc98 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/GrammarTests.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/GrammarTests.kt
@@ -75,7 +75,7 @@ class GrammarTests {
             override val root = parser { lexeme(a) }
         }.run {
             assertParsed("a").isEqualTo(a.lex())
-            assertThatParsing("b").failedWithTokenMismatch(a, b, 0)
+            assertThatParsing("b").failedWithUnmatchedToken(a, 0)
             assertThat(parse(nonRootParser, "b").getOrThrow()).isEqualTo(b.lex())
         }
     }
diff --git a/src/commonTest/kotlin/me/alllex/parsus/IgnoreCaseTests.kt b/src/commonTest/kotlin/me/alllex/parsus/IgnoreCaseTests.kt
index dd47672..c0e944d 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/IgnoreCaseTests.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/IgnoreCaseTests.kt
@@ -2,7 +2,7 @@ package me.alllex.parsus
 
 import assertk.assertions.isEqualTo
 import me.alllex.parsus.parser.Grammar
-import me.alllex.parsus.parser.NoMatchingToken
+import me.alllex.parsus.parser.NoViableAlternative
 import me.alllex.parsus.parser.or
 import me.alllex.parsus.parser.parser
 import me.alllex.parsus.token.literalToken
@@ -72,7 +72,7 @@ class IgnoreCaseTests {
             assertParsed("f").isEqualTo(lam.lex("f"))
             assertParsed("F").isEqualTo(lam.lex("F"))
             assertParsed("g").isEqualTo(lamStrict.lex("g"))
-            assertNotParsed("G").failedWith(NoMatchingToken(0))
+            assertNotParsed("G").failedWith(NoViableAlternative(0))
         }
     }
 
diff --git a/src/commonTest/kotlin/me/alllex/parsus/Tests.kt b/src/commonTest/kotlin/me/alllex/parsus/Tests.kt
index 8718a5c..fdfd52e 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/Tests.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/Tests.kt
@@ -21,9 +21,9 @@ class Tests {
             override val root = parser { node(lexeme(a) + lexeme(b)) }
         }
 
-        assertThat(g.parse("bb")).failedWithTokenMismatch(expected = g.a, actual = g.b, offset = 0)
-        assertThat(g.parse("aa")).failedWithTokenMismatch(expected = g.b, actual = EofToken, offset = 2)
-        assertThat(g.parse("aabbaa")).failedWithTokenMismatch(expected = EofToken, actual = g.a, offset = 4)
+        assertThat(g.parse("bb")).failedWithUnmatchedToken(expected = g.a, offset = 0)
+        assertThat(g.parse("aa")).failedWithUnmatchedToken(expected = g.b, offset = 2)
+        assertThat(g.parse("aabbaa")).failedWithUnmatchedToken(expected = EofToken, offset = 4)
     }
 
     @Test
@@ -256,7 +256,7 @@ class Tests {
             }
         }.run {
             assertParsed("ab").isEqualTo(b.lex(1))
-            assertThatParsing("b").failedWithTokenMismatch(a, b, offset = 0)
+            assertThatParsing("b").failedWithUnmatchedToken(a, offset = 0)
         }
     }
 
@@ -275,7 +275,7 @@ class Tests {
             assertParsed("a").isEqualTo(true to false)
             assertParsed("b").isEqualTo(false to true)
             assertParsed("").isEqualTo(false to false)
-            assertThatParsing("aa").failedWithTokenMismatch(EofToken, a, offset = 1)
+            assertThatParsing("aa").failedWithUnmatchedToken(EofToken, offset = 1)
         }
     }
 
@@ -291,7 +291,7 @@ class Tests {
             assertThat(g.parseOrThrow("b")).isEqualTo(node(g.b))
             assertThat(g.parseOrThrow("ab")).isEqualTo(node(g.a, g.b))
             assertThat(g.parseOrThrow("aab")).isEqualTo(node(g.a, g.a, g.b))
-            assertThat(g.parse("")).failedWithTokenMismatch(g.b, EofToken, offset = 0)
+            assertThat(g.parse("")).failedWithUnmatchedToken(g.b, offset = 0)
         }
     }
 
@@ -322,7 +322,7 @@ class Tests {
             assertThat(g.parseOrThrow("baab")).isEqualTo(node(g.b, g.a, g.a, g.b))
             assertThat(g.parseOrThrow("baaab")).isEqualTo(node(g.b, g.a, g.a, g.a, g.b))
             assertThat(g.parse("bab")).failedWithNotEnoughRepetition(1, 2, 1)
-            assertThat(g.parse("baaaab")).failedWithTokenMismatch(g.b, g.a, offset = 4)
+            assertThat(g.parse("baaaab")).failedWithUnmatchedToken(g.b, offset = 4)
         }
 
         object : Grammar<SyntaxTree>() {
diff --git a/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt b/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
index 776a8d2..d89ee04 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
@@ -4,6 +4,7 @@ import assertk.assertions.isEqualTo
 import me.alllex.parsus.parser.Grammar
 import me.alllex.parsus.parser.map
 import me.alllex.parsus.parser.or
+import me.alllex.parsus.token.TokenMatch
 import me.alllex.parsus.token.literalToken
 import me.alllex.parsus.token.regexToken
 import kotlin.test.Test
@@ -21,4 +22,17 @@ class TokenTests {
         }
     }
 
+    @Test
+    fun tokenPriorityIsDrivenByParser() {
+        object : Grammar<TokenMatch>() {
+            val single by literalToken("<")
+            val double by literalToken("<<")
+
+            // even though single token is declared first, it is not matched first
+            override val root by double or single
+        }.run {
+            assertParsed("<<").isEqualTo(TokenMatch(double, 0, 2))
+        }
+    }
+
 }
diff --git a/src/commonTest/kotlin/me/alllex/parsus/util.kt b/src/commonTest/kotlin/me/alllex/parsus/util.kt
index 8557668..e7436a8 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/util.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/util.kt
@@ -82,3 +82,11 @@ fun <T> Assert<ParseResult<T>>.failedWithTokenMismatch(expected: Token, actual:
             }
         }
 }
+
+fun <T> Assert<ParseResult<T>>.failedWithUnmatchedToken(expected: Token, offset: Int) {
+    isInstanceOf(UnmatchedToken::class)
+        .all {
+            prop("expected token", UnmatchedToken::expected).isEqualTo(expected)
+            prop("offset", UnmatchedToken::offset).isEqualTo(offset)
+        }
+}

From 3857b9eefdab937d261002aa40fe58c9cd1d13bd Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Wed, 4 Oct 2023 09:24:38 +0200
Subject: [PATCH 03/11] Add reflect to tests for better error reporting

---
 build.gradle.kts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build.gradle.kts b/build.gradle.kts
index 388d773..220281c 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -16,6 +16,7 @@ kotlin {
             dependencies {
                 implementation(kotlin("test"))
                 implementation("com.willowtreeapps.assertk:assertk:0.26.1")
+                runtimeOnly(kotlin("reflect"))
             }
         }
     }

From 04d32685e342b01b161eb2673e8611047d4df16c Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Thu, 5 Oct 2023 21:36:37 +0200
Subject: [PATCH 04/11] Refactor tokenizers and switch to Scannerless

---
 .../me/alllex/parsus/parser/ChoiceParser.kt   |  41 ++++--
 .../kotlin/me/alllex/parsus/parser/Grammar.kt |  13 +-
 .../kotlin/me/alllex/parsus/parser/Lexer.kt   | 131 ------------------
 .../me/alllex/parsus/parser/ParsingContext.kt |  10 +-
 .../parsus/tokenizer/AbstractTokenizer.kt     |  41 ++++++
 .../alllex/parsus/tokenizer/EagerTokenizer.kt |  88 ++++++++++++
 .../parsus/tokenizer/ScannerlessTokenizer.kt  |  42 ++++++
 .../me/alllex/parsus/tokenizer/Tokenizer.kt   |  14 ++
 .../alllex/parsus/trace/TokenMatchingTrace.kt |   2 +-
 .../alllex/parsus/TokenMatchingTraceTest.kt   |  69 +++++----
 10 files changed, 271 insertions(+), 180 deletions(-)
 delete mode 100644 src/commonMain/kotlin/me/alllex/parsus/parser/Lexer.kt
 create mode 100644 src/commonMain/kotlin/me/alllex/parsus/tokenizer/AbstractTokenizer.kt
 create mode 100644 src/commonMain/kotlin/me/alllex/parsus/tokenizer/EagerTokenizer.kt
 create mode 100644 src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt
 create mode 100644 src/commonMain/kotlin/me/alllex/parsus/tokenizer/Tokenizer.kt

diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt
index 4dbbdbc..7183f63 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt
@@ -2,12 +2,19 @@ package me.alllex.parsus.parser
 
 import me.alllex.parsus.token.Token
 
-internal class ChoiceParser<out T>(
+private fun Parser<*>.hasUnknownFirstTokens() = firstTokens.isEmpty()
+private fun List<Parser<*>>.hasUnknownFirstTokens() = any { it.hasUnknownFirstTokens() }
+
+internal abstract class AbstractChoiceParser<T>(
     val parsers: List<Parser<T>>,
 ) : ParserImpl<T>(
     null,
     firstTokens = if (parsers.hasUnknownFirstTokens()) emptySet() else parsers.flatMap { it.firstTokens }.toSet()
-) {
+)
+
+internal class EagerChoiceParser<T>(
+    parsers: List<Parser<T>>,
+) : AbstractChoiceParser<T>(parsers) {
 
     private val parsersByFirstToken: Map<Token, List<Parser<T>>> =
         mutableMapOf<Token, MutableList<Parser<T>>>()
@@ -29,22 +36,34 @@ internal class ChoiceParser<out T>(
     private val unknownFirstTokenParsers = parsers.filter { it.hasUnknownFirstTokens() }
 
     override suspend fun ParsingScope.parse(): T {
-        // TODO: clean up
-//        val currentToken = currentToken?.token ?: fail(NoMatchingToken(currentOffset))
-//        val parsers = parsersByFirstToken[currentToken] ?: unknownFirstTokenParsers
+        val currentToken = currentToken?.token ?: fail(NoMatchingToken(currentOffset))
+        val parsers = parsersByFirstToken[currentToken] ?: unknownFirstTokenParsers
         for (parser in parsers) {
             val r = tryParse(parser)
             if (r is ParsedValue) return r.value
         }
         fail(NoViableAlternative(currentOffset))
     }
+}
 
-    companion object {
-        private fun Parser<*>.hasUnknownFirstTokens() = firstTokens.isEmpty()
-        private fun List<Parser<*>>.hasUnknownFirstTokens() = any { it.hasUnknownFirstTokens() }
+internal class ScannerlessChoiceParser<T>(
+    parsers: List<Parser<T>>,
+) : AbstractChoiceParser<T>(parsers) {
+
+    override suspend fun ParsingScope.parse(): T {
+        for (parser in parsers) {
+            val r = tryParse(parser)
+            if (r is ParsedValue) return r.value
+        }
+        fail(NoViableAlternative(currentOffset))
     }
 }
 
+@Suppress("FunctionName")
+private fun <T> ChoiceParser(parsers: List<Parser<T>>) =
+    // EagerChoiceParser can only be used with EagerTokenizer
+    ScannerlessChoiceParser(parsers)
+
 /**
  * Creates a combined parser that will try the receiver parser first,
  * and fall back to the other parser in case of a parse error.
@@ -56,8 +75,8 @@ internal class ChoiceParser<out T>(
  * ```
  */
 infix fun <R> Parser<R>.or(p: Parser<R>): Parser<R> = when {
-    this is ChoiceParser && p is ChoiceParser -> ChoiceParser(parsers + p.parsers)
-    this is ChoiceParser -> ChoiceParser(parsers + p)
-    p is ChoiceParser -> ChoiceParser(listOf(this) + p.parsers)
+    this is AbstractChoiceParser && p is AbstractChoiceParser -> ChoiceParser(parsers + p.parsers)
+    this is AbstractChoiceParser -> ChoiceParser(parsers + p)
+    p is AbstractChoiceParser -> ChoiceParser(listOf(this) + p.parsers)
     else -> ChoiceParser(listOf(this, p))
 }
diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/Grammar.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/Grammar.kt
index 1b607ee..69f3595 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/Grammar.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/parser/Grammar.kt
@@ -3,6 +3,7 @@ package me.alllex.parsus.parser
 import me.alllex.parsus.annotations.ExperimentalParsusApi
 import me.alllex.parsus.token.EofToken
 import me.alllex.parsus.token.Token
+import me.alllex.parsus.tokenizer.ScannerlessTokenizer
 import me.alllex.parsus.trace.TokenMatchingTrace
 import me.alllex.parsus.trace.TracedParseResult
 import kotlin.reflect.KProperty
@@ -159,18 +160,20 @@ abstract class Grammar<out V>(
 
     private fun <T> parseEntire(parser: Parser<T>, input: String): ParseResult<T> {
         beforeParsing()
-        val lexer = Lexer(input, _tokens)
-        val parsingContext = ParsingContext(lexer, debugMode)
+        // If tokenizer impl is changed to EagerTokenizer, then ChoiceParser impl has to be changed to EagerChoiceParser
+        val tokenizer = ScannerlessTokenizer(input, _tokens)
+        val parsingContext = ParsingContext(tokenizer, debugMode)
         return parsingContext.runParser(createUntilEofParser(parser))
     }
 
     @ExperimentalParsusApi
     private fun <T> parseTracingEntire(parser: Parser<T>, input: String): TracedParseResult<T, TokenMatchingTrace> {
         beforeParsing()
-        val lexer = Lexer(input, _tokens, traceTokenMatching = true)
-        val parsingContext = ParsingContext(lexer, debugMode)
+        // If tokenizer impl is changed to EagerTokenizer, then ChoiceParser impl has to be changed to EagerChoiceParser
+        val tokenizer = ScannerlessTokenizer(input, _tokens, traceTokenMatching = true)
+        val parsingContext = ParsingContext(tokenizer, debugMode)
         val result = parsingContext.runParser(createUntilEofParser(parser))
-        val trace = lexer.getTokenMatchingTrace() ?: error("Token matching trace is not available")
+        val trace = tokenizer.getTokenMatchingTrace() ?: error("Token matching trace is not available")
         return TracedParseResult(result, trace)
     }
 
diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/Lexer.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/Lexer.kt
deleted file mode 100644
index 4566d3d..0000000
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/Lexer.kt
+++ /dev/null
@@ -1,131 +0,0 @@
-package me.alllex.parsus.parser
-
-import me.alllex.parsus.annotations.ExperimentalParsusApi
-import me.alllex.parsus.token.Token
-import me.alllex.parsus.token.TokenMatch
-import me.alllex.parsus.trace.TokenMatchingEvent
-import me.alllex.parsus.trace.TokenMatchingTrace
-
-/**
- * Lexer is responsible for [finding][findMatch] token-matches in the given position
- * in the input string.
- */
-@OptIn(ExperimentalParsusApi::class)
-internal class Lexer(
-    val input: String,
-    private val tokens: List<Token>,
-    traceTokenMatching: Boolean = false,
-) {
-
-    private val ignoredTokens = tokens.filter { it.ignored }
-    private val tokensByFirstChar: Map<Char, List<Token>>
-//    private var cachedFromIndex: Int = -1
-//    private var cachedTokenMatch: TokenMatch? = null
-
-    private val traceEvents: MutableList<TokenMatchingEvent>? = if (traceTokenMatching) mutableListOf() else null
-
-    init {
-        tokensByFirstChar = mutableMapOf<Char, MutableList<Token>>()
-        val unknownFirstCharTokens = mutableListOf<Token>()
-        for (token in tokens) {
-            val firstChars = token.firstChars
-            if (firstChars.isEmpty()) {
-                // If the token first char is unknown, then the first char heuristic cannot be applied.
-                // Therefore, we assume that such tokens can start with any character and put them in appropriate buckets
-                // to ensure the token priority correctness.
-                unknownFirstCharTokens += token
-                tokensByFirstChar.values.forEach { it += token }
-            } else {
-                for (c in firstChars) {
-                    tokensByFirstChar.getOrPut(c) { unknownFirstCharTokens.toMutableList() }
-                        .add(token)
-                }
-            }
-        }
-    }
-
-    internal fun getTokenMatchingTrace(): TokenMatchingTrace? {
-        return traceEvents?.let { TokenMatchingTrace(input, it) }
-    }
-
-    fun findMatchOf(fromIndex: Int, targetToken: Token): TokenMatch? {
-        var pos = fromIndex
-        while (true) {
-            matchImpl(pos, targetToken)?.let { return it }
-
-            val preIgnorePos = pos
-            for (ignoredToken in ignoredTokens) {
-                val ignoredMatch = matchImpl(pos, ignoredToken)
-                if (ignoredMatch != null) {
-                    pos = ignoredMatch.offset + ignoredMatch.length
-                    break
-                }
-            }
-
-            if (preIgnorePos == pos) {
-                // No ignored tokens matched, so we can't find the target token
-                return null
-            }
-        }
-        // The loop will exit via a mismatch, because no tokens can match "after the end of input"
-    }
-
-    fun findMatch(fromIndex: Int): TokenMatch? {
-//        if (fromIndex == cachedFromIndex && cachedTokenMatch != null) {
-//            return cachedTokenMatch
-//        }
-
-        val foundTokenMatch = findMatchIgnoring(fromIndex)
-//        cachedFromIndex = fromIndex
-//        cachedTokenMatch = foundTokenMatch
-        return foundTokenMatch
-    }
-
-    private fun findMatchIgnoring(fromIndex: Int): TokenMatch? {
-        var pos = fromIndex
-        while (true) {
-            val lex = findMatchImpl(pos) ?: return null
-            if (lex.token.ignored) {
-                pos = lex.offset + lex.length
-                continue
-            }
-
-            return lex
-        }
-    }
-
-    private fun findMatchImpl(fromIndex: Int): TokenMatch? {
-        if (fromIndex < input.length) {
-            val nextChar = input[fromIndex]
-            val byFirstChar = tokensByFirstChar[nextChar].orEmpty()
-            for (token in byFirstChar) {
-                matchImpl(fromIndex, token)?.let { return it }
-            }
-        }
-
-        for (token in tokens) {
-            matchImpl(fromIndex, token)?.let { return it }
-        }
-        return null
-    }
-
-    private fun matchImpl(fromIndex: Int, token: Token): TokenMatch? {
-        val length = token.match(input, fromIndex)
-        if (length == 0) {
-            traceMismatch(token, fromIndex)
-            return null
-        }
-
-        val match = TokenMatch(token, fromIndex, length)
-        traceMatch(token, match)
-        return match
-    }
-
-    private fun traceMismatch(token: Token, offset: Int) {
-        traceEvents?.add(TokenMatchingEvent(token, offset, null))
-    }
-
-    private fun traceMatch(token: Token, match: TokenMatch) {
-        traceEvents?.add(TokenMatchingEvent(token, match.offset, match))
-    }
-}
diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt
index c002631..edb8c01 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingContext.kt
@@ -2,6 +2,7 @@ package me.alllex.parsus.parser
 
 import me.alllex.parsus.token.Token
 import me.alllex.parsus.token.TokenMatch
+import me.alllex.parsus.tokenizer.Tokenizer
 import kotlin.coroutines.Continuation
 import kotlin.coroutines.CoroutineContext
 import kotlin.coroutines.EmptyCoroutineContext
@@ -15,7 +16,7 @@ import kotlin.coroutines.intrinsics.suspendCoroutineUninterceptedOrReturn
  * For each [run][runParser] a new context must be created.
  */
 internal class ParsingContext(
-    private val lexer: Lexer,
+    private val tokenizer: Tokenizer,
     private val debugMode: Boolean = false
 ) : ParsingScope {
 
@@ -37,13 +38,12 @@ internal class ParsingContext(
         return result.getOrThrow() as ParseResult<T>
     }
 
-    override val TokenMatch.text: String get() = lexer.input.substring(offset, offset + length)
+    override val TokenMatch.text: String get() = tokenizer.input.substring(offset, offset + length)
 
     override val currentOffset: Int get() = position
 
-    // TODO: clean up
     override val currentToken: TokenMatch?
-        get() = null // lexer.findMatch(position)
+        get() = tokenizer.findContextFreeMatch(position)
 
     override suspend fun <R> Parser<R>.invoke(): R = parse()
 
@@ -58,7 +58,7 @@ internal class ParsingContext(
 
     override fun tryParse(token: Token): ParseResult<TokenMatch> {
         val fromIndex = this.position
-        val match = lexer.findMatchOf(fromIndex, token)
+        val match = tokenizer.findMatchOf(fromIndex, token)
             ?: return UnmatchedToken(token, fromIndex)
         // TODO: clean up, as this should not happen anymore
         if (match.token != token) return MismatchedToken(token, match)
diff --git a/src/commonMain/kotlin/me/alllex/parsus/tokenizer/AbstractTokenizer.kt b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/AbstractTokenizer.kt
new file mode 100644
index 0000000..0f7e3a3
--- /dev/null
+++ b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/AbstractTokenizer.kt
@@ -0,0 +1,41 @@
+package me.alllex.parsus.tokenizer
+
+import me.alllex.parsus.annotations.ExperimentalParsusApi
+import me.alllex.parsus.token.Token
+import me.alllex.parsus.token.TokenMatch
+import me.alllex.parsus.trace.TokenMatchingEvent
+import me.alllex.parsus.trace.TokenMatchingTrace
+
+@OptIn(ExperimentalParsusApi::class)
+internal abstract class AbstractTokenizer(
+    override val input: String,
+    protected val tokens: List<Token>,
+    traceTokenMatching: Boolean = false,
+) : Tokenizer {
+
+    private val traceEvents: MutableList<TokenMatchingEvent>? = if (traceTokenMatching) mutableListOf() else null
+
+    override fun getTokenMatchingTrace(): TokenMatchingTrace? {
+        return traceEvents?.let { TokenMatchingTrace(input, it) }
+    }
+
+    protected fun matchImpl(fromIndex: Int, token: Token): TokenMatch? {
+        val length = token.match(input, fromIndex)
+        if (length == 0) {
+            traceMismatch(token, fromIndex)
+            return null
+        }
+
+        val match = TokenMatch(token, fromIndex, length)
+        traceMatch(token, match)
+        return match
+    }
+
+    private fun traceMismatch(token: Token, offset: Int) {
+        traceEvents?.add(TokenMatchingEvent(token, offset, null))
+    }
+
+    private fun traceMatch(token: Token, match: TokenMatch) {
+        traceEvents?.add(TokenMatchingEvent(token, match.offset, match))
+    }
+}
diff --git a/src/commonMain/kotlin/me/alllex/parsus/tokenizer/EagerTokenizer.kt b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/EagerTokenizer.kt
new file mode 100644
index 0000000..65f565c
--- /dev/null
+++ b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/EagerTokenizer.kt
@@ -0,0 +1,88 @@
+package me.alllex.parsus.tokenizer
+
+import me.alllex.parsus.token.Token
+import me.alllex.parsus.token.TokenMatch
+
+/**
+ * This tokenizer eagerly tries to match tokens from the input,
+ * based on the full token set ordered by priority.
+ * It deterministically matches tokens from the input,
+ * not taking into account tokens expected by parsers.
+ */
+internal class EagerTokenizer(
+    input: String,
+    tokens: List<Token>,
+    traceTokenMatching: Boolean = false,
+) : AbstractTokenizer(input, tokens, traceTokenMatching) {
+
+    private val tokensByFirstChar: Map<Char, List<Token>>
+    private var cachedFromIndex: Int = -1
+    private var cachedTokenMatch: TokenMatch? = null
+
+    init {
+        tokensByFirstChar = mutableMapOf<Char, MutableList<Token>>()
+        val unknownFirstCharTokens = mutableListOf<Token>()
+        for (token in tokens) {
+            val firstChars = token.firstChars
+            if (firstChars.isEmpty()) {
+                // If the token first char is unknown, then the first char heuristic cannot be applied.
+                // Therefore, we assume that such tokens can start with any character and put them in appropriate buckets
+                // to ensure the token priority correctness.
+                unknownFirstCharTokens += token
+                tokensByFirstChar.values.forEach { it += token }
+            } else {
+                for (c in firstChars) {
+                    tokensByFirstChar.getOrPut(c) { unknownFirstCharTokens.toMutableList() }
+                        .add(token)
+                }
+            }
+        }
+    }
+
+    override fun findContextFreeMatch(fromIndex: Int): TokenMatch? {
+        return findMatchCaching(fromIndex)
+    }
+
+    override fun findMatchOf(fromIndex: Int, targetToken: Token): TokenMatch? {
+        return findMatchCaching(fromIndex)
+    }
+
+    private fun findMatchCaching(fromIndex: Int): TokenMatch? {
+        if (fromIndex == cachedFromIndex && cachedTokenMatch != null) {
+            return cachedTokenMatch
+        }
+
+        val foundTokenMatch = findMatchIgnoring(fromIndex)
+        cachedFromIndex = fromIndex
+        cachedTokenMatch = foundTokenMatch
+        return foundTokenMatch
+    }
+
+    private fun findMatchIgnoring(fromIndex: Int): TokenMatch? {
+        var pos = fromIndex
+        while (true) {
+            val lex = findMatchImpl(pos) ?: return null
+            if (lex.token.ignored) {
+                pos = lex.offset + lex.length
+                continue
+            }
+
+            return lex
+        }
+    }
+
+    private fun findMatchImpl(fromIndex: Int): TokenMatch? {
+        if (fromIndex < input.length) {
+            val nextChar = input[fromIndex]
+            val byFirstChar = tokensByFirstChar[nextChar].orEmpty()
+            for (token in byFirstChar) {
+                matchImpl(fromIndex, token)?.let { return it }
+            }
+        }
+
+        for (token in tokens) {
+            matchImpl(fromIndex, token)?.let { return it }
+        }
+        return null
+    }
+}
diff --git a/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt
new file mode 100644
index 0000000..9d204c7
--- /dev/null
+++ b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt
@@ -0,0 +1,42 @@
+package me.alllex.parsus.tokenizer
+
+import me.alllex.parsus.token.Token
+import me.alllex.parsus.token.TokenMatch
+
+/**
+ * Scannerless tokenizer tries to parse the target token at the give position.
+ *
+ * It treats the target token as having higher priority than all other tokens.
+ */
+internal class ScannerlessTokenizer(
+    input: String,
+    tokens: List<Token>,
+    traceTokenMatching: Boolean = false,
+): AbstractTokenizer(input, tokens, traceTokenMatching) {
+
+    private val ignoredTokens = tokens.filter { it.ignored }
+
+    override fun findContextFreeMatch(fromIndex: Int): TokenMatch? = null
+
+    override fun findMatchOf(fromIndex: Int, targetToken: Token): TokenMatch? {
+        var pos = fromIndex
+        while (true) {
+            matchImpl(pos, targetToken)?.let { return it }
+
+            val preIgnorePos = pos
+            for (ignoredToken in ignoredTokens) {
+                val ignoredMatch = matchImpl(pos, ignoredToken)
+                if (ignoredMatch != null) {
+                    pos = ignoredMatch.offset + ignoredMatch.length
+                    break
+                }
+            }
+
+            if (preIgnorePos == pos) {
+                // No ignored tokens matched, so we can't find the target token
+                return null
+            }
+        }
+        // The loop will exit via a mismatch, because no tokens can match "after the end of input"
+    }
+}
diff --git a/src/commonMain/kotlin/me/alllex/parsus/tokenizer/Tokenizer.kt b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/Tokenizer.kt
new file mode 100644
index 0000000..4f4ca77
--- /dev/null
+++ b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/Tokenizer.kt
@@ -0,0 +1,14 @@
+package me.alllex.parsus.tokenizer
+
+import me.alllex.parsus.annotations.ExperimentalParsusApi
+import me.alllex.parsus.token.Token
+import me.alllex.parsus.token.TokenMatch
+import me.alllex.parsus.trace.TokenMatchingTrace
+
+@OptIn(ExperimentalParsusApi::class)
+internal interface Tokenizer {
+    val input: String
+    fun getTokenMatchingTrace(): TokenMatchingTrace?
+    fun findContextFreeMatch(fromIndex: Int): TokenMatch?
+    fun findMatchOf(fromIndex: Int, targetToken: Token): TokenMatch?
+}
diff --git a/src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt b/src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt
index d1ffae3..b66fa4e 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/trace/TokenMatchingTrace.kt
@@ -77,7 +77,7 @@ fun formatTokenMatchingTrace(
         sb.append(" ".repeat(lookBehind + 1))
         sb.append(matchSymbol.repeat(matchLength.coerceAtLeast(1)))
         sb.append(" [$offset").append(if (match != null) " - ${offset + matchLength - 1}" else "")
-            .append("] ").append(event.token)
+            .append("] ").append(event.token.name?.let { "$it " } ?: "").append(event.token)
         sb.appendLine()
     }
     return sb.toString()
diff --git a/src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt b/src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt
index 6d323b8..e6a389f 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt
@@ -5,10 +5,9 @@ import assertk.assertions.isEqualTo
 import me.alllex.parsus.annotations.ExperimentalParsusApi
 import me.alllex.parsus.parser.*
 import me.alllex.parsus.token.literalToken
+import me.alllex.parsus.token.regexToken
 import me.alllex.parsus.trace.formatTokenMatchingTrace
 import me.alllex.parsus.tree.SyntaxTree
-import me.alllex.parsus.tree.lexeme
-import me.alllex.parsus.tree.plus
 import kotlin.test.Test
 
 @OptIn(ExperimentalParsusApi::class)
@@ -17,39 +16,55 @@ class TokenMatchingTraceTest {
     @Test
     fun tokenMatchingTraceIsFormatted() {
         object : Grammar<SyntaxTree>() {
+            @Suppress("unused")
+            val ws by regexToken("\\s+", ignored = true)
             val a by literalToken("a")
             val b by literalToken("b")
             val cd by literalToken("cd")
-            val ab by parser { node(lexeme(a) + lexeme(b)) }
-            override val root by ab * parlex(cd) map { (v1, v2) -> node(v1, v2) }
+            val ef by literalToken("ef")
+            val aOrB by parlex(a) or parlex(b)
+            val cdOrEf by parlex(cd) or parlex(ef)
+            val p by aOrB * cdOrEf map { (v1, v2) -> node(v1, v2) }
+            override val root by oneOrMore(p) map { node(it) }
         }.run {
-            val input = "abcd"
+            val input = "aefbcd"
             val tracedResult = parseTracingTokenMatching(input)
-            assertThat(tracedResult.result).isEqualTo(ParsedValue(node(node(a.lex(0), b.lex(1)), cd.lex(2))))
+            assertThat(tracedResult.result).isEqualTo(ParsedValue(
+                node(node(a.lex(0), ef.lex(1)), node(b.lex(3), cd.lex(4))))
+            )
             val formattedTrace = formatTokenMatchingTrace(tracedResult.trace)
+            println(formattedTrace)
             assertThat("\n" + formattedTrace).isEqualTo(
                 """
-__________
-······abcd
-      x [0] Token(EOF)
-__________
-······abcd
-      ^ [0 - 0] LiteralToken('a')
-__________
-·····abcd·
-      x [1] Token(EOF)
-__________
-·····abcd·
-      ^ [1 - 1] LiteralToken('b')
-__________
-····abcd··
-      x [2] Token(EOF)
-__________
-····abcd··
-      ^^ [2 - 3] LiteralToken('cd')
-__________
-··abcd····
-      ^ [4 - 4] Token(EOF)
+____________
+······aefbcd
+      ^ [0 - 0] a LiteralToken('a')
+____________
+·····aefbcd·
+      x [1] cd LiteralToken('cd')
+      x [1] ws RegexToken(ws [\s+] [ignored])
+____________
+·····aefbcd·
+      ^^ [1 - 2] ef LiteralToken('ef')
+____________
+···aefbcd···
+      x [3] a LiteralToken('a')
+      x [3] ws RegexToken(ws [\s+] [ignored])
+____________
+···aefbcd···
+      ^ [3 - 3] b LiteralToken('b')
+____________
+··aefbcd····
+      ^^ [4 - 5] cd LiteralToken('cd')
+____________
+…efbcd······
+      x [6] a LiteralToken('a')
+      x [6] ws RegexToken(ws [\s+] [ignored])
+      x [6] b LiteralToken('b')
+      x [6] ws RegexToken(ws [\s+] [ignored])
+____________
+…efbcd······
+      ^ [6 - 6] EOF Token(EOF)
 """
             )
         }

From b33bc5b41fdfa745e6258f48edd0e49f45536969 Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Thu, 5 Oct 2023 21:40:53 +0200
Subject: [PATCH 05/11] Add tests that checks support for ignored token parsing

---
 .../kotlin/me/alllex/parsus/TokenTests.kt     | 36 +++++++++++++++++--
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt b/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
index d89ee04..50cc966 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
@@ -1,12 +1,12 @@
 package me.alllex.parsus
 
 import assertk.assertions.isEqualTo
-import me.alllex.parsus.parser.Grammar
-import me.alllex.parsus.parser.map
-import me.alllex.parsus.parser.or
+import me.alllex.parsus.parser.*
 import me.alllex.parsus.token.TokenMatch
 import me.alllex.parsus.token.literalToken
 import me.alllex.parsus.token.regexToken
+import me.alllex.parsus.tree.SyntaxTree
+import me.alllex.parsus.tree.lexeme
 import kotlin.test.Test
 
 class TokenTests {
@@ -35,4 +35,34 @@ class TokenTests {
         }
     }
 
+    @Test
+    fun explicitIgnoredTokenParsing() {
+        object : Grammar<SyntaxTree>() {
+            val ws by regexToken("\\s+", ignored = true)
+            val a by literalToken("a")
+            override val root by parser {
+                val a1 = lexeme(a)
+                val w = lexeme(ws)
+                val a2 = lexeme(a)
+                node(a1, w, a2)
+            }
+        }.run {
+            assertParsed("a a").isEqualTo(node(a.lex("a", 0), ws.lex(" ", 1), a.lex("a", 2)))
+            assertParsed(" a a ").isEqualTo(node(a.lex("a", 1), ws.lex(" ", 2), a.lex("a", 3)))
+            assertNotParsed("aa").failedWithUnmatchedToken(ws, 1)
+            assertNotParsed(" aa").failedWithUnmatchedToken(ws, 2)
+        }
+
+        object : Grammar<SyntaxTree>() {
+            val ws by regexToken("\\s+", ignored = true)
+            val a by literalToken("a")
+            override val root by parlex(a) and (-ws * parlex(a)) map { node(it.first, it.second) }
+        }.run {
+            assertParsed("a a").isEqualTo(node(a.lex("a", 0), a.lex("a", 2)))
+            assertParsed(" a a ").isEqualTo(node(a.lex("a", 1), a.lex("a", 3)))
+            assertNotParsed("aa").failedWithUnmatchedToken(ws, 1)
+            assertNotParsed(" aa").failedWithUnmatchedToken(ws, 2)
+        }
+    }
+
 }

From 22cb43ef2cff2bac4a4eff9ae73c8d23cf895d15 Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Thu, 5 Oct 2023 22:02:41 +0200
Subject: [PATCH 06/11] Scannerless: cache ignored token last matching result

---
 .../parsus/tokenizer/ScannerlessTokenizer.kt  | 51 ++++++++++++++-----
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt
index 9d204c7..fc13179 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt
@@ -16,27 +16,54 @@ internal class ScannerlessTokenizer(
 
     private val ignoredTokens = tokens.filter { it.ignored }
 
+    private var cachedIgnoredFromIndex: Int = -1
+    private var cachedIgnoredTokenMatch: TokenMatch? = null
+
     override fun findContextFreeMatch(fromIndex: Int): TokenMatch? = null
 
     override fun findMatchOf(fromIndex: Int, targetToken: Token): TokenMatch? {
         var pos = fromIndex
         while (true) {
-            matchImpl(pos, targetToken)?.let { return it }
-
-            val preIgnorePos = pos
-            for (ignoredToken in ignoredTokens) {
-                val ignoredMatch = matchImpl(pos, ignoredToken)
-                if (ignoredMatch != null) {
-                    pos = ignoredMatch.offset + ignoredMatch.length
-                    break
-                }
-            }
+            matchTarget(pos, targetToken)?.let { return it }
 
-            if (preIgnorePos == pos) {
-                // No ignored tokens matched, so we can't find the target token
+            val ignoredMatch = matchIgnored(pos)
+            @Suppress("LiftReturnOrAssignment")
+            if (ignoredMatch != null) {
+                val posAfterIgnored = ignoredMatch.offset + ignoredMatch.length
+                if (posAfterIgnored > pos) {
+                    pos = posAfterIgnored
+                    continue
+                } else {
+                    // An ignored token matched, but it did not advance the position.
+                    // This should not happen normally, but this is a safeguard.
+                    return null
+                }
+            } else {
+                // No ignored tokens matched at the current position either,
+                // so it is a mismatch overall
                 return null
             }
         }
         // The loop will exit via a mismatch, because no tokens can match "after the end of input"
     }
+
+    private fun matchIgnored(fromIndex: Int): TokenMatch? {
+        if (fromIndex == cachedIgnoredFromIndex) {
+            return cachedIgnoredTokenMatch
+        }
+
+        var match: TokenMatch? = null
+        for (ignoredToken in ignoredTokens) {
+            match = matchImpl(fromIndex, ignoredToken)
+            if (match != null) {
+                break
+            }
+        }
+
+        cachedIgnoredFromIndex = fromIndex
+        cachedIgnoredTokenMatch = match
+        return match
+    }
+
+    private fun matchTarget(pos: Int, targetToken: Token) = matchImpl(pos, targetToken)
 }

From 2d1e1a56c85c732731131dd268b319629fe46dd3 Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Thu, 5 Oct 2023 22:33:22 +0200
Subject: [PATCH 07/11] Scannerless: cache last ignored mismatch separately

---
 .../parsus/tokenizer/ScannerlessTokenizer.kt  | 29 +++++++++++++++++--
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt
index fc13179..d747dec 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/tokenizer/ScannerlessTokenizer.kt
@@ -12,10 +12,24 @@ internal class ScannerlessTokenizer(
     input: String,
     tokens: List<Token>,
     traceTokenMatching: Boolean = false,
-): AbstractTokenizer(input, tokens, traceTokenMatching) {
+) : AbstractTokenizer(input, tokens, traceTokenMatching) {
 
     private val ignoredTokens = tokens.filter { it.ignored }
 
+    // We cache one mismatch and one match of ignored tokens.
+    // This is for the frequent case, when there is exactly one ignored token before the target token.
+    // Example:
+    //   parser = t1 or t2 or t3, ws = ignored whitespace
+    //   input = " t3"
+    // In this example, t1 will fail to match at 0, but ws will match at 0, so we cache the match.
+    // Then t1 will try to match at 1, but it will fail again, so we try ignored tokens again,
+    // but this time we get a mismatch, which we cache separately. This fails the t1 branch of the parser.
+    // Now, we backtrack and try t2 at 0, which fails.
+    // But we can avoid rematching ws at 0, because we cached this match.
+    // Then we try t2 at position 1, which fails. But we don't retry ws, because we cached the mismatch.
+    // In the last t3 branch, we try t3 at 0, which fails, but then we skip rematching ws at 0,
+    // because it is still cached. Then t3 succeeds at 0, and parsing succeeds.
+    private var cacheIgnoredMismatchFromIndex = -1
     private var cachedIgnoredFromIndex: Int = -1
     private var cachedIgnoredTokenMatch: TokenMatch? = null
 
@@ -48,6 +62,11 @@ internal class ScannerlessTokenizer(
     }
 
     private fun matchIgnored(fromIndex: Int): TokenMatch? {
+        require(fromIndex >= 0) { "fromIndex must be non-negative, but was $fromIndex" }
+
+        if (fromIndex == cacheIgnoredMismatchFromIndex) {
+            return null
+        }
         if (fromIndex == cachedIgnoredFromIndex) {
             return cachedIgnoredTokenMatch
         }
@@ -60,8 +79,12 @@ internal class ScannerlessTokenizer(
             }
         }
 
-        cachedIgnoredFromIndex = fromIndex
-        cachedIgnoredTokenMatch = match
+        if (match == null) {
+            cacheIgnoredMismatchFromIndex = fromIndex
+        } else {
+            cachedIgnoredFromIndex = fromIndex
+            cachedIgnoredTokenMatch = match
+        }
         return match
     }
 

From 3368259b5b5eebc84895e6acc1e7ef495bcfad19 Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Thu, 5 Oct 2023 22:33:43 +0200
Subject: [PATCH 08/11] Fix TokenMatchingTraceTest

---
 .../alllex/parsus/TokenMatchingTraceTest.kt   | 103 +++++++++++++-----
 1 file changed, 73 insertions(+), 30 deletions(-)

diff --git a/src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt b/src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt
index e6a389f..507d72d 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/TokenMatchingTraceTest.kt
@@ -27,44 +27,87 @@ class TokenMatchingTraceTest {
             val p by aOrB * cdOrEf map { (v1, v2) -> node(v1, v2) }
             override val root by oneOrMore(p) map { node(it) }
         }.run {
-            val input = "aefbcd"
+            val input = " a ef  b cd "
             val tracedResult = parseTracingTokenMatching(input)
             assertThat(tracedResult.result).isEqualTo(ParsedValue(
-                node(node(a.lex(0), ef.lex(1)), node(b.lex(3), cd.lex(4))))
+                node(node(a.lex(1), ef.lex(3)), node(b.lex(7), cd.lex(9))))
             )
             val formattedTrace = formatTokenMatchingTrace(tracedResult.trace)
             println(formattedTrace)
             assertThat("\n" + formattedTrace).isEqualTo(
                 """
-____________
-······aefbcd
-      ^ [0 - 0] a LiteralToken('a')
-____________
-·····aefbcd·
-      x [1] cd LiteralToken('cd')
-      x [1] ws RegexToken(ws [\s+] [ignored])
-____________
-·····aefbcd·
-      ^^ [1 - 2] ef LiteralToken('ef')
-____________
-···aefbcd···
-      x [3] a LiteralToken('a')
+__________________
+······␣a␣ef␣␣b␣cd␣
+      x [0] a LiteralToken('a')
+__________________
+······␣a␣ef␣␣b␣cd␣
+      ^ [0 - 0] ws RegexToken(ws [\s+] [ignored])
+__________________
+·····␣a␣ef␣␣b␣cd␣·
+      ^ [1 - 1] a LiteralToken('a')
+__________________
+····␣a␣ef␣␣b␣cd␣··
+      x [2] cd LiteralToken('cd')
+__________________
+····␣a␣ef␣␣b␣cd␣··
+      ^ [2 - 2] ws RegexToken(ws [\s+] [ignored])
+__________________
+···␣a␣ef␣␣b␣cd␣···
+      x [3] cd LiteralToken('cd')
       x [3] ws RegexToken(ws [\s+] [ignored])
-____________
-···aefbcd···
-      ^ [3 - 3] b LiteralToken('b')
-____________
-··aefbcd····
-      ^^ [4 - 5] cd LiteralToken('cd')
-____________
-…efbcd······
-      x [6] a LiteralToken('a')
-      x [6] ws RegexToken(ws [\s+] [ignored])
-      x [6] b LiteralToken('b')
-      x [6] ws RegexToken(ws [\s+] [ignored])
-____________
-…efbcd······
-      ^ [6 - 6] EOF Token(EOF)
+__________________
+····␣a␣ef␣␣b␣cd␣··
+      x [2] ef LiteralToken('ef')
+__________________
+···␣a␣ef␣␣b␣cd␣···
+      ^^ [3 - 4] ef LiteralToken('ef')
+__________________
+·␣a␣ef␣␣b␣cd␣·····
+      x [5] a LiteralToken('a')
+__________________
+·␣a␣ef␣␣b␣cd␣·····
+      ^^ [5 - 6] ws RegexToken(ws [\s+] [ignored])
+__________________
+…␣ef␣␣b␣cd␣·······
+      x [7] a LiteralToken('a')
+      x [7] ws RegexToken(ws [\s+] [ignored])
+__________________
+·␣a␣ef␣␣b␣cd␣·····
+      x [5] b LiteralToken('b')
+__________________
+…␣ef␣␣b␣cd␣·······
+      ^ [7 - 7] b LiteralToken('b')
+__________________
+…ef␣␣b␣cd␣········
+      x [8] cd LiteralToken('cd')
+__________________
+…ef␣␣b␣cd␣········
+      ^ [8 - 8] ws RegexToken(ws [\s+] [ignored])
+__________________
+…f␣␣b␣cd␣·········
+      ^^ [9 - 10] cd LiteralToken('cd')
+__________________
+…␣b␣cd␣···········
+      x [11] a LiteralToken('a')
+__________________
+…␣b␣cd␣···········
+      ^ [11 - 11] ws RegexToken(ws [\s+] [ignored])
+__________________
+…b␣cd␣············
+      x [12] a LiteralToken('a')
+      x [12] ws RegexToken(ws [\s+] [ignored])
+__________________
+…␣b␣cd␣···········
+      x [11] b LiteralToken('b')
+__________________
+…b␣cd␣············
+      x [12] b LiteralToken('b')
+__________________
+…␣b␣cd␣···········
+      x [11] EOF Token(EOF)
+__________________
+…b␣cd␣············
+      ^ [12 - 12] EOF Token(EOF)
 """
             )
         }

From 915edc84836a0e78280d00df514080dbc157da64 Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Thu, 5 Oct 2023 22:36:39 +0200
Subject: [PATCH 09/11] Add naive JSON grammar test

---
 .../kotlin/me/alllex/parsus/GrammarTests.kt   | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/src/commonTest/kotlin/me/alllex/parsus/GrammarTests.kt b/src/commonTest/kotlin/me/alllex/parsus/GrammarTests.kt
index 34ecc98..f223a7b 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/GrammarTests.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/GrammarTests.kt
@@ -4,6 +4,7 @@ import assertk.assertThat
 import assertk.assertions.isEqualTo
 import me.alllex.parsus.parser.*
 import me.alllex.parsus.token.literalToken
+import me.alllex.parsus.token.regexToken
 import me.alllex.parsus.tree.SyntaxTree
 import me.alllex.parsus.tree.lexeme
 import kotlin.test.Test
@@ -80,4 +81,56 @@ class GrammarTests {
         }
     }
 
+    @Test
+    fun naiveJsonGrammarTest() {
+        NaiveJsonGrammar.run {
+            assertParsed("""{${'\n'}"a": 1,${'\n'}"b": {"c":false}${'\n'}}""").isEqualTo(
+                Json.Obj(
+                    mapOf(
+                        "a" to Json.Num(1.0),
+                        "b" to Json.Obj(mapOf("c" to Json.Bool(false)))
+                    )
+                )
+            )
+        }
+    }
+
+    sealed class Json {
+        object Null : Json() {
+            override fun toString(): String = "Null"
+        }
+
+        data class Bool(val value: Boolean) : Json()
+        data class Num(val value: Double) : Json()
+        data class Str(val value: String) : Json()
+        data class Arr(val values: List<Json>) : Json()
+        data class Obj(val values: Map<String, Json>) : Json()
+    }
+
+    object NaiveJsonGrammar : Grammar<Json>() {
+        init {
+            regexToken("\\s+", ignored = true)
+        }
+
+        private val comma by literalToken(",")
+        private val colon by literalToken(":")
+        private val lbrace by literalToken("{")
+        private val rbrace by literalToken("}")
+        private val lbracket by literalToken("[")
+        private val rbracket by literalToken("]")
+        private val str by regexToken("\"[^\\\\\"]*(\\\\[\"nrtbf\\\\][^\\\\\"]*)*\"") map { it.text.run { substring(1, lastIndex) } }
+        private val jsonTrue by literalToken("true") map { Json.Bool(true) }
+        private val jsonFalse by literalToken("false") map { Json.Bool(false) }
+        private val jsonNull by literalToken("null") map Json.Null
+        private val jsonNum by regexToken("-?(?:0|[1-9]\\d*)(?:\\.\\d+)?(?:[eE][+-]?\\d+)?") map { Json.Num(it.text.toDouble()) }
+        private val jsonStr by str map { Json.Str(it) }
+
+        private val keyValue by str * -colon and ref(::jsonValue) map { it.toPair() }
+        private val jsonObj by -lbrace * separated(keyValue, comma) * -rbrace map { Json.Obj(it.toMap()) }
+
+        private val jsonArr by -lbracket * separated(ref(::jsonValue), comma) * -rbracket map { Json.Arr(it) }
+        private val jsonValue: Parser<Json> by jsonNull or jsonTrue or jsonFalse or jsonNum or jsonStr or jsonArr or jsonObj
+        override val root by jsonValue
+    }
+
 }

From 30669a60af56e3e2b0a23101a8842f27e604eeaa Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Thu, 5 Oct 2023 22:39:41 +0200
Subject: [PATCH 10/11] Extend the test that validates token priority is driven
 by parsers

---
 .../kotlin/me/alllex/parsus/TokenTests.kt     | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt b/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
index 50cc966..8a5e0dd 100644
--- a/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
+++ b/src/commonTest/kotlin/me/alllex/parsus/TokenTests.kt
@@ -2,6 +2,7 @@ package me.alllex.parsus
 
 import assertk.assertions.isEqualTo
 import me.alllex.parsus.parser.*
+import me.alllex.parsus.token.EofToken
 import me.alllex.parsus.token.TokenMatch
 import me.alllex.parsus.token.literalToken
 import me.alllex.parsus.token.regexToken
@@ -25,14 +26,31 @@ class TokenTests {
     @Test
     fun tokenPriorityIsDrivenByParser() {
         object : Grammar<TokenMatch>() {
-            val single by literalToken("<")
+            // double declared first
             val double by literalToken("<<")
+            val single by literalToken("<")
+            override val root by double or single
+        }.run {
+            assertParsed("<<").isEqualTo(TokenMatch(double, 0, 2))
+        }
 
+        object : Grammar<TokenMatch>() {
+            val single by literalToken("<")
+            val double by literalToken("<<")
             // even though single token is declared first, it is not matched first
             override val root by double or single
         }.run {
             assertParsed("<<").isEqualTo(TokenMatch(double, 0, 2))
         }
+
+        object : Grammar<TokenMatch>() {
+            val single by literalToken("<")
+            val double by literalToken("<<")
+            // if the order in the parser is "wrong", then the parsing will fail too
+            override val root by single or double
+        }.run {
+            assertNotParsed("<<").failedWithUnmatchedToken(EofToken, 1)
+        }
     }
 
     @Test

From 4f6501dbf2fc418293e073b3996be46075ea886d Mon Sep 17 00:00:00 2001
From: Alex Semin <alllexsm@gmail.com>
Date: Thu, 5 Oct 2023 22:46:27 +0200
Subject: [PATCH 11/11] Deprecate currentToken

---
 src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt | 1 +
 src/commonMain/kotlin/me/alllex/parsus/parser/ParsingScope.kt | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt
index 7183f63..f467280 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/parser/ChoiceParser.kt
@@ -35,6 +35,7 @@ internal class EagerChoiceParser<T>(
 
     private val unknownFirstTokenParsers = parsers.filter { it.hasUnknownFirstTokens() }
 
+    @Suppress("DEPRECATION")
     override suspend fun ParsingScope.parse(): T {
         val currentToken = currentToken?.token ?: fail(NoMatchingToken(currentOffset))
         val parsers = parsersByFirstToken[currentToken] ?: unknownFirstTokenParsers
diff --git a/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingScope.kt b/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingScope.kt
index 7437166..cc8c5e7 100644
--- a/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingScope.kt
+++ b/src/commonMain/kotlin/me/alllex/parsus/parser/ParsingScope.kt
@@ -45,6 +45,7 @@ interface ParsingScope {
     /**
      * The token at the current offset in the input.
      */
+    @Deprecated("The new \"scannerless\" parsing approach does not eagerly tokenize the input. The `currentToken` is always null.")
     val currentToken: TokenMatch?
 
     /**