Skip to content

Commit

Permalink
Merge pull request #13 from alllex/ignore-case
Browse files Browse the repository at this point in the history
Support case-insensitive literal tokens
  • Loading branch information
alllex committed Jul 26, 2023
2 parents d6fe534 + 5d77cd4 commit caa9c11
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 11 deletions.
1 change: 1 addition & 0 deletions src/commonMain/kotlin/me/alllex/parsus/parser/Grammar.kt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ interface GrammarContext
* ```
*/
abstract class Grammar<out V>(
val ignoreCase: Boolean = false,
private val debugMode: Boolean = false,
) : GrammarContext {

Expand Down
10 changes: 6 additions & 4 deletions src/commonMain/kotlin/me/alllex/parsus/token/LiteralToken.kt
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@ import me.alllex.parsus.parser.Grammar
class LiteralToken(
val string: String,
name: String? = null,
ignored: Boolean = false
ignored: Boolean = false,
val ignoreCase: Boolean = false,
) : Token(name, ignored) {

init {
require(string.isNotEmpty()) { "text must not be empty" }
}

override fun match(input: CharSequence, fromIndex: Int): Int {
if (input.startsWith(string, fromIndex)) {
if (input.startsWith(string, fromIndex, ignoreCase)) {
return string.length
}
return 0
Expand All @@ -37,5 +38,6 @@ class LiteralToken(
fun Grammar<*>.literalToken(
text: String,
name: String? = null,
ignored: Boolean = false
): LiteralToken = LiteralToken(text, name, ignored).also { register(it) }
ignored: Boolean = false,
ignoreCase: Boolean = this.ignoreCase,
): LiteralToken = LiteralToken(text, name, ignored, ignoreCase).also { register(it) }
16 changes: 12 additions & 4 deletions src/commonMain/kotlin/me/alllex/parsus/token/RegexToken.kt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class RegexToken(
override fun toString(): String = "RegexToken(${name ?: ""} [$pattern]${if (ignored) " [ignored]" else ""})"
}

private fun Regex.withIgnoreCase(ignoreCase: Boolean) =
if (!ignoreCase || RegexOption.IGNORE_CASE in options) this
else Regex(pattern, options + RegexOption.IGNORE_CASE)

// TODO: Add a @Language annotation to automatically highlight the pattern as a regex in the IDE
// see: https://github.com/kotest/kotest/pull/3397
/**
Expand All @@ -36,8 +40,9 @@ fun Grammar<*>.regexToken(
@Language("RegExp", "", "")
pattern: String,
name: String? = null,
ignored: Boolean = false
): RegexToken = RegexToken(Regex(pattern), name, ignored).also { register(it) }
ignored: Boolean = false,
ignoreCase: Boolean = this.ignoreCase,
): RegexToken = regexToken(Regex(pattern), name, ignored, ignoreCase)

/**
* Creates and registers a regex token in this grammar.
Expand All @@ -47,5 +52,8 @@ fun Grammar<*>.regexToken(
fun Grammar<*>.regexToken(
regex: Regex,
name: String? = null,
ignored: Boolean = false
): RegexToken = RegexToken(regex, name, ignored).also { register(it) }
ignored: Boolean = false,
ignoreCase: Boolean = this.ignoreCase,
): RegexToken = RegexToken(regex.withIgnoreCase(ignoreCase), name, ignored).also { register(it) }


71 changes: 68 additions & 3 deletions src/commonTest/kotlin/me/alllex/parsus/Tests.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package me.alllex.parsus
import assertk.Assert
import assertk.all
import assertk.assertThat
import assertk.assertions.*
import assertk.assertions.isEqualTo
import assertk.assertions.isInstanceOf
import assertk.assertions.isNull
import assertk.assertions.prop
import me.alllex.parsus.parser.*
import me.alllex.parsus.token.*
import me.alllex.parsus.tree.*
Expand Down Expand Up @@ -451,6 +454,68 @@ class Tests {
}
}

@Test
fun literalTokenIgnoreCase() {
object : Grammar<SyntaxTree>() {
val data by literalToken("data", ignoreCase = true)
override val root by parser { lexeme(data) }
}.run {
assertParsed("data").isEqualTo(data.lex())
assertParsed("DATA").isEqualTo(data.lex("DATA"))
assertParsed("Data").isEqualTo(data.lex("Data"))
assertParsed("dAtA").isEqualTo(data.lex("dAtA"))
}
}

@Test
fun regexTokenIgnoreCase() {
object : Grammar<SyntaxTree>() {
val data by regexToken("[ab]", ignoreCase = true)
override val root by parser { lexeme(data) }
}.run {
assertParsed("a").isEqualTo(data.lex("a"))
assertParsed("b").isEqualTo(data.lex("b"))
assertParsed("A").isEqualTo(data.lex("A"))
assertParsed("B").isEqualTo(data.lex("B"))
}
}

@Test
fun explicitRegexTokenIgnoreCase() {
object : Grammar<SyntaxTree>() {
val data by regexToken(Regex("[ab]"), ignoreCase = true)
override val root by parser { lexeme(data) }
}.run {
assertParsed("a").isEqualTo(data.lex("a"))
assertParsed("b").isEqualTo(data.lex("b"))
assertParsed("A").isEqualTo(data.lex("A"))
assertParsed("B").isEqualTo(data.lex("B"))
}
}

@Test
fun ignoreCaseGrammar() {
object : Grammar<SyntaxTree>(ignoreCase = true) {
val lit by literalToken("a")
val reLit by regexToken("[bc]")
val re by regexToken(Regex("[de]"))
val lam by token { s, i -> if (s[i] == 'f' || (ignoreCase && s[i] == 'F')) 1 else 0 }
val lamStrict by token { s, i -> if (s[i] == 'g') 1 else 0 }
override val root by parser { lexeme(lit) } or parser { lexeme(reLit) } or parser { lexeme(re) } or parser { lexeme(lam) } or parser { lexeme(lamStrict) }
}.run {
assertParsed("a").isEqualTo(lit.lex("a"))
assertParsed("A").isEqualTo(lit.lex("A"))
assertParsed("b").isEqualTo(reLit.lex("b"))
assertParsed("C").isEqualTo(reLit.lex("C"))
assertParsed("D").isEqualTo(re.lex("D"))
assertParsed("e").isEqualTo(re.lex("e"))
assertParsed("f").isEqualTo(lam.lex("f"))
assertParsed("F").isEqualTo(lam.lex("F"))
assertParsed("g").isEqualTo(lamStrict.lex("g"))
assertThat(parseEntire("G")).failedWith(NoMatchingToken(0))
}
}

companion object {

private fun <T> Grammar<T>.assertParsed(text: String): Assert<T> = assertThat(parseEntireOrThrow(text))
Expand All @@ -471,11 +536,11 @@ class Tests {

private fun node(children: List<SyntaxTree>) = Node(children)

private fun LiteralToken.lex(offset: Int): Lexeme {
private fun LiteralToken.lex(offset: Int = 0): Lexeme {
return Lexeme(TokenMatch(this, offset, string.length), string)
}

private fun Token.lex(text: String, offset: Int): Lexeme {
private fun Token.lex(text: String, offset: Int = 0): Lexeme {
return Lexeme(TokenMatch(this, offset, text.length), text)
}

Expand Down

0 comments on commit caa9c11

Please sign in to comment.