Skip to content

Commit

Permalink
Add internally stored Regex to Token to preserve originally used Regex
Browse files Browse the repository at this point in the history
Make Parsed.remainder public
Optimize TokenizerMatchesSequence to reduce number of objects.
Push version to 0.3.2
  • Loading branch information
h0tk3y committed Dec 2, 2017
1 parent 3b99d45 commit 7388582
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 23 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
group 'com.github.h0tk3y.betterParse'
version '0.3.1'
version '0.3.2'

buildscript {
ext.kotlin_version = '1.1.51'
Expand Down
12 changes: 6 additions & 6 deletions src/main/kotlin/com/github/h0tk3y/betterParse/grammar/Grammar.kt
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ abstract class Grammar<out T> : Parser<T> {
open val declaredParsers get() = (_parsers + _tokens + rootParser).toSet()

fun token(@Language("RegExp") @RegExp pattern: String, ignore: Boolean = false) = Token(null, pattern, ignore)
fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toString(), ignore)
fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern.toString(), ignore)
fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toRegex(), ignore)
fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern, ignore)

/** A [Lexer] that is built with the [Token]s defined within this [Grammar], in their order of declaration */
open val tokenizer: Tokenizer by lazy { DefaultTokenizer(tokens) }
Expand All @@ -59,12 +59,12 @@ abstract class Grammar<out T> : Parser<T> {
}

fun token(name: String, @Language("RegExp") @RegExp pattern: String, ignore: Boolean = false) = Token(name, pattern, ignore)
fun token(name: String, pattern: Pattern, ignore: Boolean = false) = Token(name, pattern.toString(), ignore)
fun token(name: String, pattern: Regex, ignore: Boolean = false) = Token(name, pattern.toString(), ignore)
fun token(name: String, pattern: Pattern, ignore: Boolean = false) = Token(name, pattern.toRegex(), ignore)
fun token(name: String, pattern: Regex, ignore: Boolean = false) = Token(name, pattern, ignore)

fun token(@Language("RegExp") @RegExp pattern: String, ignore: Boolean = false) = Token(null, pattern, ignore)
fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toString(), ignore)
fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern.toString(), ignore)
fun token(pattern: Pattern, ignore: Boolean = false) = Token(null, pattern.toRegex(), ignore)
fun token(pattern: Regex, ignore: Boolean = false) = Token(null, pattern, ignore)

/** A convenience function to use for referencing a parser that is not initialized up to this moment. */
fun <T> parser(block: () -> Parser<T>): Parser<T> = ParserReference(block)
Expand Down
25 changes: 19 additions & 6 deletions src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Token.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,28 @@ import org.intellij.lang.annotations.RegExp
* Parses to [TokenMatch].
* The [name] only provides additional information.
*/
class Token(
name: String?,
@RegExp @Language("RegExp") val pattern: String,
val ignored: Boolean = false
) : Parser<TokenMatch> {
class Token : Parser<TokenMatch> {
val pattern: String
val regex: Regex?
val ignored: Boolean

var name: String? = name
var name: String? = null
internal set

constructor(name: String?, @RegExp @Language("RegExp") patternString: String, ignored: Boolean = false) {
this.name = name
this.ignored = ignored
pattern = patternString
regex = null
}

constructor(name: String?, regex: Regex, ignored: Boolean = false) {
this.name = name
this.ignored = ignored
pattern = regex.pattern
this.regex = regex
}

override fun toString() =
(if (name != null) "$name ($pattern)" else pattern) +
if (ignored) " [ignorable]" else ""
Expand Down
13 changes: 7 additions & 6 deletions src/main/kotlin/com/github/h0tk3y/betterParse/lexer/Tokenizer.kt
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
package com.github.h0tk3y.betterParse.lexer

import com.github.h0tk3y.betterParse.utils.CachedSequence
import com.github.h0tk3y.betterParse.utils.cached
import java.io.InputStream
import java.util.*
import kotlin.coroutines.experimental.buildSequence

internal class TokenizerMatchesSequence(
val tokens: CachedSequence<TokenMatch>,
val tokenizer: Tokenizer
) : Sequence<TokenMatch> by tokens
iterator: Iterator<TokenMatch>,
val tokenizer: Tokenizer,
cache: ArrayList<TokenMatch> = arrayListOf(),
startAt: Int = 0
) : CachedSequence<TokenMatch>(iterator, cache, startAt)

interface Tokenizer {
val tokens: List<Token>
Expand All @@ -34,7 +35,7 @@ class DefaultTokenizer(override val tokens: List<Token>) : Tokenizer {
require(tokens.isNotEmpty()) { "The tokens list should not be empty" }
}

val patterns = tokens.map { it to it.pattern.toPattern() }
val patterns = tokens.map { it to (it.regex?.toPattern() ?: it.pattern.toPattern()) }

/** Tokenizes the [input] from a [String] into a [TokenizerMatchesSequence]. */
override fun tokenize(input: String) = tokenize(Scanner(input))
Expand Down Expand Up @@ -81,5 +82,5 @@ class DefaultTokenizer(override val tokens: List<Token>) : Tokenizer {

yield(result)
}
}.constrainOnce().cached().let { TokenizerMatchesSequence(it as CachedSequence, this) }
}.constrainOnce().iterator().let { TokenizerMatchesSequence(it, this) }
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ sealed class ParseResult<out T>

/** Represents a successful parsing result of a [Parser] that produced [value] and left a
* possibly empty input sequence [remainder] unprocessed.*/
data class Parsed<out T>(val value: T, internal val remainder: Sequence<TokenMatch>) : ParseResult<T>() {
data class Parsed<out T>(val value: T, val remainder: Sequence<TokenMatch>) : ParseResult<T>() {
override fun toString(): String = "Parsed($value)"
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package com.github.h0tk3y.betterParse.utils

import com.github.h0tk3y.betterParse.lexer.TokenizerMatchesSequence
import com.github.h0tk3y.betterParse.lexer.TokenMatch
import com.github.h0tk3y.betterParse.lexer.TokenizerMatchesSequence
import java.util.*

internal class CachedSequence<T> constructor(
internal open class CachedSequence<T> constructor(
val source: Iterator<T>,
val cache: ArrayList<T>,
val startAt: Int
Expand All @@ -28,7 +28,7 @@ internal class CachedSequence<T> constructor(
}

internal fun Sequence<TokenMatch>.skipOne(): Sequence<TokenMatch> = when (this) {
is TokenizerMatchesSequence -> TokenizerMatchesSequence(tokens.skipOne() as CachedSequence, tokenizer)
is TokenizerMatchesSequence -> TokenizerMatchesSequence(source, tokenizer, cache, startAt + 1)
is CachedSequence -> CachedSequence(source, cache, startAt + 1)
else -> drop(1)
}
Expand Down

0 comments on commit 7388582

Please sign in to comment.