Skip to content

Commit

Permalink
Support Trim Operator for Strings / Refactor Lexer (#198)
Browse files Browse the repository at this point in the history
  • Loading branch information
jhnaldo committed Jan 8, 2024
1 parent 92501a3 commit 15e1d94
Show file tree
Hide file tree
Showing 27 changed files with 154 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,6 @@
"precision": [
"language/expressions/does-not-equals/bigint-and-number-extremes",
"language/expressions/equals/bigint-and-number-extremes",
"language/expressions/modulus/S11.5.3_A4_T7.js"
"language/expressions/modulus/S11.5.3_A4_T7"
]
}
1 change: 1 addition & 0 deletions src/main/resources/result/complete-funcs
Original file line number Diff line number Diff line change
Expand Up @@ -2303,6 +2303,7 @@ ToUint16
ToUint32
ToUint8
TriggerPromiseReactions
TrimString
TryStatement[0,0].ContainsDuplicateLabels
TryStatement[0,0].ContainsUndefinedBreakTarget
TryStatement[0,0].ContainsUndefinedContinueTarget
Expand Down
12 changes: 6 additions & 6 deletions src/main/resources/result/spec-summary
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
- numeric string: 17
- syntactic: 195
- extended productions for web: 28
- algorithms: 2656 (88.48%)
- complete: 2350
- incomplete: 306
- algorithm steps: 19559 (96.53%)
- complete: 18881
- incomplete: 678
- algorithms: 2656 (88.52%)
- complete: 2351
- incomplete: 305
- algorithm steps: 19559 (96.55%)
- complete: 18884
- incomplete: 675
- types: 7000 (91.49%)
- known: 6404
- yet: 596
Expand Down
4 changes: 4 additions & 0 deletions src/main/scala/esmeta/analyzer/AbsTransfer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,10 @@ trait AbsTransfer extends Optimized with PruneHelper {
f <- transfer(from)
t <- transfer(to)
} yield v.substring(f, t)
case ETrim(expr, leading, trailing) =>
for {
v <- transfer(expr)
} yield v.trim(leading, trailing)
case ERef(ref) =>
for {
rv <- transfer(ref)
Expand Down
10 changes: 10 additions & 0 deletions src/main/scala/esmeta/analyzer/domain/value/BasicDomain.scala
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,16 @@ object BasicDomain extends value.Domain {
else if (t.isValidInt) apply(s.substring(f.toInt, t.toInt))
else Bot
case _ => Bot
def trim(leading: Boolean, trailing: Boolean): Elem = elem.getSingle match
case Many => exploded("ETrim")
case One(Str(s)) =>
apply(
if (leading && trailing) s.trim
else if (leading) s.replaceAll("^\\s+", "")
else if (trailing) s.replaceAll("\\s+$", "")
else s,
)
case _ => Bot
def clamp(lower: Elem, upper: Elem): Elem =
(elem.getSingle, lower.getSingle, upper.getSingle) match
case (Zero, _, _) | (_, Zero, _) | (_, _, Zero) => Bot
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/esmeta/analyzer/domain/value/Domain.scala
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ trait Domain extends domain.Domain[AValue] {
def duplicated(st: AbsState): Elem
def substring(from: Elem): Elem
def substring(from: Elem, to: Elem): Elem
def trim(leading: Boolean, trailing: Boolean): Elem
def clamp(lower: Elem, upper: Elem): Elem
def isArrayIndex: Elem

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ object TypeDomain extends value.Domain {
def duplicated(st: AbsState): Elem = boolTop
def substring(from: Elem): Elem = strTop
def substring(from: Elem, to: Elem): Elem = strTop
def trim(leading: Boolean, trailing: Boolean): Elem = strTop
def clamp(lower: Elem, upper: Elem): Elem = mathTop
def isArrayIndex: Elem = boolTop

Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/esmeta/compiler/Compiler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,8 @@ class Compiler(
compile(fb, from),
to.map(compile(fb, _)),
)
case TrimExpression(expr, leading, trailing) =>
ETrim(compile(fb, expr), leading, trailing)
case NumberOfExpression(ReferenceExpression(ref)) =>
toStrERef(compile(fb, ref), "length")
case NumberOfExpression(expr) =>
Expand Down
12 changes: 12 additions & 0 deletions src/main/scala/esmeta/interpreter/Interpreter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,18 @@ class Interpreter(
case Math(n) if s.length < n => s.substring(f)
case v => s.substring(f, v.asInt),
))
case ETrim(expr, leading, trailing) =>
val sb = new java.lang.StringBuilder
val arr = eval(expr).asStr.codePoints.toArray
val cps = esParser.WhiteSpaceCPs ++ esParser.LineTerminatorCPs
def find(i: Int, next: Int => Int): Int =
if (i < 0 || i >= arr.length) i
else if (cps contains arr(i)) find(next(i), next)
else i
val start = if (leading) find(0, _ + 1) else 0
val end = if (trailing) find(arr.length - 1, _ - 1) else arr.length
arr.slice(start, end + 1).foreach(sb.appendCodePoint)
Str(sb.toString)
case ERef(ref) =>
st(eval(ref))
case EUnary(uop, expr) =>
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/esmeta/ir/Expr.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ case class EYet(msg: String) extends Expr
case class EContains(list: Expr, expr: Expr, field: Option[(Type, String)])
extends Expr
case class ESubstring(expr: Expr, from: Expr, to: Option[Expr]) extends Expr
case class ETrim(expr: Expr, leading: Boolean, trailing: Boolean) extends Expr
case class ERef(ref: Ref) extends Expr
case class EUnary(uop: UOp, expr: Expr) extends Expr
case class EBinary(bop: BOp, left: Expr, right: Expr) extends Expr
Expand Down
6 changes: 6 additions & 0 deletions src/main/scala/esmeta/ir/util/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ trait Parsers extends TyParsers {
case l ~ e ~ f => EContains(l, e, f)
} | "(" ~ "substring" ~> expr ~ expr ~ opt(expr) <~ ")" ^^ {
case e ~ f ~ t => ESubstring(e, f, t)
} | "(" ~ "trim-start" ~> expr <~ ")" ^^ {
case e => ETrim(e, true, false)
} | "(" ~ "trim-end" ~> expr <~ ")" ^^ {
case e => ETrim(e, false, true)
} | "(" ~ "trim" ~> expr <~ ")" ^^ {
case e => ETrim(e, true, true)
} | "(" ~> uop ~ expr <~ ")" ^^ {
case u ~ e => EUnary(u, e)
} | "(" ~> bop ~ expr ~ expr <~ ")" ^^ {
Expand Down
6 changes: 6 additions & 0 deletions src/main/scala/esmeta/ir/util/Stringifier.scala
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ class Stringifier(detail: Boolean, location: Boolean) {
app >> "(substring " >> expr >> " " >> from
to.map(app >> " " >> _)
app >> ")"
case ETrim(expr, leading, trailing) =>
(leading, trailing) match
case (true, true) => app >> "(trim " >> expr >> ")"
case (true, false) => app >> "(trim-start " >> expr >> ")"
case (false, true) => app >> "(trim-end " >> expr >> ")"
case (false, false) => app >> expr
case ERef(ref) =>
app >> ref
case EUnary(uop, expr) =>
Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/esmeta/ir/util/UnitWalker.scala
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ trait UnitWalker extends BasicUnitWalker {
walkOpt(field, { case (t, f) => walk(t) })
case ESubstring(expr, from, to) =>
walk(expr); walk(from); walkOpt(to, walk)
case ETrim(expr, leading, trailing) =>
walk(expr); walk(leading); walk(trailing)
case ERef(ref) =>
walk(ref)
case EUnary(uop, expr) =>
Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/esmeta/ir/util/Walker.scala
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ trait Walker extends BasicWalker {
)
case ESubstring(expr, from, to) =>
ESubstring(walk(expr), walk(from), walkOpt(to, walk))
case ETrim(expr, leading, trailing) =>
ETrim(walk(expr), walk(leading), walk(trailing))
case ERef(ref) =>
ERef(walk(ref))
case EUnary(uop, expr) =>
Expand Down
7 changes: 7 additions & 0 deletions src/main/scala/esmeta/lang/Expression.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ case class SubstringExpression(
to: Option[Expression],
) extends Expression

// trim expressions
case class TrimExpression(
expr: Expression,
leading: Boolean,
trailing: Boolean,
) extends Expression

// `the number of elements in <list>` expressions
case class NumberOfExpression(expr: Expression) extends Expression

Expand Down
12 changes: 12 additions & 0 deletions src/main/scala/esmeta/lang/util/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ trait Parsers extends IndentParsers {
recordExpr |
lengthExpr |
substrExpr |
trimExpr |
numberOfExpr |
sourceTextExpr |
coveredByExpr |
Expand Down Expand Up @@ -401,6 +402,17 @@ trait Parsers extends IndentParsers {
("from" ~> expr) ~
opt("to" ~> expr) ^^ { case e ~ f ~ t => SubstringExpression(e, f, t) }

// trim expressions
lazy val trimExpr: PL[TrimExpression] =
("the String value that is a copy of" ~> expr) ~
("with" ~> (
"leading" ^^^ (true, false) |
"trailing" ^^^ (false, true) |
"both leading and trailing" ^^^ (true, true)
) <~ "white space removed") ^^ {
case e ~ (l, t) => TrimExpression(e, l, t)
}

// `the number of elements in` expressions
lazy val numberOfExpr: PL[NumberOfExpression] =
("the number of elements" ~ ("in" | "of") ~ opt("the List") ~> expr) ^^ {
Expand Down
9 changes: 9 additions & 0 deletions src/main/scala/esmeta/lang/util/Stringifier.scala
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,15 @@ class Stringifier(detail: Boolean, location: Boolean) {
case SubstringExpression(expr, from, to) =>
app >> "the substring of " >> expr >> " from " >> from
to.fold(app)(app >> " to " >> _)
case TrimExpression(expr, leading, trailing) =>
app >> "the String value that is a copy of " >> expr >> " with "
app >> ((leading, trailing) match
case (true, true) => "both leading and trailing"
case (true, false) => "leading"
case (false, true) => "trailing"
case (false, false) => "no"
)
app >> " white space removed"
case NumberOfExpression(expr) =>
app >> "the number of elements in " >> expr
case SourceTextExpression(expr) =>
Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/esmeta/lang/util/UnitWalker.scala
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ trait UnitWalker extends BasicUnitWalker {
walk(expr)
case SubstringExpression(expr, from, to) =>
walk(expr); walk(from); walkOpt(to, walk)
case TrimExpression(expr, leading, trailing) =>
walk(expr); walk(leading); walk(trailing)
case NumberOfExpression(expr) =>
walk(expr)
case SourceTextExpression(expr) =>
Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/esmeta/lang/util/Walker.scala
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ trait Walker extends BasicWalker {
LengthExpression(walk(expr))
case SubstringExpression(expr, from, to) =>
SubstringExpression(walk(expr), walk(from), walkOpt(to, walk))
case TrimExpression(expr, leading, trailing) =>
TrimExpression(walk(expr), walk(leading), walk(trailing))
case NumberOfExpression(expr) =>
NumberOfExpression(walk(expr))
case SourceTextExpression(expr) =>
Expand Down
29 changes: 20 additions & 9 deletions src/main/scala/esmeta/parser/Lexer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,24 @@ trait Lexer extends UnicodeParsers {
)
}

// special code points
lazy val WhiteSpaceCPs = USP ++ Set(TAB, VT, FF, SP, NBSP, ZWNBSP)
lazy val LineTerminatorCPs = Set(LF, CR, LS, PS)
lazy val NoLineTerminatorCPs = WhiteSpaceCPs -- LineTerminatorCPs

// special lexers
lazy val WhiteSpace = TAB | VT | FF | SP | NBSP | ZWNBSP | USP
lazy val LineTerminator = LF | CR | LS | PS
lazy val WhiteSpace = toParser(WhiteSpaceCPs)
lazy val LineTerminator = toParser(LineTerminatorCPs)
lazy val LineTerminatorSequence =
LF | CR <~ not(LF) | LS | PS | CR % LF
val cr = toParser(CR)
val lf = toParser(LF)
toParser(LF, LS, PS) | cr <~ not(lf) | cr % lf
lazy val Comment =
"""/\*+[^*]*\*+(?:[^/*][^*]*\*+)*/|//[^\u000A\u000D\u2028\u2029]*""".r
lazy val empty = "".r
lazy val Skip =
rep(WhiteSpace | LineTerminator | Comment) ^^ { _.mkString }
lazy val Skip = rep(WhiteSpace | LineTerminator | Comment) ^^ { _.mkString }
lazy val strNoLineTerminator =
val lines = LineTerminatorCPs.map(_.toChar).mkString("[", "", "]").r
"" <~ guard(Skip.filter(s => lines.findFirstIn(s).isEmpty))

// lexers
Expand Down Expand Up @@ -113,10 +120,14 @@ trait Lexer extends UnicodeParsers {
case NoLineTerminator => strNoLineTerminator
case CodePoint(cp, desc) => cp.toChar.toString.r
case CodePointAbbr(abbr) =>
abbrCPs.getOrElse(
abbr,
error(s"unknown code point abbreviation: <$abbr>"),
)
abbrCPs
.getOrElse(
abbr,
error(s"unknown code point abbreviation: <$abbr>"),
)
.map(_.toChar)
.mkString("[", "", "]")
.r
case UnicodeSet(None) => Any
case UnicodeSet(Some("with the Unicode property “ID_Start”")) => IDStart
case UnicodeSet(Some("with the Unicode property “ID_Continue”")) =>
Expand Down
59 changes: 29 additions & 30 deletions src/main/scala/esmeta/parser/UnicodeParsers.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,43 +9,41 @@ import scala.util.matching.Regex

/** ECMAScript special unicodes */
trait UnicodeParsers extends BasicParsers with EPackratParsers {
val ZWNJ = "\u200C".r
val ZWJ = "\u200D".r
val ZWNBSP = "\uFEFF".r
val ZWNJ = 0x200c
val ZWJ = 0x200d
val ZWNBSP = 0xfeff
// white spaces
val TAB = "\u0009".r
val VT = "\u000B".r
val FF = "\u000C".r
val SP = "\u0020".r
val NBSP = "\u00A0".r
val TAB = 0x0009
val VT = 0x000b
val FF = 0x000c
val SP = 0x0020
val NBSP = 0x00a0
// TODO automatically extract category "Zs"
val USP =
"[\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000]".r
val USP = Set(
0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007,
0x2008, 0x2009, 0x200a, 0x202f, 0x205f, 0x3000,
)
// line terminators
val LF = "\u000A".r
val CR = "\u000D".r
val LS = "\u2028".r
val PS = "\u2029".r
val LF = 0x000a
val CR = 0x000d
val LS = 0x2028
val PS = 0x2029

lazy val lines = "[\u000A\u000D\u2028\u2029]".r
lazy val lines = toParser(LF, CR, LS, PS)
lazy val Any = "(?s).".r
lazy val IDStart =
Any.filter(s => Unicode.IDStart contains toCodePoint(s))
lazy val IDContinue =
Any.filter(s => Unicode.IDContinue contains toCodePoint(s))
lazy val IDStart = toParser(Unicode.IDStart)
lazy val IDContinue = toParser(Unicode.IDContinue)

protected inline def toCodePoint(s: String): Int = s.codePoints.toArray.head

protected def toCodePoint(str: String): Int =
def check4B(i: Int): Boolean =
str.codePointCount(i, str.length min (i + 2)) == 1
def aux(i: Int, acc: Int): Int =
if (i >= str.length) acc
else
val nextAcc = str.codePointAt(i) + (acc * (1 << 16))
aux(if (check4B(i)) i + 2 else i + 1, nextAcc)
aux(0, 0)
protected inline def toParser(seq: Int*): Parser[String] = toParser(seq.toSet)
protected inline def toParser(cp: Int): Parser[String] =
Any.filter(s => toCodePoint(s) == cp)
protected inline def toParser(set: Set[Int]): Parser[String] =
Any.filter(s => set contains toCodePoint(s))

// abbreviated code points mapping
val abbrCPs: Map[String, Regex] = Map(
val abbrCPs: Map[String, Set[Int]] = Map(
"ZWNJ" -> ZWNJ,
"ZWJ" -> ZWJ,
"ZWNBSP" -> ZWNBSP,
Expand All @@ -54,10 +52,11 @@ trait UnicodeParsers extends BasicParsers with EPackratParsers {
"FF" -> FF,
"SP" -> SP,
"NBSP" -> NBSP,
"USP" -> USP,
"LF" -> LF,
"CR" -> CR,
"LS" -> LS,
"PS" -> PS,
).map((k, v) => k -> Set(v)) + (
"USP" -> USP
)
}
3 changes: 3 additions & 0 deletions src/test/scala/esmeta/ir/IRTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ object IRTest {
lazy val containsField = EContains(xExpr, xExpr, Some(ty, "Value"))
lazy val substring = ESubstring(xExpr, xExpr, None)
lazy val substringTo = ESubstring(xExpr, xExpr, Some(xExpr))
lazy val trim = ETrim(xExpr, true, true)
lazy val trimStart = ETrim(xExpr, true, false)
lazy val trimEnd = ETrim(xExpr, false, true)
lazy val xExpr = ERef(x)
lazy val yExpr = ERef(y)
lazy val unary = EUnary(UOp.Neg, xExpr)
Expand Down
3 changes: 3 additions & 0 deletions src/test/scala/esmeta/ir/JsonTinyTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ class JsonTinyTest extends IRTest {
containsField -> "(contains x x: Number Value)",
substring -> "(substring x x)",
substringTo -> "(substring x x x)",
trim -> "(trim x)",
trimStart -> "(trim-start x)",
trimEnd -> "(trim-end x)",
xExpr -> "x",
unary -> "(- x)",
binary -> "(+ x x)",
Expand Down
3 changes: 3 additions & 0 deletions src/test/scala/esmeta/ir/StringifyTinyTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ class StringifyTinyTest extends IRTest {
containsField -> "(contains x x: Number Value)",
substring -> "(substring x x)",
substringTo -> "(substring x x x)",
trim -> "(trim x)",
trimStart -> "(trim-start x)",
trimEnd -> "(trim-end x)",
xExpr -> "x",
unary -> "(- x)",
binary -> "(+ x x)",
Expand Down
Loading

0 comments on commit 15e1d94

Please sign in to comment.