From 2131202b5a8c3838983fc8c0b36717008834b38b Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Sun, 30 Jan 2022 16:53:01 -1000 Subject: [PATCH 01/15] checkpoint with compiling --- .../src/main/scala/cats/parse/Parser.scala | 144 +++++++++++------- .../src/main/scala/cats/parse/RadixNode.scala | 26 ++++ .../test/scala/cats/parse/ParserTest.scala | 14 +- .../test/scala/cats/parse/RadixNodeTest.scala | 13 +- 4 files changed, 138 insertions(+), 59 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index eabb67cb..16a94873 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -1002,19 +1002,18 @@ object Parser { flatten(rest, acc += notOneOf) } - val flattened = flatten(parsers, new ListBuffer) + val flat0 = flatten(parsers, new ListBuffer) // we unmap if we can to make merging work better - val isStr = flattened.forall(Impl.matchesString) - val maybeUnmap = if (isStr) flattened.map(Impl.unmap) else flattened + val isStr = flat0.forall(Impl.matchesString) + val flat = if (isStr) flat0.map(Impl.unmap) else flat0 - val cs = Impl.mergeCharIn[Any, Parser[Any]](maybeUnmap) - val res = Impl.mergeStrIn[Any, Parser[Any]](cs) match { + val cs0 = Impl.mergeCharIn[Any, Parser[Any]](flat) + val cs = (if (isStr) cs0.map(string(_)) else cs0).asInstanceOf[List[Parser[A]]] + Impl.mergeStrIn[A, Parser[A]](cs) match { case Nil => fail case p :: Nil => p case two => Impl.OneOf(two) } - - (if (isStr) string(res) else res).asInstanceOf[Parser[A]] } /** go through the list of parsers trying each as long as they are epsilon failures (don't @@ -1050,14 +1049,13 @@ object Parser { val isStr = flat0.forall(Impl.matchesString) val flat = if (isStr) flat0.map(Impl.unmap0) else flat0 - val cs = Impl.mergeCharIn[Any, Parser0[Any]](flat) - val res = Impl.mergeStrIn[Any, Parser0[Any]](cs) match { + val cs0 = Impl.mergeCharIn[Any, Parser0[Any]](flat) + val cs = (if (isStr) cs0.map(string0(_)) else cs0).asInstanceOf[List[Parser0[A]]] + Impl.mergeStrIn[A, Parser0[A]](cs) match { case Nil => fail case p :: Nil => p case two => Impl.OneOf0(two) } - - (if (isStr) string0(res) else res).asInstanceOf[Parser0[A]] } /** Parse the longest matching string between alternatives. The order of the strings does not @@ -1076,7 +1074,6 @@ object Parser { .StringIn( SortedSet(two: _*) ) // sadly scala 2.12 doesn't have the `SortedSet.from` constructor function - .string } /** Version of stringIn that allows the empty string @@ -1665,7 +1662,6 @@ object Parser { case f @ Impl.Fail() => f.widen case f @ Impl.FailWith(_) => f.widen case p: Impl.Str => p - case p: Impl.StringIn => p case p: Impl.IgnoreCase => p case notVoid => Impl.Void(notVoid) } @@ -1695,9 +1691,9 @@ object Parser { Impl.unmap(pa) match { case len @ Impl.Length(_) => len case strP @ Impl.Str(expect) => strP.as(expect) - case ci @ Impl.CharIn(min, bs, _) if BitSetUtil.isSingleton(bs) => + case ci @ Impl.SingleChar(c) => // we can allocate the returned string once here - val minStr = min.toChar.toString + val minStr = c.toString Impl.Map(ci, Impl.ConstFn(minStr)) case f @ Impl.Fail() => f.widen case f @ Impl.FailWith(_) => f.widen @@ -1788,12 +1784,12 @@ object Parser { if (b.equals(())) v.asInstanceOf[Parser[B]] else v match { - case Impl.Void(ci @ Impl.CharIn(min, bs, _)) => + case Impl.Void(ci @ Impl.SingleChar(c)) => // CharIn is common and cheap, no need to wrap // with Void since CharIn always returns the char // even when voided b match { - case bc: Char if BitSetUtil.isSingleton(bs) && (min.toChar == bc) => + case bc: Char if bc == c => ci.asInstanceOf[Parser[B]] case _ => Impl.Map(ci, Impl.ConstFn(b)) @@ -1972,13 +1968,28 @@ object Parser { case _ => false } + object SingleChar { + def unapply(p: Parser[Any]): Option[Char] = + p match { + case CharIn(min, bs, _) if BitSetUtil.isSingleton(bs) => Some(min.toChar) + case _ => None + } + } + + object DefiniteString { + def unapply(p: Parser0[Any]): Option[String] = + p match { + case Pure("") => Some("") + case Map(Str(e1), ConstFn(e2)) if e1 == e2 => Some(e1) + case Map(SingleChar(c), ConstFn(e: String)) if (e.length == 1) && (e.charAt(0) == c) => Some(e) + case _ => None + } + } // does this parser return the string it matches def matchesString(p: Parser0[Any]): Boolean = p match { - case StringP0(_) | StringP(_) | Pure("") | Length(_) | Fail() | FailWith(_) => true - case Map(Str(e1), ConstFn(e2)) => e1 == e2 - case Map(CharIn(min, bs, _), ConstFn(e)) if BitSetUtil.isSingleton(bs) => - e == min.toChar.toString + case StringP0(_) | StringP(_) | StringIn(_) | Length(_) | Fail() | FailWith(_) => true + case DefiniteString(_) => true case OneOf(ss) => ss.forall(matchesString) case OneOf0(ss) => ss.forall(matchesString) case WithContextP(_, p) => matchesString(p) @@ -2008,8 +2019,8 @@ object Parser { final def hasKnownResult[A](p: Parser0[A]): Option[A] = p match { case Pure(a) => Some(a) - case Impl.CharIn(min, bs, _) if BitSetUtil.isSingleton(bs) => - Some(min.toChar.asInstanceOf[A]) + case SingleChar(c) => + Some(c.asInstanceOf[A]) case Map0(_, fn) => // scala 3.0.2 seems to fail if we inline // this match above @@ -2064,18 +2075,15 @@ object Parser { case WithContextP0(_, p) => hasKnownResult(p) case Backtrack(p) => hasKnownResult(p) case Backtrack0(p) => hasKnownResult(p) - case Not(_) | Peek(_) | Void(_) | Void0(_) | StartParser | EndParser | Str(_) | StringIn( - _ - ) | IgnoreCase( - _ - ) => + case Not(_) | Peek(_) | Void(_) | Void0(_) | StartParser | EndParser | + Str(_) | IgnoreCase(_) => // these are always unit someUnit.asInstanceOf[Option[A]] case Rep0(_, _, _) | Rep(_, _, _, _) | FlatMap0(_, _) | FlatMap(_, _) | TailRecM(_, _) | TailRecM0(_, _) | Defer(_) | Defer0(_) | GetCaret | Index | OneOf(_) | OneOf0(_) | Length(_) | Fail() | FailWith(_) | CharIn(_, _, _) | AnyChar | StringP( _ - ) | StringP0(_) | Select(_, _) | Select0(_, _) => + ) | StringP0(_) | Select(_, _) | Select0(_, _) | StringIn(_)=> // these we don't know the value fundamentally or by construction None } @@ -2286,9 +2294,9 @@ object Parser { state.capture = false val init = state.offset pa.parseMut(state) - val str = state.str.substring(init, state.offset) state.capture = s0 - str + if (state.error eq null) state.str.substring(init, state.offset) + else null } case class StringP0[A](parser: Parser0[A]) extends Parser0[String] { @@ -2442,16 +2450,19 @@ object Parser { null.asInstanceOf[A] } - final def stringIn[A](radix: RadixNode, all: SortedSet[String], state: State): Unit = { + final def stringIn(radix: RadixNode, all: SortedSet[String], state: State): String = { val str = state.str val startOffset = state.offset - val lastMatch = radix.matchAt(str, startOffset) + val matched = radix.matchAtOrNull(str, startOffset) - if (lastMatch < 0) { + if (matched eq null) { state.error = Eval.later(Chain.one(Expectation.OneOfStr(startOffset, all.toList))) state.offset = startOffset + null } else { + val lastMatch = startOffset + matched.length state.offset = lastMatch + matched } } @@ -2469,13 +2480,13 @@ object Parser { override def parseMut(state: State): A = oneOf(ary, state) } - case class StringIn(sorted: SortedSet[String]) extends Parser[Unit] { + case class StringIn(sorted: SortedSet[String]) extends Parser[String] { require(sorted.size >= 2, s"expected more than two items, found: ${sorted.size}") require(!sorted.contains(""), "empty string is not allowed in alternatives") private[this] val tree = RadixNode.fromSortedStrings(NonEmptyList.fromListUnsafe(sorted.toList)) - override def parseMut(state: State): Unit = stringIn(tree, sorted, state) + override def parseMut(state: State): String = stringIn(tree, sorted, state) } final def prod[A, B](pa: Parser0[A], pb: Parser0[B], state: State): (A, B) = { @@ -2805,7 +2816,7 @@ object Parser { case AnyChar :: tail => // AnyChar is bigger than all subsequent CharIn: // and any direct prefix CharIns - val tail1 = tail.filterNot(_.isInstanceOf[CharIn]) + val tail1 = tail.filterNot { p => p.isInstanceOf[CharIn] || p.isInstanceOf[AnyChar.type] } (result :+ AnyChar.asInstanceOf[P0]) ++ Chain.fromSeq(tail1) case (ci: CharIn) :: tail => loop(tail, ci :: front, result) @@ -2830,44 +2841,71 @@ object Parser { * there are no prefixes of the right inside the left */ def mergeStrIn[A, P0 <: Parser0[A]](ps: List[P0]): List[P0] = { + val reset = Left(SortedSet.empty[String]) + @annotation.tailrec - def loop(ps: List[P0], front: SortedSet[String], result: Chain[P0]): Chain[P0] = { + def loop(ps: List[P0], front: Either[SortedSet[String], StringIn], result: Chain[P0]): Chain[P0] = { @inline - def res(front: SortedSet[String]): Chain[P0] = - if (front.isEmpty) Chain.nil - else if (front.size == 1) Chain.one(Str(front.head).asInstanceOf[P0]) - else Chain.one(StringIn(front).asInstanceOf[P0]) + def res(front: Either[SortedSet[String], StringIn]): Chain[P0] = + front match { + case Right(si) => Chain.one(si.asInstanceOf[P0]) + case Left(set) => + if (set.isEmpty) Chain.nil + else if (set.size == 1) Chain.one { + val p = set.head match { + case "" => emptyStringParser0 + case ne => Parser.string(ne).string + } + p.asInstanceOf[P0] + } + else Chain.one(StringIn(set).asInstanceOf[P0]) + } + val frontSet: SortedSet[String] = + front match { + case Right(si) => si.sorted + case Left(s) => s + } // returns if there is a strict prefix (equality does not count) def frontHasPrefixOf(s: String): Boolean = - front.exists { f => s.startsWith(f) && (f.length != s.length) } + frontSet.exists { f => s.startsWith(f) && (f.length != s.length) } ps match { case Nil => result ++ res(front) - case Str(s) :: tail => + case StringP(ci @ CharIn(_, _, _)) :: tail if frontSet.nonEmpty && !ci.allChars.exists { c => frontHasPrefixOf(c.toString) } => + // we can merge all these into the non-empty StringIn + loop(tail, Left(frontSet ++ ci.allChars.map(_.toString)), result) + case DefiniteString(s) :: tail if s.nonEmpty => + // we can't add empty string to `StringIn` if (frontHasPrefixOf(s)) { // there is an overlap, so we need to match what we have first, then come here - loop(tail, SortedSet(s), result ++ res(front)) + loop(tail, Left(SortedSet(s)), result ++ res(front)) } else { // there is no overlap in the tree, just merge it in: - loop(tail, front + s, result) + loop(tail, Left(frontSet + s), result) } - case StringIn(ss) :: tail => + case (si @ StringIn(ss)) :: tail => val (rights, lefts) = ss.partition(frontHasPrefixOf(_)) - val front1 = front | lefts if (rights.nonEmpty) { // there are some that can't be merged in - loop(tail, rights, result ++ res(front1)) + val front1 = + if (lefts.nonEmpty) Left(frontSet | lefts) + else front + + loop(tail, Left(rights), result ++ res(front1)) } else { // everything can be merged in - loop(tail, front1, result) + val newFront = + if (frontSet.isEmpty) Right(si) + else Left(frontSet | lefts) + loop(tail, newFront, result) } case h :: tail => - loop(tail, SortedSet.empty, (result ++ res(front)) :+ h) + loop(tail, reset, (result ++ res(front)) :+ h) } } - loop(ps, SortedSet.empty, Chain.nil).toList + loop(ps, reset, Chain.nil).toList } case object AnyChar extends Parser[Char] { @@ -2890,6 +2928,8 @@ object Parser { override def toString = s"CharIn($min, bitSet = ..., $ranges)" + def allChars: Iterable[Char] = BitSetUtil.union((min, bitSet) :: Nil) + def makeError(offset: Int): Chain[Expectation] = { var result = Chain.empty[Expectation] var aux = ranges.toList diff --git a/core/shared/src/main/scala/cats/parse/RadixNode.scala b/core/shared/src/main/scala/cats/parse/RadixNode.scala index 111b9f5a..444f9b69 100644 --- a/core/shared/src/main/scala/cats/parse/RadixNode.scala +++ b/core/shared/src/main/scala/cats/parse/RadixNode.scala @@ -55,6 +55,32 @@ private[parse] final class RadixNode( else (matched :: rest.filterNot(_ == matched).toList) } + + final def matchesWithPrefix(prefix: String): List[String] = + matchesWithPrefixLoop(prefix, 0, prefix.length) + + @tailrec + private def matchesWithPrefixLoop(prefix: String, start: Int, end: Int): List[String] = + if (start >= end) { + // this is the empty string, all the strings start with empty + allStrings + } + else { + // start < end, there is at least another character to match + val c = prefix.charAt(start) + val idx = c.toInt & bitMask + prefixes(idx) match { + case null => + Nil + case p => + val len = math.min(end - start, p.length) + if (prefix.regionMatches(start, p, 0, len)) { + children(idx).matchesWithPrefixLoop(prefix, start + len, end) + } + else Nil + } + } + /** If this matches, return the new offset, else return -1 * * @param str diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index 4454b088..d070aafe 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -847,13 +847,15 @@ class ParserTest extends munit.ScalaCheckSuite { property("oneOf0 composes as expected") { forAll(ParserGen.gen0, ParserGen.gen0, Arbitrary.arbitrary[String]) { (genP1, genP2, str) => - assertEquals(genP1.fa.orElse(genP2.fa).parse(str), orElse(genP1.fa, genP2.fa, str)) + assertEquals(genP1.fa.orElse(genP2.fa).parse(str).leftMap(_.offsets), + orElse(genP1.fa, genP2.fa, str).leftMap(_.offsets)) } } property("oneOf composes as expected") { forAll(ParserGen.gen, ParserGen.gen, Arbitrary.arbitrary[String]) { (genP1, genP2, str) => - assertEquals(genP1.fa.orElse(genP2.fa).parse(str), orElse(genP1.fa, genP2.fa, str)) + assertEquals(genP1.fa.orElse(genP2.fa).parse(str).leftMap(_.offsets), + orElse(genP1.fa, genP2.fa, str).leftMap(_.offsets)) } } @@ -1438,8 +1440,8 @@ class ParserTest extends munit.ScalaCheckSuite { } } - property("p orElse p == p") { - forAll(ParserGen.gen, Arbitrary.arbitrary[String]) { (genP, str) => + property("p orElse p == p (0)") { + forAll(ParserGen.gen0, Arbitrary.arbitrary[String]) { (genP, str) => val res0 = genP.fa.parse(str) val res1 = genP.fa.orElse(genP.fa).parse(str) assertEquals(res1, res0) @@ -2426,7 +2428,7 @@ class ParserTest extends munit.ScalaCheckSuite { val left = Parser.oneOf0(as.map(_.fa.string)) val right = Parser.oneOf0[Any](as.map(_.fa)).string - assertEquals(left.parse(toParse), right.parse(toParse)) + assertEquals(left.parse(toParse).leftMap(_.offsets), right.parse(toParse).leftMap(_.offsets)) } } @@ -2436,7 +2438,7 @@ class ParserTest extends munit.ScalaCheckSuite { val left = Parser.oneOf(as.map(_.fa.string)) val right = Parser.oneOf[Any](as.map(_.fa)).string - assertEquals(left.parse(toParse), right.parse(toParse)) + assertEquals(left.parse(toParse).leftMap(_.offsets), right.parse(toParse).leftMap(_.offsets)) } } diff --git a/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala b/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala index 8b6669af..d25df970 100644 --- a/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala +++ b/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala @@ -22,7 +22,7 @@ package cats.parse import org.scalacheck.{Gen, Prop} -import org.scalacheck.Prop.forAll +import org.scalacheck.Prop.{forAll, forAllNoShrink} class RadixNodeTest extends munit.ScalaCheckSuite { val tests: Int = if (BitSetUtil.isScalaJs) 50 else 20000 @@ -193,4 +193,15 @@ class RadixNodeTest extends munit.ScalaCheckSuite { assertEquals(RadixNode.fromStrings(ss).allStrings.sorted, ss.distinct.sorted) } } + + property("RadixNode.matchesWithPrefix(p) matches allStrings.filter(_.startsWith(p))") { + forAll { (ss: List[String], head: String, tail: List[String]) => + val rn = RadixNode.fromStrings(ss) + forAllNoShrink(Gen.oneOf(head :: tail ::: ss)) { prefix => + assertEquals( + rn.matchesWithPrefix(prefix).sorted, + ss.filter(_.startsWith(prefix)).distinct.sorted) + } + } + } } From b4bf30dfb52e502e6fdad3285d201b82a80fede4 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Wed, 2 Feb 2022 16:10:28 -1000 Subject: [PATCH 02/15] refactor merging --- .../src/main/scala/cats/parse/Parser.scala | 278 ++++++++---------- .../src/main/scala/cats/parse/RadixNode.scala | 7 +- .../test/scala/cats/parse/ParserTest.scala | 31 +- .../test/scala/cats/parse/RadixNodeTest.scala | 3 +- 4 files changed, 144 insertions(+), 175 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index 16a94873..423f720e 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -27,6 +27,7 @@ import cats.data.{AndThen, Chain, NonEmptyList} import cats.implicits._ import scala.collection.immutable.SortedSet import scala.collection.mutable.ListBuffer +import scala.annotation.tailrec import java.util.Arrays /** Parser0[A] attempts to extract an `A` value from the given input, potentially moving its offset @@ -741,7 +742,7 @@ object Parser { } private def mergeInRange(irs: List[InRange]): List[InRange] = { - @annotation.tailrec + @tailrec def merge(rs: List[InRange], aux: Chain[InRange] = Chain.empty): Chain[InRange] = rs match { case x :: y :: rest => @@ -761,14 +762,14 @@ object Parser { Some(OneOfStr(ooss.head.offset, ssb.result().toList)) } - @annotation.tailrec + @tailrec private def stripContext(ex: Expectation): Expectation = ex match { case WithContext(_, inner) => stripContext(inner) case _ => ex } - @annotation.tailrec + @tailrec private def addContext(revCtx: List[String], ex: Expectation): Expectation = revCtx match { case Nil => ex @@ -990,30 +991,28 @@ object Parser { * Note: order matters here, since we don't backtrack by default. */ def oneOf[A](parsers: List[Parser[A]]): Parser[A] = { - @annotation.tailrec - def flatten(ls: List[Parser[A]], acc: ListBuffer[Parser[A]]): List[Parser[A]] = - ls match { - case Nil => acc.toList.distinct - case Impl.OneOf(ps) :: rest => - flatten(ps ::: rest, acc) - case Impl.Fail() :: rest => - flatten(rest, acc) - case notOneOf :: rest => - flatten(rest, acc += notOneOf) + @tailrec + def loop(ps: List[Parser[A]], acc: List[Parser[A]]): Parser[A] = + ps match { + case Nil => + acc match { + case Nil => Impl.Fail() + case one :: Nil => one + case many => Impl.OneOf(many.reverse) + } + case h :: Nil => loop(Nil, h :: acc) + case h1 :: (tail @ (h2 :: tail2)) => + Impl.merge(h1, h2) match { + case Some(NonEmptyList(h, Nil)) => loop(h :: tail2, acc) + case Some(NonEmptyList(h, t)) => loop(t ::: tail2, h :: acc) + case None => loop(tail, h1 :: acc) + } } - - val flat0 = flatten(parsers, new ListBuffer) - // we unmap if we can to make merging work better - val isStr = flat0.forall(Impl.matchesString) - val flat = if (isStr) flat0.map(Impl.unmap) else flat0 - - val cs0 = Impl.mergeCharIn[Any, Parser[Any]](flat) - val cs = (if (isStr) cs0.map(string(_)) else cs0).asInstanceOf[List[Parser[A]]] - Impl.mergeStrIn[A, Parser[A]](cs) match { - case Nil => fail - case p :: Nil => p - case two => Impl.OneOf(two) + val flattened = parsers.flatMap { + case Impl.OneOf(os) => os + case notOneOf => notOneOf :: Nil } + loop(flattened, Nil) } /** go through the list of parsers trying each as long as they are epsilon failures (don't @@ -1026,36 +1025,30 @@ object Parser { * Note: order matters here, since we don't backtrack by default. */ def oneOf0[A](ps: List[Parser0[A]]): Parser0[A] = { - @annotation.tailrec - def flatten(ls: List[Parser0[A]], acc: ListBuffer[Parser0[A]]): List[Parser0[A]] = - ls match { - case Nil => acc.toList.distinct - case Impl.OneOf0(ps) :: rest => - flatten(ps ::: rest, acc) - case Impl.OneOf(ps) :: rest => - flatten(ps ::: rest, acc) - case Impl.Fail() :: rest => - flatten(rest, acc) - case notOneOf :: rest => - if (Impl.alwaysSucceeds(notOneOf)) { - (acc += notOneOf).toList.distinct - } else { - flatten(rest, acc += notOneOf) + + @tailrec + def loop(ps: List[Parser0[A]], acc: List[Parser0[A]]): Parser0[A] = + ps match { + case Nil => + acc match { + case Nil => Impl.Fail() + case one :: Nil => one + case many => Impl.OneOf0(many.reverse) + } + case h :: Nil => loop(Nil, h :: acc) + case h1 :: (tail @ (h2 :: tail2)) => + Impl.merge0(h1, h2) match { + case Some(NonEmptyList(h, Nil)) => loop(h :: tail2, acc) + case Some(NonEmptyList(h, t)) => loop(t ::: tail2, h :: acc) + case None => loop(tail, h1 :: acc) } } - val flat0 = flatten(ps, new ListBuffer) - // we unmap if we can to make merging work better - val isStr = flat0.forall(Impl.matchesString) - val flat = if (isStr) flat0.map(Impl.unmap0) else flat0 - - val cs0 = Impl.mergeCharIn[Any, Parser0[Any]](flat) - val cs = (if (isStr) cs0.map(string0(_)) else cs0).asInstanceOf[List[Parser0[A]]] - Impl.mergeStrIn[A, Parser0[A]](cs) match { - case Nil => fail - case p :: Nil => p - case two => Impl.OneOf0(two) + val flattened = ps.flatMap { + case Impl.OneOf0(os) => os + case notOneOf => notOneOf :: Nil } + loop(flattened, Nil) } /** Parse the longest matching string between alternatives. The order of the strings does not @@ -1689,6 +1682,7 @@ object Parser { case str if Impl.matchesString(str) => str.asInstanceOf[Parser[String]] case _ => Impl.unmap(pa) match { + case si @ Impl.StringIn(_) => si case len @ Impl.Length(_) => len case strP @ Impl.Str(expect) => strP.as(expect) case ci @ Impl.SingleChar(c) => @@ -1951,7 +1945,7 @@ object Parser { final def doesBacktrackCheat(p: Parser0[Any]): Boolean = doesBacktrack(p) - @annotation.tailrec + @tailrec final def doesBacktrack(p: Parser0[Any]): Boolean = p match { case Backtrack0(_) | Backtrack(_) | AnyChar | CharIn(_, _, _) | Str(_) | IgnoreCase(_) | @@ -1981,7 +1975,8 @@ object Parser { p match { case Pure("") => Some("") case Map(Str(e1), ConstFn(e2)) if e1 == e2 => Some(e1) - case Map(SingleChar(c), ConstFn(e: String)) if (e.length == 1) && (e.charAt(0) == c) => Some(e) + case Map(SingleChar(c), ConstFn(e: String)) if (e.length == 1) && (e.charAt(0) == c) => + Some(e) case _ => None } } @@ -2075,15 +2070,16 @@ object Parser { case WithContextP0(_, p) => hasKnownResult(p) case Backtrack(p) => hasKnownResult(p) case Backtrack0(p) => hasKnownResult(p) - case Not(_) | Peek(_) | Void(_) | Void0(_) | StartParser | EndParser | - Str(_) | IgnoreCase(_) => + case Not(_) | Peek(_) | Void(_) | Void0(_) | StartParser | EndParser | Str(_) | IgnoreCase( + _ + ) => // these are always unit someUnit.asInstanceOf[Option[A]] case Rep0(_, _, _) | Rep(_, _, _, _) | FlatMap0(_, _) | FlatMap(_, _) | TailRecM(_, _) | TailRecM0(_, _) | Defer(_) | Defer0(_) | GetCaret | Index | OneOf(_) | OneOf0(_) | Length(_) | Fail() | FailWith(_) | CharIn(_, _, _) | AnyChar | StringP( _ - ) | StringP0(_) | Select(_, _) | Select0(_, _) | StringIn(_)=> + ) | StringP0(_) | Select(_, _) | Select0(_, _) | StringIn(_) => // these we don't know the value fundamentally or by construction None } @@ -2644,14 +2640,14 @@ object Parser { override def parseMut(state: State): B = Impl.tailRecM(p1, fn, state) } - @annotation.tailrec + @tailrec final def compute0[A](fn: () => Parser0[A]): Parser0[A] = fn() match { case Defer(f) => compute(f) case Defer0(f) => compute0(f) case notDefer0 => notDefer0 } - @annotation.tailrec + @tailrec final def compute[A](fn: () => Parser[A]): Parser[A] = fn() match { case Defer(f) => compute(f) @@ -2794,120 +2790,76 @@ object Parser { } } - /* - * Merge CharIn bitsets - */ - def mergeCharIn[A, P0 <: Parser0[A]](ps: List[P0]): List[P0] = { - @annotation.tailrec - def loop(ps: List[P0], front: List[CharIn], result: Chain[P0]): Chain[P0] = { - @inline - def frontRes: Chain[P0] = - front match { - case Nil => Chain.nil - case one :: Nil => Chain.one(one.asInstanceOf[P0]) - case many => - // we need to union - val minBs = many.iterator.map { case CharIn(m, bs, _) => (m, bs) } - Chain.one(Parser.charIn(BitSetUtil.union(minBs)).asInstanceOf[P0]) - } + private def oneONEL[A](a: A): Option[NonEmptyList[A]] = Some(NonEmptyList(a, Nil)) - ps match { - case Nil => result ++ frontRes - case AnyChar :: tail => - // AnyChar is bigger than all subsequent CharIn: - // and any direct prefix CharIns - val tail1 = tail.filterNot { p => p.isInstanceOf[CharIn] || p.isInstanceOf[AnyChar.type] } - (result :+ AnyChar.asInstanceOf[P0]) ++ Chain.fromSeq(tail1) - case (ci: CharIn) :: tail => - loop(tail, ci :: front, result) - case h :: tail => - // h is not an AnyChar or CharIn - // we make our prefix frontRes - // and resume working on the tail - loop(tail, Nil, (result ++ frontRes) :+ h) - } + def merge0[A](left: Parser0[A], right: Parser0[A]): Option[NonEmptyList[Parser0[A]]] = + (left, right) match { + case (l1: Parser[A], r1: Parser[A]) => merge(l1, r1) + case (_, _) if alwaysSucceeds(left) => oneONEL(left) + case (Fail(), _) => oneONEL(right) + case (_, Fail()) => oneONEL(left) + case _ => None } - - loop(ps, Nil, Chain.nil).toList - } - - /* - * Merge Str and StringIn - * - * the semantic issue is this: - * oneOf matches the first, left to right - * StringIn matches the longest. - * we can only merge into the left if - * there are no prefixes of the right inside the left - */ - def mergeStrIn[A, P0 <: Parser0[A]](ps: List[P0]): List[P0] = { - val reset = Left(SortedSet.empty[String]) - - @annotation.tailrec - def loop(ps: List[P0], front: Either[SortedSet[String], StringIn], result: Chain[P0]): Chain[P0] = { - @inline - def res(front: Either[SortedSet[String], StringIn]): Chain[P0] = - front match { - case Right(si) => Chain.one(si.asInstanceOf[P0]) - case Left(set) => - if (set.isEmpty) Chain.nil - else if (set.size == 1) Chain.one { - val p = set.head match { - case "" => emptyStringParser0 - case ne => Parser.string(ne).string - } - p.asInstanceOf[P0] - } - else Chain.one(StringIn(set).asInstanceOf[P0]) + def merge[A](left: Parser[A], right: Parser[A]): Option[NonEmptyList[Parser[A]]] = + (left, right) match { + case (Fail(), _) => oneONEL(right) + case (_, Fail()) => oneONEL(left) + case (CharIn(_, _, _), AnyChar) => oneONEL(AnyChar) + case (CharIn(m1, b1, _), CharIn(m2, b2, _)) => + oneONEL(Parser.charIn(BitSetUtil.union((m1, b1) :: (m2, b2) :: Nil))) + case (Void(l1), Void(r1)) => + merge(l1, r1).map(_.map(_.void)) + case (Str(l), Str(r)) => + // if l is a prefix of r, it matches first + // if not, then we can make a StringIn(_).void + if (r.startsWith(l)) oneONEL(left) + else oneONEL(StringIn(SortedSet(l, r)).void) + case (StringIn(ls), Map(Str(s1), ConstFn(s2))) if s1 == s2 => + if (ls.exists { l => s1.startsWith(l) && (l.length < s1.length) }) None + else oneONEL(StringIn(ls + s1)) + case (Map(Str(l), ConstFn(s2)), StringIn(rs)) if l == s2 => + // any string in rs that doesn't have a substring in ls can be moved + // over, since substrings would match first in oneOf but not StringIn + val (bad, good) = rs.partition { s => + s.startsWith(l) && (l.length < s.length) } - - val frontSet: SortedSet[String] = - front match { - case Right(si) => si.sorted - case Left(s) => s - } - // returns if there is a strict prefix (equality does not count) - def frontHasPrefixOf(s: String): Boolean = - frontSet.exists { f => s.startsWith(f) && (f.length != s.length) } - - ps match { - case Nil => result ++ res(front) - case StringP(ci @ CharIn(_, _, _)) :: tail if frontSet.nonEmpty && !ci.allChars.exists { c => frontHasPrefixOf(c.toString) } => - // we can merge all these into the non-empty StringIn - loop(tail, Left(frontSet ++ ci.allChars.map(_.toString)), result) - case DefiniteString(s) :: tail if s.nonEmpty => - // we can't add empty string to `StringIn` - if (frontHasPrefixOf(s)) { - // there is an overlap, so we need to match what we have first, then come here - loop(tail, Left(SortedSet(s)), result ++ res(front)) + if (good.isEmpty) None + else { + val newLeft = StringIn(good + l) + if (bad.isEmpty) oneONEL(newLeft) + else if (bad.size > 1) { + Some(NonEmptyList(newLeft, StringIn(bad) :: Nil)) } else { - // there is no overlap in the tree, just merge it in: - loop(tail, Left(frontSet + s), result) + Some(NonEmptyList(newLeft, Str(bad.head).string :: Nil)) } - case (si @ StringIn(ss)) :: tail => - val (rights, lefts) = ss.partition(frontHasPrefixOf(_)) - if (rights.nonEmpty) { - // there are some that can't be merged in - val front1 = - if (lefts.nonEmpty) Left(frontSet | lefts) - else front - - loop(tail, Left(rights), result ++ res(front1)) + } + case (StringIn(ls), StringIn(rs)) => + // any string in rs that doesn't have a substring in ls can be moved + // over, since substrings would match first in oneOf but not StringIn + val (bad, good) = rs.partition { s => + ls.exists { l => s.startsWith(l) && (l.length < s.length) } + } + if (good.isEmpty) None + else { + val newLeft = StringIn(ls | good) + if (bad.isEmpty) oneONEL(newLeft) + else if (bad.size > 1) { + Some(NonEmptyList(newLeft, StringIn(bad) :: Nil)) } else { - // everything can be merged in - val newFront = - if (frontSet.isEmpty) Right(si) - else Left(frontSet | lefts) - loop(tail, newFront, result) + Some(NonEmptyList(newLeft, Str(bad.head).string :: Nil)) } - case h :: tail => - loop(tail, reset, (result ++ res(front)) :+ h) - } + } + case (StringP(l1), StringP(r1)) => + merge(l1, r1).map(_.map(_.string)) + case (AnyChar, FailWith(_)) => + // preserve the failWith message + None + case (AnyChar, _) => + // a parser must consume to advance, and AnyChar will succeed + oneONEL(left) + case _ => None } - loop(ps, reset, Chain.nil).toList - } - case object AnyChar extends Parser[Char] { override def parseMut(state: State): Char = { val offset = state.offset diff --git a/core/shared/src/main/scala/cats/parse/RadixNode.scala b/core/shared/src/main/scala/cats/parse/RadixNode.scala index 444f9b69..ac9a9634 100644 --- a/core/shared/src/main/scala/cats/parse/RadixNode.scala +++ b/core/shared/src/main/scala/cats/parse/RadixNode.scala @@ -55,7 +55,6 @@ private[parse] final class RadixNode( else (matched :: rest.filterNot(_ == matched).toList) } - final def matchesWithPrefix(prefix: String): List[String] = matchesWithPrefixLoop(prefix, 0, prefix.length) @@ -64,8 +63,7 @@ private[parse] final class RadixNode( if (start >= end) { // this is the empty string, all the strings start with empty allStrings - } - else { + } else { // start < end, there is at least another character to match val c = prefix.charAt(start) val idx = c.toInt & bitMask @@ -76,8 +74,7 @@ private[parse] final class RadixNode( val len = math.min(end - start, p.length) if (prefix.regionMatches(start, p, 0, len)) { children(idx).matchesWithPrefixLoop(prefix, start + len, end) - } - else Nil + } else Nil } } diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index d070aafe..090cf7ea 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -847,15 +847,19 @@ class ParserTest extends munit.ScalaCheckSuite { property("oneOf0 composes as expected") { forAll(ParserGen.gen0, ParserGen.gen0, Arbitrary.arbitrary[String]) { (genP1, genP2, str) => - assertEquals(genP1.fa.orElse(genP2.fa).parse(str).leftMap(_.offsets), - orElse(genP1.fa, genP2.fa, str).leftMap(_.offsets)) + assertEquals( + genP1.fa.orElse(genP2.fa).parse(str).leftMap(_.offsets), + orElse(genP1.fa, genP2.fa, str).leftMap(_.offsets) + ) } } property("oneOf composes as expected") { forAll(ParserGen.gen, ParserGen.gen, Arbitrary.arbitrary[String]) { (genP1, genP2, str) => - assertEquals(genP1.fa.orElse(genP2.fa).parse(str).leftMap(_.offsets), - orElse(genP1.fa, genP2.fa, str).leftMap(_.offsets)) + assertEquals( + genP1.fa.orElse(genP2.fa).parse(str).leftMap(_.offsets), + orElse(genP1.fa, genP2.fa, str).leftMap(_.offsets) + ) } } @@ -2428,7 +2432,10 @@ class ParserTest extends munit.ScalaCheckSuite { val left = Parser.oneOf0(as.map(_.fa.string)) val right = Parser.oneOf0[Any](as.map(_.fa)).string - assertEquals(left.parse(toParse).leftMap(_.offsets), right.parse(toParse).leftMap(_.offsets)) + assertEquals( + left.parse(toParse).leftMap(_.offsets), + right.parse(toParse).leftMap(_.offsets) + ) } } @@ -2438,7 +2445,10 @@ class ParserTest extends munit.ScalaCheckSuite { val left = Parser.oneOf(as.map(_.fa.string)) val right = Parser.oneOf[Any](as.map(_.fa)).string - assertEquals(left.parse(toParse).leftMap(_.offsets), right.parse(toParse).leftMap(_.offsets)) + assertEquals( + left.parse(toParse).leftMap(_.offsets), + right.parse(toParse).leftMap(_.offsets) + ) } } @@ -2601,4 +2611,13 @@ class ParserTest extends munit.ScalaCheckSuite { } } } + + property("stringIn(s).void.string == stringIn(s)") { + forAll { (ss0: List[String]) => + val ss = ss0.filter(_.nonEmpty) + val si = Parser.stringIn(ss) + + assertEquals(si.void.string, si) + } + } } diff --git a/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala b/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala index d25df970..595ecfb1 100644 --- a/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala +++ b/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala @@ -200,7 +200,8 @@ class RadixNodeTest extends munit.ScalaCheckSuite { forAllNoShrink(Gen.oneOf(head :: tail ::: ss)) { prefix => assertEquals( rn.matchesWithPrefix(prefix).sorted, - ss.filter(_.startsWith(prefix)).distinct.sorted) + ss.filter(_.startsWith(prefix)).distinct.sorted + ) } } } From 0d83c4f4c227ebb5fb0849f9d5070d6d3ffa7722 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Wed, 2 Feb 2022 16:27:33 -1000 Subject: [PATCH 03/15] use DefiniteString more --- .../src/main/scala/cats/parse/Parser.scala | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index 423f720e..da845752 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -2804,20 +2804,27 @@ object Parser { (left, right) match { case (Fail(), _) => oneONEL(right) case (_, Fail()) => oneONEL(left) + case (Void(l1), Void(r1)) => + merge(l1, r1).map(_.map(_.void)) + case (StringP(l1), StringP(r1)) => + merge(l1, r1).map(_.map(_.string)) case (CharIn(_, _, _), AnyChar) => oneONEL(AnyChar) case (CharIn(m1, b1, _), CharIn(m2, b2, _)) => oneONEL(Parser.charIn(BitSetUtil.union((m1, b1) :: (m2, b2) :: Nil))) - case (Void(l1), Void(r1)) => - merge(l1, r1).map(_.map(_.void)) case (Str(l), Str(r)) => // if l is a prefix of r, it matches first // if not, then we can make a StringIn(_).void if (r.startsWith(l)) oneONEL(left) else oneONEL(StringIn(SortedSet(l, r)).void) - case (StringIn(ls), Map(Str(s1), ConstFn(s2))) if s1 == s2 => + case (DefiniteString(l), DefiniteString(r)) => + // if l is a prefix of r, it matches first + // if not, then we can make a StringIn(_).void + if (r.startsWith(l)) oneONEL(left) + else oneONEL(StringIn(SortedSet(l, r)).asInstanceOf[Parser[A]]) + case (StringIn(ls), DefiniteString(s1)) => if (ls.exists { l => s1.startsWith(l) && (l.length < s1.length) }) None else oneONEL(StringIn(ls + s1)) - case (Map(Str(l), ConstFn(s2)), StringIn(rs)) if l == s2 => + case (DefiniteString(l), StringIn(rs)) => // any string in rs that doesn't have a substring in ls can be moved // over, since substrings would match first in oneOf but not StringIn val (bad, good) = rs.partition { s => @@ -2849,8 +2856,6 @@ object Parser { Some(NonEmptyList(newLeft, Str(bad.head).string :: Nil)) } } - case (StringP(l1), StringP(r1)) => - merge(l1, r1).map(_.map(_.string)) case (AnyChar, FailWith(_)) => // preserve the failWith message None From 7cdb219daf3f14abf6c7b268f7a2c11fc2a3bf08 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Thu, 3 Feb 2022 14:16:59 -1000 Subject: [PATCH 04/15] try to fix tests --- .../src/main/scala/cats/parse/Parser.scala | 102 +++++++++--------- .../test/scala/cats/parse/ParserTest.scala | 40 +++++-- 2 files changed, 82 insertions(+), 60 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index da845752..8568a4cb 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -1003,8 +1003,7 @@ object Parser { case h :: Nil => loop(Nil, h :: acc) case h1 :: (tail @ (h2 :: tail2)) => Impl.merge(h1, h2) match { - case Some(NonEmptyList(h, Nil)) => loop(h :: tail2, acc) - case Some(NonEmptyList(h, t)) => loop(t ::: tail2, h :: acc) + case Some(h) => loop(h :: tail2, acc) case None => loop(tail, h1 :: acc) } } @@ -1038,8 +1037,7 @@ object Parser { case h :: Nil => loop(Nil, h :: acc) case h1 :: (tail @ (h2 :: tail2)) => Impl.merge0(h1, h2) match { - case Some(NonEmptyList(h, Nil)) => loop(h :: tail2, acc) - case Some(NonEmptyList(h, t)) => loop(t ::: tail2, h :: acc) + case Some(h) => loop(h :: tail2, acc) case None => loop(tail, h1 :: acc) } } @@ -2790,78 +2788,78 @@ object Parser { } } - private def oneONEL[A](a: A): Option[NonEmptyList[A]] = Some(NonEmptyList(a, Nil)) - - def merge0[A](left: Parser0[A], right: Parser0[A]): Option[NonEmptyList[Parser0[A]]] = + def merge0[A](left: Parser0[A], right: Parser0[A]): Option[Parser0[A]] = (left, right) match { case (l1: Parser[A], r1: Parser[A]) => merge(l1, r1) - case (_, _) if alwaysSucceeds(left) => oneONEL(left) - case (Fail(), _) => oneONEL(right) - case (_, Fail()) => oneONEL(left) + case (_, _) if alwaysSucceeds(left) => Some(left) + case (Fail(), _) => Some(right) + case (_, Fail()) => Some(left) + case (left, OneOf0(h :: t)) => + merge0(left, h).map { h1 => OneOf0(h1 :: t) } + case (left, OneOf(h :: t)) => + merge0(left, h).map { + case h: Parser[A] => OneOf(h :: t) + case h0 => OneOf0(h0 :: t) + } + case (OneOf0(ls), right) => + merge0(ls.last, right).map { l1 => OneOf0(ls.init :+ l1) } + case (OneOf(ls), right) => + merge0(ls.last, right).map { + case l: Parser[A] => OneOf(ls.init :+ l) + case l => OneOf0(ls.init :+ l) + } case _ => None } - def merge[A](left: Parser[A], right: Parser[A]): Option[NonEmptyList[Parser[A]]] = + + def merge[A](left: Parser[A], right: Parser[A]): Option[Parser[A]] = (left, right) match { - case (Fail(), _) => oneONEL(right) - case (_, Fail()) => oneONEL(left) + case (Fail(), _) => Some(right) + case (_, Fail()) => Some(left) + case (left, OneOf(h :: t)) => + merge(left, h).map { h1 => OneOf(h1 :: t) } + case (OneOf(ls), right) => + merge(ls.last, right).map { l1 => OneOf(ls.init :+ l1) } case (Void(l1), Void(r1)) => - merge(l1, r1).map(_.map(_.void)) + merge(l1, r1).map(_.void) case (StringP(l1), StringP(r1)) => - merge(l1, r1).map(_.map(_.string)) - case (CharIn(_, _, _), AnyChar) => oneONEL(AnyChar) + merge(l1, r1).map(_.string) + case (CharIn(_, _, _), AnyChar) => Some(AnyChar) + case (AnyChar, CharIn(_, _, _)) => Some(AnyChar) case (CharIn(m1, b1, _), CharIn(m2, b2, _)) => - oneONEL(Parser.charIn(BitSetUtil.union((m1, b1) :: (m2, b2) :: Nil))) + Some(Parser.charIn(BitSetUtil.union((m1, b1) :: (m2, b2) :: Nil))) case (Str(l), Str(r)) => // if l is a prefix of r, it matches first // if not, then we can make a StringIn(_).void - if (r.startsWith(l)) oneONEL(left) - else oneONEL(StringIn(SortedSet(l, r)).void) + if (r.startsWith(l)) Some(left) + else Some(StringIn(SortedSet(l, r)).void) case (DefiniteString(l), DefiniteString(r)) => // if l is a prefix of r, it matches first // if not, then we can make a StringIn(_).void - if (r.startsWith(l)) oneONEL(left) - else oneONEL(StringIn(SortedSet(l, r)).asInstanceOf[Parser[A]]) + if (r.startsWith(l)) Some(left) + else Some(StringIn(SortedSet(l, r)).asInstanceOf[Parser[A]]) case (StringIn(ls), DefiniteString(s1)) => - if (ls.exists { l => s1.startsWith(l) && (l.length < s1.length) }) None - else oneONEL(StringIn(ls + s1)) + if (ls.exists { l => s1.startsWith(l) && (l.length <= s1.length) }) { + // if left didn't match, then s1 can't match + Some(left) + } else Some(StringIn(ls + s1)) case (DefiniteString(l), StringIn(rs)) => - // any string in rs that doesn't have a substring in ls can be moved - // over, since substrings would match first in oneOf but not StringIn - val (bad, good) = rs.partition { s => - s.startsWith(l) && (l.length < s.length) - } - if (good.isEmpty) None + // We know if we go to rs that l did + // not match so nothing in rs can have l as a prefix + val good = rs.filterNot(_.startsWith(l)) + if (good.isEmpty) Some(left) else { - val newLeft = StringIn(good + l) - if (bad.isEmpty) oneONEL(newLeft) - else if (bad.size > 1) { - Some(NonEmptyList(newLeft, StringIn(bad) :: Nil)) - } else { - Some(NonEmptyList(newLeft, Str(bad.head).string :: Nil)) - } + Some(StringIn(good + l)) } case (StringIn(ls), StringIn(rs)) => // any string in rs that doesn't have a substring in ls can be moved // over, since substrings would match first in oneOf but not StringIn - val (bad, good) = rs.partition { s => - ls.exists { l => s.startsWith(l) && (l.length < s.length) } + val canMatch = rs.filterNot { s => + ls.exists { l => s.startsWith(l) && (l.length <= s.length) } } - if (good.isEmpty) None + if (canMatch.isEmpty) Some(left) else { - val newLeft = StringIn(ls | good) - if (bad.isEmpty) oneONEL(newLeft) - else if (bad.size > 1) { - Some(NonEmptyList(newLeft, StringIn(bad) :: Nil)) - } else { - Some(NonEmptyList(newLeft, Str(bad.head).string :: Nil)) - } + Some(StringIn(ls | canMatch)) } - case (AnyChar, FailWith(_)) => - // preserve the failWith message - None - case (AnyChar, _) => - // a parser must consume to advance, and AnyChar will succeed - oneONEL(left) case _ => None } diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index 090cf7ea..20ae362e 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -845,21 +845,45 @@ class ParserTest extends munit.ScalaCheckSuite { case right => right } + def oneOfLaw(left: Parser0[Any], right: Parser0[Any], str: String) = { + assertEquals( + left.orElse(right).parse(str).leftMap(_.offsets), + orElse(left, right, str).leftMap(_.offsets) + ) + } + property("oneOf0 composes as expected") { forAll(ParserGen.gen0, ParserGen.gen0, Arbitrary.arbitrary[String]) { (genP1, genP2, str) => - assertEquals( - genP1.fa.orElse(genP2.fa).parse(str).leftMap(_.offsets), - orElse(genP1.fa, genP2.fa, str).leftMap(_.offsets) - ) + oneOfLaw(genP1.fa, genP2.fa, str) } } property("oneOf composes as expected") { forAll(ParserGen.gen, ParserGen.gen, Arbitrary.arbitrary[String]) { (genP1, genP2, str) => - assertEquals( - genP1.fa.orElse(genP2.fa).parse(str).leftMap(_.offsets), - orElse(genP1.fa, genP2.fa, str).leftMap(_.offsets) - ) + oneOfLaw(genP1.fa, genP2.fa, str) + } + } + + property("check some specific oneOf compositions") { + val pairs: List[(Parser0[Any], Parser0[Any])] = + (Parser.string("foo").string, Parser.stringIn("foo" :: "bar" :: "foobar" :: Nil)) :: + (Parser.stringIn("foo" :: "quux" :: Nil), Parser.string("foobar").string) :: + (Parser.stringIn("foo" :: "quux" :: Nil), Parser.char('f').string) :: + (Parser.stringIn("foo" :: "quux" :: Nil), Parser.stringIn("foo" :: "quux" :: Nil)) :: + ( + Parser.stringIn("foo" :: "quux" :: "bar" :: Nil), + Parser.stringIn("foo" :: "quux" :: Nil) + ) :: + ( + Parser.stringIn("foo" :: "quux" :: Nil), + Parser.stringIn("foo" :: "quux" :: "bar" :: Nil) + ) :: + Nil + + forAll { (str: String) => + pairs.foreach { case (p1, p2) => + oneOfLaw(p1, p2, str) + } } } From b19f60d10ac675f36028c0225a312555e3564c14 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Thu, 3 Feb 2022 17:27:42 -1000 Subject: [PATCH 05/15] get 3.0.2 passing --- .../src/main/scala/cats/parse/Parser.scala | 120 ++++++++++++------ .../test/scala/cats/parse/ParserTest.scala | 2 +- 2 files changed, 84 insertions(+), 38 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index 8568a4cb..4cc30970 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -1007,11 +1007,7 @@ object Parser { case None => loop(tail, h1 :: acc) } } - val flattened = parsers.flatMap { - case Impl.OneOf(os) => os - case notOneOf => notOneOf :: Nil - } - loop(flattened, Nil) + loop(parsers, Nil) } /** go through the list of parsers trying each as long as they are epsilon failures (don't @@ -1041,12 +1037,7 @@ object Parser { case None => loop(tail, h1 :: acc) } } - - val flattened = ps.flatMap { - case Impl.OneOf0(os) => os - case notOneOf => notOneOf :: Nil - } - loop(flattened, Nil) + loop(ps, Nil) } /** Parse the longest matching string between alternatives. The order of the strings does not @@ -1570,9 +1561,10 @@ object Parser { */ def charWhere(fn: Char => Boolean): Parser[Char] = fn match { - case s: Set[Char] => - // Set extends function, but it is also iterable - charIn(s) + case s: Set[_] => + // Set extends function, so if the fn is a Set it has to be a Set[Char] + // but it is also iterable + charIn(s.asInstanceOf[Set[Char]]) case _ => charIn(Impl.allChars.filter(fn)) } @@ -1961,7 +1953,7 @@ object Parser { } object SingleChar { - def unapply(p: Parser[Any]): Option[Char] = + def unapply(p: Parser0[Any]): Option[Char] = p match { case CharIn(min, bs, _) if BitSetUtil.isSingleton(bs) => Some(min.toChar) case _ => None @@ -1972,9 +1964,12 @@ object Parser { def unapply(p: Parser0[Any]): Option[String] = p match { case Pure("") => Some("") - case Map(Str(e1), ConstFn(e2)) if e1 == e2 => Some(e1) - case Map(SingleChar(c), ConstFn(e: String)) if (e.length == 1) && (e.charAt(0) == c) => - Some(e) + case Map(left, ConstFn(res: String)) => + left match { + case Str(s0) if s0 == res => Some(s0) + case SingleChar(c) if (res.length == 1) && (res.charAt(0) == c) => Some(res) + case _ => None + } case _ => None } } @@ -2012,8 +2007,7 @@ object Parser { final def hasKnownResult[A](p: Parser0[A]): Option[A] = p match { case Pure(a) => Some(a) - case SingleChar(c) => - Some(c.asInstanceOf[A]) + case SingleChar(c) => Some(c.asInstanceOf[A]) case Map0(_, fn) => // scala 3.0.2 seems to fail if we inline // this match above @@ -2794,20 +2788,46 @@ object Parser { case (_, _) if alwaysSucceeds(left) => Some(left) case (Fail(), _) => Some(right) case (_, Fail()) => Some(left) - case (left, OneOf0(h :: t)) => - merge0(left, h).map { h1 => OneOf0(h1 :: t) } - case (left, OneOf(h :: t)) => - merge0(left, h).map { - case h: Parser[A] => OneOf(h :: t) - case h0 => OneOf0(h0 :: t) + case (OneOf0(_), OneOf(rs)) => + merge0(left, OneOf0(rs)) + case (OneOf(ls), OneOf0(_)) => + merge0(OneOf0(ls), right) + case (OneOf0(ls), OneOf0(rights @ (h :: t))) => + merge0(ls.last, h) match { + case None => + // just concat + Some(OneOf0(ls ::: rights)) + case Some(l1) => + val newLeft = OneOf0(ls.init :+ l1) + t match { + case rlast :: Nil => + merge0(newLeft, rlast) + case twoOrMore => + merge0(newLeft, OneOf0(twoOrMore)) + } } + case (left, OneOf0(rs @ (h :: t))) => + merge0(left, h) + .map { h1 => OneOf0(h1 :: t) } + .orElse(Some(OneOf0(left :: rs))) + case (left, OneOf(rs @ (h :: t))) => + merge0(left, h) + .map { + case h: Parser[A] => OneOf(h :: t) + case h0 => OneOf0(h0 :: t) + } + .orElse(Some(OneOf0(left :: rs))) case (OneOf0(ls), right) => - merge0(ls.last, right).map { l1 => OneOf0(ls.init :+ l1) } + merge0(ls.last, right) + .map { l1 => OneOf0(ls.init :+ l1) } + .orElse(Some(OneOf0(ls :+ right))) case (OneOf(ls), right) => - merge0(ls.last, right).map { - case l: Parser[A] => OneOf(ls.init :+ l) - case l => OneOf0(ls.init :+ l) - } + merge0(ls.last, right) + .map { + case l: Parser[A] => OneOf(ls.init :+ l) + case l => OneOf0(ls.init :+ l) + } + .orElse(Some(OneOf0(ls :+ right))) case _ => None } @@ -2815,10 +2835,28 @@ object Parser { (left, right) match { case (Fail(), _) => Some(right) case (_, Fail()) => Some(left) - case (left, OneOf(h :: t)) => - merge(left, h).map { h1 => OneOf(h1 :: t) } + case (OneOf(ls), OneOf(rights @ (h :: t))) => + merge(ls.last, h) match { + case None => + // just concat + Some(OneOf(ls ::: rights)) + case Some(l1) => + val newLeft = OneOf(ls.init :+ l1) + t match { + case rlast :: Nil => + merge(newLeft, rlast) + case twoOrMore => + merge(newLeft, OneOf(twoOrMore)) + } + } + case (left, OneOf(rs @ (h :: t))) => + merge(left, h) + .map { h1 => OneOf(h1 :: t) } + .orElse(Some(OneOf(left :: rs))) case (OneOf(ls), right) => - merge(ls.last, right).map { l1 => OneOf(ls.init :+ l1) } + merge(ls.last, right) + .map { l1 => OneOf(ls.init :+ l1) } + .orElse(Some(OneOf(ls :+ right))) case (Void(l1), Void(r1)) => merge(l1, r1).map(_.void) case (StringP(l1), StringP(r1)) => @@ -2836,7 +2874,17 @@ object Parser { // if l is a prefix of r, it matches first // if not, then we can make a StringIn(_).void if (r.startsWith(l)) Some(left) - else Some(StringIn(SortedSet(l, r)).asInstanceOf[Parser[A]]) + else + Some { + val res = + if (l.length == 1 && r.length == 1) { + charIn(l.head :: r.head :: Nil).string + } else { + StringIn(SortedSet(l, r)) + } + + res.asInstanceOf[Parser[A]] + } case (StringIn(ls), DefiniteString(s1)) => if (ls.exists { l => s1.startsWith(l) && (l.length <= s1.length) }) { // if left didn't match, then s1 can't match @@ -2883,8 +2931,6 @@ object Parser { override def toString = s"CharIn($min, bitSet = ..., $ranges)" - def allChars: Iterable[Char] = BitSetUtil.union((min, bitSet) :: Nil) - def makeError(offset: Int): Chain[Expectation] = { var result = Chain.empty[Expectation] var aux = ranges.toList diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index 20ae362e..3cc043e7 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -71,7 +71,7 @@ object ParserGen { } implicit val cogenCaret: Cogen[Caret] = - Cogen { caret: Caret => + Cogen { (caret: Caret) => (caret.offset.toLong << 32) | (caret.col.toLong << 16) | (caret.line.toLong) } From 853e1a2513e3b6cd45d23a663cb338d08b9e8ba5 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Thu, 3 Feb 2022 17:42:54 -1000 Subject: [PATCH 06/15] fix mima, improve oneOf --- build.sbt | 27 ++++++++++++++---- .../src/main/scala/cats/parse/Parser.scala | 28 ++++++++++++++++--- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/build.sbt b/build.sbt index 5696119a..dab99733 100644 --- a/build.sbt +++ b/build.sbt @@ -135,18 +135,33 @@ lazy val core = crossProject(JSPlatform, JVMPlatform) if (isScala211) Set.empty else mimaPreviousArtifacts.value }, mimaBinaryIssueFilters ++= { + /* + * It is okay to filter anything in Impl or RadixNode which are private + */ if (tlIsScala3.value) List( - ProblemFilters.exclude[IncompatibleResultTypeProblem]("cats.parse.Parser#State.error"), - ProblemFilters.exclude[IncompatibleMethTypeProblem]("cats.parse.Parser#State.error_="), - ProblemFilters.exclude[IncompatibleMethTypeProblem]("cats.parse.RadixNode.this"), + ProblemFilters.exclude[DirectMissingMethodProblem]("cats.parse.Parser#Impl.mergeCharIn"), + ProblemFilters.exclude[DirectMissingMethodProblem]("cats.parse.Parser#Impl.mergeStrIn"), + ProblemFilters.exclude[DirectMissingMethodProblem]("cats.parse.RadixNode.children"), ProblemFilters.exclude[DirectMissingMethodProblem]("cats.parse.RadixNode.fsts"), ProblemFilters.exclude[DirectMissingMethodProblem]("cats.parse.RadixNode.prefixes"), - ProblemFilters.exclude[DirectMissingMethodProblem]("cats.parse.RadixNode.children"), ProblemFilters.exclude[DirectMissingMethodProblem]("cats.parse.RadixNode.word"), - ProblemFilters.exclude[FinalClassProblem]("cats.parse.RadixNode") + ProblemFilters.exclude[FinalClassProblem]("cats.parse.RadixNode"), + ProblemFilters.exclude[IncompatibleMethTypeProblem]("cats.parse.Parser#State.error_="), + ProblemFilters.exclude[IncompatibleMethTypeProblem]("cats.parse.RadixNode.this"), + ProblemFilters + .exclude[IncompatibleResultTypeProblem]("cats.parse.Parser#Impl#StringIn.parseMut"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("cats.parse.Parser#Impl.stringIn"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("cats.parse.Parser#State.error") + ) + else + List( + ProblemFilters.exclude[DirectMissingMethodProblem]("cats.parse.Parser#Impl.mergeCharIn"), + ProblemFilters.exclude[DirectMissingMethodProblem]("cats.parse.Parser#Impl.mergeStrIn"), + ProblemFilters + .exclude[IncompatibleResultTypeProblem]("cats.parse.Parser#Impl#StringIn.parseMut"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("cats.parse.Parser#Impl.stringIn") ) - else Nil } ) .jsSettings( diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index 4cc30970..ca2ea36d 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -995,10 +995,20 @@ object Parser { def loop(ps: List[Parser[A]], acc: List[Parser[A]]): Parser[A] = ps match { case Nil => - acc match { + /* + * we can still have inner oneof if the head items + * were not oneof and couldn't be merged + * but the last items did have oneof + */ + val flat = acc.reverse.flatMap { + case Impl.OneOf(ps) => ps + case one => one :: Nil + } + + flat match { case Nil => Impl.Fail() case one :: Nil => one - case many => Impl.OneOf(many.reverse) + case many => Impl.OneOf(many) } case h :: Nil => loop(Nil, h :: acc) case h1 :: (tail @ (h2 :: tail2)) => @@ -1025,10 +1035,20 @@ object Parser { def loop(ps: List[Parser0[A]], acc: List[Parser0[A]]): Parser0[A] = ps match { case Nil => - acc match { + /* + * we can still have inner oneof if the head items + * were not oneof and couldn't be merged + * but the last items did have oneof + */ + val flat = acc.reverse.flatMap { + case Impl.OneOf(ps) => ps + case Impl.OneOf0(ps) => ps + case one => one :: Nil + } + flat match { case Nil => Impl.Fail() case one :: Nil => one - case many => Impl.OneOf0(many.reverse) + case many => Impl.OneOf0(many) } case h :: Nil => loop(Nil, h :: acc) case h1 :: (tail @ (h2 :: tail2)) => From 9b0757d05be4c1f8bd9c0fa8c30798011148c3f6 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Sat, 5 Feb 2022 16:03:43 -1000 Subject: [PATCH 07/15] checkpoint --- .../src/main/scala/cats/parse/Parser.scala | 100 ++++++++++++++---- .../test/scala/cats/parse/ParserTest.scala | 22 +++- 2 files changed, 101 insertions(+), 21 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index ca2ea36d..121845fd 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -1029,8 +1029,9 @@ object Parser { * * Note: order matters here, since we don't backtrack by default. */ - def oneOf0[A](ps: List[Parser0[A]]): Parser0[A] = { - + def oneOf0[A](ps: List[Parser0[A]]): Parser0[A] = + if (ps.forall(_.isInstanceOf[Parser[_]])) oneOf(ps.asInstanceOf[List[Parser[A]]]) + else { @tailrec def loop(ps: List[Parser0[A]], acc: List[Parser0[A]]): Parser0[A] = ps match { @@ -1642,13 +1643,15 @@ object Parser { pa match { case v @ Impl.Void0(_) => v case p1: Parser[_] => void(p1) - case s if Impl.alwaysSucceeds(s) => unit + case s if Impl.alwaysSucceeds0(s) => unit case _ => Impl.unmap0(pa) match { case Impl.StartParser => Impl.StartParser case Impl.EndParser => Impl.EndParser case n @ Impl.Not(_) => n case p @ Impl.Peek(_) => p + case o @ Impl.OneOf0(ps) if ps.forall(Impl.voidReturnsSame(_)) => + o.asInstanceOf[Parser0[Unit]] case other => Impl.Void0(other) } } @@ -1666,6 +1669,8 @@ object Parser { case f @ Impl.FailWith(_) => f.widen case p: Impl.Str => p case p: Impl.IgnoreCase => p + case o @ Impl.OneOf(ps) if ps.forall(Impl.voidReturnsSame(_)) => + o.asInstanceOf[Parser[Unit]] case notVoid => Impl.Void(notVoid) } } @@ -1719,7 +1724,7 @@ object Parser { def peek(pa: Parser0[Any]): Parser0[Unit] = pa match { case peek @ Impl.Peek(_) => peek - case s if Impl.alwaysSucceeds(s) => unit + case s if Impl.alwaysSucceeds0(s) => unit case notPeek => // TODO: we can adjust Rep0/Rep to do minimal // work since we rewind after we are sure there is @@ -1751,7 +1756,7 @@ object Parser { def backtrack0[A](pa: Parser0[A]): Parser0[A] = pa match { case p1: Parser[A] => backtrack(p1) - case pa if Impl.doesBacktrack(pa) => pa + case pa if Impl.doesBacktrack(pa) | Impl.eventuallySucceeds(pa) => pa case nbt => Impl.Backtrack0(nbt) } @@ -1760,7 +1765,7 @@ object Parser { */ def backtrack[A](pa: Parser[A]): Parser[A] = pa match { - case pa if Impl.doesBacktrack(pa) => pa + case pa if Impl.doesBacktrack(pa) | Impl.eventuallySucceeds(pa) => pa case nbt => Impl.Backtrack(nbt) } @@ -1775,7 +1780,7 @@ object Parser { // If b is (), such as foo.as(()) // we can just return v if (b.equals(())) voided.asInstanceOf[Parser0[B]] - else if (Impl.alwaysSucceeds(voided)) pure(b) + else if (Impl.alwaysSucceeds0(voided)) pure(b) else Impl.Map0(voided, Impl.ConstFn(b)) } @@ -1969,6 +1974,8 @@ object Parser { case SoftProd(a, b) => doesBacktrackCheat(a) && doesBacktrack(b) case WithContextP(_, p) => doesBacktrack(p) case WithContextP0(_, p) => doesBacktrack(p) + case OneOf(ps) => ps.forall(doesBacktrackCheat(_)) + case OneOf0(ps) => ps.forall(doesBacktrackCheat(_)) case _ => false } @@ -2005,16 +2012,33 @@ object Parser { case _ => false } - // does this parser always succeed? + // does this parser always succeed without consuming input // note: a parser1 does not always succeed // and by construction, a oneOf0 never always succeeds - final def alwaysSucceeds(p: Parser0[Any]): Boolean = + final def alwaysSucceeds0(p: Parser0[Any]): Boolean = + p match { + case Index | GetCaret | Pure(_) => true + case Map0(p, _) => alwaysSucceeds0(p) + case SoftProd0(a, b) => alwaysSucceeds0(a) && alwaysSucceeds0(b) + case Prod0(a, b) => alwaysSucceeds0(a) && alwaysSucceeds0(b) + case WithContextP0(_, p) => alwaysSucceeds0(p) + // by construction we never build a Not(Fail()) since + // it would just be the same as unit + // case Not(Fail() | FailWith(_)) => true + case _ => false + } + + // does this parser always eventually succeed (maybe consuming input) + // note, Parser1 has to consume, but may get an empty string, so can't + // always succeed + final def eventuallySucceeds(p: Parser0[Any]): Boolean = p match { case Index | GetCaret | Pure(_) => true - case Map0(p, _) => alwaysSucceeds(p) - case SoftProd0(a, b) => alwaysSucceeds(a) && alwaysSucceeds(b) - case Prod0(a, b) => alwaysSucceeds(a) && alwaysSucceeds(b) - case WithContextP0(_, p) => alwaysSucceeds(p) + case Map0(p, _) => eventuallySucceeds(p) + case SoftProd0(a, b) => eventuallySucceeds(a) && eventuallySucceeds(b) + case Prod0(a, b) => eventuallySucceeds(a) && eventuallySucceeds(b) + case WithContextP0(_, p) => eventuallySucceeds(p) + case OneOf0(ps) => eventuallySucceeds(ps.last) // by construction we never build a Not(Fail()) since // it would just be the same as unit // case Not(Fail() | FailWith(_)) => true @@ -2088,14 +2112,23 @@ object Parser { // these are always unit someUnit.asInstanceOf[Option[A]] case Rep0(_, _, _) | Rep(_, _, _, _) | FlatMap0(_, _) | FlatMap(_, _) | TailRecM(_, _) | - TailRecM0(_, _) | Defer(_) | Defer0(_) | GetCaret | Index | OneOf(_) | OneOf0(_) | + TailRecM0(_, _) | Defer(_) | Defer0(_) | GetCaret | Index | Length(_) | Fail() | FailWith(_) | CharIn(_, _, _) | AnyChar | StringP( _ - ) | StringP0(_) | Select(_, _) | Select0(_, _) | StringIn(_) => + ) | OneOf(Nil) | OneOf0(Nil) | StringP0(_) | Select(_, _) | Select0(_, _) | StringIn(_) => // these we don't know the value fundamentally or by construction None } + /* + * If we call void on this does it return the same thing? + */ + def voidReturnsSame[A](p: Parser0[A]): Boolean = + p match{ + case Fail() | FailWith(_) => true + case _ => hasKnownResult(p) == someUnit + } + /** This removes any trailing map functions which can cause wasted allocations if we are later * going to void or return strings. This stops at StringP or VoidP since those are markers that * anything below has already been transformed @@ -2104,7 +2137,7 @@ object Parser { pa match { case p1: Parser[Any] => unmap(p1) case GetCaret | Index | Pure(_) => Parser.unit - case s if alwaysSucceeds(s) => Parser.unit + case s if alwaysSucceeds0(s) => Parser.unit case Map0(p, _) => // we discard any allocations done by fn unmap0(p) @@ -2805,12 +2838,22 @@ object Parser { def merge0[A](left: Parser0[A], right: Parser0[A]): Option[Parser0[A]] = (left, right) match { case (l1: Parser[A], r1: Parser[A]) => merge(l1, r1) - case (_, _) if alwaysSucceeds(left) => Some(left) + case (_, _) if eventuallySucceeds(left) => Some(left) case (Fail(), _) => Some(right) case (_, Fail()) => Some(left) + case (Void0(vl), Void0(vr)) => + merge0(vl, vr).map(_.void) + case (Void0(vl), right) if hasKnownResult(right) == someUnit => + merge0(vl, right).map(_.void) + case (Void(vl), right) if hasKnownResult(right) == someUnit => + merge0(vl, right).map(_.void) + case (left, Void0(vr)) if hasKnownResult(left) == someUnit => + merge0(left, vr).map(_.void) + case (left, Void(vr)) if hasKnownResult(left) == someUnit => + merge0(left, vr).map(_.void) case (OneOf0(_), OneOf(rs)) => merge0(left, OneOf0(rs)) - case (OneOf(ls), OneOf0(_)) => + case (OneOf(ls), OneOf0(_)) => merge0(OneOf0(ls), right) case (OneOf0(ls), OneOf0(rights @ (h :: t))) => merge0(ls.last, h) match { @@ -2877,8 +2920,8 @@ object Parser { merge(ls.last, right) .map { l1 => OneOf(ls.init :+ l1) } .orElse(Some(OneOf(ls :+ right))) - case (Void(l1), Void(r1)) => - merge(l1, r1).map(_.void) + case (Void(vl), Void(vr)) => + merge(vl, vr).map(_.void) case (StringP(l1), StringP(r1)) => merge(l1, r1).map(_.string) case (CharIn(_, _, _), AnyChar) => Some(AnyChar) @@ -2910,6 +2953,11 @@ object Parser { // if left didn't match, then s1 can't match Some(left) } else Some(StringIn(ls + s1)) + case (Void(StringIn(ls)), Str(s1)) => + if (ls.exists { l => s1.startsWith(l) && (l.length <= s1.length) }) { + // if left didn't match, then s1 can't match + Some(left) + } else Some(Void(StringIn(ls + s1))) case (DefiniteString(l), StringIn(rs)) => // We know if we go to rs that l did // not match so nothing in rs can have l as a prefix @@ -2918,6 +2966,14 @@ object Parser { else { Some(StringIn(good + l)) } + case (Str(l), Void(StringIn(rs))) => + // We know if we go to rs that l did + // not match so nothing in rs can have l as a prefix + val good = rs.filterNot(_.startsWith(l)) + if (good.isEmpty) Some(left) + else { + Some(Void(StringIn(good + l))) + } case (StringIn(ls), StringIn(rs)) => // any string in rs that doesn't have a substring in ls can be moved // over, since substrings would match first in oneOf but not StringIn @@ -2928,6 +2984,10 @@ object Parser { else { Some(StringIn(ls | canMatch)) } + case (Void(vl), right) if hasKnownResult(right) == someUnit => + merge(vl, right).map(_.void) + case (left, Void(vr)) if hasKnownResult(left) == someUnit => + merge(left, vr).map(_.void) case _ => None } diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index 3cc043e7..9594c93d 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -600,7 +600,7 @@ class ParserTest extends munit.ScalaCheckSuite { import ParserGen.{arbParser0, arbParser, biasSmall} - val tests: Int = if (BitSetUtil.isScalaJs) 50 else 2000 + val tests: Int = if (BitSetUtil.isScalaJs) 50 else 200 override def scalaCheckTestParameters = super.scalaCheckTestParameters @@ -2644,4 +2644,24 @@ class ParserTest extends munit.ScalaCheckSuite { assertEquals(si.void.string, si) } } + + property("a | b is associative") { + def law(a: Parser0[Any], b: Parser0[Any], c: Parser0[Any]) = + assertEquals((a | b) | c, a | (b | c)) + val regressions: List[(Parser0[Any], Parser0[Any], Parser0[Any])] = + (Parser.char('c'), Parser.unit, Parser.char('b')) :: + (Parser.string("foo"), Parser.string("bar"), Parser.string("cow")) :: + (Parser.char('a'), Parser.char('b'), Parser.char('c')) :: + (Parser.end, Parser.string("foo"), Parser.char('c')) :: + Nil + + regressions.foreach { case (a, b, c) => law(a, b, c) } + + forAll(ParserGen.gen, ParserGen.gen, ParserGen.gen) { (a, b, c) => + law(a.fa, b.fa, c.fa) + } && + forAll(ParserGen.gen0, ParserGen.gen0, ParserGen.gen0) { (a, b, c) => + law(a.fa, b.fa, c.fa) + } + } } From 1ed7acddd86967d7746ab307e80d9771d8afd284 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Sun, 6 Feb 2022 20:58:22 -1000 Subject: [PATCH 08/15] get tests passing --- .../src/main/scala/cats/parse/Parser.scala | 455 ++++++++++++------ .../test/scala/cats/parse/ParserTest.scala | 190 ++++++-- 2 files changed, 447 insertions(+), 198 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index 121845fd..d85fce25 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -1008,13 +1008,36 @@ object Parser { flat match { case Nil => Impl.Fail() case one :: Nil => one - case many => Impl.OneOf(many) + case many => + many.traverse[Option, Parser[Any]] { + case Impl.StringP(p) => Some(p) + case _ => None + } match { + case Some(m0) => + Impl + .StringP(Impl.OneOf(m0)) + .asInstanceOf[Parser[A]] + case None => + many.traverse[Option, Parser[Any]] { + case Impl.Void(p) => Some(p) + case _ => None + } match { + case Some(m0) => + Impl + .Void(Impl.OneOf(m0)) + .asInstanceOf[Parser[A]] + case None => + Impl.OneOf(many) + } + } } case h :: Nil => loop(Nil, h :: acc) - case h1 :: (tail @ (h2 :: tail2)) => + case h1 :: (t1 @ (h2 :: tail2)) => Impl.merge(h1, h2) match { - case Some(h) => loop(h :: tail2, acc) - case None => loop(tail, h1 :: acc) + case Impl.OneOf(a :: b :: Nil) if (a eq h1) && (b eq h2) => + loop(t1, h1 :: acc) + case h => + loop(h :: tail2, acc) } } loop(parsers, Nil) @@ -1032,34 +1055,38 @@ object Parser { def oneOf0[A](ps: List[Parser0[A]]): Parser0[A] = if (ps.forall(_.isInstanceOf[Parser[_]])) oneOf(ps.asInstanceOf[List[Parser[A]]]) else { - @tailrec - def loop(ps: List[Parser0[A]], acc: List[Parser0[A]]): Parser0[A] = - ps match { - case Nil => - /* - * we can still have inner oneof if the head items - * were not oneof and couldn't be merged - * but the last items did have oneof - */ - val flat = acc.reverse.flatMap { - case Impl.OneOf(ps) => ps - case Impl.OneOf0(ps) => ps - case one => one :: Nil - } - flat match { - case Nil => Impl.Fail() - case one :: Nil => one - case many => Impl.OneOf0(many) - } - case h :: Nil => loop(Nil, h :: acc) - case h1 :: (tail @ (h2 :: tail2)) => - Impl.merge0(h1, h2) match { - case Some(h) => loop(h :: tail2, acc) - case None => loop(tail, h1 :: acc) - } - } - loop(ps, Nil) - } + @tailrec + def loop(ps: List[Parser0[A]], acc: List[Parser0[A]]): Parser0[A] = + ps match { + case Nil => + /* + * we can still have inner oneof if the head items + * were not oneof and couldn't be merged + * but the last items did have oneof + */ + val flat = acc.reverse.flatMap { + case Impl.OneOf(ps) => ps + case Impl.OneOf0(ps) => ps + case one => one :: Nil + } + flat match { + case Nil => Impl.Fail() + case one :: Nil => one + case many => Impl.OneOf0(many) + } + case h :: Nil => loop(Nil, h :: acc) + case h1 :: (t1 @ (h2 :: tail2)) => + Impl.merge0(h1, h2) match { + case Impl.OneOf0(a :: b :: Nil) if (a eq h1) && (b eq h2) => + loop(t1, h1 :: acc) + case Impl.OneOf(a :: b :: Nil) if (a eq h1) && (b eq h2) => + loop(t1, h1 :: acc) + case h => + loop(h :: tail2, acc) + } + } + loop(ps, Nil) + } /** Parse the longest matching string between alternatives. The order of the strings does not * matter. @@ -1641,19 +1668,13 @@ object Parser { */ def void0(pa: Parser0[Any]): Parser0[Unit] = pa match { - case v @ Impl.Void0(_) => v case p1: Parser[_] => void(p1) case s if Impl.alwaysSucceeds0(s) => unit + case v @ Impl.Void0(_) => v case _ => - Impl.unmap0(pa) match { - case Impl.StartParser => Impl.StartParser - case Impl.EndParser => Impl.EndParser - case n @ Impl.Not(_) => n - case p @ Impl.Peek(_) => p - case o @ Impl.OneOf0(ps) if ps.forall(Impl.voidReturnsSame(_)) => - o.asInstanceOf[Parser0[Unit]] - case other => Impl.Void0(other) - } + val unmapped = Impl.unmap0(pa) + if (Impl.isVoided(unmapped)) unmapped.asInstanceOf[Parser0[Unit]] + else Impl.Void0(unmapped) } /** discard the value in a Parser. This is an optimization because we remove trailing map @@ -1667,11 +1688,9 @@ object Parser { Impl.unmap(pa) match { case f @ Impl.Fail() => f.widen case f @ Impl.FailWith(_) => f.widen - case p: Impl.Str => p - case p: Impl.IgnoreCase => p - case o @ Impl.OneOf(ps) if ps.forall(Impl.voidReturnsSame(_)) => - o.asInstanceOf[Parser[Unit]] - case notVoid => Impl.Void(notVoid) + case notVoid => + if (Impl.isVoided(notVoid)) notVoid.asInstanceOf[Parser[Unit]] + else Impl.Void(notVoid) } } @@ -1756,7 +1775,8 @@ object Parser { def backtrack0[A](pa: Parser0[A]): Parser0[A] = pa match { case p1: Parser[A] => backtrack(p1) - case pa if Impl.doesBacktrack(pa) | Impl.eventuallySucceeds(pa) => pa + case pa if Impl.doesBacktrack(pa) => pa + case Impl.Void0(b) => Impl.Void0(Impl.Backtrack0(b)) case nbt => Impl.Backtrack0(nbt) } @@ -1765,7 +1785,8 @@ object Parser { */ def backtrack[A](pa: Parser[A]): Parser[A] = pa match { - case pa if Impl.doesBacktrack(pa) | Impl.eventuallySucceeds(pa) => pa + case pa if Impl.doesBacktrack(pa) => pa + case Impl.Void(b) => Impl.Void(Impl.Backtrack(b)) case nbt => Impl.Backtrack(nbt) } @@ -1803,6 +1824,8 @@ object Parser { case _ => Impl.Map(ci, Impl.ConstFn(b)) } + case f @ Impl.Fail() => f.widen + case f @ Impl.FailWith(_) => f.widen case voided => Impl.Map(voided, Impl.ConstFn(b)) } @@ -1811,12 +1834,19 @@ object Parser { /** Add a context string to Errors to aid debugging */ def withContext0[A](p0: Parser0[A], ctx: String): Parser0[A] = - Impl.WithContextP0(ctx, p0) + p0 match { + case Impl.Void0(p) => Impl.Void0(withContext0(p, ctx)).asInstanceOf[Parser0[A]] + case _ if Impl.alwaysSucceeds0(p0) => p0 + case _ => Impl.WithContextP0(ctx, p0) + } /** Add a context string to Errors to aid debugging */ def withContext[A](p: Parser[A], ctx: String): Parser[A] = - Impl.WithContextP(ctx, p) + p match { + case Impl.Void(p) => Impl.Void(withContext(p, ctx)) + case _ => Impl.WithContextP(ctx, p) + } implicit val catsInstancesParser : FlatMap[Parser] with Defer[Parser] with MonoidK[Parser] with FunctorFilter[Parser] = @@ -1972,10 +2002,12 @@ object Parser { case Map(p, _) => doesBacktrack(p) case SoftProd0(a, b) => doesBacktrackCheat(a) && doesBacktrack(b) case SoftProd(a, b) => doesBacktrackCheat(a) && doesBacktrack(b) - case WithContextP(_, p) => doesBacktrack(p) case WithContextP0(_, p) => doesBacktrack(p) - case OneOf(ps) => ps.forall(doesBacktrackCheat(_)) + case WithContextP(_, p) => doesBacktrack(p) case OneOf0(ps) => ps.forall(doesBacktrackCheat(_)) + case OneOf(ps) => ps.forall(doesBacktrackCheat(_)) + case Void0(p) => doesBacktrack(p) + case Void(p) => doesBacktrack(p) case _ => false } @@ -2112,23 +2144,42 @@ object Parser { // these are always unit someUnit.asInstanceOf[Option[A]] case Rep0(_, _, _) | Rep(_, _, _, _) | FlatMap0(_, _) | FlatMap(_, _) | TailRecM(_, _) | - TailRecM0(_, _) | Defer(_) | Defer0(_) | GetCaret | Index | - Length(_) | Fail() | FailWith(_) | CharIn(_, _, _) | AnyChar | StringP( + TailRecM0(_, _) | Defer(_) | Defer0(_) | GetCaret | Index | Length(_) | Fail() | + FailWith(_) | CharIn(_, _, _) | AnyChar | StringP( + _ + ) | OneOf(Nil) | OneOf0(Nil) | StringP0(_) | Select(_, _) | Select0(_, _) | StringIn( _ - ) | OneOf(Nil) | OneOf0(Nil) | StringP0(_) | Select(_, _) | Select0(_, _) | StringIn(_) => + ) => // these we don't know the value fundamentally or by construction None } - /* - * If we call void on this does it return the same thing? - */ - def voidReturnsSame[A](p: Parser0[A]): Boolean = - p match{ - case Fail() | FailWith(_) => true - case _ => hasKnownResult(p) == someUnit + /** return true if this is already non-allocating + * + * @param p + * the Parser to check + * @return + * true if this parser does not capture + */ + def doesNotAllocate(p: Parser0[Any]): Boolean = + p match { + case StartParser | EndParser | Void(_) | Void0(_) | IgnoreCase(_) | Str(_) | Fail() | + FailWith(_) | Pure(()) | Not(_) | Peek(_) => + true + case OneOf(ps) => ps.forall(doesNotAllocate(_)) + case OneOf0(ps) => ps.forall(doesNotAllocate(_)) + case WithContextP(_, p) => doesNotAllocate(p) + case WithContextP0(_, p) => doesNotAllocate(p) + case Backtrack(p) => doesNotAllocate(p) + case Backtrack0(p) => doesNotAllocate(p) + case Map(p, _) => doesNotAllocate(p) + case Map0(p, _) => doesNotAllocate(p) + case _ => false } + def isVoided(p: Parser0[Any]): Boolean = + doesNotAllocate(p) && (hasKnownResult(p) == someUnit) + /** This removes any trailing map functions which can cause wasted allocations if we are later * going to void or return strings. This stops at StringP or VoidP since those are markers that * anything below has already been transformed @@ -2138,9 +2189,18 @@ object Parser { case p1: Parser[Any] => unmap(p1) case GetCaret | Index | Pure(_) => Parser.unit case s if alwaysSucceeds0(s) => Parser.unit - case Map0(p, _) => - // we discard any allocations done by fn - unmap0(p) + case Map0(Void0(v), _) => + // this is common with as + v + case Map0(p, fn) => + fn match { + case ConstFn(_) => + // p must have already been unmapped + p + case _ => + // we discard any allocations done by fn + unmap0(p) + } case Select0(p, fn) => Select0(p, unmap0(fn)) case StringP0(s) => @@ -2159,7 +2219,11 @@ object Parser { // unmap0 may simplify enough // to remove the backtrack wrapper Parser.backtrack0(unmap0(p)) - case OneOf0(ps) => Parser.oneOf0(ps.map(unmap0)) + case OneOf0(ps) => + // Find the fixed point here + val next = Parser.oneOf0(ps.map(unmap0)) + if (next == pa) next + else unmap0(next) case Prod0(p1, p2) => unmap0(p1) match { case Prod0(p11, p12) => @@ -2217,9 +2281,16 @@ object Parser { */ def unmap(pa: Parser[Any]): Parser[Any] = pa match { - case Map(p, _) => - // we discard any allocations done by fn - unmap(p) + case Map(Void(v), _) => + // this is common with as + v + case Map(p, fn) => + fn match { + case ConstFn(_) => p + case _ => + // we discard any allocations done by fn + unmap(p) + } case Select(p, fn) => Select(p, unmap0(fn)) case StringP(s) => @@ -2232,7 +2303,10 @@ object Parser { // unmap may simplify enough // to remove the backtrack wrapper Parser.backtrack(unmap(p)) - case OneOf(ps) => Parser.oneOf(ps.map(unmap)) + case OneOf(ps) => + val next = Parser.oneOf(ps.map(unmap)) + if (next == pa) pa + else unmap(next) case Prod(p1, p2) => unmap0(p1) match { case Prod0(p11, p12) => @@ -2835,32 +2909,29 @@ object Parser { } } - def merge0[A](left: Parser0[A], right: Parser0[A]): Option[Parser0[A]] = + def allCharsIn(ci: CharIn): List[String] = + BitSetUtil + .union((ci.min, ci.bitSet) :: Nil) + .iterator + .map(_.toString) + .toList + + def merge0[A](left: Parser0[A], right: Parser0[A]): Parser0[A] = (left, right) match { case (l1: Parser[A], r1: Parser[A]) => merge(l1, r1) - case (_, _) if eventuallySucceeds(left) => Some(left) - case (Fail(), _) => Some(right) - case (_, Fail()) => Some(left) - case (Void0(vl), Void0(vr)) => - merge0(vl, vr).map(_.void) - case (Void0(vl), right) if hasKnownResult(right) == someUnit => - merge0(vl, right).map(_.void) - case (Void(vl), right) if hasKnownResult(right) == someUnit => - merge0(vl, right).map(_.void) - case (left, Void0(vr)) if hasKnownResult(left) == someUnit => - merge0(left, vr).map(_.void) - case (left, Void(vr)) if hasKnownResult(left) == someUnit => - merge0(left, vr).map(_.void) + case (_, _) if eventuallySucceeds(left) => left + case (Fail(), _) => right + case (_, Fail()) => left case (OneOf0(_), OneOf(rs)) => merge0(left, OneOf0(rs)) - case (OneOf(ls), OneOf0(_)) => + case (OneOf(ls), OneOf0(_)) => merge0(OneOf0(ls), right) case (OneOf0(ls), OneOf0(rights @ (h :: t))) => merge0(ls.last, h) match { - case None => + case OneOf(_) | OneOf0(_) => // just concat - Some(OneOf0(ls ::: rights)) - case Some(l1) => + OneOf0(ls ::: rights) + case l1 => val newLeft = OneOf0(ls.init :+ l1) t match { case rlast :: Nil => @@ -2870,40 +2941,58 @@ object Parser { } } case (left, OneOf0(rs @ (h :: t))) => - merge0(left, h) - .map { h1 => OneOf0(h1 :: t) } - .orElse(Some(OneOf0(left :: rs))) + merge0(left, h) match { + case OneOf(_) | OneOf0(_) => + OneOf0(left :: rs) + case h1 => + OneOf0(h1 :: t) + } case (left, OneOf(rs @ (h :: t))) => - merge0(left, h) - .map { - case h: Parser[A] => OneOf(h :: t) - case h0 => OneOf0(h0 :: t) - } - .orElse(Some(OneOf0(left :: rs))) + merge0(left, h) match { + case OneOf(_) | OneOf0(_) => + OneOf0(left :: rs) + case h1: Parser[A] => + OneOf(h1 :: t) + case h1 => + OneOf0(h1 :: t) + } case (OneOf0(ls), right) => - merge0(ls.last, right) - .map { l1 => OneOf0(ls.init :+ l1) } - .orElse(Some(OneOf0(ls :+ right))) + merge0(ls.last, right) match { + case OneOf(_) | OneOf0(_) => + OneOf0(ls :+ right) + case l1 => + OneOf0(ls.init :+ l1) + } case (OneOf(ls), right) => - merge0(ls.last, right) - .map { - case l: Parser[A] => OneOf(ls.init :+ l) - case l => OneOf0(ls.init :+ l) - } - .orElse(Some(OneOf0(ls :+ right))) - case _ => None + merge0(ls.last, right) match { + case OneOf(_) | OneOf0(_) => + OneOf0(ls :+ right) + case l: Parser[A] => OneOf(ls.init :+ l) + case l => OneOf0(ls.init :+ l) + } + case (Void0(vl), Void0(vr)) => + merge0(vl, vr).void + case (Void0(vl), right) if hasKnownResult(right) == someUnit => + merge0(vl, right).void + case (Void(vl), right) if hasKnownResult(right) == someUnit => + merge0(vl, right).void + case (left, Void0(vr)) if hasKnownResult(left) == someUnit => + merge0(left, vr).void + case (left, Void(vr)) if hasKnownResult(left) == someUnit => + merge0(left, vr).void + case _ => OneOf0(left :: right :: Nil) } - def merge[A](left: Parser[A], right: Parser[A]): Option[Parser[A]] = - (left, right) match { - case (Fail(), _) => Some(right) - case (_, Fail()) => Some(left) + def merge[A](left: Parser[A], right: Parser[A]): Parser[A] = { + val res = (left, right) match { + case (Fail(), _) => right + case (_, Fail()) => left case (OneOf(ls), OneOf(rights @ (h :: t))) => merge(ls.last, h) match { - case None => + case OneOf(_) => // just concat - Some(OneOf(ls ::: rights)) - case Some(l1) => + OneOf(ls ::: rights) + case l1 => val newLeft = OneOf(ls.init :+ l1) t match { case rlast :: Nil => @@ -2913,66 +3002,87 @@ object Parser { } } case (left, OneOf(rs @ (h :: t))) => - merge(left, h) - .map { h1 => OneOf(h1 :: t) } - .orElse(Some(OneOf(left :: rs))) + merge(left, h) match { + case OneOf(_) => + OneOf(left :: rs) + case h1 => + // maybe we can progess on t + if (t.lengthCompare(2) >= 0) + merge(h1, OneOf(t)) + else + merge(h1, t.head) + } case (OneOf(ls), right) => - merge(ls.last, right) - .map { l1 => OneOf(ls.init :+ l1) } - .orElse(Some(OneOf(ls :+ right))) - case (Void(vl), Void(vr)) => - merge(vl, vr).map(_.void) - case (StringP(l1), StringP(r1)) => - merge(l1, r1).map(_.string) - case (CharIn(_, _, _), AnyChar) => Some(AnyChar) - case (AnyChar, CharIn(_, _, _)) => Some(AnyChar) + merge(ls.last, right) match { + case OneOf(_) => + OneOf(ls :+ right) + case l1 => + val li = ls.init + if (li.lengthCompare(2) >= 0) + merge(OneOf(li), l1) + else + merge(li.head, l1) + } + case (CharIn(_, _, _), AnyChar) => AnyChar + case (AnyChar, CharIn(_, _, _) | Str(_) | StringIn(_)) => AnyChar case (CharIn(m1, b1, _), CharIn(m2, b2, _)) => - Some(Parser.charIn(BitSetUtil.union((m1, b1) :: (m2, b2) :: Nil))) + Parser.charIn(BitSetUtil.union((m1, b1) :: (m2, b2) :: Nil)) + case (Void(ci @ CharIn(_, _, _)), Str(_)) => + Parser.oneOf(allCharsIn(ci).map(Str(_)) ::: (right :: Nil)).asInstanceOf[Parser[A]] + case (StringP(ci @ CharIn(_, _, _)), DefiniteString(_) | StringIn(_)) => + // make sure we make progress... + val strs = StringIn(SortedSet(allCharsIn(ci): _*)) + merge(strs.asInstanceOf[Parser[A]], right) + case (Str(l), Void(ci @ CharIn(_, _, _))) => + Parser.oneOf(Str(l) :: allCharsIn(ci).map(Str(_))).asInstanceOf[Parser[A]] + case (DefiniteString(_) | StringIn(_), StringP(ci @ CharIn(_, _, _))) => + // make sure we make progress... + val strs = StringIn(SortedSet(allCharsIn(ci): _*)) + merge(left, strs.asInstanceOf[Parser[A]]) case (Str(l), Str(r)) => // if l is a prefix of r, it matches first // if not, then we can make a StringIn(_).void - if (r.startsWith(l)) Some(left) - else Some(StringIn(SortedSet(l, r)).void) + if (r.startsWith(l)) left + else Void(StringIn(SortedSet(l, r))) case (DefiniteString(l), DefiniteString(r)) => // if l is a prefix of r, it matches first // if not, then we can make a StringIn(_).void - if (r.startsWith(l)) Some(left) - else - Some { - val res = - if (l.length == 1 && r.length == 1) { - charIn(l.head :: r.head :: Nil).string - } else { - StringIn(SortedSet(l, r)) - } - - res.asInstanceOf[Parser[A]] - } + if (r.startsWith(l)) left + else { + val res = + if (l.length == 1 && r.length == 1) { + charIn(l.head :: r.head :: Nil).string + } else { + StringIn(SortedSet(l, r)) + } + + res.asInstanceOf[Parser[A]] + } case (StringIn(ls), DefiniteString(s1)) => if (ls.exists { l => s1.startsWith(l) && (l.length <= s1.length) }) { // if left didn't match, then s1 can't match - Some(left) - } else Some(StringIn(ls + s1)) + left + } else StringIn(ls + s1) case (Void(StringIn(ls)), Str(s1)) => if (ls.exists { l => s1.startsWith(l) && (l.length <= s1.length) }) { // if left didn't match, then s1 can't match - Some(left) - } else Some(Void(StringIn(ls + s1))) + left + } else Void(StringIn(ls + s1)) case (DefiniteString(l), StringIn(rs)) => // We know if we go to rs that l did // not match so nothing in rs can have l as a prefix val good = rs.filterNot(_.startsWith(l)) - if (good.isEmpty) Some(left) + if (good.isEmpty) left else { - Some(StringIn(good + l)) + StringIn(good + l) } case (Str(l), Void(StringIn(rs))) => // We know if we go to rs that l did // not match so nothing in rs can have l as a prefix val good = rs.filterNot(_.startsWith(l)) - if (good.isEmpty) Some(left) + if (good.isEmpty) left else { - Some(Void(StringIn(good + l))) + Void(StringIn(good + l)) } case (StringIn(ls), StringIn(rs)) => // any string in rs that doesn't have a substring in ls can be moved @@ -2980,16 +3090,45 @@ object Parser { val canMatch = rs.filterNot { s => ls.exists { l => s.startsWith(l) && (l.length <= s.length) } } - if (canMatch.isEmpty) Some(left) + if (canMatch.isEmpty) left else { - Some(StringIn(ls | canMatch)) + StringIn(ls | canMatch) + } + case (Void(StringIn(ls)), Void(ci @ CharIn(_, _, _))) => + val rs = SortedSet(allCharsIn(ci): _*) + // any string in rs that doesn't have a substring in ls can be moved + // over, since substrings would match first in oneOf but not StringIn + val canMatch = rs.filterNot { s => + ls.exists { l => s.startsWith(l) && (l.length <= s.length) } } + if (canMatch.isEmpty) left + else { + Void(StringIn(ls | canMatch)) + } + case (Void(ci @ CharIn(_, _, _)), Void(StringIn(rs))) => + val ls = SortedSet(allCharsIn(ci): _*) + // any string in rs that doesn't have a substring in ls can be moved + // over, since substrings would match first in oneOf but not StringIn + val canMatch = rs.filterNot { s => + ls.exists { l => s.startsWith(l) && (l.length <= s.length) } + } + if (canMatch.isEmpty) left + else { + Void(StringIn(ls | canMatch)) + } + case (Void(vl), Void(vr)) => + merge(vl, vr).void + case (StringP(l1), StringP(r1)) => + merge(l1, r1).string case (Void(vl), right) if hasKnownResult(right) == someUnit => - merge(vl, right).map(_.void) + merge(vl, right).void case (left, Void(vr)) if hasKnownResult(left) == someUnit => - merge(left, vr).map(_.void) - case _ => None + merge(left, vr).void + case _ => OneOf(left :: right :: Nil) } + // println(s"merge($left, $right) == $res") + res.asInstanceOf[Parser[A]] + } case object AnyChar extends Parser[Char] { override def parseMut(state: State): Char = { @@ -3049,7 +3188,10 @@ object Parser { case class Not(under: Parser0[Unit]) extends Parser0[Unit] { override def parseMut(state: State): Unit = { val offset = state.offset + val cap = state.capture + state.capture = false under.parseMut(state) + state.capture = cap if (state.error ne null) { // under failed, so we succeed state.error = null @@ -3080,7 +3222,10 @@ object Parser { case class Peek(under: Parser0[Unit]) extends Parser0[Unit] { override def parseMut(state: State): Unit = { val offset = state.offset + val cap = state.capture + state.capture = false under.parseMut(state) + state.capture = cap if (state.error eq null) { // under passed, so we succeed state.offset = offset diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index 9594c93d..7dae2444 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -21,15 +21,15 @@ package cats.parse -import cats.{Eq, Id, FlatMap, Functor, Defer, MonoidK, Monad, Eval} import cats.arrow.FunctionK -import cats.data.NonEmptyList +import cats.data.{NonEmptyList, NonEmptyVector} +import cats.implicits._ +import cats.{Eq, Id, FlatMap, Functor, Defer, MonoidK, Monad, Eval} import org.scalacheck.Prop.forAll import org.scalacheck.{Arbitrary, Gen, Cogen} - -import cats.implicits._ import scala.util.Random -import cats.data.NonEmptyVector + +import Arbitrary.arbitrary sealed abstract class GenT[F[_]] { self => type A @@ -600,7 +600,7 @@ class ParserTest extends munit.ScalaCheckSuite { import ParserGen.{arbParser0, arbParser, biasSmall} - val tests: Int = if (BitSetUtil.isScalaJs) 50 else 200 + val tests: Int = if (BitSetUtil.isScalaJs) 50 else 2000 override def scalaCheckTestParameters = super.scalaCheckTestParameters @@ -612,6 +612,8 @@ class ParserTest extends munit.ScalaCheckSuite { // "bUQvEfxFZ73bxtVjauK8tJDrEKOFbxUfk6WrGiy3bkH=" // "PPsKExr4HRlyCXkMrC6Rki5u59V88vwSeVTiGWJFS3G=" // "Ova1uT18mkE4uTX4RdgQza6z70fxyv6micl4hIZvywP=" + // "YcGRsiTHa791rV5CIL4wYhWDofanqbYbvO418dbZnOK=" + // "6YoSspuNxqEoMosfi5J6wHgo4I4rD48Zg21XAnZtMcA=" def parseTest[A: Eq](p: Parser0[A], str: String, a: A) = p.parse(str) match { @@ -757,12 +759,23 @@ class ParserTest extends munit.ScalaCheckSuite { } } + test("string(x).void == string(x) and withContext") { + assertEquals(Parser.string("foo").void, Parser.string("foo")) + assertEquals( + Parser.string("foo").withContext("ctx").void, + Parser.string("foo").withContext("ctx") + ) + } + property("voided only changes the result") { forAll(ParserGen.gen0, Arbitrary.arbitrary[String]) { (genP, str) => - val r1 = genP.fa.parse(str) - val r2 = genP.fa.void.parse(str) - val r3 = FlatMap[Parser0].void(genP.fa).parse(str) - val r4 = genP.fa.as(()).parse(str) + def go[A](p: Parser0[A]) = + p.parse(str).leftMap(_.offsets) + + val r1 = go(genP.fa) + val r2 = go(genP.fa.void) + val r3 = go(FlatMap[Parser0].void(genP.fa)) + val r4 = go(genP.fa.as(())) assertEquals(r2, r1.map { case (off, _) => (off, ()) }) assertEquals(r2, r3) @@ -772,11 +785,14 @@ class ParserTest extends munit.ScalaCheckSuite { property("voided only changes the result Parser") { forAll(ParserGen.gen, Arbitrary.arbitrary[String]) { (genP, str) => - val r1 = genP.fa.parse(str) - val r2 = genP.fa.void.parse(str) - val r3 = FlatMap[Parser].void(genP.fa).parse(str) - val r4 = genP.fa.as(()).parse(str) - val r5 = ((genP.fa.void: Parser0[Unit]) <* Monad[Parser0].unit).parse(str) + def go[A](p: Parser0[A]) = + p.parse(str).leftMap(_.offsets) + + val r1 = go(genP.fa) + val r2 = go(genP.fa.void) + val r3 = go(FlatMap[Parser].void(genP.fa)) + val r4 = go(genP.fa.as(())) + val r5 = go((genP.fa.void: Parser0[Unit]) <* Monad[Parser0].unit) assertEquals(r2, r1.map { case (off, _) => (off, ()) }) assertEquals(r2, r3) @@ -804,7 +820,7 @@ class ParserTest extends munit.ScalaCheckSuite { val oneOf = Parser.oneOf0((genP1 ::: genP2).map(_.fa)) val oneOf2 = Parser.oneOf0(genP1.map(_.fa)).orElse(Parser.oneOf0(genP2.map(_.fa))) - assertEquals(oneOf.parse(str), oneOf2.parse(str)) + assertEquals(oneOf.parse(str).leftMap(_.offsets), oneOf2.parse(str).leftMap(_.offsets)) } } @@ -818,7 +834,7 @@ class ParserTest extends munit.ScalaCheckSuite { Parser.oneOf(genP2.map(_.fa)) ) - assertEquals(oneOf.parse(str), oneOf2.parse(str)) + assertEquals(oneOf.parse(str).leftMap(_.offsets), oneOf2.parse(str).leftMap(_.offsets)) } } @@ -893,7 +909,10 @@ class ParserTest extends munit.ScalaCheckSuite { leftp.orElse(p.fa) } - assertEquals(oneOfImpl.parse(str), Parser.oneOf0(genP1.map(_.fa)).parse(str)) + assertEquals( + oneOfImpl.parse(str).leftMap(_.offsets), + Parser.oneOf0(genP1.map(_.fa)).parse(str).leftMap(_.offsets) + ) } } @@ -901,7 +920,10 @@ class ParserTest extends munit.ScalaCheckSuite { forAll(Gen.listOf(ParserGen.gen), Arbitrary.arbitrary[String]) { (genP1, str) => val oneOfImpl = genP1.foldLeft(Parser.fail[Any]) { (leftp, p) => leftp.orElse(p.fa) } - assertEquals(oneOfImpl.parse(str), Parser.oneOf(genP1.map(_.fa)).parse(str)) + assertEquals( + oneOfImpl.parse(str).leftMap(_.offsets), + Parser.oneOf(genP1.map(_.fa)).parse(str).leftMap(_.offsets) + ) } } @@ -1378,35 +1400,40 @@ class ParserTest extends munit.ScalaCheckSuite { property("repSep0 with unit sep is the same as rep0") { + case class MinMax(min: Int, max: Int) val minMax = for { min <- biasSmall(0) max <- biasSmall(Integer.max(min, 1)) - } yield (min, max) + } yield MinMax(min, max) - forAll(ParserGen.gen, biasSmall(0), Arbitrary.arbitrary[String]) { (genP, min, str) => + forAll(ParserGen.gen, biasSmall(0), Arbitrary.arbitrary[String]) { (genP, min0, str) => + val min = min0 & Int.MaxValue // make sure it is positive + // repSep0 internally uses | which triggers rewriting optimizations val p1a = genP.fa.repSep0(min = min, sep = Parser.unit) val p1b = genP.fa.rep0(min = min) - assertEquals(p1a.parse(str), p1b.parse(str)) + assertEquals(p1a.parse(str).leftMap(_.offsets), p1b.parse(str).leftMap(_.offsets)) val min1 = if (min < 1) 1 else min val p2a = genP.fa.repSep(min = min1, sep = Parser.unit) val p2b = genP.fa.rep(min = min1) - assertEquals(p2a.parse(str), p2b.parse(str)) + assertEquals(p2a.parse(str).leftMap(_.offsets), p2b.parse(str).leftMap(_.offsets)) } && - forAll(ParserGen.gen, minMax, Arbitrary.arbitrary[String]) { case (genP, (min, max), str) => - val p1a = genP.fa.repSep0(min = min, max = max, sep = Parser.unit) - val p1b = genP.fa.rep0(min = min, max = max) + forAll(ParserGen.gen, minMax, Arbitrary.arbitrary[String]) { + case (genP, MinMax(min, max), str) => + // repSep0 internally uses | which triggers rewriting optimizations + val p1a = genP.fa.repSep0(min = min, max = max, sep = Parser.unit) + val p1b = genP.fa.rep0(min = min, max = max) - assertEquals(p1a.parse(str), p1b.parse(str)) + assertEquals(p1a.parse(str).leftMap(_.offsets), p1b.parse(str).leftMap(_.offsets)) - val min1 = if (min < 1) 1 else min - val p2a = genP.fa.repSep(min = min1, max = max, sep = Parser.unit) - val p2b = genP.fa.rep(min = min1, max = max) + val min1 = if (min < 1) 1 else min + val p2a = genP.fa.repSep(min = min1, max = max, sep = Parser.unit) + val p2b = genP.fa.rep(min = min1, max = max) - assertEquals(p2a.parse(str), p2b.parse(str)) + assertEquals(p2a.parse(str).leftMap(_.offsets), p2b.parse(str).leftMap(_.offsets)) } } @@ -2015,8 +2042,8 @@ class ParserTest extends munit.ScalaCheckSuite { val left = pa.repAs0[Unit](Accumulator0.unitAccumulator0) val right = pa.rep0.void - val leftRes = left.parse(str) - val rightRes = right.parse(str) + val leftRes = left.parse(str).leftMap(_.offsets) + val rightRes = right.parse(str).leftMap(_.offsets) assertEquals(leftRes, rightRes) } } @@ -2311,6 +2338,27 @@ class ParserTest extends munit.ScalaCheckSuite { } property("p.as(a).map(fn) == p.as(fn(a))") { + val regressions = + (Parser.defer(Parser.string("foo")).void.backtrack) :: + (Parser.defer(Parser.string("foo")).void.withContext("ctx").backtrack) :: + Nil + + regressions.foreach { p => + assertEquals(p.void.void, p.void) + assertEquals(p.void.as(1), p.as(1)) + assertEquals(p.as(1).void, p.void) + assertEquals(p.as(1).as(1), p.as(1)) + + val a = 42 + val fn = { x: Int => x + 1 } + assertEquals(p.as(a).map(fn), p.as(fn(a))) + } + assertEquals(Parser.string("foo").void, Parser.string("foo")) + assertEquals( + Parser.string("foo").withContext("bar").void, + Parser.string("foo").withContext("bar") + ) + forAll(ParserGen.gen, Gen.choose(0, 128), Gen.function1[Int, Int](Gen.choose(0, 128))) { (p, a, fn) => assertEquals(p.fa.as(a).map(fn), p.fa.as(fn(a))) @@ -2581,6 +2629,15 @@ class ParserTest extends munit.ScalaCheckSuite { } property("P.void is idempotent") { + val regressions = + ((Parser.string("aa").map(_ => 1) | Parser.string("bb").map(_ => 2)).withContext("ctx")) :: + Nil + + regressions.foreach { p => + val v = p.void + assertEquals(v.void, v) + } + forAll(ParserGen.gen) { p => val v1 = p.fa.void assertEquals(v1.void, v1) @@ -2645,23 +2702,70 @@ class ParserTest extends munit.ScalaCheckSuite { } } + property("test that some parsers unify with |") { + forAll { (s10: Set[String], s20: Set[String]) => + val s1 = s10.filter(_.length > 1) + val s2 = s20.filterNot { s => (s.length < 2) || s1.exists(s.startsWith(_)) } + assertEquals(Parser.stringIn(s1) | Parser.stringIn(s2), Parser.stringIn(s1 | s2)) + } && + forAll { (s1: Set[Char], s2: Set[Char]) => + assertEquals(Parser.charIn(s1) | Parser.charIn(s2), Parser.charIn(s1 | s2)) + } + } + property("a | b is associative") { - def law(a: Parser0[Any], b: Parser0[Any], c: Parser0[Any]) = + def strict(a: Parser0[Any], b: Parser0[Any], c: Parser0[Any]) = assertEquals((a | b) | c, a | (b | c)) + val regressions: List[(Parser0[Any], Parser0[Any], Parser0[Any])] = (Parser.char('c'), Parser.unit, Parser.char('b')) :: - (Parser.string("foo"), Parser.string("bar"), Parser.string("cow")) :: - (Parser.char('a'), Parser.char('b'), Parser.char('c')) :: - (Parser.end, Parser.string("foo"), Parser.char('c')) :: - Nil + (Parser.string("foo"), Parser.string("bar"), Parser.string("cow")) :: + ( + Parser.stringIn("foo" :: "bar" :: Nil), + Parser.string("x").string, + Parser.string("y").string + ) :: + ( + Parser.stringIn("foo" :: "bar" :: Nil), + Parser.char('x').string, + Parser.char('y').string + ) :: + (Parser.stringIn("foo" :: "bar" :: Nil).void, Parser.string("x"), Parser.string("y")) :: + (Parser.stringIn("foo" :: "bar" :: Nil).void, Parser.char('x'), Parser.char('y')) :: + (Parser.string("foo"), Parser.string("bar"), Parser.string("cow").withContext("ctx")) :: + // (Parser.string("foo"), Parser.string("bar"), Parser.unit.withContext("ctx")) :: + (Parser.char('a'), Parser.char('b'), Parser.char('c')) :: + // (Parser.end, Parser.string("foo"), Parser.char('c')) :: + (Parser.char('c'), Parser.string("foo"), Parser.ignoreCase("select")) :: + (Parser.string("aa"), Parser.string("bb"), Parser.char('c')) :: + (Parser.string("aa").string, Parser.string("bb").string, Parser.char('c').string) :: + ( + Parser.string("aa").string, + Parser.string("bb").string, + Parser.charIn('c' :: 'd' :: Nil).string + ) :: + (Parser.ignoreCase("select").string, Parser.char('a').string, Parser.char('b').string) :: + (Parser.anyChar.void, Parser.char('a'), Parser.string("foo")) :: + (Parser.anyChar, Parser.string("foo"), Parser.stringIn("bar" :: "baz" :: Nil)) :: + // (Parser.stringIn("foo" :: "bar" :: Nil).void, Parser.anyChar.void, Parser.charIn('a' :: 'b' :: Nil)) :: + Nil - regressions.foreach { case (a, b, c) => law(a, b, c) } + regressions.foreach { case (a, b, c) => strict(a, b, c) } + + def law(a: Parser0[Any], b: Parser0[Any], c: Parser0[Any], str: String) = + // We only compare the offsets of errors here because occassionally + // merges produce equivalent parsers, but different errors kinds + // since there is some overlap in how things can be expressed + assertEquals( + ((a | b) | c).parse(str).leftMap(_.offsets), + (a | (b | c)).parse(str).leftMap(_.offsets) + ) - forAll(ParserGen.gen, ParserGen.gen, ParserGen.gen) { (a, b, c) => - law(a.fa, b.fa, c.fa) + forAll(ParserGen.gen, ParserGen.gen, ParserGen.gen, arbitrary[String]) { (a, b, c, str) => + law(a.fa, b.fa, c.fa, str) } && - forAll(ParserGen.gen0, ParserGen.gen0, ParserGen.gen0) { (a, b, c) => - law(a.fa, b.fa, c.fa) + forAll(ParserGen.gen0, ParserGen.gen0, ParserGen.gen0, arbitrary[String]) { (a, b, c, str) => + law(a.fa, b.fa, c.fa, str) } } } From ac5f3aa5ec7a109fba82094078795d2c06a1478e Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Thu, 10 Feb 2022 19:03:36 -0800 Subject: [PATCH 09/15] more tests, format, mima --- .../src/main/scala/cats/parse/Parser.scala | 20 +++++------ .../test/scala/cats/parse/ParserTest.scala | 34 +++++++++++++++++-- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index d85fce25..b89679c3 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -1669,7 +1669,7 @@ object Parser { def void0(pa: Parser0[Any]): Parser0[Unit] = pa match { case p1: Parser[_] => void(p1) - case s if Impl.alwaysSucceeds0(s) => unit + case s if Impl.alwaysSucceeds(s) => unit case v @ Impl.Void0(_) => v case _ => val unmapped = Impl.unmap0(pa) @@ -1743,7 +1743,7 @@ object Parser { def peek(pa: Parser0[Any]): Parser0[Unit] = pa match { case peek @ Impl.Peek(_) => peek - case s if Impl.alwaysSucceeds0(s) => unit + case s if Impl.alwaysSucceeds(s) => unit case notPeek => // TODO: we can adjust Rep0/Rep to do minimal // work since we rewind after we are sure there is @@ -1801,7 +1801,7 @@ object Parser { // If b is (), such as foo.as(()) // we can just return v if (b.equals(())) voided.asInstanceOf[Parser0[B]] - else if (Impl.alwaysSucceeds0(voided)) pure(b) + else if (Impl.alwaysSucceeds(voided)) pure(b) else Impl.Map0(voided, Impl.ConstFn(b)) } @@ -1836,7 +1836,7 @@ object Parser { def withContext0[A](p0: Parser0[A], ctx: String): Parser0[A] = p0 match { case Impl.Void0(p) => Impl.Void0(withContext0(p, ctx)).asInstanceOf[Parser0[A]] - case _ if Impl.alwaysSucceeds0(p0) => p0 + case _ if Impl.alwaysSucceeds(p0) => p0 case _ => Impl.WithContextP0(ctx, p0) } @@ -2047,13 +2047,13 @@ object Parser { // does this parser always succeed without consuming input // note: a parser1 does not always succeed // and by construction, a oneOf0 never always succeeds - final def alwaysSucceeds0(p: Parser0[Any]): Boolean = + final def alwaysSucceeds(p: Parser0[Any]): Boolean = p match { case Index | GetCaret | Pure(_) => true - case Map0(p, _) => alwaysSucceeds0(p) - case SoftProd0(a, b) => alwaysSucceeds0(a) && alwaysSucceeds0(b) - case Prod0(a, b) => alwaysSucceeds0(a) && alwaysSucceeds0(b) - case WithContextP0(_, p) => alwaysSucceeds0(p) + case Map0(p, _) => alwaysSucceeds(p) + case SoftProd0(a, b) => alwaysSucceeds(a) && alwaysSucceeds(b) + case Prod0(a, b) => alwaysSucceeds(a) && alwaysSucceeds(b) + case WithContextP0(_, p) => alwaysSucceeds(p) // by construction we never build a Not(Fail()) since // it would just be the same as unit // case Not(Fail() | FailWith(_)) => true @@ -2188,7 +2188,7 @@ object Parser { pa match { case p1: Parser[Any] => unmap(p1) case GetCaret | Index | Pure(_) => Parser.unit - case s if alwaysSucceeds0(s) => Parser.unit + case s if alwaysSucceeds(s) => Parser.unit case Map0(Void0(v), _) => // this is common with as v diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index 7dae2444..8246e6d3 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -2350,7 +2350,7 @@ class ParserTest extends munit.ScalaCheckSuite { assertEquals(p.as(1).as(1), p.as(1)) val a = 42 - val fn = { x: Int => x + 1 } + val fn = { (x: Int) => x + 1 } assertEquals(p.as(a).map(fn), p.as(fn(a))) } assertEquals(Parser.string("foo").void, Parser.string("foo")) @@ -2707,9 +2707,35 @@ class ParserTest extends munit.ScalaCheckSuite { val s1 = s10.filter(_.length > 1) val s2 = s20.filterNot { s => (s.length < 2) || s1.exists(s.startsWith(_)) } assertEquals(Parser.stringIn(s1) | Parser.stringIn(s2), Parser.stringIn(s1 | s2)) + assertEquals( + Parser.stringIn(s1).void | Parser.stringIn(s2).void, + Parser.stringIn(s1 | s2).void + ) } && forAll { (s1: Set[Char], s2: Set[Char]) => assertEquals(Parser.charIn(s1) | Parser.charIn(s2), Parser.charIn(s1 | s2)) + assertEquals(Parser.charIn(s1).void | Parser.charIn(s2).void, Parser.charIn(s1 | s2).void) + // assertEquals(Parser.charIn(s1).string | Parser.charIn(s2).string, Parser.charIn(s1 | s2).string) + } && + forAll { (s1: String, s2: String) => + if (!s2.startsWith(s1) && (s1.nonEmpty && s2.nonEmpty)) { + if ((s1.length > 1) || (s2.length > 1)) { + assertEquals( + Parser.stringIn(s1 :: s2 :: Nil).void, + Parser.string(s1) | Parser.string(s2) + ) + + assertEquals( + Parser.stringIn(s1 :: s2 :: Nil), + (Parser.string(s1) | Parser.string(s2)).string + ) + + assertEquals( + Parser.stringIn(s1 :: s2 :: Nil), + Parser.string(s1).string | Parser.string(s2).string + ) + } else () + } else () } } @@ -2747,7 +2773,11 @@ class ParserTest extends munit.ScalaCheckSuite { (Parser.ignoreCase("select").string, Parser.char('a').string, Parser.char('b').string) :: (Parser.anyChar.void, Parser.char('a'), Parser.string("foo")) :: (Parser.anyChar, Parser.string("foo"), Parser.stringIn("bar" :: "baz" :: Nil)) :: - // (Parser.stringIn("foo" :: "bar" :: Nil).void, Parser.anyChar.void, Parser.charIn('a' :: 'b' :: Nil)) :: + ( + Parser.stringIn("foo" :: "bar" :: Nil).void, + Parser.anyChar.void, + Parser.charIn('a' :: 'b' :: Nil) + ) :: Nil regressions.foreach { case (a, b, c) => strict(a, b, c) } From 294b4c837ddcec3602e1396eadd4652a8ecf4d96 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Thu, 10 Feb 2022 20:19:50 -0800 Subject: [PATCH 10/15] remove unused code --- .../src/main/scala/cats/parse/RadixNode.scala | 23 ------------------- .../test/scala/cats/parse/RadixNodeTest.scala | 14 +---------- 2 files changed, 1 insertion(+), 36 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/RadixNode.scala b/core/shared/src/main/scala/cats/parse/RadixNode.scala index ac9a9634..111b9f5a 100644 --- a/core/shared/src/main/scala/cats/parse/RadixNode.scala +++ b/core/shared/src/main/scala/cats/parse/RadixNode.scala @@ -55,29 +55,6 @@ private[parse] final class RadixNode( else (matched :: rest.filterNot(_ == matched).toList) } - final def matchesWithPrefix(prefix: String): List[String] = - matchesWithPrefixLoop(prefix, 0, prefix.length) - - @tailrec - private def matchesWithPrefixLoop(prefix: String, start: Int, end: Int): List[String] = - if (start >= end) { - // this is the empty string, all the strings start with empty - allStrings - } else { - // start < end, there is at least another character to match - val c = prefix.charAt(start) - val idx = c.toInt & bitMask - prefixes(idx) match { - case null => - Nil - case p => - val len = math.min(end - start, p.length) - if (prefix.regionMatches(start, p, 0, len)) { - children(idx).matchesWithPrefixLoop(prefix, start + len, end) - } else Nil - } - } - /** If this matches, return the new offset, else return -1 * * @param str diff --git a/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala b/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala index 595ecfb1..8b6669af 100644 --- a/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala +++ b/core/shared/src/test/scala/cats/parse/RadixNodeTest.scala @@ -22,7 +22,7 @@ package cats.parse import org.scalacheck.{Gen, Prop} -import org.scalacheck.Prop.{forAll, forAllNoShrink} +import org.scalacheck.Prop.forAll class RadixNodeTest extends munit.ScalaCheckSuite { val tests: Int = if (BitSetUtil.isScalaJs) 50 else 20000 @@ -193,16 +193,4 @@ class RadixNodeTest extends munit.ScalaCheckSuite { assertEquals(RadixNode.fromStrings(ss).allStrings.sorted, ss.distinct.sorted) } } - - property("RadixNode.matchesWithPrefix(p) matches allStrings.filter(_.startsWith(p))") { - forAll { (ss: List[String], head: String, tail: List[String]) => - val rn = RadixNode.fromStrings(ss) - forAllNoShrink(Gen.oneOf(head :: tail ::: ss)) { prefix => - assertEquals( - rn.matchesWithPrefix(prefix).sorted, - ss.filter(_.startsWith(prefix)).distinct.sorted - ) - } - } - } } From dfcf55cc7834e3b11af20eb9e1aa647388603185 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Thu, 10 Feb 2022 20:39:37 -0800 Subject: [PATCH 11/15] simplify isVoided --- .../src/main/scala/cats/parse/Parser.scala | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index 5fb1e405..ccdfc551 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -2161,32 +2161,33 @@ object Parser { None } - /** return true if this is already non-allocating + /** return true if this is already the same a void * * @param p * the Parser to check * @return * true if this parser does not capture */ - def doesNotAllocate(p: Parser0[Any]): Boolean = + def isVoided(p: Parser0[Any]): Boolean = p match { + case Pure(a) => a == () case StartParser | EndParser | Void(_) | Void0(_) | IgnoreCase(_) | Str(_) | Fail() | - FailWith(_) | Pure(()) | Not(_) | Peek(_) => + FailWith(_) | Not(_) | Peek(_) => true - case OneOf(ps) => ps.forall(doesNotAllocate(_)) - case OneOf0(ps) => ps.forall(doesNotAllocate(_)) - case WithContextP(_, p) => doesNotAllocate(p) - case WithContextP0(_, p) => doesNotAllocate(p) - case Backtrack(p) => doesNotAllocate(p) - case Backtrack0(p) => doesNotAllocate(p) - case Map(p, _) => doesNotAllocate(p) - case Map0(p, _) => doesNotAllocate(p) - case _ => false + case OneOf(ps) => ps.forall(isVoided(_)) + case OneOf0(ps) => ps.forall(isVoided(_)) + case WithContextP(_, p) => isVoided(p) + case WithContextP0(_, p) => isVoided(p) + case Backtrack(p) => isVoided(p) + case Backtrack0(p) => isVoided(p) + case Length(_) | StringP(_) | StringIn(_) | Prod(_, _) | SoftProd(_, _) | Map(_, _) | + Select(_, _) | FlatMap(_, _) | TailRecM(_, _) | Defer(_) | Rep(_, _, _, _) | AnyChar | + CharIn(_, _, _) | StringP0(_) | Index | GetCaret | Prod0(_, _) | SoftProd0(_, _) | + Map0(_, _) | Select0(_, _) | FlatMap0(_, _) | TailRecM0(_, _) | Defer0(_) | + Rep0(_, _, _) => + false } - def isVoided(p: Parser0[Any]): Boolean = - doesNotAllocate(p) && (hasKnownResult(p) == someUnit) - /** This removes any trailing map functions which can cause wasted allocations if we are later * going to void or return strings. This stops at StringP or VoidP since those are markers that * anything below has already been transformed @@ -2979,13 +2980,13 @@ object Parser { } case (Void0(vl), Void0(vr)) => merge0(vl, vr).void - case (Void0(vl), right) if hasKnownResult(right) == someUnit => + case (Void0(vl), right) if isVoided(right) => merge0(vl, right).void - case (Void(vl), right) if hasKnownResult(right) == someUnit => + case (Void(vl), right) if isVoided(right) => merge0(vl, right).void - case (left, Void0(vr)) if hasKnownResult(left) == someUnit => + case (left, Void0(vr)) if isVoided(left) => merge0(left, vr).void - case (left, Void(vr)) if hasKnownResult(left) == someUnit => + case (left, Void(vr)) if isVoided(left) => merge0(left, vr).void case _ => OneOf0(left :: right :: Nil) } @@ -3127,9 +3128,9 @@ object Parser { merge(vl, vr).void case (StringP(l1), StringP(r1)) => merge(l1, r1).string - case (Void(vl), right) if hasKnownResult(right) == someUnit => + case (Void(vl), right) if isVoided(right) => merge(vl, right).void - case (left, Void(vr)) if hasKnownResult(left) == someUnit => + case (left, Void(vr)) if isVoided(left) => merge(left, vr).void case _ => OneOf(left :: right :: Nil) } From 0b79ad34dee0e90fcdcd79fcaf013ff4e4d76af9 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Thu, 10 Feb 2022 20:53:49 -0800 Subject: [PATCH 12/15] cleanup --- .../src/main/scala/cats/parse/Parser.scala | 37 +++++-------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index ccdfc551..0b9388f2 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -2197,18 +2197,9 @@ object Parser { case p1: Parser[Any] => unmap(p1) case GetCaret | Index | Pure(_) => Parser.unit case s if alwaysSucceeds(s) => Parser.unit - case Map0(Void0(v), _) => - // this is common with as - v - case Map0(p, fn) => - fn match { - case ConstFn(_) => - // p must have already been unmapped - p - case _ => - // we discard any allocations done by fn - unmap0(p) - } + case Map0(p, _) => + // we discard any allocations done by fn + unmap0(p) case Select0(p, fn) => Select0(p, unmap0(fn)) case StringP0(s) => @@ -2230,7 +2221,7 @@ object Parser { case OneOf0(ps) => // Find the fixed point here val next = Parser.oneOf0(ps.map(unmap0)) - if (next == pa) next + if (next == pa) pa else unmap0(next) case Prod0(p1, p2) => unmap0(p1) match { @@ -2289,16 +2280,9 @@ object Parser { */ def unmap(pa: Parser[Any]): Parser[Any] = pa match { - case Map(Void(v), _) => - // this is common with as - v - case Map(p, fn) => - fn match { - case ConstFn(_) => p - case _ => - // we discard any allocations done by fn - unmap(p) - } + case Map(p, _) => + // we discard any allocations done by fn + unmap(p) case Select(p, fn) => Select(p, unmap0(fn)) case StringP(s) => @@ -2574,17 +2558,14 @@ object Parser { } final def stringIn(radix: RadixNode, all: SortedSet[String], state: State): String = { - val str = state.str val startOffset = state.offset - val matched = radix.matchAtOrNull(str, startOffset) + val matched = radix.matchAtOrNull(state.str, startOffset) if (matched eq null) { state.error = Eval.later(Chain.one(Expectation.OneOfStr(startOffset, all.toList))) - state.offset = startOffset null } else { - val lastMatch = startOffset + matched.length - state.offset = lastMatch + state.offset = startOffset + matched.length matched } } From 1f5d7f6962b333dd57c3f4c91d1b7d9e507e3eff Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Thu, 10 Feb 2022 21:00:53 -0800 Subject: [PATCH 13/15] add isUnit --- core/shared/src/main/scala/cats/parse/Parser.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index 0b9388f2..505fa333 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -1807,7 +1807,7 @@ object Parser { // void cannot make a Parser0 a Parser // If b is (), such as foo.as(()) // we can just return v - if (b.equals(())) voided.asInstanceOf[Parser0[B]] + if (Impl.isUnit(b)) voided.asInstanceOf[Parser0[B]] else if (Impl.alwaysSucceeds(voided)) pure(b) else Impl.Map0(voided, Impl.ConstFn(b)) } @@ -1818,7 +1818,7 @@ object Parser { val v = pa.void // If b is (), such as foo.as(()) // we can just return v - if (b.equals(())) v.asInstanceOf[Parser[B]] + if (Impl.isUnit(b)) v.asInstanceOf[Parser[B]] else v match { case Impl.Void(ci @ Impl.SingleChar(c)) => @@ -1969,6 +1969,9 @@ object Parser { val allChars = Char.MinValue to Char.MaxValue + def isUnit(a: Any): Boolean = + a.equals(()) + case class ConstFn[A](result: A) extends Function[Any, A] { def apply(any: Any) = result @@ -2170,7 +2173,7 @@ object Parser { */ def isVoided(p: Parser0[Any]): Boolean = p match { - case Pure(a) => a == () + case Pure(a) => isUnit(a) case StartParser | EndParser | Void(_) | Void0(_) | IgnoreCase(_) | Str(_) | Fail() | FailWith(_) | Not(_) | Peek(_) => true From e24d99f9ecf8b12b57c45c27735275a3310bab70 Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Sun, 13 Feb 2022 10:29:29 -1000 Subject: [PATCH 14/15] address review comments --- core/shared/src/main/scala/cats/parse/Parser.scala | 14 ++++++-------- .../src/test/scala/cats/parse/ParserTest.scala | 4 +++- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index 505fa333..1cef9c21 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -2045,8 +2045,9 @@ object Parser { // does this parser return the string it matches def matchesString(p: Parser0[Any]): Boolean = p match { - case StringP0(_) | StringP(_) | StringIn(_) | Length(_) | Fail() | FailWith(_) => true - case DefiniteString(_) => true + case StringP0(_) | StringP(_) | StringIn(_) | Length(_) | Fail() | FailWith(_) | + DefiniteString(_) => + true case OneOf(ss) => ss.forall(matchesString) case OneOf0(ss) => ss.forall(matchesString) case WithContextP(_, p) => matchesString(p) @@ -2164,7 +2165,7 @@ object Parser { None } - /** return true if this is already the same a void + /** return true if this is already the same as void * * @param p * the Parser to check @@ -2975,8 +2976,8 @@ object Parser { case _ => OneOf0(left :: right :: Nil) } - def merge[A](left: Parser[A], right: Parser[A]): Parser[A] = { - val res = (left, right) match { + def merge[A](left: Parser[A], right: Parser[A]): Parser[A] = + (left, right) match { case (Fail(), _) => right case (_, Fail()) => left case (OneOf(ls), OneOf(rights @ (h :: t))) => @@ -3118,9 +3119,6 @@ object Parser { merge(left, vr).void case _ => OneOf(left :: right :: Nil) } - // println(s"merge($left, $right) == $res") - res.asInstanceOf[Parser[A]] - } case object AnyChar extends Parser[Char] { override def parseMut(state: State): Char = { diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index c4271c23..d0fcd1e8 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -2360,7 +2360,6 @@ class ParserTest extends munit.ScalaCheckSuite { Nil regressions.foreach { p => - assertEquals(p.void.void, p.void) assertEquals(p.void.as(1), p.as(1)) assertEquals(p.as(1).void, p.void) assertEquals(p.as(1).as(1), p.as(1)) @@ -2647,6 +2646,8 @@ class ParserTest extends munit.ScalaCheckSuite { property("P.void is idempotent") { val regressions = ((Parser.string("aa").map(_ => 1) | Parser.string("bb").map(_ => 2)).withContext("ctx")) :: + (Parser.defer(Parser.string("foo")).void.backtrack) :: + (Parser.defer(Parser.string("foo")).void.withContext("ctx").backtrack) :: Nil regressions.foreach { p => @@ -2731,6 +2732,7 @@ class ParserTest extends munit.ScalaCheckSuite { forAll { (s1: Set[Char], s2: Set[Char]) => assertEquals(Parser.charIn(s1) | Parser.charIn(s2), Parser.charIn(s1 | s2)) assertEquals(Parser.charIn(s1).void | Parser.charIn(s2).void, Parser.charIn(s1 | s2).void) + // TODO: make this law pass. Currently the left is StringIn, but the right is StringP(CharIn(_, _, _)) // assertEquals(Parser.charIn(s1).string | Parser.charIn(s2).string, Parser.charIn(s1 | s2).string) } && forAll { (s1: String, s2: String) => From 4590a69c68ec21002153d6f4a137d9d5e762d98a Mon Sep 17 00:00:00 2001 From: Patrick Oscar Boykin Date: Sun, 13 Feb 2022 11:07:13 -1000 Subject: [PATCH 15/15] add non-voided stringIn benchmarks --- .../scala/cats/parse/bench/StringInBench.scala | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/bench/src/main/scala/cats/parse/bench/StringInBench.scala b/bench/src/main/scala/cats/parse/bench/StringInBench.scala index f4307a26..0c6a73ce 100644 --- a/bench/src/main/scala/cats/parse/bench/StringInBench.scala +++ b/bench/src/main/scala/cats/parse/bench/StringInBench.scala @@ -39,7 +39,9 @@ private[parse] class StringInBenchmarks { var radixNode: RadixNode = _ - var stringIn: Parser[Unit] = _ + var stringInV: Parser[Unit] = _ + + var stringInS: Parser[String] = _ var oneOf: Parser[Unit] = _ @@ -60,13 +62,18 @@ private[parse] class StringInBenchmarks { } radixNode = RadixNode.fromStrings(stringsToMatch) - stringIn = Parser.stringIn(stringsToMatch).void + stringInS = Parser.stringIn(stringsToMatch) + stringInV = stringInS.void oneOf = Parser.oneOf(stringsToMatch.map(Parser.string(_))) } @Benchmark - def stringInParse(): Unit = - inputs.foreach(stringIn.parseAll(_)) + def stringInVParse(): Unit = + inputs.foreach(stringInV.parseAll(_)) + + @Benchmark + def stringInSParse(): Unit = + inputs.foreach(stringInS.parseAll(_)) @Benchmark def oneOfParse(): Unit =