diff --git a/crates/regex/src/literal.rs b/crates/regex/src/literal.rs index a45f749e2..a33da2a6c 100644 --- a/crates/regex/src/literal.rs +++ b/crates/regex/src/literal.rs @@ -430,6 +430,7 @@ impl Extractor { } seq1.union(seq2); assert!(seq1.len().map_or(true, |x| x <= self.limit_total)); + seq1.prefix = seq1.prefix && seq2.prefix; seq1 } @@ -586,10 +587,15 @@ impl TSeq { lits.iter().any(is_poisonous) } - /// Compare the two sequences and return the one that is believed to be best - /// according to a hodge podge of heuristics. + /// Compare the two sequences and return the one that is believed to be + /// best according to a hodge podge of heuristics. fn choose(self, other: TSeq) -> TSeq { - let (seq1, seq2) = (self, other); + let (mut seq1, mut seq2) = (self, other); + // Whichever one we pick, by virtue of picking one, we choose + // to not take the other. So we must consider the result inexact. + seq1.make_inexact(); + seq2.make_inexact(); + if !seq1.is_finite() { return seq2; } else if !seq2.is_finite() { @@ -681,7 +687,7 @@ mod tests { assert_eq!(e(r"foo"), seq([E("foo")])); assert_eq!(e(r"[a-z]foo[a-z]"), seq([I("foo")])); assert_eq!(e(r"[a-z](foo)(bar)[a-z]"), seq([I("foobar")])); - assert_eq!(e(r"[a-z]([a-z]foo)(bar[a-z])[a-z]"), seq([I("foobar")])); + assert_eq!(e(r"[a-z]([a-z]foo)(bar[a-z])[a-z]"), seq([I("foo")])); assert_eq!(e(r"[a-z]([a-z]foo)([a-z]foo)[a-z]"), seq([I("foo")])); assert_eq!(e(r"(\d{1,3}\.){3}\d{1,3}"), seq([I(".")])); assert_eq!(e(r"[a-z]([a-z]foo){3}[a-z]"), seq([I("foo")])); @@ -689,7 +695,7 @@ mod tests { assert_eq!(e(r"[a-z]([a-z]foo[a-z]){3}[a-z]"), seq([I("foo")])); assert_eq!( e(r"[a-z]([a-z]foo){3}(bar[a-z]){3}[a-z]"), - seq([I("foobar")]) + seq([I("foo")]) ); } @@ -935,14 +941,14 @@ mod tests { assert_eq!(Seq::infinite(), e(r"[A-Z]+")); assert_eq!(seq([I("1")]), e(r"1[A-Z]")); assert_eq!(seq([I("1")]), e(r"1[A-Z]2")); - assert_eq!(seq([E("123")]), e(r"[A-Z]+123")); + assert_eq!(seq([I("123")]), e(r"[A-Z]+123")); assert_eq!(seq([I("123")]), e(r"[A-Z]+123[A-Z]+")); assert_eq!(Seq::infinite(), e(r"1|[A-Z]|3")); assert_eq!(seq([E("1"), I("2"), E("3")]), e(r"1|2[A-Z]|3"),); assert_eq!(seq([E("1"), I("2"), E("3")]), e(r"1|[A-Z]2[A-Z]|3"),); - assert_eq!(seq([E("1"), E("2"), E("3")]), e(r"1|[A-Z]2|3"),); + assert_eq!(seq([E("1"), I("2"), E("3")]), e(r"1|[A-Z]2|3"),); assert_eq!(seq([E("1"), I("2"), E("4")]), e(r"1|2[A-Z]3|4"),); - assert_eq!(seq([E("2")]), e(r"(?:|1)[A-Z]2")); + assert_eq!(seq([I("2")]), e(r"(?:|1)[A-Z]2")); assert_eq!(inexact([I("a")]), e(r"a.z")); } @@ -1005,4 +1011,11 @@ mod tests { let s = e(r"foobarfoo|foo| |foofoo"); assert_eq!(Seq::infinite(), s); } + + // Regression test for: https://github.com/BurntSushi/ripgrep/issues/2884 + #[test] + fn case_insensitive_alternation() { + let s = e(r"(?i:e.x|ex)"); + assert_eq!(s, seq([I("X"), I("x")])); + } } diff --git a/tests/util.rs b/tests/util.rs index bac5f2e6d..07a1a783f 100644 --- a/tests/util.rs +++ b/tests/util.rs @@ -284,16 +284,7 @@ impl TestCommand { /// Runs and captures the stdout of the given command. pub fn stdout(&mut self) -> String { let o = self.output(); - let stdout = String::from_utf8_lossy(&o.stdout); - match stdout.parse() { - Ok(t) => t, - Err(err) => { - panic!( - "could not convert from string: {:?}\n\n{}", - err, stdout - ); - } - } + String::from_utf8_lossy(&o.stdout).into_owned() } /// Pipe `input` to a command, and collect the output. @@ -313,16 +304,7 @@ impl TestCommand { let output = self.expect_success(child.wait_with_output().unwrap()); worker.join().unwrap().unwrap(); - let stdout = String::from_utf8_lossy(&output.stdout); - match stdout.parse() { - Ok(t) => t, - Err(err) => { - panic!( - "could not convert from string: {:?}\n\n{}", - err, stdout - ); - } - } + String::from_utf8_lossy(&output.stdout).into_owned() } /// Gets the output of a command. If the command failed, then this panics.