diff --git a/grep-regex/src/matcher.rs b/grep-regex/src/matcher.rs index 7f30252a8..61af05186 100644 --- a/grep-regex/src/matcher.rs +++ b/grep-regex/src/matcher.rs @@ -71,10 +71,31 @@ impl RegexMatcherBuilder { &self, literals: &[B], ) -> Result { - let slices: Vec<_> = literals.iter().map(|s| s.as_ref()).collect(); - if !self.config.can_plain_aho_corasick() || literals.len() < 40 { + let mut has_escape = false; + let mut slices = vec![]; + for lit in literals { + slices.push(lit.as_ref()); + has_escape = has_escape || lit.as_ref().contains('\\'); + } + // Even when we have a fixed set of literals, we might still want to + // use the regex engine. Specifically, if any string has an escape + // in it, then we probably can't feed it to Aho-Corasick without + // removing the escape. Additionally, if there are any particular + // special match semantics we need to honor, that Aho-Corasick isn't + // enough. Finally, the regex engine can do really well with a small + // number of literals (at time of writing, this is changing soon), so + // we use it when there's a small set. + // + // Yes, this is one giant hack. Ideally, this entirely separate literal + // matcher that uses Aho-Corasick would be pushed down into the regex + // engine. + if has_escape + || !self.config.can_plain_aho_corasick() + || literals.len() < 40 + { return self.build(&slices.join("|")); } + let matcher = MultiLiteralMatcher::new(&slices)?; let imp = RegexMatcherImpl::MultiLiteral(matcher); Ok(RegexMatcher { diff --git a/tests/regression.rs b/tests/regression.rs index 40a846541..88f2194d4 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -716,3 +716,13 @@ rgtest!(r1259_drop_last_byte_nonl, |dir: Dir, mut cmd: TestCommand| { cmd = dir.command(); eqnice!("fz\n", cmd.arg("-f").arg("patterns-nl").arg("test").stdout()); }); + +// See: https://github.com/BurntSushi/ripgrep/issues/1334 +rgtest!(r1334_crazy_literals, |dir: Dir, mut cmd: TestCommand| { + dir.create("patterns", &"1.208.0.0/12\n".repeat(40)); + dir.create("corpus", "1.208.0.0/12\n"); + eqnice!( + "1.208.0.0/12\n", + cmd.arg("-Ff").arg("patterns").arg("corpus").stdout() + ); +});