From 5092855740b4625f61608440d509b98c6a266bd2 Mon Sep 17 00:00:00 2001 From: Ethan Pailes Date: Sat, 28 Apr 2018 08:36:48 -0400 Subject: [PATCH] Fix anchor bug in Match iteration The Match literal iterator would repeatedly look for matches in the remainder of the input after it found its first match regardless of whether or not the regex was anchored at the start. This patch adds logic to make sure that we don't keep looking for matches after the first match is returned for a start-anchored literal regex. --- src/exec.rs | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/src/exec.rs b/src/exec.rs index 95adae575e..1358fd99bb 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -622,8 +622,13 @@ impl<'c> ExecNoSync<'c> { } AnchoredStart => { let lits = &self.ro.nfa.prefixes; - lits.find_start(&text[start..]) - .map(|(s, e)| (start + s, start + e)) + if !self.ro.nfa.is_anchored_start + || (self.ro.nfa.is_anchored_start && start == 0) { + lits.find_start(&text[start..]) + .map(|(s, e)| (start + s, start + e)) + } else { + None + } } AnchoredEnd => { let lits = &self.ro.suffixes; @@ -1286,3 +1291,62 @@ impl ProgramCacheInner { } } } + +#[cfg(test)] +mod test { + #[test] + fn uppercut_s_backtracking_bytes_default_bytes_mismatch() { + use internal::ExecBuilder; + + let backtrack_bytes_re = ExecBuilder::new("^S") + .bounded_backtracking() + .only_utf8(false) + .build() + .map(|exec| exec.into_byte_regex()) + .map_err(|err| format!("{}", err)) + .unwrap(); + + let default_bytes_re = ExecBuilder::new("^S") + .only_utf8(false) + .build() + .map(|exec| exec.into_byte_regex()) + .map_err(|err| format!("{}", err)) + .unwrap(); + + let input = vec![83, 83]; + + let s1 = backtrack_bytes_re.split(&input); + let s2 = default_bytes_re.split(&input); + for (chunk1, chunk2) in s1.zip(s2) { + assert_eq!(chunk1, chunk2); + } + } + + #[test] + fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() { + use internal::ExecBuilder; + + let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)") + .bounded_backtracking() + .bytes(true) + .build() + .map(|exec| exec.into_regex()) + .map_err(|err| format!("{}", err)) + .unwrap(); + + let default_bytes_re = ExecBuilder::new(r"^(?u:\*)") + .bytes(true) + .build() + .map(|exec| exec.into_regex()) + .map_err(|err| format!("{}", err)) + .unwrap(); + + let input = "**"; + + let s1 = backtrack_bytes_re.split(input); + let s2 = default_bytes_re.split(input); + for (chunk1, chunk2) in s1.zip(s2) { + assert_eq!(chunk1, chunk2); + } + } +}