From 445c83431173fd04ad5b2782f97d39d17d2428c7 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 1 May 2016 13:47:07 -0400 Subject: [PATCH] Fix #186. This enables RegexSets to short-circuit when: 1. All patterns are anchored to the beginning of the input. 2. All patterns have either matched or will never match. We make this happen by checking whether all NFA states in a DFA state are match states, when a DFA match is observed. If all NFA states are match states, and since all match states are final states, we know that the current set of matches will never change. Since we don't care about reporting location information, we can quit. N.B. If no matches can be found, then the DFA will short circuit using its normal mechanism. --- src/dfa.rs | 13 +++++++++++++ src/prog.rs | 10 ++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/dfa.rs b/src/dfa.rs index 56b1bd880d..edf9e2f52d 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -581,6 +581,19 @@ impl<'a> Fsm<'a> { self.last_match_si = next_si; prev_si = next_si; + // This permits short-circuiting when matching a regex set. + // In particular, if this DFA state contains only match states, + // then it's impossible to extend the set of matches since + // match states are final. Therefore, we can quit. + if self.prog.matches.len() > 1 { + let state = self.state(next_si); + let just_matches = state.insts.iter() + .all(|&ip| self.prog[ip as usize].is_match()); + if just_matches { + return result; + } + } + // Another inner loop! If the DFA stays in this particular // match state, then we can rip through all of the input // very quickly, and only recording the match location once diff --git a/src/prog.rs b/src/prog.rs index 44e3228262..d8088e1510 100644 --- a/src/prog.rs +++ b/src/prog.rs @@ -289,6 +289,16 @@ pub enum Inst { Bytes(InstBytes), } +impl Inst { + /// Returns true if and only if this is a match instruction. + pub fn is_match(&self) -> bool { + match *self { + Inst::Match(_) => true, + _ => false, + } + } +} + /// Representation of the Save instruction. #[derive(Clone, Debug)] pub struct InstSave {