diff --git a/src/dfa.rs b/src/dfa.rs index 55abf9f0e5..22d141d9b9 100644 --- a/src/dfa.rs +++ b/src/dfa.rs @@ -1319,7 +1319,19 @@ impl<'a> Fsm<'a> { empty_flags: EmptyFlags, state_flags: StateFlags, ) -> Option { - let flagi = empty_flags.as_index(); + // Compute an index into our cache of start states based on the set + // of empty/state flags set at the current position in the input. We + // don't use every flag since not all flags matter. For example, since + // matches are delayed by one byte, start states can never be match + // states. + let flagi = { + (((empty_flags.start as u8) << 0) | + ((empty_flags.end as u8) << 1) | + ((empty_flags.start_line as u8) << 2) | + ((empty_flags.end_line as u8) << 3) | + ((state_flags.is_word() as u8) << 4)) + as usize + }; match self.cache.start_states[flagi] { STATE_UNKNOWN => {} STATE_DEAD => return Some(STATE_DEAD), @@ -1592,18 +1604,6 @@ impl Transitions { } } -impl EmptyFlags { - fn as_index(&self) -> usize { - (((self.start as u8) << 0) | - ((self.end as u8) << 1) | - ((self.start_line as u8) << 2) | - ((self.end_line as u8) << 3) | - ((self.word_boundary as u8) << 4) | - ((self.not_word_boundary as u8) << 5)) - as usize - } -} - impl StateFlags { fn is_match(&self) -> bool { self.0 & 0b0000000_1 > 0 diff --git a/tests/api.rs b/tests/api.rs index 275157bf07..e6c3a27a8d 100644 --- a/tests/api.rs +++ b/tests/api.rs @@ -234,3 +234,8 @@ expand!(expand9, r"(?P\w+)\s+(?P\d+)", "abc 123", " $b $a ", " 123 abc "); expand!(expand10, r"(?P\w+)\s+(?P\d+)", "abc 123", "$bz$az", ""); + +split!(split1, r"\s+", "a b\nc\td\n\t e", + &[t!("a"), t!("b"), t!("c"), t!("d"), t!("e")]); +split!(split2, r"\b", "a b c", + &[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c")]); diff --git a/tests/macros.rs b/tests/macros.rs index c3b79e2221..f9e8912630 100644 --- a/tests/macros.rs +++ b/tests/macros.rs @@ -114,3 +114,14 @@ macro_rules! nomatset { } } } + +macro_rules! split { + ($name:ident, $re:expr, $text:expr, $expected:expr) => { + #[test] + fn $name() { + let re = regex!($re); + let splitted: Vec<_> = re.split(t!($text)).collect(); + assert_eq!($expected, &*splitted); + } + } +} diff --git a/tests/regression.rs b/tests/regression.rs index 913a6baefb..e694dd01b9 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -46,6 +46,14 @@ mat!(uni_case_lower_nocase_flag, u!(r"(?i)\p{Ll}+"), "ΛΘΓΔα", Some((0, 10)) // See: https://github.com/rust-lang-nursery/regex/issues/191 mat!(many_alternates, r"1|2|3|4|5|6|7|8|9|10|int", "int", Some((0, 3))); +// burntsushi was bad and didn't create an issue for this bug. mat!(anchored_prefix1, r"^a\S", "a ", None); mat!(anchored_prefix2, r"^a\S", "foo boo a ", None); mat!(anchored_prefix3, r"^-[a-z]", "r-f", None); + +// See: https://github.com/rust-lang-nursery/regex/issues/204 +split!(split_on_word_boundary, r"\b", r"Should this (work?)", + &[t!(""), t!("Should"), t!(" "), t!("this"), + t!(" ("), t!("work"), t!("?)")]); +matiter!(word_boundary_dfa, r"\b", "a b c", + (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); diff --git a/tests/word_boundary.rs b/tests/word_boundary.rs index 3c27eaddfa..7fe97a2974 100644 --- a/tests/word_boundary.rs +++ b/tests/word_boundary.rs @@ -40,6 +40,7 @@ matiter!(wb37, r"^^^^^\b.$$$$$", "x", (0, 1)); matiter!(wb38, r"^^^^^\b$$$$$", "x"); matiter!(wb39, r"^^^^^\b\b\b.\b\b\b$$$$$", "x", (0, 1)); matiter!(wb40, r"\b.+\b", "$$abc$$", (2, 5)); +matiter!(wb41, r"\b", "a b c", (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); matiter!(nb1, r"\Bfoo\B", "n foo xfoox that", (7, 10)); matiter!(nb2, r"a\B", "faoa x", (1, 2));