diff --git a/regex-automata/src/meta/wrappers.rs b/regex-automata/src/meta/wrappers.rs index 08110d9bb..6cb19ba0d 100644 --- a/regex-automata/src/meta/wrappers.rs +++ b/regex-automata/src/meta/wrappers.rs @@ -212,7 +212,10 @@ impl BoundedBacktrackerEngine { .configure(backtrack_config) .build_from_nfa(nfa.clone()) .map_err(BuildError::nfa)?; - debug!("BoundedBacktracker built"); + debug!( + "BoundedBacktracker built (max haystack length: {:?})", + engine.max_haystack_len() + ); Ok(Some(BoundedBacktrackerEngine(engine))) } #[cfg(not(feature = "nfa-backtrack"))] diff --git a/regex-automata/src/nfa/thompson/backtrack.rs b/regex-automata/src/nfa/thompson/backtrack.rs index eba037c1d..df99e456d 100644 --- a/regex-automata/src/nfa/thompson/backtrack.rs +++ b/regex-automata/src/nfa/thompson/backtrack.rs @@ -820,8 +820,11 @@ impl BoundedBacktracker { // bytes to the capacity in bits. let capacity = 8 * self.get_config().get_visited_capacity(); let blocks = div_ceil(capacity, Visited::BLOCK_SIZE); - let real_capacity = blocks * Visited::BLOCK_SIZE; - (real_capacity / self.nfa.states().len()) - 1 + let real_capacity = blocks.saturating_mul(Visited::BLOCK_SIZE); + // It's possible for `real_capacity` to be smaller than the number of + // NFA states for particularly large regexes, so we saturate towards + // zero. + (real_capacity / self.nfa.states().len()).saturating_sub(1) } } @@ -1882,3 +1885,24 @@ fn div_ceil(lhs: usize, rhs: usize) -> usize { (lhs / rhs) + 1 } } + +#[cfg(test)] +mod tests { + use super::*; + + // This is a regression test for the maximum haystack length computation. + // Previously, it assumed that the total capacity of the backtracker's + // bitset would always be greater than the number of NFA states. But there + // is of course no guarantee that this is true. This regression test + // ensures that not only does `max_haystack_len` not panic, but that it + // should return `0`. + #[cfg(feature = "syntax")] + #[test] + fn max_haystack_len_overflow() { + let re = BoundedBacktracker::builder() + .configure(BoundedBacktracker::config().visited_capacity(10)) + .build(r"[0-9A-Za-z]{100}") + .unwrap(); + assert_eq!(0, re.max_haystack_len()); + } +}