diff --git a/CHANGELOG.md b/CHANGELOG.md index f2949720f..b9256786f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +1.4.6 (2021-04-22) +================== +This is a small patch release that fixes the compiler's size check on how much +heap memory a regex uses. Previously, the compiler did not account for the +heap usage of Unicode character classes. Now it does. It's possible that this +may make some regexes fail to compile that previously did compile. If that +happens, please file an issue. + +* [BUG OSS-fuzz#33579](https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579): + Some regexes can use more heap memory than one would expect. + + 1.4.5 (2021-03-14) ================== This is a small patch release that fixes a regression in the size of a `Regex` diff --git a/src/compile.rs b/src/compile.rs index 892037846..9bbd464e0 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -38,6 +38,7 @@ pub struct Compiler { suffix_cache: SuffixCache, utf8_seqs: Option, byte_classes: ByteClassSet, + extra_inst_bytes: usize, } impl Compiler { @@ -54,6 +55,7 @@ impl Compiler { suffix_cache: SuffixCache::new(1000), utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')), byte_classes: ByteClassSet::new(), + extra_inst_bytes: 0, } } @@ -420,6 +422,8 @@ impl Compiler { } fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty { + use std::mem::size_of; + assert!(!ranges.is_empty()); if self.compiled.uses_bytes() { Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?)) @@ -429,6 +433,8 @@ impl Compiler { let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 { self.push_hole(InstHole::Char { c: ranges[0].0 }) } else { + self.extra_inst_bytes += + ranges.len() * (size_of::() * 2); self.push_hole(InstHole::Ranges { ranges: ranges }) }; Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 })) @@ -795,7 +801,9 @@ impl Compiler { fn check_size(&self) -> result::Result<(), Error> { use std::mem::size_of; - if self.insts.len() * size_of::() > self.size_limit { + let size = + self.extra_inst_bytes + (self.insts.len() * size_of::()); + if size > self.size_limit { Err(Error::CompiledTooBig(self.size_limit)) } else { Ok(()) diff --git a/tests/regression_fuzz.rs b/tests/regression_fuzz.rs index 5f92ed046..4e76704d2 100644 --- a/tests/regression_fuzz.rs +++ b/tests/regression_fuzz.rs @@ -17,3 +17,15 @@ fn fuzz1() { fn empty_any_errors_no_panic() { assert!(regex_new!(r"\P{any}").is_err()); } + +// This tests that a very large regex errors during compilation instead of +// using gratuitous amounts of memory. The specific problem is that the +// compiler wasn't accounting for the memory used by Unicode character classes +// correctly. +// +// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579 +#[test] +fn big_regex_fails_to_compile() { + let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}"; + assert!(regex_new!(pat).is_err()); +}