From 3d15101bb56a49670c8cabc785a40526ed962db9 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 6 Mar 2023 09:39:21 -0500 Subject: [PATCH] bug: fix CaptureLocations::get to handle invalid offsets The contract of this function says that any invalid group offset should result in a return value of None. In general, it worked fine, unless the offset was so big that some internal multiplication overflowed. That could in turn produce an incorrect result or a panic. So we fix that here with checked arithmetic. Fixes #738, Fixes #950 --- src/re_bytes.rs | 21 +++++++++++++++++++++ src/re_trait.rs | 2 +- src/re_unicode.rs | 21 +++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/re_bytes.rs b/src/re_bytes.rs index b8f9738e8..b86973d0b 100644 --- a/src/re_bytes.rs +++ b/src/re_bytes.rs @@ -896,6 +896,27 @@ impl<'r> FusedIterator for CaptureNames<'r> {} /// In order to build a value of this type, you'll need to call the /// `capture_locations` method on the `Regex` being used to execute the search. /// The value returned can then be reused in subsequent searches. +/// +/// # Example +/// +/// This example shows how to create and use `CaptureLocations` in a search. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"(?\w+)\s+(?\w+)").unwrap(); +/// let mut locs = re.capture_locations(); +/// let m = re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); +/// assert_eq!(0..17, m.range()); +/// assert_eq!(Some((0, 17)), locs.get(0)); +/// assert_eq!(Some((0, 5)), locs.get(1)); +/// assert_eq!(Some((6, 17)), locs.get(2)); +/// +/// // Asking for an invalid capture group always returns None. +/// assert_eq!(None, locs.get(3)); +/// assert_eq!(None, locs.get(34973498648)); +/// assert_eq!(None, locs.get(9944060567225171988)); +/// ``` #[derive(Clone, Debug)] pub struct CaptureLocations(re_trait::Locations); diff --git a/src/re_trait.rs b/src/re_trait.rs index d0c717df5..505810c84 100644 --- a/src/re_trait.rs +++ b/src/re_trait.rs @@ -20,7 +20,7 @@ impl Locations { /// not match anything. The positions returned are *always* byte indices /// with respect to the original string matched. pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - let (s, e) = (i * 2, i * 2 + 1); + let (s, e) = (i.checked_mul(2)?, i.checked_mul(2)?.checked_add(1)?); match (self.0.get(s), self.0.get(e)) { (Some(&Some(s)), Some(&Some(e))) => Some((s, e)), _ => None, diff --git a/src/re_unicode.rs b/src/re_unicode.rs index 0e7fc70a4..41bd8ac09 100644 --- a/src/re_unicode.rs +++ b/src/re_unicode.rs @@ -906,6 +906,27 @@ impl<'r, 't> FusedIterator for SplitN<'r, 't> {} /// In order to build a value of this type, you'll need to call the /// `capture_locations` method on the `Regex` being used to execute the search. /// The value returned can then be reused in subsequent searches. +/// +/// # Example +/// +/// This example shows how to create and use `CaptureLocations` in a search. +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"(?\w+)\s+(?\w+)").unwrap(); +/// let mut locs = re.capture_locations(); +/// let m = re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); +/// assert_eq!(0..17, m.range()); +/// assert_eq!(Some((0, 17)), locs.get(0)); +/// assert_eq!(Some((0, 5)), locs.get(1)); +/// assert_eq!(Some((6, 17)), locs.get(2)); +/// +/// // Asking for an invalid capture group always returns None. +/// assert_eq!(None, locs.get(3)); +/// assert_eq!(None, locs.get(34973498648)); +/// assert_eq!(None, locs.get(9944060567225171988)); +/// ``` #[derive(Clone, Debug)] pub struct CaptureLocations(re_trait::Locations);