From ad2eb933aa5e383ec8488646b721650f4696c6f7 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 6 Mar 2023 10:41:20 -0500 Subject: [PATCH] doc: tweak docs for 'shortest_match' The name is somewhat unfortunate, but it's actually kind of difficult to capture the right semantics in the name. The key bit is that the function returns the offset at the point at which a match is known, and that point might vary depending on which internal regex engine was used. Fixes #747 --- src/re_bytes.rs | 9 ++++++++- src/re_unicode.rs | 17 ++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/re_bytes.rs b/src/re_bytes.rs index 64c09a725..7d488a95b 100644 --- a/src/re_bytes.rs +++ b/src/re_bytes.rs @@ -549,7 +549,14 @@ impl Regex { /// This method may have the same performance characteristics as /// `is_match`, except it provides an end location for a match. In /// particular, the location returned *may be shorter* than the proper end - /// of the leftmost-first match. + /// of the leftmost-first match that you would find via `Regex::find`. + /// + /// Note that it is not guaranteed that this routine finds the shortest or + /// "earliest" possible match. Instead, the main idea of this API is that + /// it returns the offset at the point at which the internal regex engine + /// has determined that a match has occurred. This may vary depending on + /// which internal regex engine is used, and thus, the offset itself may + /// change. /// /// # Example /// diff --git a/src/re_unicode.rs b/src/re_unicode.rs index bee365e8d..1e8bd0453 100644 --- a/src/re_unicode.rs +++ b/src/re_unicode.rs @@ -607,7 +607,14 @@ impl Regex { /// This method may have the same performance characteristics as /// `is_match`, except it provides an end location for a match. In /// particular, the location returned *may be shorter* than the proper end - /// of the leftmost-first match. + /// of the leftmost-first match that you would find via `Regex::find`. + /// + /// Note that it is not guaranteed that this routine finds the shortest or + /// "earliest" possible match. Instead, the main idea of this API is that + /// it returns the offset at the point at which the internal regex engine + /// has determined that a match has occurred. This may vary depending on + /// which internal regex engine is used, and thus, the offset itself may + /// change. /// /// # Example /// @@ -627,12 +634,12 @@ impl Regex { self.shortest_match_at(text, 0) } - /// Returns the same as shortest_match, but starts the search at the given - /// offset. + /// Returns the same as `shortest_match`, but starts the search at the + /// given offset. /// /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. + /// context into consideration. For example, the `\A` anchor can only match + /// when `start == 0`. pub fn shortest_match_at( &self, text: &str,