From 1b4cddcbfd61d05d42995cd38387e2faabe6156a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Sun, 15 Mar 2015 01:48:34 +0100 Subject: [PATCH 1/6] Implemented remaining string pattern iterators. - Added missing reverse versions of methods - Added [r]matches() - Generated the string pattern iterators with a macro - Added where bounds to the methods returning reverse iterators for better error messages. --- src/libcollections/str.rs | 320 ++++++++++++++++--- src/libcore/str/mod.rs | 639 +++++++++++++++++++++---------------- src/libcore/str/pattern.rs | 183 +++++++---- 3 files changed, 753 insertions(+), 389 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index c22b6fb9286d1..08af7879688aa 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -69,9 +69,12 @@ use vec::Vec; use slice::SliceConcatExt; pub use core::str::{FromStr, Utf8Error, Str}; -pub use core::str::{Lines, LinesAny, MatchIndices, CharRange}; -pub use core::str::{Split, SplitTerminator, SplitN}; -pub use core::str::{RSplit, RSplitN}; +pub use core::str::{Lines, LinesAny, CharRange}; +pub use core::str::{Split, RSplit}; +pub use core::str::{SplitN, RSplitN}; +pub use core::str::{SplitTerminator, RSplitTerminator}; +pub use core::str::{Matches, RMatches}; +pub use core::str::{MatchIndices, RMatchIndices}; pub use core::str::{from_utf8, Chars, CharIndices, Bytes}; pub use core::str::{from_utf8_unchecked, ParseBoolError}; pub use unicode::str::{Words, Graphemes, GraphemeIndices}; @@ -581,12 +584,22 @@ impl str { /// An iterator over substrings of `self`, separated by characters /// matched by a pattern. /// - /// The pattern can be a simple `&str`, or a closure that determines + /// The pattern can be a simple `&str`, `char`, or a closure that determines /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows a reverse search + /// and forward/reverse search yields the same elements. This is true for, eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rsplit()` can be used. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); @@ -594,6 +607,12 @@ impl str { /// /// let v: Vec<&str> = "".split('X').collect(); /// assert_eq!(v, [""]); + /// + /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); + /// assert_eq!(v, ["lion", "", "tiger", "leopard"]); + /// + /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect(); + /// assert_eq!(v, ["lion", "tiger", "leopard"]); /// ``` /// /// More complex patterns with a lambda: @@ -602,69 +621,92 @@ impl str { /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect(); /// assert_eq!(v, ["abc", "def", "ghi"]); /// - /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); - /// assert_eq!(v, ["lion", "", "tiger", "leopard"]); + /// let v: Vec<&str> = "lionXtigerXleopard".split(|c: char| c.is_uppercase()).collect(); + /// assert_eq!(v, ["lion", "tiger", "leopard"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { core_str::StrExt::split(&self[..], pat) } - /// An iterator over substrings of `self`, separated by characters matched - /// by a pattern, returning most `count` items. + /// An iterator over substrings of `self`, separated by characters + /// matched by a pattern and yielded in reverse order. /// - /// The pattern can be a simple `&str`, or a closure that determines + /// The pattern can be a simple `&str`, `char`, or a closure that determines /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. /// - /// The last element returned, if any, will contain the remainder of the - /// string. + /// # Iterator behavior /// - /// # Examples + /// The returned iterator requires that the pattern supports a reverse search, + /// and it will be double ended if a forward/reverse search yields the same elements. /// - /// Simple `&str` patterns: + /// For iterating from the front, `split()` can be used. /// - /// ``` - /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect(); - /// assert_eq!(v, ["Mary", "had a little lambda"]); + /// # Examples /// - /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect(); - /// assert_eq!(v, ["lion", "XtigerXleopard"]); + /// Simple patterns: /// - /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect(); - /// assert_eq!(v, ["abcXdef"]); + /// ```rust + /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); + /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); /// - /// let v: Vec<&str> = "".splitn(1, 'X').collect(); + /// let v: Vec<&str> = "".rsplit('X').collect(); /// assert_eq!(v, [""]); + /// + /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect(); + /// assert_eq!(v, ["leopard", "tiger", "", "lion"]); + /// + /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect(); + /// assert_eq!(v, ["leopard", "tiger", "lion"]); /// ``` /// /// More complex patterns with a lambda: /// - /// ``` - /// let v: Vec<&str> = "abc1def2ghi".splitn(2, |c: char| c.is_numeric()).collect(); - /// assert_eq!(v, ["abc", "def2ghi"]); + /// ```rust + /// let v: Vec<&str> = "abc1def2ghi".rsplit(|c: char| c.is_numeric()).collect(); + /// assert_eq!(v, ["ghi", "def", "abc"]); + /// + /// let v: Vec<&str> = "lionXtigerXleopard".rsplit(|c: char| c.is_uppercase()).collect(); + /// assert_eq!(v, ["leopard", "tiger", "lion"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { - core_str::StrExt::splitn(&self[..], count, pat) + pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rsplit(&self[..], pat) } /// An iterator over substrings of `self`, separated by characters /// matched by a pattern. /// + /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. + /// /// Equivalent to `split`, except that the trailing substring is skipped if empty. /// - /// The pattern can be a simple `&str`, or a closure that determines - /// the split. + /// This method can be used for string data that is _terminated_, rather than + /// _seperated_ by some string. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows a reverse search + /// and forward/reverse search yields the same elements. This is true for, eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rsplit_terminator()` can be used. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// let v: Vec<&str> = "A.B.".split_terminator('.').collect(); /// assert_eq!(v, ["A", "B"]); /// - /// let v: Vec<&str> = "A..B..".split_terminator('.').collect(); + /// let v: Vec<&str> = "A..B..".split_terminator(".").collect(); /// assert_eq!(v, ["A", "", "B", ""]); /// ``` /// @@ -679,32 +721,93 @@ impl str { core_str::StrExt::split_terminator(&self[..], pat) } - /// An iterator over substrings of `self`, separated by a pattern, - /// starting from the end of the string. + /// An iterator over substrings of `self`, separated by characters + /// matched by a pattern and yielded in reverse order. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. + /// + /// Equivalent to `split`, except that the trailing substring is skipped if empty. + /// + /// This method can be used for string data that is _terminated_, rather than + /// _seperated_ by some string. + /// + /// # Iterator behavior + /// + /// The returned iterator requires that the pattern supports a reverse search, + /// and it will be double ended if a forward/reverse search yields the same elements. + /// + /// For iterating from the front, `split_terminator()` can be used. /// /// # Examples /// /// Simple patterns: /// /// ``` - /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); - /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); + /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect(); + /// assert_eq!(v, ["B", "A"]); /// - /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect(); - /// assert_eq!(v, ["leopard", "tiger", "lion"]); + /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect(); + /// assert_eq!(v, ["", "B", "", "A"]); /// ``` /// /// More complex patterns with a lambda: /// /// ``` - /// let v: Vec<&str> = "abc1def2ghi".rsplit(|c: char| c.is_numeric()).collect(); + /// let v: Vec<&str> = "abc1def2ghi3".rsplit_terminator(|c: char| c.is_numeric()).collect(); /// assert_eq!(v, ["ghi", "def", "abc"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + pub fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> where P::Searcher: ReverseSearcher<'a> { - core_str::StrExt::rsplit(&self[..], pat) + core_str::StrExt::rsplit_terminator(&self[..], pat) + } + + /// An iterator over substrings of `self`, separated by a pattern, + /// restricted to returning + /// at most `count` items. + /// + /// The last element returned, if any, will contain the remainder of the + /// string. + /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will not be double ended, because it is not efficient to support. + /// + /// If the pattern allows a reverse search, `rsplitn()` can be used. + /// + /// # Examples + /// + /// Simple patterns: + /// + /// ``` + /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect(); + /// assert_eq!(v, ["Mary", "had", "a little lambda"]); + /// + /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect(); + /// assert_eq!(v, ["lion", "", "tigerXleopard"]); + /// + /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect(); + /// assert_eq!(v, ["abcXdef"]); + /// + /// let v: Vec<&str> = "".splitn(1, 'X').collect(); + /// assert_eq!(v, [""]); + /// ``` + /// + /// More complex patterns with a lambda: + /// + /// ``` + /// let v: Vec<&str> = "abc1def2ghi".splitn(2, |c: char| c.is_numeric()).collect(); + /// assert_eq!(v, ["abc", "def2ghi"]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { + core_str::StrExt::splitn(&self[..], count, pat) } /// An iterator over substrings of `self`, separated by a pattern, @@ -714,6 +817,16 @@ impl str { /// The last element returned, if any, will contain the remainder of the /// string. /// + /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will not be double ended, because it is not efficient to support. + /// + /// `splitn()` can be used for splitting from the front. + /// /// # Examples /// /// Simple patterns: @@ -722,6 +835,9 @@ impl str { /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect(); /// assert_eq!(v, ["lamb", "little", "Mary had a"]); /// + /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect(); + /// assert_eq!(v, ["leopard", "tiger", "lionX"]); + /// /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect(); /// assert_eq!(v, ["leopard", "lion::tiger"]); /// ``` @@ -739,13 +855,87 @@ impl str { core_str::StrExt::rsplitn(&self[..], count, pat) } - /// An iterator over the start and end indices of the disjoint matches of a `&str` within + /// An iterator over the matches of a pattern within `self`. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows a reverse search + /// and forward/reverse search yields the same elements. This is true for, eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rmatches()` can be used. + /// + /// # Examples + /// + /// ``` + /// # #![feature(collections)] + /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect(); + /// assert_eq!(v, ["abc", "abc", "abc"]); + /// + /// let v: Vec<&str> = "1abc2abc3".matches(|c: char| c.is_numeric()).collect(); + /// assert_eq!(v, ["1", "2", "3"]); + /// ``` + #[unstable(feature = "collections", + reason = "method got recently added")] + pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { + core_str::StrExt::matches(&self[..], pat) + } + + /// An iterator over the matches of a pattern within `self`, yielded in reverse order. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator requires that the pattern supports a reverse search, + /// and it will be double ended if a forward/reverse search yields the same elements. + /// + /// For iterating from the front, `matches()` can be used. + /// + /// # Examples + /// + /// ``` + /// # #![feature(collections)] + /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect(); + /// assert_eq!(v, ["abc", "abc", "abc"]); + /// + /// let v: Vec<&str> = "1abc2abc3".rmatches(|c: char| c.is_numeric()).collect(); + /// assert_eq!(v, ["3", "2", "1"]); + /// ``` + #[unstable(feature = "collections", + reason = "method got recently added")] + pub fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rmatches(&self[..], pat) + } + + /// An iterator over the start and end indices of the disjoint matches of a pattern within /// `self`. /// - /// That is, each returned value `(start, end)` satisfies `self.slice(start, end) == sep`. For - /// matches of `sep` within `self` that overlap, only the indices corresponding to the first + /// For matches of `pat` within `self` that overlap, only the indices corresponding to the first /// match are returned. /// + /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator will be double ended if the pattern allows a reverse search + /// and forward/reverse search yields the same elements. This is true for, eg, `char` but not + /// for `&str`. + /// + /// If the pattern allows a reverse search but its results might differ + /// from a forward search, `rmatch_indices()` can be used. + /// /// # Examples /// /// ``` @@ -761,12 +951,52 @@ impl str { /// ``` #[unstable(feature = "collections", reason = "might have its iterator type changed")] - // NB: Right now MatchIndices yields `(usize, usize)`, - // but it would be more consistent and useful to return `(usize, &str)` + // NB: Right now MatchIndices yields `(usize, usize)`, but it would + // be more consistent with `matches` and `char_indices` to return `(usize, &str)` pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { core_str::StrExt::match_indices(&self[..], pat) } + /// An iterator over the start and end indices of the disjoint matches of a pattern within + /// `self`, yielded in reverse order. + /// + /// For matches of `pat` within `self` that overlap, only the indices corresponding to the last + /// match are returned. + /// + /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// the split. + /// Additional libraries might provide more complex patterns like regular expressions. + /// + /// # Iterator behavior + /// + /// The returned iterator requires that the pattern supports a reverse search, + /// and it will be double ended if a forward/reverse search yields the same elements. + /// + /// For iterating from the front, `match_indices()` can be used. + /// + /// # Examples + /// + /// ``` + /// # #![feature(collections)] + /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".rmatch_indices("abc").collect(); + /// assert_eq!(v, [(12,15), (6,9), (0,3)]); + /// + /// let v: Vec<(usize, usize)> = "1abcabc2".rmatch_indices("abc").collect(); + /// assert_eq!(v, [(4,7), (1,4)]); + /// + /// let v: Vec<(usize, usize)> = "ababa".rmatch_indices("aba").collect(); + /// assert_eq!(v, [(2, 5)]); // only the last `aba` + /// ``` + #[unstable(feature = "collections", + reason = "might have its iterator type changed")] + // NB: Right now RMatchIndices yields `(usize, usize)`, but it would + // be more consistent with `rmatches` and `char_indices` to return `(usize, &str)` + pub fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + core_str::StrExt::rmatch_indices(&self[..], pat) + } + /// An iterator over the lines of a string, separated by `\n`. /// /// This does not include the empty string after a trailing `\n`. diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index dbb365c4e2357..99284036fd221 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -39,96 +39,6 @@ pub use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchSt mod pattern; -macro_rules! delegate_iter { - (exact $te:ty : $ti:ty) => { - delegate_iter!{$te : $ti} - impl<'a> ExactSizeIterator for $ti { - #[inline] - fn len(&self) -> usize { - self.0.len() - } - } - }; - ($te:ty : $ti:ty) => { - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a> Iterator for $ti { - type Item = $te; - - #[inline] - fn next(&mut self) -> Option<$te> { - self.0.next() - } - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() - } - } - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a> DoubleEndedIterator for $ti { - #[inline] - fn next_back(&mut self) -> Option<$te> { - self.0.next_back() - } - } - }; - (pattern $te:ty : $ti:ty) => { - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a, P: Pattern<'a>> Iterator for $ti { - type Item = $te; - - #[inline] - fn next(&mut self) -> Option<$te> { - self.0.next() - } - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() - } - } - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a, P: Pattern<'a>> DoubleEndedIterator for $ti - where P::Searcher: DoubleEndedSearcher<'a> { - #[inline] - fn next_back(&mut self) -> Option<$te> { - self.0.next_back() - } - } - }; - (pattern forward $te:ty : $ti:ty) => { - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a, P: Pattern<'a>> Iterator for $ti - where P::Searcher: DoubleEndedSearcher<'a> { - type Item = $te; - - #[inline] - fn next(&mut self) -> Option<$te> { - self.0.next() - } - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() - } - } - }; - (pattern reverse $te:ty : $ti:ty) => { - #[stable(feature = "rust1", since = "1.0.0")] - impl<'a, P: Pattern<'a>> Iterator for $ti - where P::Searcher: ReverseSearcher<'a> - { - type Item = $te; - - #[inline] - fn next(&mut self) -> Option<$te> { - self.0.next() - } - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() - } - } - }; -} - /// A trait to abstract the idea of creating a new instance of a type from a /// string. #[stable(feature = "rust1", since = "1.0.0")] @@ -444,11 +354,9 @@ impl<'a> DoubleEndedIterator for CharIndices<'a> { #[stable(feature = "rust1", since = "1.0.0")] #[derive(Clone)] pub struct Bytes<'a>(Map, BytesDeref>); -delegate_iter!{exact u8 : Bytes<'a>} -/// A temporary fn new type that ensures that the `Bytes` iterator -/// is cloneable. -#[derive(Copy, Clone)] +/// A nameable, clonable fn type +#[derive(Clone)] struct BytesDeref; impl<'a> Fn<(&'a u8,)> for BytesDeref { @@ -474,58 +382,173 @@ impl<'a> FnOnce<(&'a u8,)> for BytesDeref { } } -/// An iterator over the substrings of a string, separated by `sep`. -struct CharSplits<'a, P: Pattern<'a>> { - /// The slice remaining to be iterated - start: usize, - end: usize, - matcher: P::Searcher, - /// Whether an empty string at the end is allowed - allow_trailing_empty: bool, - finished: bool, -} +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Iterator for Bytes<'a> { + type Item = u8; -/// An iterator over the substrings of a string, separated by `sep`, -/// splitting at most `count` times. -struct CharSplitsN<'a, P: Pattern<'a>> { - iter: CharSplits<'a, P>, - /// The number of items remaining - count: usize, -} + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } -/// An iterator over the substrings of a string, separated by a -/// pattern, in reverse order. -struct RCharSplits<'a, P: Pattern<'a>> { - /// The slice remaining to be iterated - start: usize, - end: usize, - matcher: P::Searcher, - /// Whether an empty string at the end of iteration is allowed - allow_final_empty: bool, - finished: bool, + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } } -/// An iterator over the substrings of a string, separated by a -/// pattern, splitting at most `count` times, in reverse order. -struct RCharSplitsN<'a, P: Pattern<'a>> { - iter: RCharSplits<'a, P>, - /// The number of splits remaining - count: usize, +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> DoubleEndedIterator for Bytes<'a> { + #[inline] + fn next_back(&mut self) -> Option { + self.0.next_back() + } } -/// An iterator over the lines of a string, separated by `\n`. #[stable(feature = "rust1", since = "1.0.0")] -pub struct Lines<'a> { - inner: CharSplits<'a, char>, +impl<'a> ExactSizeIterator for Bytes<'a> { + #[inline] + fn len(&self) -> usize { + self.0.len() + } } -/// An iterator over the lines of a string, separated by either `\n` or (`\r\n`). -#[stable(feature = "rust1", since = "1.0.0")] -pub struct LinesAny<'a> { - inner: Map, fn(&str) -> &str>, +/// This macro generates two public iterator structs +/// wrapping an private internal one that makes use of the `Pattern` API. +/// +/// For all patterns `P: Pattern<'a>` the following items will be +/// generated (generics ommitted): +/// +/// struct $forward_iterator($internal_iterator); +/// struct $reverse_iterator($internal_iterator); +/// +/// impl Iterator for $forward_iterator +/// { /* internal ends up calling Searcher::next_match() */ } +/// +/// impl DoubleEndedIterator for $forward_iterator +/// where P::Searcher: DoubleEndedSearcher +/// { /* internal ends up calling Searcher::next_match_back() */ } +/// +/// impl Iterator for $reverse_iterator +/// where P::Searcher: ReverseSearcher +/// { /* internal ends up calling Searcher::next_match_back() */ } +/// +/// impl DoubleEndedIterator for $reverse_iterator +/// where P::Searcher: DoubleEndedSearcher +/// { /* internal ends up calling Searcher::next_match() */ } +/// +/// The internal one is defined outside the macro, and has almost the same +/// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and +/// `pattern::ReverseSearcher` for both forward and reverse iteration. +/// +/// "Almost", because a `Searcher` and a `ReverseSearcher` for a given +/// `Pattern` might not return the same elements, so actually implementing +/// `DoubleEndedIterator` for it would be incorrect. +/// (See the docs in `str::pattern` for more details) +/// +/// However, the internal struct still represents a single ended iterator from +/// either end, and depending on pattern is also a valid double ended iterator, +/// so the two wrapper structs implement `Iterator` +/// and `DoubleEndedIterator` depending on the concrete pattern type, leading +/// to the complex impls seen above. +macro_rules! generate_pattern_iterators { + { + // Forward iterator + forward: + $(#[$forward_iterator_attribute:meta])* + struct $forward_iterator:ident; + + // Reverse iterator + reverse: + $(#[$reverse_iterator_attribute:meta])* + struct $reverse_iterator:ident; + + // Stability of all generated items + stability: + $(#[$common_stability_attribute:meta])* + + // Internal almost-iterator that is being delegated to + internal: + $internal_iterator:ident yielding ($iterty:ty); + + // Kind of delgation - either single ended or double ended + delegate $($t:tt)* + } => { + $(#[$forward_iterator_attribute])* + $(#[$common_stability_attribute])* + pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>); + + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> { + type Item = $iterty; + + #[inline] + fn next(&mut self) -> Option<$iterty> { + self.0.next() + } + } + + $(#[$reverse_iterator_attribute])* + $(#[$common_stability_attribute])* + pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>); + + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + type Item = $iterty; + + #[inline] + fn next(&mut self) -> Option<$iterty> { + self.0.next_back() + } + } + + generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*, + $forward_iterator, + $reverse_iterator, $iterty); + }; + { + double ended; with $(#[$common_stability_attribute:meta])*, + $forward_iterator:ident, + $reverse_iterator:ident, $iterty:ty + } => { + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P> + where P::Searcher: DoubleEndedSearcher<'a> + { + #[inline] + fn next_back(&mut self) -> Option<$iterty> { + self.0.next_back() + } + } + + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P> + where P::Searcher: DoubleEndedSearcher<'a> + { + #[inline] + fn next_back(&mut self) -> Option<$iterty> { + self.0.next() + } + } + }; + { + single ended; with $(#[$common_stability_attribute:meta])*, + $forward_iterator:ident, + $reverse_iterator:ident, $iterty:ty + } => {} +} + +struct SplitInternal<'a, P: Pattern<'a>> { + start: usize, + end: usize, + matcher: P::Searcher, + allow_trailing_empty: bool, + finished: bool, } -impl<'a, P: Pattern<'a>> CharSplits<'a, P> { +impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { #[inline] fn get_end(&mut self) -> Option<&'a str> { if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) { @@ -538,11 +561,6 @@ impl<'a, P: Pattern<'a>> CharSplits<'a, P> { None } } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for CharSplits<'a, P> { - type Item = &'a str; #[inline] fn next(&mut self) -> Option<&'a str> { @@ -558,13 +576,11 @@ impl<'a, P: Pattern<'a>> Iterator for CharSplits<'a, P> { None => self.get_end(), } } -} -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> DoubleEndedIterator for CharSplits<'a, P> -where P::Searcher: DoubleEndedSearcher<'a> { #[inline] - fn next_back(&mut self) -> Option<&'a str> { + fn next_back(&mut self) -> Option<&'a str> + where P::Searcher: ReverseSearcher<'a> + { if self.finished { return None } if !self.allow_trailing_empty { @@ -590,10 +606,41 @@ where P::Searcher: DoubleEndedSearcher<'a> { } } -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P> { - type Item = &'a str; +generate_pattern_iterators! { + forward: + /// Return type of `str::split()` + struct Split; + reverse: + /// Return type of `str::rsplit()` + struct RSplit; + stability: + #[stable(feature = "rust1", since = "1.0.0")] + internal: + SplitInternal yielding (&'a str); + delegate double ended; +} + +generate_pattern_iterators! { + forward: + /// Return type of `str::split_terminator()` + struct SplitTerminator; + reverse: + /// Return type of `str::rsplit_terminator()` + struct RSplitTerminator; + stability: + #[stable(feature = "rust1", since = "1.0.0")] + internal: + SplitInternal yielding (&'a str); + delegate double ended; +} + +struct SplitNInternal<'a, P: Pattern<'a>> { + iter: SplitInternal<'a, P>, + /// The number of splits remaining + count: usize, +} +impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> { #[inline] fn next(&mut self) -> Option<&'a str> { match self.count { @@ -602,58 +649,151 @@ impl<'a, P: Pattern<'a>> Iterator for CharSplitsN<'a, P> { _ => { self.count -= 1; self.iter.next() } } } -} -impl<'a, P: Pattern<'a>> RCharSplits<'a, P> { #[inline] - fn get_remainder(&mut self) -> Option<&'a str> { - if !self.finished && (self.allow_final_empty || self.end - self.start > 0) { - self.finished = true; - unsafe { - let string = self.matcher.haystack().slice_unchecked(self.start, self.end); - Some(string) - } - } else { - None + fn next_back(&mut self) -> Option<&'a str> + where P::Searcher: ReverseSearcher<'a> + { + match self.count { + 0 => None, + 1 => { self.count = 0; self.iter.get_end() } + _ => { self.count -= 1; self.iter.next_back() } } } } +generate_pattern_iterators! { + forward: + /// Return type of `str::splitn()` + struct SplitN; + reverse: + /// Return type of `str::rsplitn()` + struct RSplitN; + stability: + #[stable(feature = "rust1", since = "1.0.0")] + internal: + SplitNInternal yielding (&'a str); + delegate single ended; +} + +struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher); + +impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> { + #[inline] + fn next(&mut self) -> Option<(usize, usize)> { + self.0.next_match() + } + + #[inline] + fn next_back(&mut self) -> Option<(usize, usize)> + where P::Searcher: ReverseSearcher<'a> + { + self.0.next_match_back() + } +} + +generate_pattern_iterators! { + forward: + /// Return type of `str::match_indices()` + struct MatchIndices; + reverse: + /// Return type of `str::rmatch_indices()` + struct RMatchIndices; + stability: + #[unstable(feature = "core", + reason = "type may be removed or have its iterator impl changed")] + internal: + MatchIndicesInternal yielding ((usize, usize)); + delegate double ended; +} + +struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher); + +impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> { + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.0.next_match().map(|(a, b)| unsafe { + // Indices are known to be on utf8 boundaries + self.0.haystack().slice_unchecked(a, b) + }) + } + + #[inline] + fn next_back(&mut self) -> Option<&'a str> + where P::Searcher: ReverseSearcher<'a> + { + self.0.next_match_back().map(|(a, b)| unsafe { + // Indices are known to be on utf8 boundaries + self.0.haystack().slice_unchecked(a, b) + }) + } +} + +generate_pattern_iterators! { + forward: + /// Return type of `str::matches()` + struct Matches; + reverse: + /// Return type of `str::rmatches()` + struct RMatches; + stability: + #[unstable(feature = "core", reason = "type got recently added")] + internal: + MatchesInternal yielding (&'a str); + delegate double ended; +} + +/// Return type of `str::lines()` +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Lines<'a>(SplitTerminator<'a, char>); + #[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for RCharSplits<'a, P> - where P::Searcher: ReverseSearcher<'a> -{ +impl<'a> Iterator for Lines<'a> { type Item = &'a str; #[inline] fn next(&mut self) -> Option<&'a str> { - if self.finished { return None } + self.0.next() + } - let haystack = self.matcher.haystack(); - match self.matcher.next_match_back() { - Some((a, b)) => unsafe { - let elt = haystack.slice_unchecked(b, self.end); - self.end = a; - Some(elt) - }, - None => self.get_remainder(), - } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> DoubleEndedIterator for Lines<'a> { + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.0.next_back() } } +/// Return type of `str::lines_any()` +#[stable(feature = "rust1", since = "1.0.0")] +pub struct LinesAny<'a>(Map, fn(&str) -> &str>); + #[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for RCharSplitsN<'a, P> - where P::Searcher: ReverseSearcher<'a> -{ +impl<'a> Iterator for LinesAny<'a> { type Item = &'a str; #[inline] fn next(&mut self) -> Option<&'a str> { - match self.count { - 0 => None, - 1 => { self.count -= 1; self.iter.get_remainder() } - _ => { self.count -= 1; self.iter.next() } - } + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> DoubleEndedIterator for LinesAny<'a> { + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.0.next_back() } } @@ -939,22 +1079,6 @@ struct OldMatchIndices<'a, 'b> { searcher: OldSearcher } -// FIXME: #21637 Prevents a Clone impl -/// An iterator over the start and end indices of the matches of a -/// substring within a larger string -#[unstable(feature = "core", reason = "type may be removed")] -pub struct MatchIndices<'a, P: Pattern<'a>>(P::Searcher); - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, P: Pattern<'a>> Iterator for MatchIndices<'a, P> { - type Item = (usize, usize); - - #[inline] - fn next(&mut self) -> Option<(usize, usize)> { - self.0.next_match() - } -} - impl<'a, 'b> OldMatchIndices<'a, 'b> { #[inline] #[allow(dead_code)] @@ -1292,31 +1416,6 @@ impl<'a, S: ?Sized> Str for &'a S where S: Str { fn as_slice(&self) -> &str { Str::as_slice(*self) } } -/// Return type of `str::split` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct Split<'a, P: Pattern<'a>>(CharSplits<'a, P>); -delegate_iter!{pattern &'a str : Split<'a, P>} - -/// Return type of `str::split_terminator` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct SplitTerminator<'a, P: Pattern<'a>>(CharSplits<'a, P>); -delegate_iter!{pattern &'a str : SplitTerminator<'a, P>} - -/// Return type of `str::splitn` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct SplitN<'a, P: Pattern<'a>>(CharSplitsN<'a, P>); -delegate_iter!{pattern forward &'a str : SplitN<'a, P>} - -/// Return type of `str::rsplit` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct RSplit<'a, P: Pattern<'a>>(RCharSplits<'a, P>); -delegate_iter!{pattern reverse &'a str : RSplit<'a, P>} - -/// Return type of `str::rsplitn` -#[stable(feature = "rust1", since = "1.0.0")] -pub struct RSplitN<'a, P: Pattern<'a>>(RCharSplitsN<'a, P>); -delegate_iter!{pattern reverse &'a str : RSplitN<'a, P>} - /// Methods for string slices #[allow(missing_docs)] pub trait StrExt { @@ -1329,13 +1428,20 @@ pub trait StrExt { fn bytes<'a>(&'a self) -> Bytes<'a>; fn char_indices<'a>(&'a self) -> CharIndices<'a>; fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>; - fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>; - fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>; fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> where P::Searcher: ReverseSearcher<'a>; + fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>; fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> where P::Searcher: ReverseSearcher<'a>; + fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>; + fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> + where P::Searcher: ReverseSearcher<'a>; + fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>; + fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> + where P::Searcher: ReverseSearcher<'a>; fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>; + fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> + where P::Searcher: ReverseSearcher<'a>; fn lines<'a>(&'a self) -> Lines<'a>; fn lines_any<'a>(&'a self) -> LinesAny<'a>; fn char_len(&self) -> usize; @@ -1402,7 +1508,7 @@ impl StrExt for str { #[inline] fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { - Split(CharSplits { + Split(SplitInternal { start: 0, end: self.len(), matcher: pat.into_searcher(self), @@ -1411,55 +1517,72 @@ impl StrExt for str { }) } + #[inline] + fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + RSplit(self.split(pat).0) + } + #[inline] fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { - SplitN(CharSplitsN { + SplitN(SplitNInternal { iter: self.split(pat).0, count: count, }) } + #[inline] + fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + RSplitN(self.splitn(count, pat).0) + } + #[inline] fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> { - SplitTerminator(CharSplits { + SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 }) } #[inline] - fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> + fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> where P::Searcher: ReverseSearcher<'a> { - RSplit(RCharSplits { - start: 0, - end: self.len(), - matcher: pat.into_searcher(self), - allow_final_empty: true, - finished: false, - }) + RSplitTerminator(self.split_terminator(pat).0) } #[inline] - fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> + fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { + Matches(MatchesInternal(pat.into_searcher(self))) + } + + #[inline] + fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> where P::Searcher: ReverseSearcher<'a> { - RSplitN(RCharSplitsN { - iter: self.rsplit(pat).0, - count: count, - }) + RMatches(self.matches(pat).0) } #[inline] fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { - MatchIndices(pat.into_searcher(self)) + MatchIndices(MatchIndicesInternal(pat.into_searcher(self))) } + #[inline] + fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> + where P::Searcher: ReverseSearcher<'a> + { + RMatchIndices(self.match_indices(pat).0) + } #[inline] fn lines(&self) -> Lines { - Lines { inner: self.split_terminator('\n').0 } + Lines(self.split_terminator('\n')) } + #[inline] fn lines_any(&self) -> LinesAny { fn f(line: &str) -> &str { let l = line.len(); @@ -1468,7 +1591,7 @@ impl StrExt for str { } let f: fn(&str) -> &str = f; // coerce to fn pointer - LinesAny { inner: self.lines().map(f) } + LinesAny(self.lines().map(f)) } #[inline] @@ -1709,35 +1832,3 @@ impl<'a> Default for &'a str { #[stable(feature = "rust1", since = "1.0.0")] fn default() -> &'a str { "" } } - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> Iterator for Lines<'a> { - type Item = &'a str; - - #[inline] - fn next(&mut self) -> Option<&'a str> { self.inner.next() } - #[inline] - fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> DoubleEndedIterator for Lines<'a> { - #[inline] - fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> Iterator for LinesAny<'a> { - type Item = &'a str; - - #[inline] - fn next(&mut self) -> Option<&'a str> { self.inner.next() } - #[inline] - fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> DoubleEndedIterator for LinesAny<'a> { - #[inline] - fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() } -} diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index 922ab2c14a6b7..0c2a58f3ca7c3 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -8,6 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +//! The string Pattern API. +//! +//! For more details, see the traits `Pattern`, `Searcher`, +//! `ReverseSearcher` and `DoubleEndedSearcher`. + use prelude::*; // Pattern @@ -223,7 +228,9 @@ pub unsafe trait ReverseSearcher<'a>: Searcher<'a> { /// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched. pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {} +///////////////////////////////////////////////////////////////////////////// // Impl for a CharEq wrapper +///////////////////////////////////////////////////////////////////////////// #[doc(hidden)] trait CharEq { @@ -261,6 +268,7 @@ impl<'a> CharEq for &'a [char] { struct CharEqPattern(C); +#[derive(Clone)] struct CharEqSearcher<'a, C: CharEq> { char_eq: C, haystack: &'a str, @@ -330,12 +338,15 @@ unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> { impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {} +///////////////////////////////////////////////////////////////////////////// // Impl for &str +///////////////////////////////////////////////////////////////////////////// // Todo: Optimize the naive implementation here +/// Associated type for `<&str as Pattern<'a>>::Searcher`. #[derive(Clone)] -struct StrSearcher<'a, 'b> { +pub struct StrSearcher<'a, 'b> { haystack: &'a str, needle: &'b str, start: usize, @@ -456,116 +467,148 @@ fn str_search_step(mut m: &mut StrSearcher, } } -macro_rules! char_eq_pattern_impl { - ($wrapper:ty, $wrapper_ident:ident) => { - fn into_searcher(self, haystack: &'a str) -> $wrapper { - $wrapper_ident(CharEqPattern(self).into_searcher(haystack)) +///////////////////////////////////////////////////////////////////////////// + +macro_rules! pattern_methods { + ($t:ty, $pmap:expr, $smap:expr) => { + // FIXME: #22463 + //type Searcher = $t; + + #[inline] + fn into_searcher(self, haystack: &'a str) -> $t { + $smap($pmap(self).into_searcher(haystack)) } + #[inline] fn is_contained_in(self, haystack: &'a str) -> bool { - CharEqPattern(self).is_contained_in(haystack) + $pmap(self).is_contained_in(haystack) } + #[inline] fn is_prefix_of(self, haystack: &'a str) -> bool { - CharEqPattern(self).is_prefix_of(haystack) + $pmap(self).is_prefix_of(haystack) } + #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool - where $wrapper: ReverseSearcher<'a> + where $t: ReverseSearcher<'a> { - CharEqPattern(self).is_suffix_of(haystack) + $pmap(self).is_suffix_of(haystack) } } } -// Pattern for char - -impl<'a> Pattern<'a> for char { - type Searcher = CharSearcher<'a>; - char_eq_pattern_impl!(CharSearcher<'a>, CharSearcher); +macro_rules! searcher_methods { + (forward) => { + #[inline] + fn haystack(&self) -> &'a str { + self.0.haystack() + } + #[inline] + fn next(&mut self) -> SearchStep { + self.0.next() + } + #[inline] + fn next_match(&mut self) -> Option<(usize, usize)> { + self.0.next_match() + } + #[inline] + fn next_reject(&mut self) -> Option<(usize, usize)> { + self.0.next_reject() + } + }; + (reverse) => { + #[inline] + fn next_back(&mut self) -> SearchStep { + self.0.next_back() + } + #[inline] + fn next_match_back(&mut self) -> Option<(usize, usize)> { + self.0.next_match_back() + } + #[inline] + fn next_reject_back(&mut self) -> Option<(usize, usize)> { + self.0.next_reject_back() + } + } } -pub struct CharSearcher<'a>(CharEqSearcher<'a, char>); +///////////////////////////////////////////////////////////////////////////// +// Impl for char +///////////////////////////////////////////////////////////////////////////// + +/// Associated type for `>::Searcher`. +#[derive(Clone)] +pub struct CharSearcher<'a>( as Pattern<'a>>::Searcher); unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { - #[inline] - fn haystack(&self) -> &'a str { self.0.haystack() } - #[inline] - fn next(&mut self) -> SearchStep { self.0.next() } + searcher_methods!(forward); } + unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { - #[inline] - fn next_back(&mut self) -> SearchStep { self.0.next_back() } + searcher_methods!(reverse); } -impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {} -// Pattern for &[char] +impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {} -impl<'a, 'b> Pattern<'a> for &'b [char] { - type Searcher = CharSliceSearcher<'a, 'b>; - char_eq_pattern_impl!(CharSliceSearcher<'a, 'b>, CharSliceSearcher); +/// Searches for chars that are equal to a given char +impl<'a> Pattern<'a> for char { + type Searcher = CharSearcher<'a>; + pattern_methods!(CharSearcher<'a>, CharEqPattern, CharSearcher); } -pub struct CharSliceSearcher<'a, 'b>(CharEqSearcher<'a, &'b [char]>); +///////////////////////////////////////////////////////////////////////////// +// Impl for &[char] +///////////////////////////////////////////////////////////////////////////// + +// Todo: Change / Remove due to ambiguity in meaning. + +/// Associated type for `<&[char] as Pattern<'a>>::Searcher`. +#[derive(Clone)] +pub struct CharSliceSearcher<'a, 'b>( as Pattern<'a>>::Searcher); unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> { - #[inline] - fn haystack(&self) -> &'a str { self.0.haystack() } - #[inline] - fn next(&mut self) -> SearchStep { self.0.next() } + searcher_methods!(forward); } + unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> { - #[inline] - fn next_back(&mut self) -> SearchStep { self.0.next_back() } + searcher_methods!(reverse); } -impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {} -// Pattern for predicates +impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {} -impl<'a, F: FnMut(char) -> bool> Pattern<'a> for F { - type Searcher = CharPredSearcher<'a, F>; - char_eq_pattern_impl!(CharPredSearcher<'a, F>, CharPredSearcher); +/// Searches for chars that are equal to any of the chars in the array +impl<'a, 'b> Pattern<'a> for &'b [char] { + type Searcher = CharSliceSearcher<'a, 'b>; + pattern_methods!(CharSliceSearcher<'a, 'b>, CharEqPattern, CharSliceSearcher); } -pub struct CharPredSearcher<'a, F: FnMut(char) -> bool>(CharEqSearcher<'a, F>); +///////////////////////////////////////////////////////////////////////////// +// Impl for F: FnMut(char) -> bool +///////////////////////////////////////////////////////////////////////////// + +/// Associated type for `>::Searcher`. +#[derive(Clone)] +pub struct CharPredicateSearcher<'a, F>( as Pattern<'a>>::Searcher) + where F: FnMut(char) -> bool; -unsafe impl<'a, F> Searcher<'a> for CharPredSearcher<'a, F> +unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool { - #[inline] - fn haystack(&self) -> &'a str { self.0.haystack() } - #[inline] - fn next(&mut self) -> SearchStep { self.0.next() } + searcher_methods!(forward); } -unsafe impl<'a, F> ReverseSearcher<'a> for CharPredSearcher<'a, F> + +unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool { - #[inline] - fn next_back(&mut self) -> SearchStep { self.0.next_back() } + searcher_methods!(reverse); } -impl<'a, F> DoubleEndedSearcher<'a> for CharPredSearcher<'a, F> - where F: FnMut(char) -> bool -{} -// Pattern for &&str +impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F> + where F: FnMut(char) -> bool {} -impl<'a, 'b> Pattern<'a> for &'b &'b str { - type Searcher = <&'b str as Pattern<'a>>::Searcher; - #[inline] - fn into_searcher(self, haystack: &'a str) - -> <&'b str as Pattern<'a>>::Searcher { - (*self).into_searcher(haystack) - } - #[inline] - fn is_contained_in(self, haystack: &'a str) -> bool { - (*self).is_contained_in(haystack) - } - #[inline] - fn is_prefix_of(self, haystack: &'a str) -> bool { - (*self).is_prefix_of(haystack) - } - #[inline] - fn is_suffix_of(self, haystack: &'a str) -> bool { - (*self).is_suffix_of(haystack) - } +/// Searches for chars that match the given predicate +impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool { + type Searcher = CharPredicateSearcher<'a, F>; + pattern_methods!(CharPredicateSearcher<'a, F>, CharEqPattern, CharPredicateSearcher); } From c2bff14da1e1c5600b4d66a8324b4e9f522cb559 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Sun, 5 Apr 2015 18:52:14 +0200 Subject: [PATCH 2/6] Re-added Clone impls to all str iterators --- src/libcore/str/mod.rs | 91 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 9 deletions(-) diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 99284036fd221..d31c1e274664d 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -413,6 +413,21 @@ impl<'a> ExactSizeIterator for Bytes<'a> { } } +/// This macro generates a Clone impl for string pattern API +/// wrapper types of the form X<'a, P> +macro_rules! derive_pattern_clone { + (clone $t:ident with |$s:ident| $e:expr) => { + impl<'a, P: Pattern<'a>> Clone for $t<'a, P> + where P::Searcher: Clone + { + fn clone(&self) -> Self { + let $s = self; + $e + } + } + } +} + /// This macro generates two public iterator structs /// wrapping an private internal one that makes use of the `Pattern` API. /// @@ -488,6 +503,15 @@ macro_rules! generate_pattern_iterators { } } + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P> + where P::Searcher: Clone + { + fn clone(&self) -> Self { + $forward_iterator(self.0.clone()) + } + } + $(#[$reverse_iterator_attribute])* $(#[$common_stability_attribute])* pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>); @@ -504,6 +528,15 @@ macro_rules! generate_pattern_iterators { } } + $(#[$common_stability_attribute])* + impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P> + where P::Searcher: Clone + { + fn clone(&self) -> Self { + $reverse_iterator(self.0.clone()) + } + } + generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*, $forward_iterator, $reverse_iterator, $iterty); @@ -540,6 +573,10 @@ macro_rules! generate_pattern_iterators { } => {} } +derive_pattern_clone!{ + clone SplitInternal + with |s| SplitInternal { matcher: s.matcher.clone(), ..*s } +} struct SplitInternal<'a, P: Pattern<'a>> { start: usize, end: usize, @@ -634,6 +671,10 @@ generate_pattern_iterators! { delegate double ended; } +derive_pattern_clone!{ + clone SplitNInternal + with |s| SplitNInternal { iter: s.iter.clone(), ..*s } +} struct SplitNInternal<'a, P: Pattern<'a>> { iter: SplitInternal<'a, P>, /// The number of splits remaining @@ -676,6 +717,10 @@ generate_pattern_iterators! { delegate single ended; } +derive_pattern_clone!{ + clone MatchIndicesInternal + with |s| MatchIndicesInternal(s.0.clone()) +} struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher); impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> { @@ -707,6 +752,10 @@ generate_pattern_iterators! { delegate double ended; } +derive_pattern_clone!{ + clone MatchesInternal + with |s| MatchesInternal(s.0.clone()) +} struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher); impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> { @@ -745,6 +794,7 @@ generate_pattern_iterators! { /// Return type of `str::lines()` #[stable(feature = "rust1", since = "1.0.0")] +#[derive(Clone)] pub struct Lines<'a>(SplitTerminator<'a, char>); #[stable(feature = "rust1", since = "1.0.0")] @@ -772,7 +822,37 @@ impl<'a> DoubleEndedIterator for Lines<'a> { /// Return type of `str::lines_any()` #[stable(feature = "rust1", since = "1.0.0")] -pub struct LinesAny<'a>(Map, fn(&str) -> &str>); +#[derive(Clone)] +pub struct LinesAny<'a>(Map, LinesAnyMap>); + +/// A nameable, clonable fn type +#[derive(Clone)] +struct LinesAnyMap; + +impl<'a> Fn<(&'a str,)> for LinesAnyMap { + #[inline] + extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str { + let l = line.len(); + if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] } + else { line } + } +} + +impl<'a> FnMut<(&'a str,)> for LinesAnyMap { + #[inline] + extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str { + Fn::call(&*self, (line,)) + } +} + +impl<'a> FnOnce<(&'a str,)> for LinesAnyMap { + type Output = &'a str; + + #[inline] + extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str { + Fn::call(&self, (line,)) + } +} #[stable(feature = "rust1", since = "1.0.0")] impl<'a> Iterator for LinesAny<'a> { @@ -1584,14 +1664,7 @@ impl StrExt for str { #[inline] fn lines_any(&self) -> LinesAny { - fn f(line: &str) -> &str { - let l = line.len(); - if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] } - else { line } - } - - let f: fn(&str) -> &str = f; // coerce to fn pointer - LinesAny(self.lines().map(f)) + LinesAny(self.lines().map(LinesAnyMap)) } #[inline] From 91d1aa71f6c4317d91bc04a53213b04f13b09c44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Sat, 4 Apr 2015 12:03:59 +0200 Subject: [PATCH 3/6] Format all str docs to 80 char line breaks --- src/libcollections/str.rs | 283 +++++++++++++++++++++++--------------- 1 file changed, 174 insertions(+), 109 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 08af7879688aa..7c562baa82b07 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -432,7 +432,8 @@ impl str { /// Replaces all occurrences of one string with another. /// - /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a second `&str` to + /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a + /// second `&str` to /// replace it with. If the original `&str` isn't found, no change occurs. /// /// # Examples @@ -584,14 +585,16 @@ impl str { /// An iterator over substrings of `self`, separated by characters /// matched by a pattern. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// /// # Iterator behavior /// - /// The returned iterator will be double ended if the pattern allows a reverse search - /// and forward/reverse search yields the same elements. This is true for, eg, `char` but not + /// The returned iterator will be double ended if the pattern allows a + /// reverse search and forward/reverse search yields the same elements. + /// This is true for, eg, `char` but not /// for `&str`. /// /// If the pattern allows a reverse search but its results might differ @@ -615,13 +618,13 @@ impl str { /// assert_eq!(v, ["lion", "tiger", "leopard"]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect(); /// assert_eq!(v, ["abc", "def", "ghi"]); /// - /// let v: Vec<&str> = "lionXtigerXleopard".split(|c: char| c.is_uppercase()).collect(); + /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect(); /// assert_eq!(v, ["lion", "tiger", "leopard"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] @@ -632,14 +635,17 @@ impl str { /// An iterator over substrings of `self`, separated by characters /// matched by a pattern and yielded in reverse order. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// /// # Iterator behavior /// - /// The returned iterator requires that the pattern supports a reverse search, - /// and it will be double ended if a forward/reverse search yields the same elements. + /// The returned iterator requires that the pattern supports a + /// reverse search, + /// and it will be double ended if a forward/reverse search yields + /// the same elements. /// /// For iterating from the front, `split()` can be used. /// @@ -661,13 +667,13 @@ impl str { /// assert_eq!(v, ["leopard", "tiger", "lion"]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ```rust /// let v: Vec<&str> = "abc1def2ghi".rsplit(|c: char| c.is_numeric()).collect(); /// assert_eq!(v, ["ghi", "def", "abc"]); /// - /// let v: Vec<&str> = "lionXtigerXleopard".rsplit(|c: char| c.is_uppercase()).collect(); + /// let v: Vec<&str> = "lionXtigerXleopard".rsplit(char::is_uppercase).collect(); /// assert_eq!(v, ["leopard", "tiger", "lion"]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] @@ -680,20 +686,23 @@ impl str { /// An iterator over substrings of `self`, separated by characters /// matched by a pattern. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns + /// like regular expressions. /// - /// Equivalent to `split`, except that the trailing substring is skipped if empty. + /// Equivalent to `split`, except that the trailing substring + /// is skipped if empty. /// - /// This method can be used for string data that is _terminated_, rather than - /// _seperated_ by some string. + /// This method can be used for string data that is _terminated_, + /// rather than _seperated_ by a pattern. /// /// # Iterator behavior /// - /// The returned iterator will be double ended if the pattern allows a reverse search - /// and forward/reverse search yields the same elements. This is true for, eg, `char` but not - /// for `&str`. + /// The returned iterator will be double ended if the pattern allows a + /// reverse search + /// and forward/reverse search yields the same elements. This is true + /// for, eg, `char` but not for `&str`. /// /// If the pattern allows a reverse search but its results might differ /// from a forward search, `rsplit_terminator()` can be used. @@ -710,7 +719,7 @@ impl str { /// assert_eq!(v, ["A", "", "B", ""]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let v: Vec<&str> = "abc1def2ghi3".split_terminator(|c: char| c.is_numeric()).collect(); @@ -724,19 +733,22 @@ impl str { /// An iterator over substrings of `self`, separated by characters /// matched by a pattern and yielded in reverse order. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// - /// Equivalent to `split`, except that the trailing substring is skipped if empty. + /// Equivalent to `split`, except that the trailing substring is + /// skipped if empty. /// - /// This method can be used for string data that is _terminated_, rather than - /// _seperated_ by some string. + /// This method can be used for string data that is _terminated_, + /// rather than _seperated_ by a pattern. /// /// # Iterator behavior /// - /// The returned iterator requires that the pattern supports a reverse search, - /// and it will be double ended if a forward/reverse search yields the same elements. + /// The returned iterator requires that the pattern supports a + /// reverse search, and it will be double ended if a forward/reverse + /// search yields the same elements. /// /// For iterating from the front, `split_terminator()` can be used. /// @@ -752,7 +764,7 @@ impl str { /// assert_eq!(v, ["", "B", "", "A"]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let v: Vec<&str> = "abc1def2ghi3".rsplit_terminator(|c: char| c.is_numeric()).collect(); @@ -771,13 +783,15 @@ impl str { /// /// The last element returned, if any, will contain the remainder of the /// string. - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// /// # Iterator behavior /// - /// The returned iterator will not be double ended, because it is not efficient to support. + /// The returned iterator will not be double ended, because it is + /// not efficient to support. /// /// If the pattern allows a reverse search, `rsplitn()` can be used. /// @@ -799,7 +813,7 @@ impl str { /// assert_eq!(v, [""]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let v: Vec<&str> = "abc1def2ghi".splitn(2, |c: char| c.is_numeric()).collect(); @@ -817,13 +831,15 @@ impl str { /// The last element returned, if any, will contain the remainder of the /// string. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// /// # Iterator behavior /// - /// The returned iterator will not be double ended, because it is not efficient to support. + /// The returned iterator will not be double ended, because it is not + /// efficient to support. /// /// `splitn()` can be used for splitting from the front. /// @@ -842,7 +858,7 @@ impl str { /// assert_eq!(v, ["leopard", "lion::tiger"]); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let v: Vec<&str> = "abc1def2ghi".rsplitn(2, |c: char| c.is_numeric()).collect(); @@ -857,14 +873,17 @@ impl str { /// An iterator over the matches of a pattern within `self`. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// /// # Iterator behavior /// - /// The returned iterator will be double ended if the pattern allows a reverse search - /// and forward/reverse search yields the same elements. This is true for, eg, `char` but not + /// The returned iterator will be double ended if the pattern allows + /// a reverse search + /// and forward/reverse search yields the same elements. This is true + /// for, eg, `char` but not /// for `&str`. /// /// If the pattern allows a reverse search but its results might differ @@ -886,16 +905,20 @@ impl str { core_str::StrExt::matches(&self[..], pat) } - /// An iterator over the matches of a pattern within `self`, yielded in reverse order. + /// An iterator over the matches of a pattern within `self`, yielded in + /// reverse order. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// /// # Iterator behavior /// - /// The returned iterator requires that the pattern supports a reverse search, - /// and it will be double ended if a forward/reverse search yields the same elements. + /// The returned iterator requires that the pattern supports a + /// reverse search, + /// and it will be double ended if a forward/reverse search yields + /// the same elements. /// /// For iterating from the front, `matches()` can be used. /// @@ -917,20 +940,25 @@ impl str { core_str::StrExt::rmatches(&self[..], pat) } - /// An iterator over the start and end indices of the disjoint matches of a pattern within - /// `self`. + /// An iterator over the start and end indices of the disjoint matches + /// of a pattern within `self`. /// - /// For matches of `pat` within `self` that overlap, only the indices corresponding to the first + /// For matches of `pat` within `self` that overlap, only the indices + /// corresponding to the first /// match are returned. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// /// # Iterator behavior /// - /// The returned iterator will be double ended if the pattern allows a reverse search - /// and forward/reverse search yields the same elements. This is true for, eg, `char` but not + /// The returned iterator will be double ended if the pattern allows a + /// reverse search + /// and forward/reverse search yields the same elements. This is true for, + /// eg, `char` but not /// for `&str`. /// /// If the pattern allows a reverse search but its results might differ @@ -941,10 +969,10 @@ impl str { /// ``` /// # #![feature(collections)] /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".match_indices("abc").collect(); - /// assert_eq!(v, [(0,3), (6,9), (12,15)]); + /// assert_eq!(v, [(0, 3), (6, 9), (12, 15)]); /// /// let v: Vec<(usize, usize)> = "1abcabc2".match_indices("abc").collect(); - /// assert_eq!(v, [(1,4), (4,7)]); + /// assert_eq!(v, [(1, 4), (4, 7)]); /// /// let v: Vec<(usize, usize)> = "ababa".match_indices("aba").collect(); /// assert_eq!(v, [(0, 3)]); // only the first `aba` @@ -957,20 +985,26 @@ impl str { core_str::StrExt::match_indices(&self[..], pat) } - /// An iterator over the start and end indices of the disjoint matches of a pattern within + /// An iterator over the start and end indices of the disjoint matches of + /// a pattern within /// `self`, yielded in reverse order. /// - /// For matches of `pat` within `self` that overlap, only the indices corresponding to the last + /// For matches of `pat` within `self` that overlap, only the indices + /// corresponding to the last /// match are returned. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines /// the split. - /// Additional libraries might provide more complex patterns like regular expressions. + /// Additional libraries might provide more complex patterns like + /// regular expressions. /// /// # Iterator behavior /// - /// The returned iterator requires that the pattern supports a reverse search, - /// and it will be double ended if a forward/reverse search yields the same elements. + /// The returned iterator requires that the pattern supports a + /// reverse search, + /// and it will be double ended if a forward/reverse search yields + /// the same elements. /// /// For iterating from the front, `match_indices()` can be used. /// @@ -979,10 +1013,10 @@ impl str { /// ``` /// # #![feature(collections)] /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".rmatch_indices("abc").collect(); - /// assert_eq!(v, [(12,15), (6,9), (0,3)]); + /// assert_eq!(v, [(12, 15), (6, 9), (0, 3)]); /// /// let v: Vec<(usize, usize)> = "1abcabc2".rmatch_indices("abc").collect(); - /// assert_eq!(v, [(4,7), (1,4)]); + /// assert_eq!(v, [(4, 7), (1, 4)]); /// /// let v: Vec<(usize, usize)> = "ababa".rmatch_indices("aba").collect(); /// assert_eq!(v, [(2, 5)]); // only the last `aba` @@ -1023,7 +1057,8 @@ impl str { core_str::StrExt::lines(&self[..]) } - /// An iterator over the lines of a string, separated by either `\n` or `\r\n`. + /// An iterator over the lines of a string, separated by either + /// `\n` or `\r\n`. /// /// As with `.lines()`, this does not include an empty trailing line. /// @@ -1085,7 +1120,8 @@ impl str { /// /// # Unsafety /// - /// Caller must check both UTF-8 character boundaries and the boundaries of the entire slice as + /// Caller must check both UTF-8 character boundaries and the boundaries + /// of the entire slice as /// well. /// /// # Examples @@ -1128,13 +1164,15 @@ impl str { core_str::StrExt::ends_with(&self[..], pat) } - /// Returns a string with all pre- and suffixes that match a pattern repeatedly removed. + /// Returns a string with all pre- and suffixes that match a pattern + /// repeatedly removed. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `char`, or a closure that determines + /// the split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar"); @@ -1143,7 +1181,7 @@ impl str { /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar"); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar"); @@ -1155,13 +1193,15 @@ impl str { core_str::StrExt::trim_matches(&self[..], pat) } - /// Returns a string with all prefixes that match a pattern repeatedly removed. + /// Returns a string with all prefixes that match a pattern + /// repeatedly removed. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11"); @@ -1170,7 +1210,7 @@ impl str { /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12"); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123"); @@ -1180,13 +1220,15 @@ impl str { core_str::StrExt::trim_left_matches(&self[..], pat) } - /// Returns a string with all suffixes that match a pattern repeatedly removed. + /// Returns a string with all suffixes that match a pattern + /// repeatedly removed. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar"); @@ -1194,7 +1236,7 @@ impl str { /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar"); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar"); @@ -1206,9 +1248,11 @@ impl str { core_str::StrExt::trim_right_matches(&self[..], pat) } - /// Check that `index`-th byte lies at the start and/or end of a UTF-8 code point sequence. + /// Check that `index`-th byte lies at the start and/or end of a + /// UTF-8 code point sequence. /// - /// The start and end of the string (when `index == self.len()`) are considered to be + /// The start and end of the string (when `index == self.len()`) are + /// considered to be /// boundaries. /// /// # Panics @@ -1251,7 +1295,8 @@ impl str { /// /// # Examples /// - /// This example manually iterates through the characters of a string; this should normally be + /// This example manually iterates through the characters of a string; + /// this should normally be /// done by `.chars()` or `.char_indices()`. /// /// ``` @@ -1302,7 +1347,8 @@ impl str { /// /// # Examples /// - /// This example manually iterates through the characters of a string; this should normally be + /// This example manually iterates through the characters of a string; + /// this should normally be /// done by `.chars().rev()` or `.char_indices()`. /// /// ``` @@ -1365,7 +1411,8 @@ impl str { core_str::StrExt::char_at(&self[..], i) } - /// Given a byte position, return the `char` at that position, counting from the end. + /// Given a byte position, return the `char` at that position, counting + /// from the end. /// /// # Panics /// @@ -1400,31 +1447,36 @@ impl str { core_str::StrExt::as_bytes(&self[..]) } - /// Returns the byte index of the first character of `self` that matches the pattern, if it + /// Returns the byte index of the first character of `self` that matches + /// the pattern, if it /// exists. /// /// Returns `None` if it doesn't exist. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `&str`, `char`, or a closure that + /// determines the + /// split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// let s = "Löwe 老虎 Léopard"; /// /// assert_eq!(s.find('L'), Some(0)); /// assert_eq!(s.find('é'), Some(14)); + /// assert_eq!(s.find("Léopard"), Some(13)); /// /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let s = "Löwe 老虎 Léopard"; /// /// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5)); + /// assert_eq!(s.find(char::is_lowercase), Some(1)); /// ``` /// /// Not finding the pattern: @@ -1440,16 +1492,18 @@ impl str { core_str::StrExt::find(&self[..], pat) } - /// Returns the byte index of the last character of `self` that matches the pattern, if it + /// Returns the byte index of the last character of `self` that + /// matches the pattern, if it /// exists. /// /// Returns `None` if it doesn't exist. /// - /// The pattern can be a simple `&str`, or a closure that determines the split. + /// The pattern can be a simple `&str`, `char`, + /// or a closure that determines the split. /// /// # Examples /// - /// Simple `&str` patterns: + /// Simple patterns: /// /// ``` /// let s = "Löwe 老虎 Léopard"; @@ -1458,12 +1512,13 @@ impl str { /// assert_eq!(s.rfind('é'), Some(14)); /// ``` /// - /// More complex patterns with a lambda: + /// More complex patterns with closures: /// /// ``` /// let s = "Löwe 老虎 Léopard"; /// /// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12)); + /// assert_eq!(s.rfind(char::is_lowercase), Some(20)); /// ``` /// /// Not finding the pattern: @@ -1483,7 +1538,8 @@ impl str { /// Retrieves the first character from a `&str` and returns it. /// - /// This does not allocate a new string; instead, it returns a slice that points one character + /// This does not allocate a new string; instead, it returns a slice that + /// points one character /// beyond the character that was shifted. /// /// If the slice does not contain any characters, None is returned instead. @@ -1511,7 +1567,8 @@ impl str { core_str::StrExt::slice_shift_char(&self[..]) } - /// Returns the byte offset of an inner slice relative to an enclosing outer slice. + /// Returns the byte offset of an inner slice relative to an enclosing + /// outer slice. /// /// # Panics /// @@ -1536,7 +1593,8 @@ impl str { /// Return an unsafe pointer to the `&str`'s buffer. /// - /// The caller must ensure that the string outlives this pointer, and that it is not + /// The caller must ensure that the string outlives this pointer, and + /// that it is not /// reallocated (e.g. by pushing to the string). /// /// # Examples @@ -1612,7 +1670,8 @@ impl str { /// /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries /// - /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*; + /// If `is_extended` is true, the iterator is over the + /// *extended grapheme clusters*; /// otherwise, the iterator is over the *legacy grapheme clusters*. /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) /// recommends extended grapheme cluster boundaries for general processing. @@ -1637,7 +1696,8 @@ impl str { UnicodeStr::graphemes(&self[..], is_extended) } - /// Returns an iterator over the grapheme clusters of `self` and their byte offsets. See + /// Returns an iterator over the grapheme clusters of `self` and their + /// byte offsets. See /// `graphemes()` for more information. /// /// # Examples @@ -1657,7 +1717,8 @@ impl str { /// An iterator over the non-empty words of `self`. /// - /// A 'word' is a subsequence separated by any sequence of whitespace. Sequences of whitespace + /// A 'word' is a subsequence separated by any sequence of whitespace. + /// Sequences of whitespace /// are collapsed, so empty "words" are not included. /// /// # Examples @@ -1679,11 +1740,15 @@ impl str { /// /// Control characters have zero width. /// - /// `is_cjk` determines behavior for characters in the Ambiguous category: if `is_cjk` is - /// `true`, these are 2 columns wide; otherwise, they are 1. In CJK locales, `is_cjk` should be + /// `is_cjk` determines behavior for characters in the Ambiguous category: + /// if `is_cjk` is + /// `true`, these are 2 columns wide; otherwise, they are 1. + /// In CJK locales, `is_cjk` should be /// `true`, else it should be `false`. - /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) recommends that these - /// characters be treated as 1 column (i.e., `is_cjk = false`) if the locale is unknown. + /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) + /// recommends that these + /// characters be treated as 1 column (i.e., `is_cjk = false`) if the + /// locale is unknown. #[unstable(feature = "unicode", reason = "this functionality may only be provided by libunicode")] pub fn width(&self, is_cjk: bool) -> usize { From c04f22a667123b39f16452af6fa65c82b2f8c0a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Mon, 23 Mar 2015 14:21:42 +0100 Subject: [PATCH 4/6] Refactored core::str::pattern to become a user-facing module and hide away CharEq. --- src/libcollections/str.rs | 5 +++-- src/libcollections/string.rs | 2 +- src/libcore/str/mod.rs | 7 +++---- src/libcore/str/pattern.rs | 23 ++++++++++++++--------- src/libcoretest/str.rs | 8 ++++---- 5 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 7c562baa82b07..28ba7369d52a3 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -58,6 +58,8 @@ use core::iter::{Iterator, Extend}; use core::option::Option::{self, Some, None}; use core::result::Result; use core::str as core_str; +use core::str::pattern::Pattern; +use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use unicode::str::{UnicodeStr, Utf16Encoder}; use core::convert::AsRef; @@ -78,8 +80,7 @@ pub use core::str::{MatchIndices, RMatchIndices}; pub use core::str::{from_utf8, Chars, CharIndices, Bytes}; pub use core::str::{from_utf8_unchecked, ParseBoolError}; pub use unicode::str::{Words, Graphemes, GraphemeIndices}; -pub use core::str::Pattern; -pub use core::str::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep}; +pub use core::str::pattern; /* Section: Creating a string diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 7a7725320914f..178cf5fa3fed0 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -24,7 +24,7 @@ use core::mem; use core::ops::{self, Deref, Add, Index}; use core::ptr; use core::slice; -use core::str::Pattern; +use core::str::pattern::Pattern; use unicode::str as unicode_str; use unicode::str::Utf16Item; diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index d31c1e274664d..107a3376277aa 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -17,6 +17,8 @@ #![doc(primitive = "str")] use self::OldSearcher::{TwoWay, TwoWayLong}; +use self::pattern::Pattern; +use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use char::CharExt; use clone::Clone; @@ -34,10 +36,7 @@ use result::Result::{self, Ok, Err}; use slice::{self, SliceExt}; use usize; -pub use self::pattern::Pattern; -pub use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep}; - -mod pattern; +pub mod pattern; /// A trait to abstract the idea of creating a new instance of a type from a /// string. diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index 0c2a58f3ca7c3..ef10fe5e707b7 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -471,29 +471,28 @@ fn str_search_step(mut m: &mut StrSearcher, macro_rules! pattern_methods { ($t:ty, $pmap:expr, $smap:expr) => { - // FIXME: #22463 - //type Searcher = $t; + type Searcher = $t; #[inline] fn into_searcher(self, haystack: &'a str) -> $t { - $smap($pmap(self).into_searcher(haystack)) + ($smap)(($pmap)(self).into_searcher(haystack)) } #[inline] fn is_contained_in(self, haystack: &'a str) -> bool { - $pmap(self).is_contained_in(haystack) + ($pmap)(self).is_contained_in(haystack) } #[inline] fn is_prefix_of(self, haystack: &'a str) -> bool { - $pmap(self).is_prefix_of(haystack) + ($pmap)(self).is_prefix_of(haystack) } #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool where $t: ReverseSearcher<'a> { - $pmap(self).is_suffix_of(haystack) + ($pmap)(self).is_suffix_of(haystack) } } } @@ -553,7 +552,6 @@ impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {} /// Searches for chars that are equal to a given char impl<'a> Pattern<'a> for char { - type Searcher = CharSearcher<'a>; pattern_methods!(CharSearcher<'a>, CharEqPattern, CharSearcher); } @@ -579,7 +577,6 @@ impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {} /// Searches for chars that are equal to any of the chars in the array impl<'a, 'b> Pattern<'a> for &'b [char] { - type Searcher = CharSliceSearcher<'a, 'b>; pattern_methods!(CharSliceSearcher<'a, 'b>, CharEqPattern, CharSliceSearcher); } @@ -609,6 +606,14 @@ impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F> /// Searches for chars that match the given predicate impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool { - type Searcher = CharPredicateSearcher<'a, F>; pattern_methods!(CharPredicateSearcher<'a, F>, CharEqPattern, CharPredicateSearcher); } + +///////////////////////////////////////////////////////////////////////////// +// Impl for &&str +///////////////////////////////////////////////////////////////////////////// + +/// Delegates to the `&str` impl. +impl<'a, 'b> Pattern<'a> for &'b &'b str { + pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s); +} diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs index 5fce527d9798d..e6d6a32e3eca9 100644 --- a/src/libcoretest/str.rs +++ b/src/libcoretest/str.rs @@ -185,14 +185,14 @@ fn trim_ws() { } mod pattern { - use std::str::Pattern; - use std::str::{Searcher, ReverseSearcher}; - use std::str::SearchStep::{self, Match, Reject, Done}; + use std::str::pattern::Pattern; + use std::str::pattern::{Searcher, ReverseSearcher}; + use std::str::pattern::SearchStep::{self, Match, Reject, Done}; macro_rules! make_test { ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { mod $name { - use std::str::SearchStep::{Match, Reject}; + use std::str::pattern::SearchStep::{Match, Reject}; use super::{cmp_search_to_vec}; #[test] fn fwd() { From c29559d28acb34884769c884703c2c5de3397d2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Wed, 1 Apr 2015 22:45:00 +0200 Subject: [PATCH 5/6] Moved coretest::str tests into collectiontest::str --- src/libcollectionstest/str.rs | 376 +++++++++++++++++++++++++++++++++- src/libcoretest/str.rs | 376 +--------------------------------- 2 files changed, 376 insertions(+), 376 deletions(-) diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 495a961fa360e..0d5b4a14dbf72 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -1,4 +1,4 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -1506,6 +1506,278 @@ fn test_str_from_utf8() { assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort)); } +#[test] +fn test_pattern_deref_forward() { + let data = "aabcdaa"; + assert!(data.contains("bcd")); + assert!(data.contains(&"bcd")); + assert!(data.contains(&"bcd".to_string())); +} + +#[test] +fn test_empty_match_indices() { + let data = "aä中!"; + let vec: Vec<_> = data.match_indices("").collect(); + assert_eq!(vec, [(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]); +} + +#[test] +fn test_bool_from_str() { + assert_eq!("true".parse().ok(), Some(true)); + assert_eq!("false".parse().ok(), Some(false)); + assert_eq!("not even a boolean".parse::().ok(), None); +} + +fn check_contains_all_substrings(s: &str) { + assert!(s.contains("")); + for i in 0..s.len() { + for j in i+1..s.len() + 1 { + assert!(s.contains(&s[i..j])); + } + } +} + +#[test] +fn strslice_issue_16589() { + assert!("bananas".contains("nana")); + + // prior to the fix for #16589, x.contains("abcdabcd") returned false + // test all substrings for good measure + check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd"); +} + +#[test] +fn strslice_issue_16878() { + assert!(!"1234567ah012345678901ah".contains("hah")); + assert!(!"00abc01234567890123456789abc".contains("bcabc")); +} + + +#[test] +fn test_strslice_contains() { + let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'"; + check_contains_all_substrings(x); +} + +#[test] +fn test_rsplitn_char_iterator() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let mut split: Vec<&str> = data.rsplitn(4, ' ').collect(); + split.reverse(); + assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); + + let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == ' ').collect(); + split.reverse(); + assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); + + // Unicode + let mut split: Vec<&str> = data.rsplitn(4, 'ä').collect(); + split.reverse(); + assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); + + let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == 'ä').collect(); + split.reverse(); + assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); +} + +#[test] +fn test_split_char_iterator() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split(' ').collect(); + assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + let mut rsplit: Vec<&str> = data.split(' ').rev().collect(); + rsplit.reverse(); + assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + let split: Vec<&str> = data.split(|c: char| c == ' ').collect(); + assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect(); + rsplit.reverse(); + assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); + + // Unicode + let split: Vec<&str> = data.split('ä').collect(); + assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + + let mut rsplit: Vec<&str> = data.split('ä').rev().collect(); + rsplit.reverse(); + assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + + let split: Vec<&str> = data.split(|c: char| c == 'ä').collect(); + assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); + + let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect(); + rsplit.reverse(); + assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); +} + +#[test] +fn test_rev_split_char_iterator_no_trailing() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let mut split: Vec<&str> = data.split('\n').rev().collect(); + split.reverse(); + assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]); + + let mut split: Vec<&str> = data.split_terminator('\n').rev().collect(); + split.reverse(); + assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); +} + +#[test] +fn test_utf16_code_units() { + use unicode::str::Utf16Encoder; + assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::>(), + [0xE9, 0xD83D, 0xDCA9]) +} + +#[test] +fn starts_with_in_unicode() { + assert!(!"├── Cargo.toml".starts_with("# ")); +} + +#[test] +fn starts_short_long() { + assert!(!"".starts_with("##")); + assert!(!"##".starts_with("####")); + assert!("####".starts_with("##")); + assert!(!"##ä".starts_with("####")); + assert!("####ä".starts_with("##")); + assert!(!"##".starts_with("####ä")); + assert!("##ä##".starts_with("##ä")); + + assert!("".starts_with("")); + assert!("ä".starts_with("")); + assert!("#ä".starts_with("")); + assert!("##ä".starts_with("")); + assert!("ä###".starts_with("")); + assert!("#ä##".starts_with("")); + assert!("##ä#".starts_with("")); +} + +#[test] +fn contains_weird_cases() { + assert!("* \t".contains(' ')); + assert!(!"* \t".contains('?')); + assert!(!"* \t".contains('\u{1F4A9}')); +} + +#[test] +fn trim_ws() { + assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()), + "a \t "); + assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()), + " \t a"); + assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()), + "a"); + assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()), + ""); + assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()), + ""); + assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()), + ""); +} + +mod pattern { + use std::str::pattern::Pattern; + use std::str::pattern::{Searcher, ReverseSearcher}; + use std::str::pattern::SearchStep::{self, Match, Reject, Done}; + + macro_rules! make_test { + ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { + mod $name { + use std::str::pattern::SearchStep::{Match, Reject}; + use super::{cmp_search_to_vec}; + #[test] + fn fwd() { + cmp_search_to_vec(false, $p, $h, vec![$($e),*]); + } + #[test] + fn bwd() { + cmp_search_to_vec(true, $p, $h, vec![$($e),*]); + } + } + } + } + + fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str, + right: Vec) + where P::Searcher: ReverseSearcher<'a> + { + let mut searcher = pat.into_searcher(haystack); + let mut v = vec![]; + loop { + match if !rev {searcher.next()} else {searcher.next_back()} { + Match(a, b) => v.push(Match(a, b)), + Reject(a, b) => v.push(Reject(a, b)), + Done => break, + } + } + if rev { + v.reverse(); + } + assert_eq!(v, right); + } + + make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [ + Reject(0, 1), + Match (1, 3), + Reject(3, 4), + Match (4, 6), + Reject(6, 7), + ]); + make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [ + Match(0, 0), + Match(1, 1), + Match(2, 2), + Match(3, 3), + Match(4, 4), + Match(5, 5), + Match(6, 6), + Match(7, 7), + ]); + make_test!(str_searcher_mulibyte_haystack, " ", "├──", [ + Reject(0, 3), + Reject(3, 6), + Reject(6, 9), + ]); + make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [ + Match(0, 0), + Match(3, 3), + Match(6, 6), + Match(9, 9), + ]); + make_test!(str_searcher_empty_needle_empty_haystack, "", "", [ + Match(0, 0), + ]); + make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [ + ]); + make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [ + Reject(0, 1), + Match (1, 2), + Match (2, 3), + Reject(3, 4), + Match (4, 5), + Match (5, 6), + Reject(6, 7), + ]); + make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [ + Reject(0, 3), + Reject(3, 6), + Reject(6, 9), + ]); + make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [ + Reject(0, 1), + Reject(1, 2), + Reject(2, 3), + ]); + +} + mod bench { use test::{Bencher, black_box}; @@ -1693,4 +1965,106 @@ malesuada sollicitudin quam eu fermentum."; assert!(haystack.contains(needle)); }) } + + macro_rules! make_test_inner { + ($s:ident, $code:expr, $name:ident, $str:expr) => { + #[bench] + fn $name(bencher: &mut Bencher) { + let mut $s = $str; + black_box(&mut $s); + bencher.iter(|| $code); + } + } + } + + macro_rules! make_test { + ($name:ident, $s:ident, $code:expr) => { + mod $name { + use test::Bencher; + use test::black_box; + + // Short strings: 65 bytes each + make_test_inner!($s, $code, short_ascii, + "Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!"); + make_test_inner!($s, $code, short_mixed, + "ศไทย中华Việt Nam; Mary had a little lamb, Little lam!"); + make_test_inner!($s, $code, short_pile_of_poo, + "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!"); + make_test_inner!($s, $code, long_lorem_ipsum,"\ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ +ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ +eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ +sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ +tempus vel, gravida nec quam. + +In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \ +sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \ +diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \ +lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \ +eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \ +interdum. Curabitur ut nisi justo. + +Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \ +mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \ +lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \ +est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \ +felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \ +ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \ +feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \ +Aliquam sit amet placerat lorem. + +Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \ +mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \ +Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \ +lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \ +suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \ +cursus accumsan. + +Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \ +feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \ +vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \ +leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \ +malesuada sollicitudin quam eu fermentum!"); + } + } + } + + make_test!(chars_count, s, s.chars().count()); + + make_test!(contains_bang_str, s, s.contains("!")); + make_test!(contains_bang_char, s, s.contains('!')); + + make_test!(match_indices_a_str, s, s.match_indices("a").count()); + + make_test!(split_a_str, s, s.split("a").count()); + + make_test!(trim_ascii_char, s, { + use std::ascii::AsciiExt; + s.trim_matches(|c: char| c.is_ascii()) + }); + make_test!(trim_left_ascii_char, s, { + use std::ascii::AsciiExt; + s.trim_left_matches(|c: char| c.is_ascii()) + }); + make_test!(trim_right_ascii_char, s, { + use std::ascii::AsciiExt; + s.trim_right_matches(|c: char| c.is_ascii()) + }); + + make_test!(find_underscore_char, s, s.find('_')); + make_test!(rfind_underscore_char, s, s.rfind('_')); + make_test!(find_underscore_str, s, s.find("_")); + + make_test!(find_zzz_char, s, s.find('\u{1F4A4}')); + make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}')); + make_test!(find_zzz_str, s, s.find("\u{1F4A4}")); + + make_test!(split_space_char, s, s.split(' ').count()); + make_test!(split_terminator_space_char, s, s.split_terminator(' ').count()); + + make_test!(splitn_space_char, s, s.splitn(10, ' ').count()); + make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count()); + + make_test!(split_space_str, s, s.split(" ").count()); + make_test!(split_ad_str, s, s.split("ad").count()); } diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs index e6d6a32e3eca9..b7d9ba4463d98 100644 --- a/src/libcoretest/str.rs +++ b/src/libcoretest/str.rs @@ -8,378 +8,4 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[test] -fn test_pattern_deref_forward() { - let data = "aabcdaa"; - assert!(data.contains("bcd")); - assert!(data.contains(&"bcd")); - assert!(data.contains(&"bcd".to_string())); -} - -#[test] -fn test_empty_match_indices() { - let data = "aä中!"; - let vec: Vec<_> = data.match_indices("").collect(); - assert_eq!(vec, [(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]); -} - -#[test] -fn test_bool_from_str() { - assert_eq!("true".parse().ok(), Some(true)); - assert_eq!("false".parse().ok(), Some(false)); - assert_eq!("not even a boolean".parse::().ok(), None); -} - -fn check_contains_all_substrings(s: &str) { - assert!(s.contains("")); - for i in 0..s.len() { - for j in i+1..s.len() + 1 { - assert!(s.contains(&s[i..j])); - } - } -} - -#[test] -fn strslice_issue_16589() { - assert!("bananas".contains("nana")); - - // prior to the fix for #16589, x.contains("abcdabcd") returned false - // test all substrings for good measure - check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd"); -} - -#[test] -fn strslice_issue_16878() { - assert!(!"1234567ah012345678901ah".contains("hah")); - assert!(!"00abc01234567890123456789abc".contains("bcabc")); -} - - -#[test] -fn test_strslice_contains() { - let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'"; - check_contains_all_substrings(x); -} - -#[test] -fn test_rsplitn_char_iterator() { - let data = "\nMäry häd ä little lämb\nLittle lämb\n"; - - let mut split: Vec<&str> = data.rsplitn(4, ' ').collect(); - split.reverse(); - assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); - - let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == ' ').collect(); - split.reverse(); - assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); - - // Unicode - let mut split: Vec<&str> = data.rsplitn(4, 'ä').collect(); - split.reverse(); - assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); - - let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == 'ä').collect(); - split.reverse(); - assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); -} - -#[test] -fn test_split_char_iterator() { - let data = "\nMäry häd ä little lämb\nLittle lämb\n"; - - let split: Vec<&str> = data.split(' ').collect(); - assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); - - let mut rsplit: Vec<&str> = data.split(' ').rev().collect(); - rsplit.reverse(); - assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); - - let split: Vec<&str> = data.split(|c: char| c == ' ').collect(); - assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); - - let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect(); - rsplit.reverse(); - assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); - - // Unicode - let split: Vec<&str> = data.split('ä').collect(); - assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); - - let mut rsplit: Vec<&str> = data.split('ä').rev().collect(); - rsplit.reverse(); - assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); - - let split: Vec<&str> = data.split(|c: char| c == 'ä').collect(); - assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); - - let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect(); - rsplit.reverse(); - assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); -} - -#[test] -fn test_rev_split_char_iterator_no_trailing() { - let data = "\nMäry häd ä little lämb\nLittle lämb\n"; - - let mut split: Vec<&str> = data.split('\n').rev().collect(); - split.reverse(); - assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]); - - let mut split: Vec<&str> = data.split_terminator('\n').rev().collect(); - split.reverse(); - assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); -} - -#[test] -fn test_utf16_code_units() { - use unicode::str::Utf16Encoder; - assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::>(), - [0xE9, 0xD83D, 0xDCA9]) -} - -#[test] -fn starts_with_in_unicode() { - assert!(!"├── Cargo.toml".starts_with("# ")); -} - -#[test] -fn starts_short_long() { - assert!(!"".starts_with("##")); - assert!(!"##".starts_with("####")); - assert!("####".starts_with("##")); - assert!(!"##ä".starts_with("####")); - assert!("####ä".starts_with("##")); - assert!(!"##".starts_with("####ä")); - assert!("##ä##".starts_with("##ä")); - - assert!("".starts_with("")); - assert!("ä".starts_with("")); - assert!("#ä".starts_with("")); - assert!("##ä".starts_with("")); - assert!("ä###".starts_with("")); - assert!("#ä##".starts_with("")); - assert!("##ä#".starts_with("")); -} - -#[test] -fn contains_weird_cases() { - assert!("* \t".contains(' ')); - assert!(!"* \t".contains('?')); - assert!(!"* \t".contains('\u{1F4A9}')); -} - -#[test] -fn trim_ws() { - assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()), - "a \t "); - assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()), - " \t a"); - assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()), - "a"); - assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()), - ""); - assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()), - ""); - assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()), - ""); -} - -mod pattern { - use std::str::pattern::Pattern; - use std::str::pattern::{Searcher, ReverseSearcher}; - use std::str::pattern::SearchStep::{self, Match, Reject, Done}; - - macro_rules! make_test { - ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { - mod $name { - use std::str::pattern::SearchStep::{Match, Reject}; - use super::{cmp_search_to_vec}; - #[test] - fn fwd() { - cmp_search_to_vec(false, $p, $h, vec![$($e),*]); - } - #[test] - fn bwd() { - cmp_search_to_vec(true, $p, $h, vec![$($e),*]); - } - } - } - } - - fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str, - right: Vec) - where P::Searcher: ReverseSearcher<'a> - { - let mut searcher = pat.into_searcher(haystack); - let mut v = vec![]; - loop { - match if !rev {searcher.next()} else {searcher.next_back()} { - Match(a, b) => v.push(Match(a, b)), - Reject(a, b) => v.push(Reject(a, b)), - Done => break, - } - } - if rev { - v.reverse(); - } - assert_eq!(v, right); - } - - make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [ - Reject(0, 1), - Match (1, 3), - Reject(3, 4), - Match (4, 6), - Reject(6, 7), - ]); - make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [ - Match(0, 0), - Match(1, 1), - Match(2, 2), - Match(3, 3), - Match(4, 4), - Match(5, 5), - Match(6, 6), - Match(7, 7), - ]); - make_test!(str_searcher_mulibyte_haystack, " ", "├──", [ - Reject(0, 3), - Reject(3, 6), - Reject(6, 9), - ]); - make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [ - Match(0, 0), - Match(3, 3), - Match(6, 6), - Match(9, 9), - ]); - make_test!(str_searcher_empty_needle_empty_haystack, "", "", [ - Match(0, 0), - ]); - make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [ - ]); - make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [ - Reject(0, 1), - Match (1, 2), - Match (2, 3), - Reject(3, 4), - Match (4, 5), - Match (5, 6), - Reject(6, 7), - ]); - make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [ - Reject(0, 3), - Reject(3, 6), - Reject(6, 9), - ]); - make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [ - Reject(0, 1), - Reject(1, 2), - Reject(2, 3), - ]); - -} - -mod bench { - macro_rules! make_test_inner { - ($s:ident, $code:expr, $name:ident, $str:expr) => { - #[bench] - fn $name(bencher: &mut Bencher) { - let mut $s = $str; - black_box(&mut $s); - bencher.iter(|| $code); - } - } - } - - macro_rules! make_test { - ($name:ident, $s:ident, $code:expr) => { - mod $name { - use test::Bencher; - use test::black_box; - - // Short strings: 65 bytes each - make_test_inner!($s, $code, short_ascii, - "Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!"); - make_test_inner!($s, $code, short_mixed, - "ศไทย中华Việt Nam; Mary had a little lamb, Little lam!"); - make_test_inner!($s, $code, short_pile_of_poo, - "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!"); - make_test_inner!($s, $code, long_lorem_ipsum,"\ -Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ -ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ -eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ -sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ -tempus vel, gravida nec quam. - -In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \ -sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \ -diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \ -lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \ -eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \ -interdum. Curabitur ut nisi justo. - -Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \ -mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \ -lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \ -est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \ -felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \ -ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \ -feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \ -Aliquam sit amet placerat lorem. - -Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \ -mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \ -Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \ -lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \ -suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \ -cursus accumsan. - -Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \ -feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \ -vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \ -leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \ -malesuada sollicitudin quam eu fermentum!"); - } - } - } - - make_test!(chars_count, s, s.chars().count()); - - make_test!(contains_bang_str, s, s.contains("!")); - make_test!(contains_bang_char, s, s.contains('!')); - - make_test!(match_indices_a_str, s, s.match_indices("a").count()); - - make_test!(split_a_str, s, s.split("a").count()); - - make_test!(trim_ascii_char, s, { - use std::ascii::AsciiExt; - s.trim_matches(|c: char| c.is_ascii()) - }); - make_test!(trim_left_ascii_char, s, { - use std::ascii::AsciiExt; - s.trim_left_matches(|c: char| c.is_ascii()) - }); - make_test!(trim_right_ascii_char, s, { - use std::ascii::AsciiExt; - s.trim_right_matches(|c: char| c.is_ascii()) - }); - - make_test!(find_underscore_char, s, s.find('_')); - make_test!(rfind_underscore_char, s, s.rfind('_')); - make_test!(find_underscore_str, s, s.find("_")); - - make_test!(find_zzz_char, s, s.find('\u{1F4A4}')); - make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}')); - make_test!(find_zzz_str, s, s.find("\u{1F4A4}")); - - make_test!(split_space_char, s, s.split(' ').count()); - make_test!(split_terminator_space_char, s, s.split_terminator(' ').count()); - - make_test!(splitn_space_char, s, s.splitn(10, ' ').count()); - make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count()); - - make_test!(split_space_str, s, s.split(" ").count()); - make_test!(split_ad_str, s, s.split("ad").count()); -} +// All `str` tests live in libcollectiontest::str From fbba28e246950b06a322947af0152dda5f0444c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Thu, 2 Apr 2015 01:12:58 +0200 Subject: [PATCH 6/6] Added smoke tests for new methods. Fixed bug in existing StrSearcher impl --- src/libcollectionstest/str.rs | 149 +++++++++++++++++++++++++++++++--- src/libcore/str/pattern.rs | 33 +++++--- 2 files changed, 160 insertions(+), 22 deletions(-) diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 0d5b4a14dbf72..bc07c9b65a588 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -1720,6 +1720,31 @@ mod pattern { if rev { v.reverse(); } + + let mut first_index = 0; + let mut err = None; + + for (i, e) in right.iter().enumerate() { + match *e { + Match(a, b) | Reject(a, b) + if a <= b && a == first_index => { + first_index = b; + } + _ => { + err = Some(i); + break; + } + } + } + + if let Some(err) = err { + panic!("Input skipped range at {}", err); + } + + if first_index != haystack.len() { + panic!("Did not cover whole input"); + } + assert_eq!(v, right); } @@ -1731,14 +1756,21 @@ mod pattern { Reject(6, 7), ]); make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [ - Match(0, 0), - Match(1, 1), - Match(2, 2), - Match(3, 3), - Match(4, 4), - Match(5, 5), - Match(6, 6), - Match(7, 7), + Match (0, 0), + Reject(0, 1), + Match (1, 1), + Reject(1, 2), + Match (2, 2), + Reject(2, 3), + Match (3, 3), + Reject(3, 4), + Match (4, 4), + Reject(4, 5), + Match (5, 5), + Reject(5, 6), + Match (6, 6), + Reject(6, 7), + Match (7, 7), ]); make_test!(str_searcher_mulibyte_haystack, " ", "├──", [ Reject(0, 3), @@ -1746,10 +1778,13 @@ mod pattern { Reject(6, 9), ]); make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [ - Match(0, 0), - Match(3, 3), - Match(6, 6), - Match(9, 9), + Match (0, 0), + Reject(0, 3), + Match (3, 3), + Reject(3, 6), + Match (6, 6), + Reject(6, 9), + Match (9, 9), ]); make_test!(str_searcher_empty_needle_empty_haystack, "", "", [ Match(0, 0), @@ -1778,6 +1813,96 @@ mod pattern { } +macro_rules! generate_iterator_test { + { + $name:ident { + $( + ($($arg:expr),*) -> [$($t:tt)*]; + )* + } + with $fwd:expr, $bwd:expr; + } => { + #[test] + fn $name() { + $( + { + let res = vec![$($t)*]; + + let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect(); + assert_eq!(fwd_vec, res); + + let mut bwd_vec: Vec<_> = ($bwd)($($arg),*).collect(); + bwd_vec.reverse(); + assert_eq!(bwd_vec, res); + } + )* + } + }; + { + $name:ident { + $( + ($($arg:expr),*) -> [$($t:tt)*]; + )* + } + with $fwd:expr; + } => { + #[test] + fn $name() { + $( + { + let res = vec![$($t)*]; + + let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect(); + assert_eq!(fwd_vec, res); + } + )* + } + } +} + +generate_iterator_test! { + double_ended_split { + ("foo.bar.baz", '.') -> ["foo", "bar", "baz"]; + ("foo::bar::baz", "::") -> ["foo", "bar", "baz"]; + } + with str::split, str::rsplit; +} + +generate_iterator_test! { + double_ended_split_terminator { + ("foo;bar;baz;", ';') -> ["foo", "bar", "baz"]; + } + with str::split_terminator, str::rsplit_terminator; +} + +generate_iterator_test! { + double_ended_matches { + ("a1b2c3", char::is_numeric) -> ["1", "2", "3"]; + } + with str::matches, str::rmatches; +} + +generate_iterator_test! { + double_ended_match_indices { + ("a1b2c3", char::is_numeric) -> [(1, 2), (3, 4), (5, 6)]; + } + with str::match_indices, str::rmatch_indices; +} + +generate_iterator_test! { + not_double_ended_splitn { + ("foo::bar::baz", 2, "::") -> ["foo", "bar::baz"]; + } + with str::splitn; +} + +generate_iterator_test! { + not_double_ended_rsplitn { + ("foo::bar::baz", 2, "::") -> ["baz", "foo::bar"]; + } + with str::rsplitn; +} + mod bench { use test::{Bencher, black_box}; diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index ef10fe5e707b7..9f701e1b03181 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -351,7 +351,14 @@ pub struct StrSearcher<'a, 'b> { needle: &'b str, start: usize, end: usize, - done: bool, + state: State, +} + +#[derive(Clone, PartialEq)] +enum State { Done, NotDone, Reject(usize, usize) } +impl State { + #[inline] fn done(&self) -> bool { *self == State::Done } + #[inline] fn take(&mut self) -> State { ::mem::replace(self, State::NotDone) } } /// Non-allocating substring search. @@ -368,7 +375,7 @@ impl<'a, 'b> Pattern<'a> for &'b str { needle: self, start: 0, end: haystack.len(), - done: false, + state: State::NotDone, } } } @@ -385,8 +392,9 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> { |m: &mut StrSearcher| { // Forward step for empty needle let current_start = m.start; - if !m.done { + if !m.state.done() { m.start = m.haystack.char_range_at(current_start).next; + m.state = State::Reject(current_start, m.start); } SearchStep::Match(current_start, current_start) }, @@ -415,8 +423,9 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> { |m: &mut StrSearcher| { // Backward step for empty needle let current_end = m.end; - if !m.done { + if !m.state.done() { m.end = m.haystack.char_range_at_reverse(current_end).next; + m.state = State::Reject(m.end, current_end); } SearchStep::Match(current_end, current_end) }, @@ -446,23 +455,27 @@ fn str_search_step(mut m: &mut StrSearcher, where F: FnOnce(&mut StrSearcher) -> SearchStep, G: FnOnce(&mut StrSearcher) -> SearchStep { - if m.done { + if m.state.done() { SearchStep::Done } else if m.needle.len() == 0 && m.start <= m.end { // Case for needle == "" - if m.start == m.end { - m.done = true; + if let State::Reject(a, b) = m.state.take() { + SearchStep::Reject(a, b) + } else { + if m.start == m.end { + m.state = State::Done; + } + empty_needle_step(&mut m) } - empty_needle_step(&mut m) } else if m.start + m.needle.len() <= m.end { // Case for needle != "" nonempty_needle_step(&mut m) } else if m.start < m.end { // Remaining slice shorter than needle, reject it - m.done = true; + m.state = State::Done; SearchStep::Reject(m.start, m.end) } else { - m.done = true; + m.state = State::Done; SearchStep::Done } }