diff --git a/CHANGELOG.md b/CHANGELOG.md index 9063a22a3c..328695a7f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +Unreleased +================== + +* [BUG #627](https://github.com/rust-lang/regex/issues/627): + Corrects `/-/.split("a-")` to return `["a", ""]` correctly instead of `["a"]` + (where `/-/` is a placeholder for code generate a regex). +* [BUG #521](https://github.com/rust-lang/regex/issues/521): + Corrects `/-/.splitn("a", 2)` to return `["a"]` correctly instead of + `["a", ""]` (where `/-/` is a placeholder for code generate a regex). + 1.3.1 (2019-09-04) ================== This is a maintenance release with no changes in order to try to work-around diff --git a/src/re_bytes.rs b/src/re_bytes.rs index 2e38c10ca8..f8140f2d75 100644 --- a/src/re_bytes.rs +++ b/src/re_bytes.rs @@ -726,11 +726,11 @@ impl<'r, 't> Iterator for Split<'r, 't> { let text = self.finder.0.text(); match self.finder.next() { None => { - if self.last >= text.len() { + if self.last > text.len() { None } else { let s = &text[self.last..]; - self.last = text.len(); + self.last = text.len() + 1; // Next call will return None Some(s) } } @@ -761,12 +761,19 @@ impl<'r, 't> Iterator for SplitN<'r, 't> { if self.n == 0 { return None; } + self.n -= 1; - if self.n == 0 { - let text = self.splits.finder.0.text(); - Some(&text[self.splits.last..]) + if self.n > 0 { + return self.splits.next() + } + + let text = self.splits.finder.0.text(); + if self.splits.last > text.len() { + // We've already returned all substrings. + None } else { - self.splits.next() + // self.n == 0, so future calls will return None immediately + Some(&text[self.splits.last..]) } } } diff --git a/src/re_unicode.rs b/src/re_unicode.rs index 81aac15260..a787aaf4a6 100644 --- a/src/re_unicode.rs +++ b/src/re_unicode.rs @@ -766,11 +766,11 @@ impl<'r, 't> Iterator for Split<'r, 't> { let text = self.finder.0.text(); match self.finder.next() { None => { - if self.last >= text.len() { + if self.last > text.len() { None } else { let s = &text[self.last..]; - self.last = text.len(); + self.last = text.len() + 1; // Next call will return None Some(s) } } @@ -801,12 +801,19 @@ impl<'r, 't> Iterator for SplitN<'r, 't> { if self.n == 0 { return None; } + self.n -= 1; - if self.n == 0 { - let text = self.splits.finder.0.text(); - Some(&text[self.splits.last..]) + if self.n > 0 { + return self.splits.next() + } + + let text = self.splits.finder.0.text(); + if self.splits.last > text.len() { + // We've already returned all substrings. + None } else { - self.splits.next() + // self.n == 0, so future calls will return None immediately + Some(&text[self.splits.last..]) } } } diff --git a/tests/api.rs b/tests/api.rs index ff136217e1..0d4962cc9f 100644 --- a/tests/api.rs +++ b/tests/api.rs @@ -205,6 +205,18 @@ split!( split2, r"(?-u)\b", "a b c", - &[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c")] + &[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c"), t!("")] ); -split!(split3, r"a$", "a", &[t!("")]); +split!(split3, r"a$", "a", &[t!(""), t!("")]); +split!(split_none, r"-", r"a", &[t!("a")]); +split!(split_trailing_blank, r"-", r"a-", &[t!("a"), t!("")]); +split!(split_trailing_blanks, r"-", r"a--", &[t!("a"), t!(""), t!("")]); +split!(split_empty, r"-", r"", &[t!("")]); + +splitn!(splitn_below_limit, r"-", r"a", 2, &[t!("a")]); +splitn!(splitn_at_limit, r"-", r"a-b", 2, &[t!("a"), t!("b")]); +splitn!(splitn_above_limit, r"-", r"a-b-c", 2, &[t!("a"), t!("b-c")]); +splitn!(splitn_zero_limit, r"-", r"a-b", 0, empty_vec!()); +splitn!(splitn_trailing_blank, r"-", r"a-", 2, &[t!("a"), t!("")]); +splitn!(splitn_trailing_separator, r"-", r"a--", 2, &[t!("a"), t!("-")]); +splitn!(splitn_empty, r"-", r"", 1, &[t!("")]); diff --git a/tests/macros.rs b/tests/macros.rs index 3c4b888b20..e70e9489fd 100644 --- a/tests/macros.rs +++ b/tests/macros.rs @@ -147,3 +147,14 @@ macro_rules! split { } } } + +macro_rules! splitn { + ($name:ident, $re:expr, $text:expr, $limit:expr, $expected:expr) => { + #[test] + fn $name() { + let re = regex!($re); + let splitted: Vec<_> = re.splitn(t!($text), $limit).collect(); + assert_eq!($expected, &*splitted); + } + } +} diff --git a/tests/macros_bytes.rs b/tests/macros_bytes.rs index 7605d69b21..03c370d698 100644 --- a/tests/macros_bytes.rs +++ b/tests/macros_bytes.rs @@ -3,6 +3,7 @@ macro_rules! text { ($text:expr) => { $text.as_bytes() } } macro_rules! t { ($re:expr) => { text!($re) } } macro_rules! match_text { ($text:expr) => { $text.as_bytes() } } macro_rules! use_ { ($($path: tt)*) => { use regex::bytes::$($path)*; } } +macro_rules! empty_vec { () => { >::new() } } macro_rules! bytes { ($text:expr) => { $text } } diff --git a/tests/macros_str.rs b/tests/macros_str.rs index fda5814b8c..9b996b33b9 100644 --- a/tests/macros_str.rs +++ b/tests/macros_str.rs @@ -3,6 +3,7 @@ macro_rules! text { ($text:expr) => { $text } } macro_rules! t { ($text:expr) => { text!($text) } } macro_rules! match_text { ($text:expr) => { $text.as_str() } } macro_rules! use_ { ($($path: tt)*) => { use regex::$($path)*; } } +macro_rules! empty_vec { () => { >::new() } } macro_rules! no_expand { ($text:expr) => {{