Skip to content

Commit

Permalink
Remove the submatch iterators.
Browse files Browse the repository at this point in the history
All use cases can be replaced with Regex::capture_names.
  • Loading branch information
BurntSushi committed Aug 5, 2016
1 parent 3e36cff commit aba9c28
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 225 deletions.
10 changes: 5 additions & 5 deletions regex-capi/src/rure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,10 @@ ffi_fn! {
) -> bool {
let re = unsafe { &*re };
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
re.find_at(haystack, start).map(|(s, e)| unsafe {
re.find_at(haystack, start).map(|m| unsafe {
if !match_info.is_null() {
(*match_info).start = s;
(*match_info).end = e;
(*match_info).start = m.start();
(*match_info).end = m.end();
}
}).is_some()
}
Expand Down Expand Up @@ -258,7 +258,7 @@ ffi_fn! {
}
let (s, e) = match re.find_at(text, it.last_end) {
None => return false,
Some((s, e)) => (s, e),
Some(m) => (m.start(), m.end()),
};
if s == e {
// This is an empty match. To ensure we make progress, start
Expand Down Expand Up @@ -300,7 +300,7 @@ ffi_fn! {
}
let (s, e) = match re.read_captures_at(slots, text, it.last_end) {
None => return false,
Some((s, e)) => (s, e),
Some(m) => (m.start(), m.end()),
};
if s == e {
// This is an empty match. To ensure we make progress, start
Expand Down
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,9 +460,9 @@ extern crate utf8_ranges;
pub use error::Error;
pub use re_builder::unicode::*;
pub use re_set::unicode::*;
pub use re_trait::{Locations, SubCapturesPosIter};
pub use re_trait::Locations;
pub use re_unicode::{
Regex, Captures, SubCapturesIter, SubCapturesNamedIter,
Regex, Captures,
CaptureNamesIter, CapturesIter, FindIter,
Replacer, NoExpand, SplitsIter, SplitsNIter,
quote,
Expand Down Expand Up @@ -558,7 +558,7 @@ pub mod bytes {
pub use re_builder::bytes::*;
pub use re_bytes::*;
pub use re_set::bytes::*;
pub use re_trait::{Locations, SubCapturesPosIter};
pub use re_trait::Locations;
}

mod backtrack;
Expand Down
3 changes: 2 additions & 1 deletion src/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> {
SearchStep::Done
}
}
Some((s, e)) => {
Some(m) => {
let (s, e) = (m.start(), m.end());
if s == self.last_step_end {
self.last_step_end = e;
SearchStep::Match(s, e)
Expand Down
71 changes: 2 additions & 69 deletions src/re_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

use std::borrow::Cow;
use std::collections::HashMap;
use std::collections::hash_map;
use std::fmt;
use std::ops::Index;
use std::str::FromStr;
Expand All @@ -22,7 +21,7 @@ use exec::{Exec, ExecNoSync};
use expand::expand_bytes;
use error::Error;
use re_builder::bytes::RegexBuilder;
use re_trait::{self, RegularExpression, Locations, SubCapturesPosIter};
use re_trait::{self, RegularExpression, Locations};

/// Match represents a single match of a regex in a haystack.
///
Expand Down Expand Up @@ -790,29 +789,6 @@ impl<'t> Captures<'t> {
self.named_groups.get(name).and_then(|&i| self.get(i))
}

/// Creates an iterator of all the capture groups in order of appearance
/// in the regular expression.
pub fn iter<'c>(&'c self) -> SubCapturesIter<'c, 't> {
SubCapturesIter { idx: 0, caps: self }
}

/// Creates an iterator of all the capture group positions in order of
/// appearance in the regular expression. Positions are byte indices
/// in terms of the original string matched.
pub fn iter_pos(&self) -> SubCapturesPosIter {
self.locs.iter()
}

/// Creates an iterator of all named groups as an tuple with the group
/// name and the value. The iterator returns these values in arbitrary
/// order.
pub fn iter_named<'c>(&'c self) -> SubCapturesNamedIter<'c, 't> {
SubCapturesNamedIter {
caps: self,
names: self.named_groups.iter()
}
}

/// Expands all instances of `$name` in `text` to the corresponding capture
/// group `name`, and writes them to the `dst` buffer given.
///
Expand Down Expand Up @@ -873,7 +849,7 @@ impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
let slot_to_name: HashMap<&usize, &String> =
self.0.named_groups.iter().map(|(a, b)| (b, a)).collect();
let mut map = f.debug_map();
for (slot, m) in self.0.iter_pos().enumerate() {
for (slot, m) in self.0.locs.iter().enumerate() {
let m = m.map(|(s, e)| escape_bytes(&self.0.text[s..e]));
if let Some(ref name) = slot_to_name.get(&slot) {
map.entry(&name, &m);
Expand Down Expand Up @@ -926,49 +902,6 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
}
}

/// An iterator over capture groups for a particular match of a regular
/// expression.
///
/// `'c` is the lifetime of the captures and `'t` is the lifetime of the
/// matched text.
pub struct SubCapturesIter<'c, 't: 'c> {
idx: usize,
caps: &'c Captures<'t>,
}

impl<'c, 't> Iterator for SubCapturesIter<'c, 't> {
type Item = Option<&'t [u8]>;

fn next(&mut self) -> Option<Option<&'t [u8]>> {
if self.idx < self.caps.len() {
self.idx += 1;
Some(self.caps.get(self.idx - 1).map(|m| m.as_bytes()))
} else {
None
}
}
}

/// An Iterator over named capture groups as a tuple with the group name and
/// the value.
///
/// `'c` is the lifetime of the captures and `'t` is the lifetime of the
/// matched text.
pub struct SubCapturesNamedIter<'c, 't: 'c> {
caps: &'c Captures<'t>,
names: hash_map::Iter<'c, String, usize>,
}

impl<'c, 't> Iterator for SubCapturesNamedIter<'c, 't> {
type Item = (&'c str, Option<&'t [u8]>);

fn next(&mut self) -> Option<(&'c str, Option<&'t [u8]>)> {
self.names.next().map(|(name, &pos)| {
(&**name, self.caps.get(pos).map(|m| m.as_bytes()))
})
}
}

/// Replacer describes types that can be used to replace matches in a byte
/// string.
///
Expand Down
68 changes: 2 additions & 66 deletions src/re_unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use exec::{Exec, ExecNoSyncStr};
use expand::expand_str;
use re_builder::unicode::RegexBuilder;
use re_plugin::Plugin;
use re_trait::{self, RegularExpression, Locations, SubCapturesPosIter};
use re_trait::{self, RegularExpression, Locations};

/// Escapes all regular expression meta characters in `text`.
///
Expand Down Expand Up @@ -927,29 +927,6 @@ impl<'t> Captures<'t> {
self.named_groups.pos(name).and_then(|i| self.get(i))
}

/// Creates an iterator of all the capture groups in order of appearance
/// in the regular expression.
pub fn iter<'c>(&'c self) -> SubCapturesIter<'c, 't> {
SubCapturesIter { idx: 0, caps: self, }
}

/// Creates an iterator of all the capture group positions in order of
/// appearance in the regular expression. Positions are byte indices
/// in terms of the original string matched.
pub fn iter_pos(&self) -> SubCapturesPosIter {
self.locs.iter()
}

/// Creates an iterator of all named groups as an tuple with the group
/// name and the value. The iterator returns these values in arbitrary
/// order.
pub fn iter_named<'c>(&'c self) -> SubCapturesNamedIter<'c, 't> {
SubCapturesNamedIter {
caps: self,
names: self.named_groups.iter()
}
}

/// Expands all instances of `$name` in `text` to the corresponding capture
/// group `name`, and writes them to the `dst` buffer given.
///
Expand Down Expand Up @@ -995,7 +972,7 @@ impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
let slot_to_name: HashMap<usize, &str> =
self.0.named_groups.iter().map(|(a, b)| (b, a)).collect();
let mut map = f.debug_map();
for (slot, m) in self.0.iter_pos().enumerate() {
for (slot, m) in self.0.locs.iter().enumerate() {
let m = m.map(|(s, e)| &self.0.text[s..e]);
if let Some(ref name) = slot_to_name.get(&slot) {
map.entry(&name, &m);
Expand Down Expand Up @@ -1048,47 +1025,6 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
}
}

/// An iterator over capture groups for a particular match of a regular
/// expression.
///
/// `'c` is the lifetime of the captures.
pub struct SubCapturesIter<'c, 't: 'c> {
idx: usize,
caps: &'c Captures<'t>,
}

impl<'c, 't> Iterator for SubCapturesIter<'c, 't> {
type Item = Option<&'t str>;

fn next(&mut self) -> Option<Option<&'t str>> {
if self.idx < self.caps.len() {
self.idx += 1;
Some(self.caps.get(self.idx - 1).map(|m| m.as_str()))
} else {
None
}
}
}

/// An Iterator over named capture groups as a tuple with the group
/// name and the value.
///
/// `'c` is the lifetime of the captures.
pub struct SubCapturesNamedIter<'c, 't: 'c> {
caps: &'c Captures<'t>,
names: NamedGroupsIter<'c>,
}

impl<'c, 't> Iterator for SubCapturesNamedIter<'c, 't> {
type Item = (&'c str, Option<&'t str>);

fn next(&mut self) -> Option<(&'c str, Option<&'t str>)> {
self.names.next().map(|(name, pos)| {
(name, self.caps.get(pos).map(|m| m.as_str()))
})
}
}

/// An iterator that yields all non-overlapping capture groups matching a
/// particular regular expression.
///
Expand Down
80 changes: 0 additions & 80 deletions tests/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,86 +140,6 @@ fn capture_misc() {
assert_eq!(t!("c"), match_text!(cap.name("b").unwrap()));
}

#[test]
fn capture_iter() {
let re = regex!(r"(.)(?P<a>.)(.)(?P<b>.)");
let cap = re.captures(t!("abcd")).unwrap();
assert_eq!(5, cap.len());

let expected = vec![
t!("abcd"), t!("a"), t!("b"), t!("c"), t!("d"),
].into_iter().map(Some).collect::<Vec<_>>();
let got = cap.iter().collect::<Vec<_>>();
assert_eq!(expected, got);
}

#[test]
fn capture_iter_missing() {
let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)");
let cap = re.captures(t!("abc")).unwrap();
assert_eq!(5, cap.len());

let expected = vec![
Some(t!("abc")), Some(t!("a")), None, Some(t!("b")), Some(t!("c")),
];
let got = cap.iter().collect::<Vec<_>>();
assert_eq!(expected, got);
}

#[test]
fn capture_iter_pos() {
let re = regex!(r"(.)(?P<a>.)(.)(?P<b>.)");
let cap = re.captures(t!("abcd")).unwrap();

let expected = vec![
(0, 4), (0, 1), (1, 2), (2, 3), (3, 4),
].into_iter().map(Some).collect::<Vec<_>>();
let got = cap.iter_pos().collect::<Vec<_>>();
assert_eq!(expected, got);
}

#[test]
fn capture_iter_pos_missing() {
let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)");
let cap = re.captures(t!("abc")).unwrap();

let expected = vec![
Some((0, 3)), Some((0, 1)), None, Some((1, 2)), Some((2, 3)),
];
let got = cap.iter_pos().collect::<Vec<_>>();
assert_eq!(expected, got);
}

#[test]
fn capture_iter_named() {
let re = regex!(r"(.)(?P<a>.)(.)(?P<b>.)");
let cap = re.captures(t!("abcd")).unwrap();

let expected1 = vec![
("a", Some(t!("b"))), ("b", Some(t!("d"))),
];
let expected2 = vec![
("b", Some(t!("d"))), ("a", Some(t!("b"))),
];
let got = cap.iter_named().collect::<Vec<_>>();
assert!(got == expected1 || got == expected2);
}

#[test]
fn capture_iter_named_missing() {
let re = regex!(r"(.)(?P<a>.)?(.)(?P<b>.)");
let cap = re.captures(t!("abc")).unwrap();

let expected1 = vec![
("a", None), ("b", Some(t!("c"))),
];
let expected2 = vec![
("b", Some(t!("c"))), ("a", None),
];
let got = cap.iter_named().collect::<Vec<_>>();
assert!(got == expected1 || got == expected2);
}

expand!(expand1, r"(?P<foo>\w+)", "abc", "$foo", "abc");
expand!(expand2, r"(?P<foo>\w+)", "abc", "$0", "abc");
expand!(expand3, r"(?P<foo>\w+)", "abc", "$1", "abc");
Expand Down
5 changes: 4 additions & 1 deletion tests/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ macro_rules! mat(
Some(c) => {
assert!(r.is_match(text));
assert!(r.shortest_match(text).is_some());
c.iter_pos().collect()
r.capture_names()
.enumerate()
.map(|(i, _)| c.get(i).map(|m| (m.start(), m.end())))
.collect()
}
None => vec![None],
};
Expand Down

0 comments on commit aba9c28

Please sign in to comment.