Skip to content

Commit

Permalink
Merge pull request #222 from rust-lang-nursery/misc-perf
Browse files Browse the repository at this point in the history
Minor perf improvements and code touch ups.
  • Loading branch information
BurntSushi committed May 1, 2016
2 parents 2952952 + 7ea9642 commit 090655b
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 101 deletions.
12 changes: 12 additions & 0 deletions bench/src/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,23 @@ pub use ffi::tcl::Regex;
// Due to macro scoping rules, this definition only applies for the modules
// defined below. Effectively, it allows us to use the same tests for both
// native and dynamic regexes.
#[cfg(not(feature = "re-rust-bytes"))]
#[cfg(not(feature = "re-rust-plugin"))]
macro_rules! regex {
($re:expr) => { ::Regex::new($re).unwrap() }
}

#[cfg(feature = "re-rust-bytes")]
#[cfg(not(feature = "re-rust-plugin"))]
macro_rules! regex {
($re:expr) => {{
// Always enable the Unicode flag for byte based regexes.
// Really, this should have been enabled by default. *sigh*
use regex::bytes::RegexBuilder;
RegexBuilder::new($re).unicode(true).compile().unwrap()
}}
}

// Usage: text!(haystack)
//
// Builds a ::Text from an owned string.
Expand Down
19 changes: 0 additions & 19 deletions bench/src/sherlock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,32 +101,17 @@ sherlock!(everything_greedy_nl, r"(?s).*", 1);
// How fast can we match every letter? This also defeats any clever prefix
// tricks.
#[cfg(not(feature = "re-tcl"))]
#[cfg(not(feature = "re-rust-bytes"))]
sherlock!(letters, r"\p{L}", 447160);
#[cfg(not(feature = "re-tcl"))]
#[cfg(feature = "re-rust-bytes")]
sherlock!(letters, r"(?u)\p{L}", 447160);

#[cfg(not(feature = "re-tcl"))]
#[cfg(not(feature = "re-rust-bytes"))]
sherlock!(letters_upper, r"\p{Lu}", 14180);
#[cfg(not(feature = "re-tcl"))]
#[cfg(feature = "re-rust-bytes")]
sherlock!(letters_upper, r"(?u)\p{Lu}", 14180);

#[cfg(not(feature = "re-tcl"))]
#[cfg(not(feature = "re-rust-bytes"))]
sherlock!(letters_lower, r"\p{Ll}", 432980);
#[cfg(not(feature = "re-tcl"))]
#[cfg(feature = "re-rust-bytes")]
sherlock!(letters_lower, r"(?u)\p{Ll}", 432980);

// Similarly, for words.
#[cfg(not(feature = "re-rust-bytes"))]
#[cfg(not(feature = "re-re2"))]
sherlock!(words, r"\w+", 109214);
#[cfg(feature = "re-rust-bytes")]
sherlock!(words, r"(?u)\w+", 109214);
#[cfg(feature = "re-re2")]
sherlock!(words, r"\w+", 109222); // hmm, why does RE2 diverge here?

Expand Down Expand Up @@ -195,8 +180,4 @@ sherlock!(ing_suffix, r"[a-zA-Z]+ing", 2824);
//
// Onig does surprisingly well on this benchmark and yet does quite poorly on
// the ing_suffix benchmark. That one has me stumped.
//
// Interestingly, this is slower in the rust-bytes benchmark, presumably
// because scanning for one of the bytes in the Unicode *unaware* `\s` ends
// up being slower than avoiding the prefix scan at all.
sherlock!(ing_suffix_limited_space, r"\s[a-zA-Z]{0,12}ing\s", 2081);
97 changes: 51 additions & 46 deletions regex-syntax/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,7 @@ impl Parser {
'{' => try!(self.parse_counted_repeat()),
'[' => match self.maybe_parse_ascii() {
None => try!(self.parse_class()),
Some(cls) => {
Build::Expr(if self.flags.unicode {
Expr::Class(cls)
} else {
Expr::ClassBytes(cls.to_byte_class())
})
}
Some(cls) => Build::Expr(Expr::Class(cls)),
},
'^' => {
if self.flags.multi {
Expand Down Expand Up @@ -224,11 +218,7 @@ impl Parser {
}
'd'|'s'|'w'|'D'|'S'|'W' => {
self.bump();
Ok(Build::Expr(if self.flags.unicode {
Expr::Class(self.parse_perl_class(c))
} else {
Expr::ClassBytes(self.parse_perl_class(c).to_byte_class())
}))
Ok(Build::Expr(Expr::Class(self.parse_perl_class(c))))
}
c => Err(self.err(ErrorKind::UnrecognizedEscape(c))),
}
Expand Down Expand Up @@ -1328,16 +1318,28 @@ mod tests {
ByteClass::new(ranges)
}

fn asciid() -> ByteClass {
ascii_class("digit").unwrap().to_byte_class()
fn asciid() -> CharClass {
ascii_class("digit").unwrap()
}

fn asciis() -> CharClass {
ascii_class("space").unwrap()
}

fn asciiw() -> CharClass {
ascii_class("word").unwrap()
}

fn asciis() -> ByteClass {
ascii_class("space").unwrap().to_byte_class()
fn asciid_bytes() -> ByteClass {
asciid().to_byte_class()
}

fn asciiw() -> ByteClass {
ascii_class("word").unwrap().to_byte_class()
fn asciis_bytes() -> ByteClass {
asciis().to_byte_class()
}

fn asciiw_bytes() -> ByteClass {
asciiw().to_byte_class()
}

#[test]
Expand Down Expand Up @@ -1905,79 +1907,79 @@ mod tests {
#[test]
fn escape_perl_d() {
assert_eq!(p(r"\d"), Expr::Class(class(PERLD)));
assert_eq!(pb(r"(?-u)\d"), Expr::ClassBytes(asciid()));
assert_eq!(pb(r"(?-u)\d"), Expr::Class(asciid()));
}

#[test]
fn escape_perl_s() {
assert_eq!(p(r"\s"), Expr::Class(class(PERLS)));
assert_eq!(pb(r"(?-u)\s"), Expr::ClassBytes(asciis()));
assert_eq!(pb(r"(?-u)\s"), Expr::Class(asciis()));
}

#[test]
fn escape_perl_w() {
assert_eq!(p(r"\w"), Expr::Class(class(PERLW)));
assert_eq!(pb(r"(?-u)\w"), Expr::ClassBytes(asciiw()));
assert_eq!(pb(r"(?-u)\w"), Expr::Class(asciiw()));
}

#[test]
fn escape_perl_d_negate() {
assert_eq!(p(r"\D"), Expr::Class(class(PERLD).negate()));
assert_eq!(pb(r"(?-u)\D"), Expr::ClassBytes(asciid().negate()));
assert_eq!(pb(r"(?-u)\D"), Expr::Class(asciid().negate()));
}

#[test]
fn escape_perl_s_negate() {
assert_eq!(p(r"\S"), Expr::Class(class(PERLS).negate()));
assert_eq!(pb(r"(?-u)\S"), Expr::ClassBytes(asciis().negate()));
assert_eq!(pb(r"(?-u)\S"), Expr::Class(asciis().negate()));
}

#[test]
fn escape_perl_w_negate() {
assert_eq!(p(r"\W"), Expr::Class(class(PERLW).negate()));
assert_eq!(pb(r"(?-u)\W"), Expr::ClassBytes(asciiw().negate()));
assert_eq!(pb(r"(?-u)\W"), Expr::Class(asciiw().negate()));
}

#[test]
fn escape_perl_d_case_fold() {
assert_eq!(p(r"(?i)\d"), Expr::Class(class(PERLD).case_fold()));
assert_eq!(pb(r"(?i-u)\d"), Expr::ClassBytes(asciid().case_fold()));
assert_eq!(pb(r"(?i-u)\d"), Expr::Class(asciid().case_fold()));
}

#[test]
fn escape_perl_s_case_fold() {
assert_eq!(p(r"(?i)\s"), Expr::Class(class(PERLS).case_fold()));
assert_eq!(pb(r"(?i-u)\s"), Expr::ClassBytes(asciis().case_fold()));
assert_eq!(pb(r"(?i-u)\s"), Expr::Class(asciis().case_fold()));
}

#[test]
fn escape_perl_w_case_fold() {
assert_eq!(p(r"(?i)\w"), Expr::Class(class(PERLW).case_fold()));
assert_eq!(pb(r"(?i-u)\w"), Expr::ClassBytes(asciiw().case_fold()));
assert_eq!(pb(r"(?i-u)\w"), Expr::Class(asciiw().case_fold()));
}

#[test]
fn escape_perl_d_case_fold_negate() {
assert_eq!(p(r"(?i)\D"),
Expr::Class(class(PERLD).case_fold().negate()));
let bytes = asciid().case_fold().negate();
assert_eq!(pb(r"(?i-u)\D"), Expr::ClassBytes(bytes));
assert_eq!(pb(r"(?i-u)\D"), Expr::Class(bytes));
}

#[test]
fn escape_perl_s_case_fold_negate() {
assert_eq!(p(r"(?i)\S"),
Expr::Class(class(PERLS).case_fold().negate()));
let bytes = asciis().case_fold().negate();
assert_eq!(pb(r"(?i-u)\S"), Expr::ClassBytes(bytes));
assert_eq!(pb(r"(?i-u)\S"), Expr::Class(bytes));
}

#[test]
fn escape_perl_w_case_fold_negate() {
assert_eq!(p(r"(?i)\W"),
Expr::Class(class(PERLW).case_fold().negate()));
let bytes = asciiw().case_fold().negate();
assert_eq!(pb(r"(?i-u)\W"), Expr::ClassBytes(bytes));
assert_eq!(pb(r"(?i-u)\W"), Expr::Class(bytes));
}

#[test]
Expand Down Expand Up @@ -2039,11 +2041,11 @@ mod tests {
assert_eq!(p(r"[^\w]"), Expr::Class(class(PERLW).negate()));
assert_eq!(p(r"[^\s]"), Expr::Class(class(PERLS).negate()));

let bytes = asciid().negate();
let bytes = asciid_bytes().negate();
assert_eq!(pb(r"(?-u)[^\d]"), Expr::ClassBytes(bytes));
let bytes = asciiw().negate();
let bytes = asciiw_bytes().negate();
assert_eq!(pb(r"(?-u)[^\w]"), Expr::ClassBytes(bytes));
let bytes = asciis().negate();
let bytes = asciis_bytes().negate();
assert_eq!(pb(r"(?-u)[^\s]"), Expr::ClassBytes(bytes));
}

Expand All @@ -2053,17 +2055,18 @@ mod tests {
assert_eq!(p(r"[^\W]"), Expr::Class(class(PERLW)));
assert_eq!(p(r"[^\S]"), Expr::Class(class(PERLS)));

assert_eq!(pb(r"(?-u)[^\D]"), Expr::ClassBytes(asciid()));
assert_eq!(pb(r"(?-u)[^\W]"), Expr::ClassBytes(asciiw()));
assert_eq!(pb(r"(?-u)[^\S]"), Expr::ClassBytes(asciis()));
assert_eq!(pb(r"(?-u)[^\D]"), Expr::ClassBytes(asciid_bytes()));
assert_eq!(pb(r"(?-u)[^\W]"), Expr::ClassBytes(asciiw_bytes()));
assert_eq!(pb(r"(?-u)[^\S]"), Expr::ClassBytes(asciis_bytes()));
}

#[test]
fn class_singleton_class_casei() {
assert_eq!(p(r"(?i)[\d]"), Expr::Class(class(PERLD).case_fold()));
assert_eq!(p(r"(?i)[\p{Yi}]"), Expr::Class(class(YI).case_fold()));

assert_eq!(pb(r"(?i-u)[\d]"), Expr::ClassBytes(asciid().case_fold()));
assert_eq!(pb(r"(?i-u)[\d]"),
Expr::ClassBytes(asciid_bytes().case_fold()));
}

#[test]
Expand All @@ -2075,11 +2078,11 @@ mod tests {
assert_eq!(p(r"(?i)[^\s]"),
Expr::Class(class(PERLS).case_fold().negate()));

let bytes = asciid().case_fold().negate();
let bytes = asciid_bytes().case_fold().negate();
assert_eq!(pb(r"(?i-u)[^\d]"), Expr::ClassBytes(bytes));
let bytes = asciiw().case_fold().negate();
let bytes = asciiw_bytes().case_fold().negate();
assert_eq!(pb(r"(?i-u)[^\w]"), Expr::ClassBytes(bytes));
let bytes = asciis().case_fold().negate();
let bytes = asciis_bytes().case_fold().negate();
assert_eq!(pb(r"(?i-u)[^\s]"), Expr::ClassBytes(bytes));
}

Expand All @@ -2089,9 +2092,12 @@ mod tests {
assert_eq!(p(r"(?i)[^\W]"), Expr::Class(class(PERLW).case_fold()));
assert_eq!(p(r"(?i)[^\S]"), Expr::Class(class(PERLS).case_fold()));

assert_eq!(pb(r"(?i-u)[^\D]"), Expr::ClassBytes(asciid().case_fold()));
assert_eq!(pb(r"(?i-u)[^\W]"), Expr::ClassBytes(asciiw().case_fold()));
assert_eq!(pb(r"(?i-u)[^\S]"), Expr::ClassBytes(asciis().case_fold()));
assert_eq!(pb(r"(?i-u)[^\D]"),
Expr::ClassBytes(asciid_bytes().case_fold()));
assert_eq!(pb(r"(?i-u)[^\W]"),
Expr::ClassBytes(asciiw_bytes().case_fold()));
assert_eq!(pb(r"(?i-u)[^\S]"),
Expr::ClassBytes(asciis_bytes().case_fold()));
}

#[test]
Expand Down Expand Up @@ -2184,8 +2190,7 @@ mod tests {
assert_eq!(p("[:upper:]"), Expr::Class(class(UPPER)));
assert_eq!(p("[[:upper:]]"), Expr::Class(class(UPPER)));

assert_eq!(pb("(?-u)[:upper:]"),
Expr::ClassBytes(class(UPPER).to_byte_class()));
assert_eq!(pb("(?-u)[:upper:]"), Expr::Class(class(UPPER)));
assert_eq!(pb("(?-u)[[:upper:]]"),
Expr::ClassBytes(class(UPPER).to_byte_class()));
}
Expand Down Expand Up @@ -2233,7 +2238,7 @@ mod tests {
Expr::Class(class(UPPER).case_fold()));

assert_eq!(pb("(?i-u)[:upper:]"),
Expr::ClassBytes(class(UPPER).to_byte_class().case_fold()));
Expr::Class(class(UPPER).case_fold()));
assert_eq!(pb("(?i-u)[[:upper:]]"),
Expr::ClassBytes(class(UPPER).to_byte_class().case_fold()));
}
Expand Down
Loading

0 comments on commit 090655b

Please sign in to comment.