Skip to content

Commit

Permalink
progress
Browse files Browse the repository at this point in the history
  • Loading branch information
BurntSushi committed Oct 4, 2022
1 parent 159a63c commit 4075d39
Show file tree
Hide file tree
Showing 47 changed files with 171 additions and 170 deletions.
22 changes: 11 additions & 11 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "regex"
name = "regex-old"
version = "1.6.0" #:version
authors = ["The Rust Project Developers"]
license = "MIT OR Apache-2.0"
Expand Down Expand Up @@ -32,7 +32,7 @@ doctest = false
# Features are documented in the "Crate features" section of the crate docs:
# https://docs.rs/regex/*/#crate-features
[features]
default = ["std", "perf", "unicode", "regex-syntax/default"]
default = ["std", "perf", "unicode", "regex-syntax-old/default"]

# ECOSYSTEM FEATURES

Expand Down Expand Up @@ -75,22 +75,22 @@ unicode = [
"unicode-perl",
"unicode-script",
"unicode-segment",
"regex-syntax/unicode",
"regex-syntax-old/unicode",
]
# Enables use of the `Age` property, e.g., `\p{Age:3.0}`.
unicode-age = ["regex-syntax/unicode-age"]
unicode-age = ["regex-syntax-old/unicode-age"]
# Enables use of a smattering of boolean properties, e.g., `\p{Emoji}`.
unicode-bool = ["regex-syntax/unicode-bool"]
unicode-bool = ["regex-syntax-old/unicode-bool"]
# Enables Unicode-aware case insensitive matching, e.g., `(?i)β`.
unicode-case = ["regex-syntax/unicode-case"]
unicode-case = ["regex-syntax-old/unicode-case"]
# Enables Unicode general categories, e.g., `\p{Letter}` or `\pL`.
unicode-gencat = ["regex-syntax/unicode-gencat"]
unicode-gencat = ["regex-syntax-old/unicode-gencat"]
# Enables Unicode-aware Perl classes corresponding to `\w`, `\s` and `\d`.
unicode-perl = ["regex-syntax/unicode-perl"]
unicode-perl = ["regex-syntax-old/unicode-perl"]
# Enables Unicode scripts and script extensions, e.g., `\p{Greek}`.
unicode-script = ["regex-syntax/unicode-script"]
unicode-script = ["regex-syntax-old/unicode-script"]
# Enables Unicode segmentation properties, e.g., `\p{gcb=Extend}`.
unicode-segment = ["regex-syntax/unicode-segment"]
unicode-segment = ["regex-syntax-old/unicode-segment"]


# UNSTABLE FEATURES (requires Rust nightly)
Expand All @@ -115,7 +115,7 @@ version = "2.4.0"
optional = true

# For parsing regular expressions.
[dependencies.regex-syntax]
[dependencies.regex-syntax-old]
path = "regex-syntax"
version = "0.6.27"
default-features = false
Expand Down
4 changes: 2 additions & 2 deletions bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ libc = "0.2"
onig = { version = "3", optional = true }
libpcre-sys = { version = "0.2", optional = true }
memmap = "0.6.2"
regex = { version = "1", path = ".." }
regex-syntax = { version = "0.6", path = "../regex-syntax" }
regex-old = { version = "1", path = ".." }
regex-syntax-old = { version = "0.6", path = "../regex-syntax" }
serde = { version = "1", features = ["derive"] }
cfg-if = "0.1"

Expand Down
4 changes: 2 additions & 2 deletions bench/src/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ cfg_if! {
} else if #[cfg(feature = "re-onig")] {
pub use ffi::onig::Regex;
} else if #[cfg(any(feature = "re-rust"))] {
pub use regex::{Regex, RegexSet};
pub use regex_old::{Regex, RegexSet};
} else if #[cfg(feature = "re-rust-bytes")] {
pub use regex::bytes::{Regex, RegexSet};
pub use regex_old::bytes::{Regex, RegexSet};
} else if #[cfg(feature = "re-re2")] {
pub use ffi::re2::Regex;
} else if #[cfg(feature = "re-pcre2")] {
Expand Down
4 changes: 2 additions & 2 deletions bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,14 @@ fn count_re2(pat: &str, haystack: &str) -> usize {
nada!("re-rust", count_rust);
#[cfg(feature = "re-rust")]
fn count_rust(pat: &str, haystack: &str) -> usize {
use regex::Regex;
use regex_old::Regex;
Regex::new(pat).unwrap().find_iter(haystack).count()
}

nada!("re-rust-bytes", count_rust_bytes);
#[cfg(feature = "re-rust-bytes")]
fn count_rust_bytes(pat: &str, haystack: &str) -> usize {
use regex::bytes::Regex;
use regex_old::bytes::Regex;
Regex::new(pat).unwrap().find_iter(haystack.as_bytes()).count()
}

Expand Down
4 changes: 2 additions & 2 deletions bench/src/rust_compile.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use regex_syntax::Parser;
use regex_syntax_old::Parser;
use test::Bencher;

use regex::internal::Compiler;
use regex_old::internal::Compiler;

#[bench]
fn compile_simple(b: &mut Bencher) {
Expand Down
2 changes: 1 addition & 1 deletion bench/src/rust_parse.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use regex_syntax::Expr;
use regex_syntax_old::Expr;
use test::Bencher;

#[bench]
Expand Down
2 changes: 1 addition & 1 deletion examples/shootout-regex-dna-bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::thread;

macro_rules! regex {
($re:expr) => {
::regex::bytes::Regex::new($re).unwrap()
::regex_old::bytes::Regex::new($re).unwrap()
};
}

Expand Down
2 changes: 1 addition & 1 deletion examples/shootout-regex-dna-cheat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::thread;

macro_rules! regex {
($re:expr) => {
::regex::Regex::new($re).unwrap()
::regex_old::Regex::new($re).unwrap()
};
}

Expand Down
2 changes: 1 addition & 1 deletion examples/shootout-regex-dna-replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::io::{self, Read};

macro_rules! regex {
($re:expr) => {{
use regex::internal::ExecBuilder;
use regex_old::internal::ExecBuilder;
ExecBuilder::new($re).build().unwrap().into_regex()
}};
}
Expand Down
2 changes: 1 addition & 1 deletion examples/shootout-regex-dna-single-cheat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::io::{self, Read};

macro_rules! regex {
($re:expr) => {
::regex::Regex::new($re).unwrap()
::regex_old::Regex::new($re).unwrap()
};
}

Expand Down
2 changes: 1 addition & 1 deletion examples/shootout-regex-dna-single.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::io::{self, Read};

macro_rules! regex {
($re:expr) => {
::regex::Regex::new($re).unwrap()
::regex_old::Regex::new($re).unwrap()
};
}

Expand Down
2 changes: 1 addition & 1 deletion examples/shootout-regex-dna.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::thread;

macro_rules! regex {
($re:expr) => {
::regex::Regex::new($re).unwrap()
::regex_old::Regex::new($re).unwrap()
};
}

Expand Down
2 changes: 1 addition & 1 deletion fuzz/fuzz_targets/fuzz_regex_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fuzz_target!(|data: &[u8]| {
let char_index = data.char_indices().nth(split_off_point);
if let Some((char_index, _)) = char_index {
let (pattern, input) = data.split_at(char_index);
if let Ok(re) = regex::Regex::new(pattern) {
if let Ok(re) = regex_old::Regex::new(pattern) {
re.is_match(input);
}
}
Expand Down
2 changes: 1 addition & 1 deletion regex-capi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ crate-type = ["staticlib", "cdylib"]

[dependencies]
libc = "0.2"
regex = { version = "1", path = ".." }
regex-old = { version = "1", path = ".." }
2 changes: 1 addition & 1 deletion regex-capi/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub struct Error {
pub enum ErrorKind {
None,
Str(str::Utf8Error),
Regex(regex::Error),
Regex(regex_old::Error),
Nul(ffi::NulError),
}

Expand Down
4 changes: 2 additions & 2 deletions regex-capi/src/rure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::slice;
use std::str;

use libc::{c_char, size_t};
use regex::bytes;
use regex_old::bytes;

use crate::error::{Error, ErrorKind};

Expand Down Expand Up @@ -609,7 +609,7 @@ fn rure_escape(
return ptr::null();
},
};
let esc_pat = regex::escape(str_pat);
let esc_pat = regex_old::escape(str_pat);
let c_esc_pat = match CString::new(esc_pat) {
Ok(val) => val,
Err(err) => unsafe {
Expand Down
4 changes: 2 additions & 2 deletions regex-debug/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ edition = "2018"

[dependencies]
docopt = "1"
regex = { version = "1.1", path = ".." }
regex-syntax = { version = "0.6", path = "../regex-syntax" }
regex-old = { version = "1.1", path = ".." }
regex-syntax-old = { version = "0.6", path = "../regex-syntax" }
serde = { version = "1", features = ["derive"] }
24 changes: 12 additions & 12 deletions regex-debug/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ use std::process;
use std::result;

use docopt::Docopt;
use regex::internal::{Compiler, LiteralSearcher};
use regex_syntax::hir::literal::Literals;
use regex_syntax::hir::Hir;
use regex_old::internal::{Compiler, LiteralSearcher};
use regex_syntax_old::hir::literal::Literals;
use regex_syntax_old::hir::Hir;

const USAGE: &'static str = "
Usage:
Expand Down Expand Up @@ -120,7 +120,7 @@ fn run(args: &Args) -> Result<()> {
}

fn cmd_ast(args: &Args) -> Result<()> {
use regex_syntax::ast::parse::Parser;
use regex_syntax_old::ast::parse::Parser;

let mut parser = Parser::new();
let ast = parser.parse(&args.arg_pattern)?;
Expand All @@ -129,7 +129,7 @@ fn cmd_ast(args: &Args) -> Result<()> {
}

fn cmd_hir(args: &Args) -> Result<()> {
use regex_syntax::ParserBuilder;
use regex_syntax_old::ParserBuilder;

let mut parser = ParserBuilder::new().allow_invalid_utf8(false).build();
let hir = parser.parse(&args.arg_pattern)?;
Expand Down Expand Up @@ -218,9 +218,9 @@ fn cmd_compile(args: &Args) -> Result<()> {
}

fn cmd_utf8_ranges(args: &Args) -> Result<()> {
use regex_syntax::hir::{self, HirKind};
use regex_syntax::utf8::Utf8Sequences;
use regex_syntax::ParserBuilder;
use regex_syntax_old::hir::{self, HirKind};
use regex_syntax_old::utf8::Utf8Sequences;
use regex_syntax_old::ParserBuilder;

let hir = ParserBuilder::new()
.build()
Expand Down Expand Up @@ -251,9 +251,9 @@ fn cmd_utf8_ranges(args: &Args) -> Result<()> {
}

fn cmd_utf8_ranges_rev(args: &Args) -> Result<()> {
use regex_syntax::hir::{self, HirKind};
use regex_syntax::utf8::Utf8Sequences;
use regex_syntax::ParserBuilder;
use regex_syntax_old::hir::{self, HirKind};
use regex_syntax_old::utf8::Utf8Sequences;
use regex_syntax_old::ParserBuilder;

let hir = ParserBuilder::new()
.build()
Expand Down Expand Up @@ -327,7 +327,7 @@ impl Args {
}

fn parse(re: &str) -> Result<Hir> {
use regex_syntax::ParserBuilder;
use regex_syntax_old::ParserBuilder;
ParserBuilder::new()
.allow_invalid_utf8(true)
.build()
Expand Down
2 changes: 1 addition & 1 deletion regex-syntax/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "regex-syntax"
name = "regex-syntax-old"
version = "0.6.27" #:version
authors = ["The Rust Project Developers"]
license = "MIT OR Apache-2.0"
Expand Down
2 changes: 1 addition & 1 deletion regex-syntax/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

extern crate test;

use regex_syntax::Parser;
use regex_syntax_old::Parser;
use test::Bencher;

#[bench]
Expand Down
4 changes: 2 additions & 2 deletions regex-syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ convert your pattern to an `Ast` and then convert the `Ast` to an `Hir`.
This example shows how to parse a pattern string into its HIR:
```
use regex_syntax::Parser;
use regex_syntax::hir::{self, Hir};
use regex_syntax_old::Parser;
use regex_syntax_old::hir::{self, Hir};
let hir = Parser::new().parse("a|b").unwrap();
assert_eq!(hir, Hir::alternation(vec![
Expand Down
2 changes: 1 addition & 1 deletion regex-syntax/src/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ impl fmt::Debug for Utf8Range {
/// scalar values.
///
/// ```rust
/// use regex_syntax::utf8::{Utf8Sequences, Utf8Sequence};
/// use regex_syntax_old::utf8::{Utf8Sequences, Utf8Sequence};
///
/// fn matches(seqs: &[Utf8Sequence], bytes: &[u8]) -> bool {
/// for range in seqs {
Expand Down
10 changes: 5 additions & 5 deletions src/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ use std::iter;
use std::result;
use std::sync::Arc;

use regex_syntax::hir::{self, Hir};
use regex_syntax::is_word_byte;
use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
use regex_syntax_old::hir::{self, Hir};
use regex_syntax_old::is_word_byte;
use regex_syntax_old::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};

use crate::prog::{
EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges,
Expand Down Expand Up @@ -267,7 +267,7 @@ impl Compiler {
/// instruction, and so no patch.entry value makes sense.
fn c(&mut self, expr: &Hir) -> ResultOrEmpty {
use crate::prog;
use regex_syntax::hir::HirKind::*;
use regex_syntax_old::hir::HirKind::*;

self.check_size()?;
match *expr.kind() {
Expand Down Expand Up @@ -587,7 +587,7 @@ impl Compiler {
}

fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
use regex_syntax::hir::RepetitionKind::*;
use regex_syntax_old::hir::RepetitionKind::*;
match rep.kind {
ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
Expand Down
8 changes: 4 additions & 4 deletions src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ use std::sync::Arc;

#[cfg(feature = "perf-literal")]
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use regex_syntax::hir::literal::Literals;
use regex_syntax::hir::Hir;
use regex_syntax::ParserBuilder;
use regex_syntax_old::hir::literal::Literals;
use regex_syntax_old::hir::Hir;
use regex_syntax_old::ParserBuilder;

use crate::backtrack;
use crate::compile::Compiler;
Expand Down Expand Up @@ -1547,7 +1547,7 @@ impl ProgramCacheInner {
/// literals, and if so, returns them. Otherwise, this returns None.
#[cfg(feature = "perf-literal")]
fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
use regex_syntax::hir::{HirKind, Literal};
use regex_syntax_old::hir::{HirKind, Literal};

// This is pretty hacky, but basically, if `is_alternation_literal` is
// true, then we can make several assumptions about the structure of our
Expand Down
7 changes: 5 additions & 2 deletions src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,15 +377,18 @@ impl Char {
// available. However, our compiler ensures that if a Unicode word
// boundary is used, then the data must also be available. If it isn't,
// then the compiler returns an error.
char::from_u32(self.0).map_or(false, regex_syntax::is_word_character)
char::from_u32(self.0)
.map_or(false, regex_syntax_old::is_word_character)
}

/// Returns true iff the byte is a word byte.
///
/// If the byte is absent, then false is returned.
pub fn is_word_byte(self) -> bool {
match char::from_u32(self.0) {
Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8),
Some(c) if c <= '\u{7F}' => {
regex_syntax_old::is_word_byte(c as u8)
}
None | Some(_) => false,
}
}
Expand Down
Loading

0 comments on commit 4075d39

Please sign in to comment.