Skip to content

Commit

Permalink
regex: support perf-cache
Browse files Browse the repository at this point in the history
This makes the thread_local (and by consequence, lazy_static) crates
optional by providing a naive caching mechanism when perf-cache is
disabled. This is achieved by defining a common API and implementing it
via both approaches.

The one tricky bit here is to ensure our naive version implements the
same auto-traits as the fast version. Since we just use a plain mutex,
it impls RefUnwindSafe, but thread_local does not. So we forcefully
remove the RefUnwindSafe impl from our safe variant.

We should be able to implement RefUnwindSafe in both cases, but
this likely requires some mechanism for clearing the regex cache
automatically if a panic occurs anywhere during search. But that's a
more invasive change and is part of  #576.
  • Loading branch information
BurntSushi committed Sep 3, 2019
1 parent 096a5ea commit 2c2a3ee
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 21 deletions.
100 changes: 100 additions & 0 deletions src/cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// This module defines a common API for caching internal runtime state.
// The `thread_local` crate provides an extremely optimized version of this.
// However, if the perf-cache feature is disabled, then we drop the
// thread_local dependency and instead use a pretty naive caching mechanism
// with a mutex.
//
// Strictly speaking, the CachedGuard isn't necessary for the much more
// flexible thread_local API, but implementing thread_local's API doesn't
// seem possible in purely safe code.

pub use self::imp::{Cached, CachedGuard};

#[cfg(feature = "perf-cache")]
mod imp {
use thread_local::CachedThreadLocal;

#[derive(Debug)]
pub struct Cached<T: Send>(CachedThreadLocal<T>);

#[derive(Debug)]
pub struct CachedGuard<'a, T: 'a>(&'a T);

impl<T: Send> Cached<T> {
pub fn new() -> Cached<T> {
Cached(CachedThreadLocal::new())
}

pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard<T> {
CachedGuard(self.0.get_or(|| Box::new(create())))
}
}

impl<'a, T: Send> CachedGuard<'a, T> {
pub fn value(&self) -> &T {
self.0
}
}
}

#[cfg(not(feature = "perf-cache"))]
mod imp {
use std::marker::PhantomData;
use std::panic::UnwindSafe;
use std::sync::Mutex;

#[derive(Debug)]
pub struct Cached<T: Send> {
stack: Mutex<Vec<T>>,
/// When perf-cache is enabled, the thread_local crate is used, and
/// its CachedThreadLocal impls Send, Sync and UnwindSafe, but NOT
/// RefUnwindSafe. However, a Mutex impls RefUnwindSafe. So in order
/// to keep the APIs consistent regardless of whether perf-cache is
/// enabled, we force this type to NOT impl RefUnwindSafe too.
///
/// Ideally, we should always impl RefUnwindSafe, but it seems a little
/// tricky to do that right now.
///
/// See also: https://github.com/rust-lang/regex/issues/576
_phantom: PhantomData<Box<Send + Sync + UnwindSafe>>,
}

#[derive(Debug)]
pub struct CachedGuard<'a, T: Send> {
cache: &'a Cached<T>,
value: Option<T>,
}

impl<T: Send> Cached<T> {
pub fn new() -> Cached<T> {
Cached { stack: Mutex::new(vec![]), _phantom: PhantomData }
}

pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard<T> {
let mut stack = self.stack.lock().unwrap();
match stack.pop() {
None => CachedGuard { cache: self, value: Some(create()) },
Some(value) => CachedGuard { cache: self, value: Some(value) },
}
}

fn put(&self, value: T) {
let mut stack = self.stack.lock().unwrap();
stack.push(value);
}
}

impl<'a, T: Send> CachedGuard<'a, T> {
pub fn value(&self) -> &T {
self.value.as_ref().unwrap()
}
}

impl<'a, T: Send> Drop for CachedGuard<'a, T> {
fn drop(&mut self) {
if let Some(value) = self.value.take() {
self.cache.put(value);
}
}
}
}
41 changes: 20 additions & 21 deletions src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use syntax::hir::literal::Literals;
use syntax::hir::Hir;
use syntax::ParserBuilder;
use thread_local::CachedThreadLocal;

use backtrack;
use cache::{Cached, CachedGuard};
use compile::Compiler;
use dfa;
use error::Error;
Expand All @@ -32,7 +32,7 @@ pub struct Exec {
/// All read only state.
ro: Arc<ExecReadOnly>,
/// Caches for the various matching engines.
cache: CachedThreadLocal<ProgramCache>,
cache: Cached<ProgramCache>,
}

/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
Expand All @@ -43,7 +43,7 @@ pub struct ExecNoSync<'c> {
/// All read only state.
ro: &'c Arc<ExecReadOnly>,
/// Caches for the various matching engines.
cache: &'c ProgramCache,
cache: CachedGuard<'c, ProgramCache>,
}

/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
Expand Down Expand Up @@ -291,7 +291,7 @@ impl ExecBuilder {
ac: None,
match_type: MatchType::Nothing,
});
return Ok(Exec { ro: ro, cache: CachedThreadLocal::new() });
return Ok(Exec { ro: ro, cache: Cached::new() });
}
let parsed = self.parse()?;
let mut nfa = Compiler::new()
Expand Down Expand Up @@ -347,7 +347,7 @@ impl ExecBuilder {
ro.match_type = ro.choose_match_type(self.match_type);

let ro = Arc::new(ro);
Ok(Exec { ro: ro, cache: CachedThreadLocal::new() })
Ok(Exec { ro: ro, cache: Cached::new() })
}
}

Expand Down Expand Up @@ -423,7 +423,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
MatchType::DfaAnchoredReverse => {
match dfa::Fsm::reverse(
&self.ro.dfa_reverse,
self.cache,
self.cache.value(),
true,
&text[start..],
text.len(),
Expand Down Expand Up @@ -471,7 +471,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
MatchType::DfaAnchoredReverse => {
match dfa::Fsm::reverse(
&self.ro.dfa_reverse,
self.cache,
self.cache.value(),
true,
&text[start..],
text.len(),
Expand Down Expand Up @@ -691,7 +691,7 @@ impl<'c> ExecNoSync<'c> {
use dfa::Result::*;
let end = match dfa::Fsm::forward(
&self.ro.dfa,
self.cache,
self.cache.value(),
false,
text,
start,
Expand All @@ -704,7 +704,7 @@ impl<'c> ExecNoSync<'c> {
// Now run the DFA in reverse to find the start of the match.
match dfa::Fsm::reverse(
&self.ro.dfa_reverse,
self.cache,
self.cache.value(),
false,
&text[start..],
end - start,
Expand All @@ -730,7 +730,7 @@ impl<'c> ExecNoSync<'c> {
use dfa::Result::*;
match dfa::Fsm::reverse(
&self.ro.dfa_reverse,
self.cache,
self.cache.value(),
false,
&text[start..],
text.len() - start,
Expand All @@ -744,7 +744,7 @@ impl<'c> ExecNoSync<'c> {
/// Finds the end of the shortest match using only the DFA.
#[cfg_attr(feature = "perf-inline", inline(always))]
fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result<usize> {
dfa::Fsm::forward(&self.ro.dfa, self.cache, true, text, start)
dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start)
}

/// Finds the end of the shortest match using only the DFA by scanning for
Expand Down Expand Up @@ -796,7 +796,7 @@ impl<'c> ExecNoSync<'c> {
end = last_literal + lcs.len();
match dfa::Fsm::reverse(
&self.ro.dfa_reverse,
self.cache,
self.cache.value(),
false,
&text[start..end],
end - start,
Expand Down Expand Up @@ -841,7 +841,7 @@ impl<'c> ExecNoSync<'c> {
// leftmost-first match.)
match dfa::Fsm::forward(
&self.ro.dfa,
self.cache,
self.cache.value(),
false,
text,
match_start,
Expand Down Expand Up @@ -1007,7 +1007,7 @@ impl<'c> ExecNoSync<'c> {
if self.ro.nfa.uses_bytes() {
pikevm::Fsm::exec(
&self.ro.nfa,
self.cache,
self.cache.value(),
matches,
slots,
quit_after_match,
Expand All @@ -1018,7 +1018,7 @@ impl<'c> ExecNoSync<'c> {
} else {
pikevm::Fsm::exec(
&self.ro.nfa,
self.cache,
self.cache.value(),
matches,
slots,
quit_after_match,
Expand All @@ -1041,7 +1041,7 @@ impl<'c> ExecNoSync<'c> {
if self.ro.nfa.uses_bytes() {
backtrack::Bounded::exec(
&self.ro.nfa,
self.cache,
self.cache.value(),
matches,
slots,
ByteInput::new(text, self.ro.nfa.only_utf8),
Expand All @@ -1051,7 +1051,7 @@ impl<'c> ExecNoSync<'c> {
} else {
backtrack::Bounded::exec(
&self.ro.nfa,
self.cache,
self.cache.value(),
matches,
slots,
CharInput::new(text),
Expand Down Expand Up @@ -1087,7 +1087,7 @@ impl<'c> ExecNoSync<'c> {
Dfa | DfaAnchoredReverse | DfaSuffix | DfaMany => {
match dfa::Fsm::forward_many(
&self.ro.dfa,
self.cache,
self.cache.value(),
matches,
text,
start,
Expand Down Expand Up @@ -1145,8 +1145,7 @@ impl Exec {
/// Get a searcher that isn't Sync.
#[cfg_attr(feature = "perf-inline", inline(always))]
pub fn searcher(&self) -> ExecNoSync {
let create =
|| Box::new(RefCell::new(ProgramCacheInner::new(&self.ro)));
let create = || RefCell::new(ProgramCacheInner::new(&self.ro));
ExecNoSync {
ro: &self.ro, // a clone is too expensive here! (and not needed)
cache: self.cache.get_or(create),
Expand Down Expand Up @@ -1201,7 +1200,7 @@ impl Exec {

impl Clone for Exec {
fn clone(&self) -> Exec {
Exec { ro: self.ro.clone(), cache: CachedThreadLocal::new() }
Exec { ro: self.ro.clone(), cache: Cached::new() }
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@ compile_error!("`std` feature is currently required to build this crate");

extern crate aho_corasick;
extern crate memchr;
#[cfg(feature = "perf-cache")]
extern crate thread_local;
#[cfg(test)]
#[macro_use]
Expand Down Expand Up @@ -744,6 +745,7 @@ pub mod bytes {
}

mod backtrack;
mod cache;
mod compile;
mod dfa;
mod error;
Expand Down

0 comments on commit 2c2a3ee

Please sign in to comment.