diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000000..fc0510a5d1 --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,100 @@ +// This module defines a common API for caching internal runtime state. +// The `thread_local` crate provides an extremely optimized version of this. +// However, if the perf-cache feature is disabled, then we drop the +// thread_local dependency and instead use a pretty naive caching mechanism +// with a mutex. +// +// Strictly speaking, the CachedGuard isn't necessary for the much more +// flexible thread_local API, but implementing thread_local's API doesn't +// seem possible in purely safe code. + +pub use self::imp::{Cached, CachedGuard}; + +#[cfg(feature = "perf-cache")] +mod imp { + use thread_local::CachedThreadLocal; + + #[derive(Debug)] + pub struct Cached(CachedThreadLocal); + + #[derive(Debug)] + pub struct CachedGuard<'a, T: 'a>(&'a T); + + impl Cached { + pub fn new() -> Cached { + Cached(CachedThreadLocal::new()) + } + + pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard { + CachedGuard(self.0.get_or(|| Box::new(create()))) + } + } + + impl<'a, T: Send> CachedGuard<'a, T> { + pub fn value(&self) -> &T { + self.0 + } + } +} + +#[cfg(not(feature = "perf-cache"))] +mod imp { + use std::marker::PhantomData; + use std::panic::UnwindSafe; + use std::sync::Mutex; + + #[derive(Debug)] + pub struct Cached { + stack: Mutex>, + /// When perf-cache is enabled, the thread_local crate is used, and + /// its CachedThreadLocal impls Send, Sync and UnwindSafe, but NOT + /// RefUnwindSafe. However, a Mutex impls RefUnwindSafe. So in order + /// to keep the APIs consistent regardless of whether perf-cache is + /// enabled, we force this type to NOT impl RefUnwindSafe too. + /// + /// Ideally, we should always impl RefUnwindSafe, but it seems a little + /// tricky to do that right now. + /// + /// See also: https://github.com/rust-lang/regex/issues/576 + _phantom: PhantomData>, + } + + #[derive(Debug)] + pub struct CachedGuard<'a, T: Send> { + cache: &'a Cached, + value: Option, + } + + impl Cached { + pub fn new() -> Cached { + Cached { stack: Mutex::new(vec![]), _phantom: PhantomData } + } + + pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard { + let mut stack = self.stack.lock().unwrap(); + match stack.pop() { + None => CachedGuard { cache: self, value: Some(create()) }, + Some(value) => CachedGuard { cache: self, value: Some(value) }, + } + } + + fn put(&self, value: T) { + let mut stack = self.stack.lock().unwrap(); + stack.push(value); + } + } + + impl<'a, T: Send> CachedGuard<'a, T> { + pub fn value(&self) -> &T { + self.value.as_ref().unwrap() + } + } + + impl<'a, T: Send> Drop for CachedGuard<'a, T> { + fn drop(&mut self) { + if let Some(value) = self.value.take() { + self.cache.put(value); + } + } + } +} diff --git a/src/exec.rs b/src/exec.rs index f5d5a0accb..6df2ad0268 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -6,9 +6,9 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind}; use syntax::hir::literal::Literals; use syntax::hir::Hir; use syntax::ParserBuilder; -use thread_local::CachedThreadLocal; use backtrack; +use cache::{Cached, CachedGuard}; use compile::Compiler; use dfa; use error::Error; @@ -32,7 +32,7 @@ pub struct Exec { /// All read only state. ro: Arc, /// Caches for the various matching engines. - cache: CachedThreadLocal, + cache: Cached, } /// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This @@ -43,7 +43,7 @@ pub struct ExecNoSync<'c> { /// All read only state. ro: &'c Arc, /// Caches for the various matching engines. - cache: &'c ProgramCache, + cache: CachedGuard<'c, ProgramCache>, } /// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8]. @@ -291,7 +291,7 @@ impl ExecBuilder { ac: None, match_type: MatchType::Nothing, }); - return Ok(Exec { ro: ro, cache: CachedThreadLocal::new() }); + return Ok(Exec { ro: ro, cache: Cached::new() }); } let parsed = self.parse()?; let mut nfa = Compiler::new() @@ -347,7 +347,7 @@ impl ExecBuilder { ro.match_type = ro.choose_match_type(self.match_type); let ro = Arc::new(ro); - Ok(Exec { ro: ro, cache: CachedThreadLocal::new() }) + Ok(Exec { ro: ro, cache: Cached::new() }) } } @@ -423,7 +423,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> { MatchType::DfaAnchoredReverse => { match dfa::Fsm::reverse( &self.ro.dfa_reverse, - self.cache, + self.cache.value(), true, &text[start..], text.len(), @@ -471,7 +471,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> { MatchType::DfaAnchoredReverse => { match dfa::Fsm::reverse( &self.ro.dfa_reverse, - self.cache, + self.cache.value(), true, &text[start..], text.len(), @@ -691,7 +691,7 @@ impl<'c> ExecNoSync<'c> { use dfa::Result::*; let end = match dfa::Fsm::forward( &self.ro.dfa, - self.cache, + self.cache.value(), false, text, start, @@ -704,7 +704,7 @@ impl<'c> ExecNoSync<'c> { // Now run the DFA in reverse to find the start of the match. match dfa::Fsm::reverse( &self.ro.dfa_reverse, - self.cache, + self.cache.value(), false, &text[start..], end - start, @@ -730,7 +730,7 @@ impl<'c> ExecNoSync<'c> { use dfa::Result::*; match dfa::Fsm::reverse( &self.ro.dfa_reverse, - self.cache, + self.cache.value(), false, &text[start..], text.len() - start, @@ -744,7 +744,7 @@ impl<'c> ExecNoSync<'c> { /// Finds the end of the shortest match using only the DFA. #[cfg_attr(feature = "perf-inline", inline(always))] fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result { - dfa::Fsm::forward(&self.ro.dfa, self.cache, true, text, start) + dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start) } /// Finds the end of the shortest match using only the DFA by scanning for @@ -796,7 +796,7 @@ impl<'c> ExecNoSync<'c> { end = last_literal + lcs.len(); match dfa::Fsm::reverse( &self.ro.dfa_reverse, - self.cache, + self.cache.value(), false, &text[start..end], end - start, @@ -841,7 +841,7 @@ impl<'c> ExecNoSync<'c> { // leftmost-first match.) match dfa::Fsm::forward( &self.ro.dfa, - self.cache, + self.cache.value(), false, text, match_start, @@ -1007,7 +1007,7 @@ impl<'c> ExecNoSync<'c> { if self.ro.nfa.uses_bytes() { pikevm::Fsm::exec( &self.ro.nfa, - self.cache, + self.cache.value(), matches, slots, quit_after_match, @@ -1018,7 +1018,7 @@ impl<'c> ExecNoSync<'c> { } else { pikevm::Fsm::exec( &self.ro.nfa, - self.cache, + self.cache.value(), matches, slots, quit_after_match, @@ -1041,7 +1041,7 @@ impl<'c> ExecNoSync<'c> { if self.ro.nfa.uses_bytes() { backtrack::Bounded::exec( &self.ro.nfa, - self.cache, + self.cache.value(), matches, slots, ByteInput::new(text, self.ro.nfa.only_utf8), @@ -1051,7 +1051,7 @@ impl<'c> ExecNoSync<'c> { } else { backtrack::Bounded::exec( &self.ro.nfa, - self.cache, + self.cache.value(), matches, slots, CharInput::new(text), @@ -1087,7 +1087,7 @@ impl<'c> ExecNoSync<'c> { Dfa | DfaAnchoredReverse | DfaSuffix | DfaMany => { match dfa::Fsm::forward_many( &self.ro.dfa, - self.cache, + self.cache.value(), matches, text, start, @@ -1145,8 +1145,7 @@ impl Exec { /// Get a searcher that isn't Sync. #[cfg_attr(feature = "perf-inline", inline(always))] pub fn searcher(&self) -> ExecNoSync { - let create = - || Box::new(RefCell::new(ProgramCacheInner::new(&self.ro))); + let create = || RefCell::new(ProgramCacheInner::new(&self.ro)); ExecNoSync { ro: &self.ro, // a clone is too expensive here! (and not needed) cache: self.cache.get_or(create), @@ -1201,7 +1200,7 @@ impl Exec { impl Clone for Exec { fn clone(&self) -> Exec { - Exec { ro: self.ro.clone(), cache: CachedThreadLocal::new() } + Exec { ro: self.ro.clone(), cache: Cached::new() } } } diff --git a/src/lib.rs b/src/lib.rs index ec9ce0c6df..8bc62ec967 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -621,6 +621,7 @@ compile_error!("`std` feature is currently required to build this crate"); extern crate aho_corasick; extern crate memchr; +#[cfg(feature = "perf-cache")] extern crate thread_local; #[cfg(test)] #[macro_use] @@ -744,6 +745,7 @@ pub mod bytes { } mod backtrack; +mod cache; mod compile; mod dfa; mod error;