Skip to content

Commit

Permalink
Add wasm simd support
Browse files Browse the repository at this point in the history
This commit adds simd acceleration support to the `memmem` module. This
is added with the freshly-stabilized support from rust-lang/rust#86204.
This mostly just cribs off the generic simd support for 128-bit types
built for sse, copying bits and pieces of code here and there. Some
refactoring happened internally to help reduce duplication where
possible.

I ran some initial benchmarks with the `memmem/krate/*` regex and a
hacked up single-threaded version of criterion. Some [initial
comparisons][compare] using Wasmtime as a runtime do indeed show a lot
of improvements, but there are indeed some slowdowns as well.

[compare]: https://gist.github.com/alexcrichton/6a72e682e7b6d505ade605359fbe3f2d
  • Loading branch information
alexcrichton committed Dec 22, 2021
1 parent 8e1da98 commit e53a4c5
Show file tree
Hide file tree
Showing 9 changed files with 287 additions and 113 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
- stable
- stable-32
- stable-mips
- wasm
- beta
- nightly
- macos
Expand Down Expand Up @@ -62,6 +63,10 @@ jobs:
- build: win-gnu
os: windows-2019
rust: stable-x86_64-gnu
- build: wasm
os: ubuntu-18.04
rust: stable-x86_64-gnu
wasm: true
steps:
- name: Checkout repository
uses: actions/checkout@v1
Expand All @@ -81,6 +86,16 @@ jobs:
cargo install --git https://github.com/rust-embedded/cross
echo "CARGO=cross" >> $GITHUB_ENV
echo "TARGET=--target ${{ matrix.target }}" >> $GITHUB_ENV
- name: Download Wasmtime
if: matrix.wasm
run: |
rustup target add wasm32-wasi
echo "CARGO_BUILD_TARGET=wasm32-wasi" >> $GITHUB_ENV
echo "RUSTFLAGS=-Ctarget-feature=+simd128" >> $GITHUB_ENV
curl -LO https://github.com/bytecodealliance/wasmtime/releases/download/v0.32.0/wasmtime-v0.32.0-x86_64-linux.tar.xz
tar xvf wasmtime-v0.32.0-x86_64-linux.tar.xz
echo `pwd`/wasmtime-v0.32.0-x86_64-linux >> $GITHUB_PATH
echo "CARGO_TARGET_WASM32_WASI_RUNNER=wasmtime run --enable-simd --" >> $GITHUB_ENV
- name: Show command used for Cargo
run: |
echo "cargo command is: ${{ env.CARGO }}"
Expand Down
35 changes: 25 additions & 10 deletions bench/src/memmem/imp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -640,44 +640,47 @@ pub(crate) mod sliceslice {
}

pub(crate) fn prebuilt(_: &str) -> impl Fn(&str) -> bool + 'static {
unimplemented!("sliceslice only runs on x86")
if true {
unimplemented!("sliceslice only runs on x86")
}
|_| false
}

pub(crate) fn oneshotiter<'a>(
haystack: &'a str,
needle: &'a str,
_haystack: &'a str,
_needle: &'a str,
) -> impl Iterator<Item = usize> + 'static {
std::iter::from_fn(move || {
unimplemented!("sliceslice only runs on x86")
})
}

pub(crate) fn prebuiltiter(needle: &str) -> super::super::NoIter {
pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
unimplemented!("sliceslice only runs on x86")
}
}

pub(crate) mod rev {
pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
unimplemented!("sliceslice does not support reverse searches")
}

pub(crate) fn prebuilt(
needle: &str,
_needle: &str,
) -> impl Fn(&str) -> bool + 'static {
|_| unimplemented!("sliceslice does not support reverse searches")
}

pub(crate) fn oneshotiter(
haystack: &str,
needle: &str,
_haystack: &str,
_needle: &str,
) -> impl Iterator<Item = usize> + 'static {
std::iter::from_fn(move || {
unimplemented!("sliceslice does not support reverse searches")
})
}

pub(crate) fn prebuiltiter(needle: &str) -> super::super::NoIter {
pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
unimplemented!("sliceslice does not support reverse searches")
}
}
Expand All @@ -693,9 +696,21 @@ pub(crate) mod libc {
}

pub(crate) mod fwd {
#[cfg(target_arch = "wasm32")]
extern "C" {
fn memmem(
haystack: *const libc::c_void,
haystack_len: usize,
needle: *const libc::c_void,
needle_len: usize,
) -> *const libc::c_void;
}
#[cfg(not(target_arch = "wasm32"))]
use libc::memmem;

fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
let p = unsafe {
libc::memmem(
memmem(
haystack.as_ptr() as *const libc::c_void,
haystack.len(),
needle.as_ptr() as *const libc::c_void,
Expand Down
28 changes: 21 additions & 7 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,29 @@ fn main() {
// is not a problem. In that case, the fastest option will be chosen at
// runtime.
fn enable_simd_optimizations() {
if is_env_set("CARGO_CFG_MEMCHR_DISABLE_AUTO_SIMD")
|| !target_has_feature("sse2")
{
if is_env_set("CARGO_CFG_MEMCHR_DISABLE_AUTO_SIMD") {
return;
}
println!("cargo:rustc-cfg=memchr_runtime_simd");
println!("cargo:rustc-cfg=memchr_runtime_sse2");
println!("cargo:rustc-cfg=memchr_runtime_sse42");
println!("cargo:rustc-cfg=memchr_runtime_avx");
let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
match &arch[..] {
"x86_64" => {
if !target_has_feature("sse2") {
return;
}
println!("cargo:rustc-cfg=memchr_runtime_simd");
println!("cargo:rustc-cfg=memchr_runtime_sse2");
println!("cargo:rustc-cfg=memchr_runtime_sse42");
println!("cargo:rustc-cfg=memchr_runtime_avx");
}
"wasm32" | "wasm64" => {
if !target_has_feature("simd128") {
return;
}
println!("cargo:rustc-cfg=memchr_runtime_simd");
println!("cargo:rustc-cfg=memchr_runtime_wasm128");
}
_ => {}
}
}

// This adds a `memchr_libc` cfg if and only if libc can be used, if no other
Expand Down
85 changes: 37 additions & 48 deletions src/memmem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,16 +146,17 @@ macro_rules! define_memmem_simple_tests {
}

mod byte_frequencies;
#[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
#[cfg(memchr_runtime_simd)]
mod genericsimd;
mod prefilter;
mod rabinkarp;
mod rarebytes;
mod twoway;
mod util;
// SIMD is only supported on x86_64 currently.
#[cfg(target_arch = "x86_64")]
#[cfg(memchr_runtime_simd)]
mod vector;
#[cfg(all(memchr_runtime_wasm128))]
mod wasm;
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
mod x86;

Expand Down Expand Up @@ -773,47 +774,47 @@ enum SearcherKind {
TwoWay(twoway::Forward),
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
GenericSIMD128(x86::sse::Forward),
#[cfg(memchr_runtime_wasm128)]
GenericSIMD128(wasm::Forward),
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
GenericSIMD256(x86::avx::Forward),
}

impl<'n> Searcher<'n> {
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> {
use self::SearcherKind::*;

let ninfo = NeedleInfo::new(needle);
let prefn =
prefilter::forward(&config.prefilter, &ninfo.rarebytes, needle);
let kind = if needle.len() == 0 {
Empty
} else if needle.len() == 1 {
OneByte(needle[0])
} else if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) {
GenericSIMD256(fwd)
} else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) {
GenericSIMD128(fwd)
} else {
TwoWay(twoway::Forward::new(needle))
let mk = |kind: SearcherKind| {
let prefn = prefilter::forward(
&config.prefilter,
&ninfo.rarebytes,
needle,
);
Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
};
Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
}

#[cfg(not(all(not(miri), target_arch = "x86_64", memchr_runtime_simd)))]
fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> {
use self::SearcherKind::*;
if needle.len() == 0 {
return mk(Empty);
}
if needle.len() == 1 {
return mk(OneByte(needle[0]));
}
#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
{
if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) {
return mk(GenericSIMD256(fwd));
} else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) {
return mk(GenericSIMD128(fwd));
}
}
#[cfg(all(target_arch = "wasm32", memchr_runtime_simd))]
{
if let Some(fwd) = wasm::Forward::new(&ninfo, needle) {
return mk(GenericSIMD128(fwd));
}
}

let ninfo = NeedleInfo::new(needle);
let prefn =
prefilter::forward(&config.prefilter, &ninfo.rarebytes, needle);
let kind = if needle.len() == 0 {
Empty
} else if needle.len() == 1 {
OneByte(needle[0])
} else {
TwoWay(twoway::Forward::new(needle))
};
Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
mk(TwoWay(twoway::Forward::new(needle)))
}

/// Return a fresh prefilter state that can be used with this searcher.
Expand Down Expand Up @@ -844,11 +845,7 @@ impl<'n> Searcher<'n> {
Empty => Empty,
OneByte(b) => OneByte(b),
TwoWay(tw) => TwoWay(tw),
#[cfg(all(
not(miri),
target_arch = "x86_64",
memchr_runtime_simd
))]
#[cfg(all(not(miri), memchr_runtime_simd))]
GenericSIMD128(gs) => GenericSIMD128(gs),
#[cfg(all(
not(miri),
Expand All @@ -873,11 +870,7 @@ impl<'n> Searcher<'n> {
Empty => Empty,
OneByte(b) => OneByte(b),
TwoWay(tw) => TwoWay(tw),
#[cfg(all(
not(miri),
target_arch = "x86_64",
memchr_runtime_simd
))]
#[cfg(all(not(miri), memchr_runtime_simd))]
GenericSIMD128(gs) => GenericSIMD128(gs),
#[cfg(all(
not(miri),
Expand Down Expand Up @@ -921,11 +914,7 @@ impl<'n> Searcher<'n> {
self.find_tw(tw, state, haystack, needle)
}
}
#[cfg(all(
not(miri),
target_arch = "x86_64",
memchr_runtime_simd
))]
#[cfg(all(not(miri), memchr_runtime_simd))]
GenericSIMD128(ref gs) => {
// The SIMD matcher can't handle particularly short haystacks,
// so we fall back to RK in these cases.
Expand Down
Loading

0 comments on commit e53a4c5

Please sign in to comment.