Skip to content

Commit

Permalink
Support WebAssembly Relaxed SIMD
Browse files Browse the repository at this point in the history
This makes use of the [WebAssembly Relaxed
SIMD](https://github.com/WebAssembly/relaxed-simd) instructions, trade
off consistent results across architectures for certain edge cases to
gain better performance. These differing edge case behaviors already
exist in the native equivalents that are used in the SIMD
implementation, so this does not regress any correctness.
  • Loading branch information
CryZe committed Jul 14, 2024
1 parent 7845695 commit 1c0fd06
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 7 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ jobs:
RUSTFLAGS: -Ctarget-feature=+simd128,+bulk-memory,+nontrapping-fptoint,+sign-ext
run: cargo test --target wasm32-wasi

- name: Run tests with Relaxed SIMD
env:
RUSTFLAGS: -Ctarget-feature=+simd128,+relaxed-simd,+bulk-memory,+nontrapping-fptoint,+sign-ext
run: cargo test --target wasm32-wasi

aarch64:
runs-on: ubuntu-20.04
steps:
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ default = ["std", "simd", "png-format"]
std = ["tiny-skia-path/std"]
no-std-float = ["tiny-skia-path/no-std-float"]

# Enables SIMD instructions on x86 (from SSE up to AVX2), WebAssembly (SIMD128)
# and AArch64 (Neon).
# Enables SIMD instructions on x86 (from SSE up to AVX2), WebAssembly (SIMD128,
# Relaxed SIMD) and AArch64 (Neon).
# Has no effect on other targets. Present mainly for testing.
simd = []

Expand Down
10 changes: 10 additions & 0 deletions src/wide/f32x4_t.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ impl f32x4 {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
Self(unsafe { _mm_max_ps(self.0, rhs.0) })
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
Self(f32x4_relaxed_max(self.0, rhs.0))
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
Self(f32x4_pmax(self.0, rhs.0))
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
Expand All @@ -111,6 +113,8 @@ impl f32x4 {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
Self(unsafe { _mm_min_ps(self.0, rhs.0) })
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
Self(f32x4_relaxed_min(self.0, rhs.0))
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
Self(f32x4_pmin(self.0, rhs.0))
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
Expand Down Expand Up @@ -245,6 +249,8 @@ impl f32x4 {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
Self(unsafe { _mm_blendv_ps(f.0, t.0, self.0) })
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
Self(i32x4_relaxed_laneselect(t.0, f.0, self.0))
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
Self(v128_bitselect(t.0, f.0, self.0))
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
Expand Down Expand Up @@ -302,6 +308,8 @@ impl f32x4 {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
i32x4(unsafe { _mm_cvtps_epi32(self.0) })
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
i32x4(i32x4_relaxed_trunc_f32x4(self.round().0))
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
i32x4(i32x4_trunc_sat_f32x4(self.round().0))
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
Expand All @@ -325,6 +333,8 @@ impl f32x4 {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
i32x4(unsafe { _mm_cvttps_epi32(self.0) })
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
i32x4(i32x4_relaxed_trunc_f32x4(self.0))
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
i32x4(i32x4_trunc_sat_f32x4(self.0))
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
Expand Down
16 changes: 11 additions & 5 deletions src/wide/f32x8_t.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,17 @@ impl f32x8 {
}

pub fn floor(self) -> Self {
let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8());
roundtrip
- roundtrip
.cmp_gt(self)
.blend(f32x8::splat(1.0), f32x8::default())
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
Self(self.0.floor(), self.1.floor())
} else {
let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8());
roundtrip
- roundtrip
.cmp_gt(self)
.blend(f32x8::splat(1.0), f32x8::default())
}
}
}

pub fn fract(self) -> Self {
Expand Down
2 changes: 2 additions & 0 deletions src/wide/i32x4_t.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ impl i32x4 {
cfg_if::cfg_if! {
if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
Self(unsafe { _mm_blendv_epi8(f.0, t.0, self.0) })
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
Self(i32x4_relaxed_laneselect(t.0, f.0, self.0))
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
Self(v128_bitselect(t.0, f.0, self.0))
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
Expand Down

0 comments on commit 1c0fd06

Please sign in to comment.