-
-
Notifications
You must be signed in to change notification settings - Fork 684
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1807 from odin-lang/simd-dev
Generic #simd type and intrinsics
- Loading branch information
Showing
43 changed files
with
5,432 additions
and
364 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
package simd | ||
|
||
import "core:builtin" | ||
import "core:intrinsics" | ||
|
||
// 128-bit vector aliases | ||
u8x16 :: #simd[16]u8 | ||
i8x16 :: #simd[16]i8 | ||
u16x8 :: #simd[8]u16 | ||
i16x8 :: #simd[8]i16 | ||
u32x4 :: #simd[4]u32 | ||
i32x4 :: #simd[4]i32 | ||
u64x2 :: #simd[2]u64 | ||
i64x2 :: #simd[2]i64 | ||
f32x4 :: #simd[4]f32 | ||
f64x2 :: #simd[2]f64 | ||
|
||
boolx16 :: #simd[16]bool | ||
b8x16 :: #simd[16]b8 | ||
b16x8 :: #simd[8]b16 | ||
b32x4 :: #simd[4]b32 | ||
b64x2 :: #simd[2]b64 | ||
|
||
// 256-bit vector aliases | ||
u8x32 :: #simd[32]u8 | ||
i8x32 :: #simd[32]i8 | ||
u16x16 :: #simd[16]u16 | ||
i16x16 :: #simd[16]i16 | ||
u32x8 :: #simd[8]u32 | ||
i32x8 :: #simd[8]i32 | ||
u64x4 :: #simd[4]u64 | ||
i64x4 :: #simd[4]i64 | ||
f32x8 :: #simd[8]f32 | ||
f64x4 :: #simd[4]f64 | ||
|
||
boolx32 :: #simd[32]bool | ||
b8x32 :: #simd[32]b8 | ||
b16x16 :: #simd[16]b16 | ||
b32x8 :: #simd[8]b32 | ||
b64x4 :: #simd[4]b64 | ||
|
||
// 512-bit vector aliases | ||
u8x64 :: #simd[64]u8 | ||
i8x64 :: #simd[64]i8 | ||
u16x32 :: #simd[32]u16 | ||
i16x32 :: #simd[32]i16 | ||
u32x16 :: #simd[16]u32 | ||
i32x16 :: #simd[16]i32 | ||
u64x8 :: #simd[8]u64 | ||
i64x8 :: #simd[8]i64 | ||
f32x16 :: #simd[16]f32 | ||
f64x8 :: #simd[8]f64 | ||
|
||
boolx64 :: #simd[64]bool | ||
b8x64 :: #simd[64]b8 | ||
b16x32 :: #simd[32]b16 | ||
b32x16 :: #simd[16]b32 | ||
b64x8 :: #simd[8]b64 | ||
|
||
|
||
add :: intrinsics.simd_add | ||
sub :: intrinsics.simd_sub | ||
mul :: intrinsics.simd_mul | ||
div :: intrinsics.simd_div | ||
rem :: intrinsics.simd_rem // integers only | ||
|
||
// Keeps Odin's Behaviour | ||
// (x << y) if y <= mask else 0 | ||
shl :: intrinsics.simd_shl | ||
shr :: intrinsics.simd_shr | ||
|
||
// Similar to C's Behaviour | ||
// x << (y & mask) | ||
shl_masked :: intrinsics.simd_shl_masked | ||
shr_masked :: intrinsics.simd_shr_masked | ||
|
||
// Saturation Arithmetic | ||
add_sat :: intrinsics.simd_add_sat | ||
sub_sat :: intrinsics.simd_sub_sat | ||
|
||
and :: intrinsics.simd_and | ||
or :: intrinsics.simd_or | ||
xor :: intrinsics.simd_xor | ||
and_not :: intrinsics.simd_and_not | ||
|
||
neg :: intrinsics.simd_neg | ||
|
||
abs :: intrinsics.simd_abs | ||
|
||
min :: intrinsics.simd_min | ||
max :: intrinsics.simd_max | ||
clamp :: intrinsics.simd_clamp | ||
|
||
// Return an unsigned integer of the same size as the input type | ||
// NOT A BOOLEAN | ||
// element-wise: | ||
// false => 0x00...00 | ||
// true => 0xff...ff | ||
lanes_eq :: intrinsics.simd_lanes_eq | ||
lanes_ne :: intrinsics.simd_lanes_ne | ||
lanes_lt :: intrinsics.simd_lanes_lt | ||
lanes_le :: intrinsics.simd_lanes_le | ||
lanes_gt :: intrinsics.simd_lanes_gt | ||
lanes_ge :: intrinsics.simd_lanes_ge | ||
|
||
// extract :: proc(a: #simd[N]T, idx: uint) -> T | ||
extract :: intrinsics.simd_extract | ||
// replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T | ||
replace :: intrinsics.simd_replace | ||
|
||
reduce_add_ordered :: intrinsics.simd_reduce_add_ordered | ||
reduce_mul_ordered :: intrinsics.simd_reduce_mul_ordered | ||
reduce_min :: intrinsics.simd_reduce_min | ||
reduce_max :: intrinsics.simd_reduce_max | ||
reduce_and :: intrinsics.simd_reduce_and | ||
reduce_or :: intrinsics.simd_reduce_or | ||
reduce_xor :: intrinsics.simd_reduce_xor | ||
|
||
// swizzle :: proc(a: #simd[N]T, indices: ..int) -> #simd[len(indices)]T | ||
swizzle :: builtin.swizzle | ||
|
||
// shuffle :: proc(a, b: #simd[N]T, indices: #simd[max 2*N]u32) -> #simd[len(indices)]T | ||
shuffle :: intrinsics.simd_shuffle | ||
|
||
// select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T | ||
select :: intrinsics.simd_select | ||
|
||
|
||
sqrt :: intrinsics.sqrt | ||
ceil :: intrinsics.simd_ceil | ||
floor :: intrinsics.simd_floor | ||
trunc :: intrinsics.simd_trunc | ||
nearest :: intrinsics.simd_nearest | ||
|
||
to_bits :: intrinsics.simd_to_bits | ||
|
||
lanes_reverse :: intrinsics.simd_lanes_reverse | ||
|
||
lanes_rotate_left :: intrinsics.simd_lanes_rotate_left | ||
lanes_rotate_right :: intrinsics.simd_lanes_rotate_right | ||
|
||
count_ones :: intrinsics.count_ones | ||
count_zeros :: intrinsics.count_zeros | ||
count_trailing_zeros :: intrinsics.count_trailing_zeros | ||
count_leading_zeros :: intrinsics.count_leading_zeros | ||
reverse_bits :: intrinsics.reverse_bits | ||
|
||
fused_mul_add :: intrinsics.fused_mul_add | ||
fma :: intrinsics.fused_mul_add | ||
|
||
to_array_ptr :: #force_inline proc "contextless" (v: ^#simd[$LANES]$E) -> ^[LANES]E { | ||
return (^[LANES]E)(v) | ||
} | ||
to_array :: #force_inline proc "contextless" (v: #simd[$LANES]$E) -> [LANES]E { | ||
return transmute([LANES]E)(v) | ||
} | ||
from_array :: #force_inline proc "contextless" (v: $A/[$LANES]$E) -> #simd[LANES]E { | ||
return transmute(#simd[LANES]E)v | ||
} | ||
|
||
from_slice :: proc($T: typeid/#simd[$LANES]$E, slice: []E) -> T { | ||
assert(len(slice) >= LANES, "slice length must be a least the number of lanes") | ||
array: [LANES]E | ||
#no_bounds_check for i in 0..<LANES { | ||
array[i] = slice[i] | ||
} | ||
return transmute(T)array | ||
} | ||
|
||
bit_not :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_integer(E) { | ||
return xor(v, T(~E(0))) | ||
} | ||
|
||
copysign :: #force_inline proc "contextless" (v, sign: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) { | ||
neg_zero := to_bits(T(-0.0)) | ||
sign_bit := to_bits(sign) & neg_zero | ||
magnitude := to_bits(v) &~ neg_zero | ||
return transmute(T)(sign_bit|magnitude) | ||
} | ||
|
||
signum :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) { | ||
is_nan := lanes_ne(v, v) | ||
return select(is_nan, v, copysign(T(1), v)) | ||
} | ||
|
||
recip :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) { | ||
return T(1) / v | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
//+build i386, amd64 | ||
package simd_x86 | ||
|
||
import "core:intrinsics" | ||
|
||
@(require_results, enable_target_feature="lzcnt") | ||
_lzcnt_u32 :: #force_inline proc "c" (x: u32) -> u32 { | ||
return intrinsics.count_leading_zeros(x) | ||
} | ||
@(require_results, enable_target_feature="popcnt") | ||
_popcnt32 :: #force_inline proc "c" (x: u32) -> i32 { | ||
return i32(intrinsics.count_ones(x)) | ||
} | ||
|
||
when ODIN_ARCH == .amd64 { | ||
@(require_results, enable_target_feature="lzcnt") | ||
_lzcnt_u64 :: #force_inline proc "c" (x: u64) -> u64 { | ||
return intrinsics.count_leading_zeros(x) | ||
} | ||
@(require_results, enable_target_feature="popcnt") | ||
_popcnt64 :: #force_inline proc "c" (x: u64) -> i32 { | ||
return i32(intrinsics.count_ones(x)) | ||
} | ||
} |
Oops, something went wrong.