Skip to content

Commit

Permalink
feat(simd): add clampn4_f32, sum4_f32
Browse files Browse the repository at this point in the history
  • Loading branch information
postspectacular committed Oct 20, 2019
1 parent 80f1a0e commit 0e0dfde
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 1 deletion.
4 changes: 3 additions & 1 deletion packages/simd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ This project is part of the
## About

[WebAssembly SIMD](https://github.com/WebAssembly/simd) vector
operations for batch processing, written in
operations for array/batch processing, written in
[AssemblyScript](https://docs.assemblyscript.org/).

## Available functions
Expand All @@ -36,6 +36,7 @@ for sources:
- `add4_f32`
- `addn4_f32`
- `clamp4_f32`
- `clampn4_f32`
- `div4_f32` (*)
- `divn4_f32` (*)
- `dot2_f32_aos` (2x vec2 per iteration)
Expand All @@ -57,6 +58,7 @@ for sources:
- `sqrt4_f32` (*)
- `sub4_f32`
- `subn4_f32`
- `sum4_f32`

(*) Missing native implementation, waiting on...

Expand Down
22 changes: 22 additions & 0 deletions packages/simd/assembly/clamp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,25 @@ export function clamp4_f32(
}
return res;
}

export function clampn4_f32(
out: usize,
a: usize,
b: f32,
c: f32,
num: usize,
so: usize,
sa: usize
): usize {
const res = out;
so <<= 2;
sa <<= 2;
const vmin = f32x4.splat(b);
const vmax = f32x4.splat(c);
for (; num-- > 0; ) {
v128.store(out, f32x4.min(f32x4.max(v128.load(a), vmin), vmax));
out += so;
a += sa;
}
return res;
}
1 change: 1 addition & 0 deletions packages/simd/assembly/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ export * from "./normalize";

export * from "./sub";
export * from "./subn";
export * from "./sum";
10 changes: 10 additions & 0 deletions packages/simd/assembly/sum.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export function sum4_f32(a: usize, num: usize, sa: usize): f64 {
sa <<= 2;
let acc = f32x4.splat(0);
for (; num-- > 0; ) {
acc = f32x4.add(acc, v128.load(a));
a += sa;
}
acc = f32x4.add(acc, v128.shuffle<f32>(acc, acc, 2, 3, 0, 1));
return f32x4.extract_lane(acc, 0) + f32x4.extract_lane(acc, 1);
}
5 changes: 5 additions & 0 deletions packages/simd/src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ export interface SIMD {
// prettier-ignore
clamp4_f32(out: number, a: number, b: number, c: number, num: number, so: number, sa: number, sb: number, sc: number): number;

// prettier-ignore
clampn4_f32(out: number, a: number, b: number, c: number, num: number, so: number, sa: number): number;

// prettier-ignore
div4_f32(out: number, a: number, b: number, num: number, so: number, sa: number, sb: number): number;

Expand Down Expand Up @@ -159,6 +162,8 @@ export interface SIMD {
// prettier-ignore
subn4_f32(out: number, a: number, n: number, num: number, so: number, sa: number): number;

sum4_f32(a: number, num: number, sa: number): number;

/**
* WASM memory instance given to `init()`.
*/
Expand Down

0 comments on commit 0e0dfde

Please sign in to comment.