Skip to content

Commit

Permalink
feat(simd): update & enable swizzle4_u32_aos()
Browse files Browse the repository at this point in the history
- fix swizzle impl (byte lane order)
- add/update tests
  • Loading branch information
postspectacular committed Jul 16, 2020
1 parent c53e0ab commit ae1ad77
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 96 deletions.
8 changes: 1 addition & 7 deletions packages/simd/assembly/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@ export * from "./abs";
export * from "./add";
export * from "./addn";
export * from "./clamp";

// TODO waiting for native impl
export * from "./div";
export * from "./divn";

export * from "./dot";
export * from "./madd";
export * from "./maddn";
Expand All @@ -23,11 +20,8 @@ export * from "./muln";
export * from "./mulv";
export * from "./neg";
export * from "./normalize";

// TODO waiting for native impl
export * from "./sqrt";

export * from "./sub";
export * from "./subn";
export * from "./sum";
// export * from "./swizzle";
export * from "./swizzle";
42 changes: 35 additions & 7 deletions packages/simd/assembly/swizzle.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,38 @@ export function set_lane_f32(
return res;
}

export function swizzle4_32(
/**
* Swaps, reorders or replaces vector components in an AOS f32/u32 vec4
* buffer. The `x`,`y`,`z`,`w` args indicate the intended lane values
* (each 0-3).
*
* @example
* ```ts
* simd.f32.set([10, 20, 30, 40], 0)
* simd.swizzle4_f32(
* 16, // dest ptr
* 0, // src ptr
* 3, 0, 1, 2, // lane IDs
* 1, // num vectors
* 4, // output stride (in f32/u32)
* 4 // input stride
* )
*
* simd.f32.slice(4, 8)
* // [40, 10, 20, 30]
* ```
*
* @param out -
* @param a -
* @param x -
* @param y -
* @param z -
* @param w -
* @param num -
* @param so -
* @param sa -
*/
export function swizzle4_32_aos(
out: usize,
a: usize,
x: u32,
Expand All @@ -46,18 +77,15 @@ export function swizzle4_32(
so <<= 2;
sa <<= 2;
// create swizzle pattern from xyzw
// each lane: id * 0x04040404 + 0x00010203
// TODO verify order
// each lane: id * 0x04040404 + 0x03020100
let mask = i64x2.replace_lane(
i64x2.splat(<u64>y * 0x0404040400000000 + <u64>x * 0x0000000004040404),
1,
<u64>w * 0x0404040400000000 + <u64>z * 0x0000000004040404
);
mask = i64x2.add(mask, i32x4.splat(0x00010203));
v128.store(out, mask);

mask = i64x2.add(mask, i32x4.splat(0x03020100));
for (; num-- > 0; ) {
// v128.store(out, v128.swizzle(v128.load(a), mask));
v128.store(out, v128.swizzle(v128.load(a), mask));
out += so;
a += sa;
}
Expand Down
3 changes: 3 additions & 0 deletions packages/simd/src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ export interface SIMD {

sum4_f32(a: number, num: number, sa: number): number;

// prettier-ignore
swizzle4_32_aos(out: number, a: number, x: number, y: number, z: number, w: number, num: number, so: number, sa: number): number;

/**
* WASM memory instance given to `init()`.
*/
Expand Down
Loading

0 comments on commit ae1ad77

Please sign in to comment.