cranelift-interpreter: Implement a bunch of SIMD arithmetic ops (#5991)

* cranelift: Add function name to tests * cranelift: Move simd-ineg tests to separate file * cranelift: Move `avg_round` tests to separate file * cranelift: Move SIMD `fmin`/`fmax` tests to separate files * cranelift-interpreter: Implement a bunch of SIMD arithmetic ops Most of these are quite easy to adapt to be polymorphic * cranelift: Move shift tests from `simd-arithmetic.clif` into shift files
bytecodealliance · Mar 16, 2023 · 07136ae · 07136ae
1 parent 5ae8575
commit 07136ae
Show file tree

Hide file tree

Showing 11 changed files with 277 additions and 240 deletions.
diff --git a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif
@@ -1,5 +1,4 @@
-; the interpreter does not currently support some of these instructions
-; such as `avg_round` on SIMD values.
+test interpret
 test run
 target aarch64
 target s390x
@@ -30,12 +29,6 @@ block0(v0: i32x4, v1: i32x4):
 }
 ; run: %isub_i32x4([1 1 1 1], [1 2 3 4]) == [0 -1 -2 -3]
 
-function %ineg_i32x4(i32x4) -> i32x4 {
-block0(v0: i32x4):
-    v1 = ineg v0
-    return v1
-}
-; run: %ineg_i32x4([1 1 1 1]) == [-1 -1 -1 -1]
 
 function %imul_i64x2(i64x2, i64x2) -> i64x2 {
 block0(v0: i64x2, v1: i64x2):
@@ -105,7 +98,7 @@ block0:
     v9 = vall_true v8
     return v9
 }
-; run
+; run: %add_sub_f32x4() == 1
 
 function %mul_div_f32x4() -> i8 {
 block0:
@@ -123,7 +116,7 @@ block0:
     v9 = vall_true v8
     return v9
 }
-; run
+; run: %mul_div_f32x4() == 1
 
 function %sqrt_f64x2(f64x2) -> f64x2 {
 block0(v0: f64x2):
@@ -132,28 +125,6 @@ block0(v0: f64x2):
 }
 ; run: %sqrt_f64x2([0x9.0 0x1.0]) == [0x3.0 0x1.0]
 
-function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
-block0(v0: f64x2, v1: f64x2):
-    v2 = fmax v0, v1
-    return v2
-}
-; This operation exhibits non-deterministic behaviour for some input NaN values;
-; refer to the simd-arithmetic-nondeterministic*.clif files for the respective tests.
-; run: %fmax_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [+0x0.0 0x1.0]
-; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN]
-; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
-; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
-
-function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
-block0(v0: f64x2, v1: f64x2):
-    v2 = fmin v0, v1
-    return v2
-}
-; This operation exhibits non-deterministic behaviour for some input NaN values;
-; refer to the simd-arithmetic-nondeterministic*.clif files for the respective tests.
-; run: %fmin_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [-0x0.0 -0x1.0]
-; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
-
 function %fneg_f64x2(f64x2) -> f64x2 {
 block0(v0: f64x2):
     v1 = fneg v0
@@ -175,128 +146,9 @@ block0(v0: f32x4):
 }
 ; run: %fabs_f32x4([0x0.0 -0x1.0 0x2.0 -0x3.0]) == [0x0.0 0x1.0 0x2.0 0x3.0]
 
-function %average_rounding_i8x16(i8x16, i8x16) -> i8x16 {
-block0(v0: i8x16, v1: i8x16):
-    v2 = avg_round v0, v1
-    return v2
-}
-; run: %average_rounding_i8x16([0 0 0 1 42 19 -1 0xff 5 0 0 0 1 42 19 -1], [0 1 2 4 42 18 -1 0 10 0 1 2 4 42 18 -1]) == [0 1 1 3 42 19 -1 0x80 8 0 1 1 3 42 19 -1]
-
-function %average_rounding_i16x8(i16x8, i16x8) -> i16x8 {
-block0(v0: i16x8, v1: i16x8):
-    v2 = avg_round v0, v1
-    return v2
-}
-; run: %average_rounding_i16x8([0 0 0 1 42 19 -1 0xffff], [0 1 2 4 42 18 -1 0]) == [0 1 1 3 42 19 -1 0x8000]
-
 function %iabs(i32x4) -> i32x4 {
 block0(v0: i32x4):
     v1 = iabs v0
     return v1
 }
 ; run: %iabs([-42 -1 0 1]) == [42 1 0 1]
-
-function %i8x16_shl_imm(i8x16) -> i8x16 {
-block0(v0: i8x16):
-    v1 = iconst.i32 2
-    v2 = ishl v0, v1
-    return v2
-}
-; run: %i8x16_shl_imm([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0 0 0]
-
-function %i16x8_shl_imm(i16x8) -> i16x8 {
-block0(v0: i16x8):
-    v1 = iconst.i32 4
-    v2 = ishl v0, v1
-    return v2
-}
-; run: %i16x8_shl_imm([0x0001 0x0002 0x0004 0x0008 0x0010 0x0020 0x0040 0x0080]) == [0x0010 0x0020 0x0040 0x0080 0x0100 0x0200 0x0400 0x0800]
-; run: %i16x8_shl_imm([0x0100 0x0200 0x0400 0x0800 0x1000 0x2000 0x4000 0x8000]) == [0x1000 0x2000 0x4000 0x8000 0 0 0 0]
-
-function %i32x4_shl_imm(i32x4) -> i32x4 {
-block0(v0: i32x4):
-    v1 = iconst.i32 4
-    v2 = ishl v0, v1
-    return v2
-}
-; run: %i32x4_shl_imm([0x00000001 0x00000002 0x00000004 0x00000008]) == [0x00000010 0x00000020 0x00000040 0x00000080]
-; run: %i32x4_shl_imm([0x10000000 0x00010000 0xf0000000 0x02000000]) == [0 0x00100000 0 0x20000000]
-
-function %i64x2_shl_imm(i64x2) -> i64x2 {
-block0(v0: i64x2):
-    v1 = iconst.i32 32
-    v2 = ishl v0, v1
-    return v2
-}
-; run: %i64x2_shl_imm([0x1 0xf]) == [0x100000000 0xf00000000]
-; run: %i64x2_shl_imm([0x100000000 0]) == [0 0]
-
-function %i8x16_sshr_imm(i8x16) -> i8x16 {
-block0(v0: i8x16):
-    v1 = iconst.i32 2
-    v2 = sshr v0, v1
-    return v2
-}
-; run: %i8x16_shl_imm([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0xe0 0 0 0 0 0 0 0 0]
-
-function %i16x8_sshr_imm(i16x8) -> i16x8 {
-block0(v0: i16x8):
-    v1 = iconst.i32 4
-    v2 = sshr v0, v1
-    return v2
-}
-; run: %i16x8_sshr_imm([0x0001 0x0002 0x0004 0x0008 0x0010 0x0020 0x0040 0x0080]) == [0 0 0 0 0x1 0x2 0x4 0x8]
-; run: %i16x8_sshr_imm([-1 -2 -4 -8 -16 16 0x8000 0x80f3]) == [-1 -1 -1 -1 -1 1 0xf800 0xf80f]
-
-function %i32x4_sshr_imm(i32x4) -> i32x4 {
-block0(v0: i32x4):
-    v1 = iconst.i32 4
-    v2 = sshr v0, v1
-    return v2
-}
-; run: %i32x4_sshr_imm([1 0xfc 0x80000000 0xf83f3000]) == [0 0xf 0xf8000000 0xff83f300]
-
-function %i64x2_sshr_imm(i64x2) -> i64x2 {
-block0(v0: i64x2):
-    v1 = iconst.i32 32
-    v2 = sshr v0, v1
-    return v2
-}
-; run: %i64x2_sshr_imm([0x1 0xf]) == [0 0]
-; run: %i64x2_sshr_imm([0x100000000 0]) == [1 0]
-; run: %i64x2_sshr_imm([-1 -1]) == [-1 -1]
-
-function %i8x16_ushr_imm(i8x16) -> i8x16 {
-block0(v0: i8x16):
-    v1 = iconst.i32 2
-    v2 = ushr v0, v1
-    return v2
-}
-; run: %i8x16_shl_imm([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0x20 0 0 0 0 0 0 0 0]
-
-function %i16x8_ushr_imm(i16x8) -> i16x8 {
-block0(v0: i16x8):
-    v1 = iconst.i32 4
-    v2 = ushr v0, v1
-    return v2
-}
-; run: %i16x8_ushr_imm([0x0001 0x0002 0x0004 0x0008 0x0010 0x0020 0x0040 0x0080]) == [0 0 0 0 0x1 0x2 0x4 0x8]
-; run: %i16x8_ushr_imm([-1 -2 -4 -8 -16 16 0x8000 0x80f3]) == [0x0fff 0x0fff 0x0fff 0x0fff 0x0fff 1 0x0800 0x080f]
-
-function %i32x4_ushr_imm(i32x4) -> i32x4 {
-block0(v0: i32x4):
-    v1 = iconst.i32 4
-    v2 = ushr v0, v1
-    return v2
-}
-; run: %i32x4_ushr_imm([1 0xfc 0x80000000 0xf83f3000]) == [0 0xf 0x08000000 0x0f83f300]
-
-function %i64x2_ushr_imm(i64x2) -> i64x2 {
-block0(v0: i64x2):
-    v1 = iconst.i32 32
-    v2 = ushr v0, v1
-    return v2
-}
-; run: %i64x2_ushr_imm([0x1 0xf]) == [0 0]
-; run: %i64x2_ushr_imm([0x100000000 0]) == [1 0]
-; run: %i64x2_ushr_imm([-1 -1]) == [0xffffffff 0xffffffff]
diff --git a/cranelift/filetests/filetests/runtests/simd-avg-round-small.clif b/cranelift/filetests/filetests/runtests/simd-avg-round-small.clif
@@ -0,0 +1,52 @@
+; the interpreter does not currently support SIMD `avg_round`.
+test run
+target aarch64
+; x86_64 and s390x do not currently support 64-bit vectors, or
+; `avg_round` on `i64x2` values.
+; x86_64 also does not currently support `avg_round.i32x4`.
+
+function %average_rounding_i8x8(i8x8, i8x8) -> i8x8 {
+block0(v0: i8x8, v1: i8x8):
+    v2 = avg_round v0, v1
+    return v2
+}
+; run: %average_rounding_i8x8([0 0 0 1 42 19 -1 0xff], [0 1 2 4 42 18 -1 0]) == [0 1 1 3 42 19 -1 0x80]
+
+function %average_rounding_i16x4(i16x4, i16x4) -> i16x4 {
+block0(v0: i16x4, v1: i16x4):
+    v2 = avg_round v0, v1
+    return v2
+}
+; run: %average_rounding_i16x4([0 0 0 1], [0 1 2 4]) == [0 1 1 3]
+; run: %average_rounding_i16x4([42 19 -1 0xffff], [42 18 -1 0]) == [42 19 -1 0x8000]
+
+function %average_rounding_i32x2(i32x2, i32x2) -> i32x2 {
+block0(v0: i32x2, v1: i32x2):
+    v2 = avg_round v0, v1
+    return v2
+}
+; run: %average_rounding_i32x2([0 0], [0 1]) == [0 1]
+; run: %average_rounding_i32x2([0 1], [2 4]) == [1 3]
+; run: %average_rounding_i32x2([42 19], [42 18]) == [42 19]
+; run: %average_rounding_i32x2([-1 0xffffffff], [-1 0]) == [-1 0x80000000]
+; run: %average_rounding_i32x2([0xffffffff 0xfffffffd], [10 0xffffffff]) == [0x80000005 0xfffffffe]
+
+function %average_rounding_i32x4(i32x4, i32x4) -> i32x4 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = avg_round v0, v1
+    return v2
+}
+; run: %average_rounding_i32x4([0 0 0 0xffffffff], [0 1 2 0]) == [0 1 1 0x80000000]
+; run: %average_rounding_i32x4([1 42 19 -1], [4 42 18 -1]) == [3 42 19 -1]
+
+function %average_rounding_i64x2(i64x2, i64x2) -> i64x2 {
+block0(v0: i64x2, v1: i64x2):
+    v2 = avg_round v0, v1
+    return v2
+}
+; run: %average_rounding_i64x2([0 0], [0 1]) == [0 1]
+; run: %average_rounding_i64x2([0 1], [2 4]) == [1 3]
+; run: %average_rounding_i64x2([42 19], [42 18]) == [42 19]
+; run: %average_rounding_i64x2([-1 0xffffffffffffffff], [-1 0]) == [-1 0x8000000000000000]
+; run: %average_rounding_i64x2([0xffffffffffffffff 0xfffffffffffffffd], [10 0xffffffffffffffff]) == [0x8000000000000005 0xfffffffffffffffe]
+
diff --git a/cranelift/filetests/filetests/runtests/simd-avg-round.clif b/cranelift/filetests/filetests/runtests/simd-avg-round.clif
@@ -1,51 +1,20 @@
-; the interpreter does not currently support SIMD `avg_round`.
 test run
 target aarch64
-; x86_64 and s390x do not currently support 64-bit vectors, or
-; `avg_round` on `i64x2` values.
-; x86_64 also does not currently support `avg_round.i32x4`.
+target s390x
+set enable_simd
+target x86_64
+target x86_64 skylake
 
-function %average_rounding_i8x8(i8x8, i8x8) -> i8x8 {
-block0(v0: i8x8, v1: i8x8):
+function %average_rounding_i8x16(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
     v2 = avg_round v0, v1
     return v2
 }
-; run: %average_rounding_i8x8([0 0 0 1 42 19 -1 0xff], [0 1 2 4 42 18 -1 0]) == [0 1 1 3 42 19 -1 0x80]
+; run: %average_rounding_i8x16([0 0 0 1 42 19 -1 0xff 5 0 0 0 1 42 19 -1], [0 1 2 4 42 18 -1 0 10 0 1 2 4 42 18 -1]) == [0 1 1 3 42 19 -1 0x80 8 0 1 1 3 42 19 -1]
 
-function %average_rounding_i16x4(i16x4, i16x4) -> i16x4 {
-block0(v0: i16x4, v1: i16x4):
+function %average_rounding_i16x8(i16x8, i16x8) -> i16x8 {
+block0(v0: i16x8, v1: i16x8):
     v2 = avg_round v0, v1
     return v2
 }
-; run: %average_rounding_i16x4([0 0 0 1], [0 1 2 4]) == [0 1 1 3]
-; run: %average_rounding_i16x4([42 19 -1 0xffff], [42 18 -1 0]) == [42 19 -1 0x8000]
-
-function %average_rounding_i32x2(i32x2, i32x2) -> i32x2 {
-block0(v0: i32x2, v1: i32x2):
-    v2 = avg_round v0, v1
-    return v2
-}
-; run: %average_rounding_i32x2([0 0], [0 1]) == [0 1]
-; run: %average_rounding_i32x2([0 1], [2 4]) == [1 3]
-; run: %average_rounding_i32x2([42 19], [42 18]) == [42 19]
-; run: %average_rounding_i32x2([-1 0xffffffff], [-1 0]) == [-1 0x80000000]
-; run: %average_rounding_i32x2([0xffffffff 0xfffffffd], [10 0xffffffff]) == [0x80000005 0xfffffffe]
-
-function %average_rounding_i32x4(i32x4, i32x4) -> i32x4 {
-block0(v0: i32x4, v1: i32x4):
-    v2 = avg_round v0, v1
-    return v2
-}
-; run: %average_rounding_i32x4([0 0 0 0xffffffff], [0 1 2 0]) == [0 1 1 0x80000000]
-; run: %average_rounding_i32x4([1 42 19 -1], [4 42 18 -1]) == [3 42 19 -1]
-
-function %average_rounding_i64x2(i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2, v1: i64x2):
-    v2 = avg_round v0, v1
-    return v2
-}
-; run: %average_rounding_i64x2([0 0], [0 1]) == [0 1]
-; run: %average_rounding_i64x2([0 1], [2 4]) == [1 3]
-; run: %average_rounding_i64x2([42 19], [42 18]) == [42 19]
-; run: %average_rounding_i64x2([-1 0xffffffffffffffff], [-1 0]) == [-1 0x8000000000000000]
-; run: %average_rounding_i64x2([0xffffffffffffffff 0xfffffffffffffffd], [10 0xffffffffffffffff]) == [0x8000000000000005 0xfffffffffffffffe]
+; run: %average_rounding_i16x8([0 0 0 1 42 19 -1 0xffff], [0 1 2 4 42 18 -1 0]) == [0 1 1 3 42 19 -1 0x8000]
diff --git a/...-arithmetic-nondeterministic-aarch64.clif → ...d-fmax-fmin-nondeterministic-aarch64.clif b/...-arithmetic-nondeterministic-aarch64.clif → ...d-fmax-fmin-nondeterministic-aarch64.clif
diff --git a/...d-arithmetic-nondeterministic-x86_64.clif → ...md-fmax-fmin-nondeterministic-x86_64.clif b/...d-arithmetic-nondeterministic-x86_64.clif → ...md-fmax-fmin-nondeterministic-x86_64.clif
diff --git a/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif b/cranelift/filetests/filetests/runtests/simd-fmax-fmin.clif
@@ -0,0 +1,28 @@
+test run
+target aarch64
+target s390x
+set enable_simd
+target x86_64
+target x86_64 skylake
+
+function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmax v0, v1
+    return v2
+}
+; This operation exhibits non-deterministic behaviour for some input NaN values;
+; refer to the simd-fmax-fmin-nondeterministic*.clif files for the respective tests.
+; run: %fmax_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [+0x0.0 0x1.0]
+; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN]
+; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
+; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
+
+function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmin v0, v1
+    return v2
+}
+; This operation exhibits non-deterministic behaviour for some input NaN values;
+; refer to the simd-fmax-fmin-nondeterministic*.clif files for the respective tests.
+; run: %fmin_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [-0x0.0 -0x1.0]
+; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
diff --git a/cranelift/filetests/filetests/runtests/simd-ineg.clif b/cranelift/filetests/filetests/runtests/simd-ineg.clif
@@ -0,0 +1,13 @@
+test run
+target aarch64
+target s390x
+set enable_simd
+target x86_64
+target x86_64 skylake
+
+function %ineg_i32x4(i32x4) -> i32x4 {
+block0(v0: i32x4):
+    v1 = ineg v0
+    return v1
+}
+; run: %ineg_i32x4([1 1 1 1]) == [-1 -1 -1 -1]
diff --git a/cranelift/filetests/filetests/runtests/simd-ishl.clif b/cranelift/filetests/filetests/runtests/simd-ishl.clif
@@ -44,3 +44,39 @@ block0(v0: i64x2):
     return v2
 }
 ; run: %ishl_imm_i64x2([1 0]) == [2 0]
+
+
+function %i8x16_shl_const(i8x16) -> i8x16 {
+block0(v0: i8x16):
+    v1 = iconst.i32 2
+    v2 = ishl v0, v1
+    return v2
+}
+; run: %i8x16_shl_const([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0 0 0]
+
+function %i16x8_shl_const(i16x8) -> i16x8 {
+block0(v0: i16x8):
+    v1 = iconst.i32 4
+    v2 = ishl v0, v1
+    return v2
+}
+; run: %i16x8_shl_const([0x0001 0x0002 0x0004 0x0008 0x0010 0x0020 0x0040 0x0080]) == [0x0010 0x0020 0x0040 0x0080 0x0100 0x0200 0x0400 0x0800]
+; run: %i16x8_shl_const([0x0100 0x0200 0x0400 0x0800 0x1000 0x2000 0x4000 0x8000]) == [0x1000 0x2000 0x4000 0x8000 0 0 0 0]
+
+function %i32x4_shl_const(i32x4) -> i32x4 {
+block0(v0: i32x4):
+    v1 = iconst.i32 4
+    v2 = ishl v0, v1
+    return v2
+}
+; run: %i32x4_shl_const([0x00000001 0x00000002 0x00000004 0x00000008]) == [0x00000010 0x00000020 0x00000040 0x00000080]
+; run: %i32x4_shl_const([0x10000000 0x00010000 0xf0000000 0x02000000]) == [0 0x00100000 0 0x20000000]
+
+function %i64x2_shl_const(i64x2) -> i64x2 {
+block0(v0: i64x2):
+    v1 = iconst.i32 32
+    v2 = ishl v0, v1
+    return v2
+}
+; run: %i64x2_shl_const([0x1 0xf]) == [0x100000000 0xf00000000]
+; run: %i64x2_shl_const([0x100000000 0]) == [0 0]