riscv64: Add vadd.vx

bytecodealliance · May 16, 2023 · 6a5e40e · 6a5e40e
1 parent 84621b1
commit 6a5e40e
Show file tree

Hide file tree

Showing 5 changed files with 217 additions and 12 deletions.
diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs
@@ -250,7 +250,7 @@ impl VecAluOpRRR {
     pub fn funct6(&self) -> u32 {
         // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
         match self {
-            VecAluOpRRR::VaddVV => 0b000000,
+            VecAluOpRRR::VaddVV | VecAluOpRRR::VaddVX => 0b000000,
             VecAluOpRRR::VsubVV | VecAluOpRRR::VsubVX => 0b000010,
             VecAluOpRRR::VrsubVX => 0b000011,
             VecAluOpRRR::VmulVV => 0b100101,
@@ -272,7 +272,9 @@ impl VecAluOpRRR {
             VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => {
                 VecOpCategory::OPMVV
             }
-            VecAluOpRRR::VsubVX | VecAluOpRRR::VrsubVX => VecOpCategory::OPIVX,
+            VecAluOpRRR::VaddVX | VecAluOpRRR::VsubVX | VecAluOpRRR::VrsubVX => {
+                VecOpCategory::OPIVX
+            }
         }
     }
 }

diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle
@@ -94,6 +94,7 @@
   (VxorVV)
 
   ;; Vector-Scalar Opcodes
+  (VaddVX)
   (VsubVX)
   (VrsubVX)
 ))
@@ -189,6 +190,11 @@
 (rule (rv_vadd_vv vs2 vs1 vstate)
   (vec_alu_rrr (VecAluOpRRR.VaddVV) vs2 vs1 vstate))
 
+;; Helper for emitting the `vadd.vx` instruction.
+(decl rv_vadd_vx (Reg Reg VState) Reg)
+(rule (rv_vadd_vx vs2 vs1 vstate)
+  (vec_alu_rrr (VecAluOpRRR.VaddVX) vs2 vs1 vstate))
+
 ;; Helper for emitting the `vadd.vi` instruction.
 (decl rv_vadd_vi (Reg Imm5 VState) Reg)
 (rule (rv_vadd_vi vs2 imm vstate)

diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -110,10 +110,16 @@
 (rule 8 (lower (has_type (ty_vec_fits_in_register ty) (iadd x y)))
   (rv_vadd_vv x y ty))
 
-(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (replicated_imm5 y))))
+(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (splat y))))
+  (rv_vadd_vx x y ty))
+
+(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd (splat x) y)))
+  (rv_vadd_vx y x ty))
+
+(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (replicated_imm5 y))))
   (rv_vadd_vi x y ty))
 
-(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd (replicated_imm5 x) y)))
+(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd (replicated_imm5 x) y)))
   (rv_vadd_vi y x ty))
 
 ;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;

diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif b/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif
@@ -331,3 +331,161 @@ block0(v0: i64x2):
 ;   addi sp, sp, 0x10
 ;   ret
 
+function %iadd_splat_i8x16(i8x16,  i8) -> i8x16 {
+block0(v0: i8x16, v1: i8):
+    v2 = splat.i8x16 v1
+    v3 = iadd v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vadd.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma)
+;   vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0xd7, 0x42, 0x15, 0x02
+;   .byte 0xa7, 0x82, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %iadd_splat_i16x8(i16x8, i16) -> i16x8 {
+block0(v0: i16x8, v1: i16):
+    v2 = splat.i16x8 v1
+    v3 = iadd v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vadd.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma)
+;   vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x84, 0xcc
+;   .byte 0xd7, 0x42, 0x15, 0x02
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x82, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %iadd_splat_i32x4(i32x4, i32) -> i32x4 {
+block0(v0: i32x4, v1: i32):
+    v2 = splat.i32x4 v1
+    v3 = iadd v0, v2
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vadd.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma)
+;   vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x02, 0xcd
+;   .byte 0xd7, 0x42, 0x15, 0x02
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x82, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
+function %iadd_splat_i64x2(i64x2, i64) -> i64x2 {
+block0(v0: i64x2, v1: i64):
+    v2 = splat.i64x2 v1
+    v3 = iadd v2, v0
+    return v3
+}
+
+; VCode:
+;   add sp,-16
+;   sd ra,8(sp)
+;   sd fp,0(sp)
+;   mv fp,sp
+; block0:
+;   vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
+;   vadd.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma)
+;   vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma)
+;   ld ra,8(sp)
+;   ld fp,0(sp)
+;   add sp,+16
+;   ret
+; 
+; Disassembled:
+; block0: ; offset 0x0
+;   addi sp, sp, -0x10
+;   sd ra, 8(sp)
+;   sd s0, 0(sp)
+;   ori s0, sp, 0
+; block1: ; offset 0x10
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   addi t6, s0, 0x10
+;   .byte 0x87, 0x80, 0x0f, 0x02
+;   .byte 0x57, 0x70, 0x81, 0xcd
+;   .byte 0xd7, 0x42, 0x15, 0x02
+;   .byte 0x57, 0x70, 0x08, 0xcc
+;   .byte 0xa7, 0x82, 0x05, 0x02
+;   ld ra, 8(sp)
+;   ld s0, 0(sp)
+;   addi sp, sp, 0x10
+;   ret
+
diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif b/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif
@@ -8,38 +8,71 @@ target x86_64
 target x86_64 skylake
 target riscv64 has_v
 
-function %iadd_splat_i8x16(i8x16) -> i8x16 {
+function %iadd_splat_const_i8x16(i8x16) -> i8x16 {
 block0(v0: i8x16):
     v1 = iconst.i8 5
     v2 = splat.i8x16 v1
     v3 = iadd v0, v2
     return v3
 }
-; run: %iadd_splat_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21]
+; run: %iadd_splat_const_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21]
 
-function %iadd_splat_i16x8(i16x8) -> i16x8 {
+function %iadd_splat_const_i16x8(i16x8) -> i16x8 {
 block0(v0: i16x8):
     v1 = iconst.i16 -16
     v2 = splat.i16x8 v1
     v3 = iadd v0, v2
     return v3
 }
-; run: %iadd_splat_i16x8([1 2 3 4 5 6 7 8]) == [-15 -14 -13 -12 -11 -10 -9 -8]
+; run: %iadd_splat_const_i16x8([1 2 3 4 5 6 7 8]) == [-15 -14 -13 -12 -11 -10 -9 -8]
 
-function %iadd_splat_i32x4(i32x4) -> i32x4 {
+function %iadd_splat_const_i32x4(i32x4) -> i32x4 {
 block0(v0: i32x4):
     v1 = iconst.i32 15
     v2 = splat.i32x4 v1
     v3 = iadd v0, v2
     return v3
 }
-; run: %iadd_splat_i32x4([1 2 3 4]) == [16 17 18 19]
+; run: %iadd_splat_const_i32x4([1 2 3 4]) == [16 17 18 19]
 
-function %iadd_splat_i64x2(i64x2) -> i64x2 {
+function %iadd_splat_const_i64x2(i64x2) -> i64x2 {
 block0(v0: i64x2):
     v1 = iconst.i64 -5
     v2 = splat.i64x2 v1
     v3 = iadd v2, v0
     return v3
 }
-; run: %iadd_splat_i64x2([1 2]) == [-4 -3]
+; run: %iadd_splat_const_i64x2([1 2]) == [-4 -3]
+
+
+function %iadd_splat_i8x16(i8x16,  i8) -> i8x16 {
+block0(v0: i8x16, v1: i8):
+    v2 = splat.i8x16 v1
+    v3 = iadd v0, v2
+    return v3
+}
+; run: %iadd_splat_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], -15) == [-14 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1]
+
+function %iadd_splat_i16x8(i16x8, i16) -> i16x8 {
+block0(v0: i16x8, v1: i16):
+    v2 = splat.i16x8 v1
+    v3 = iadd v0, v2
+    return v3
+}
+; run: %iadd_splat_i16x8([1 2 3 4 5 6 7 8], -10) == [-9 -8 -7 -6 -5 -4 -3 -2]
+
+function %iadd_splat_i32x4(i32x4, i32) -> i32x4 {
+block0(v0: i32x4, v1: i32):
+    v2 = splat.i32x4 v1
+    v3 = iadd v0, v2
+    return v3
+}
+; run: %iadd_splat_i32x4([1 2 3 4], 22) == [23 24 25 26]
+
+function %iadd_splat_i64x2(i64x2, i64) -> i64x2 {
+block0(v0: i64x2, v1: i64):
+    v2 = splat.i64x2 v1
+    v3 = iadd v2, v0
+    return v3
+}
+; run: %iadd_splat_i64x2([1 2], 10) == [11 12]