diff --git a/build.rs b/build.rs index a4409b07e4bf..fef51a2e8941 100644 --- a/build.rs +++ b/build.rs @@ -227,7 +227,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { "simd_i16x8_extadd_pairwise_i8x16", "simd_i16x8_extmul_i8x16", "simd_i16x8_q15mulr_sat_s", - "simd_i16x8_sat_arith", "simd_i32x4_arith2", "simd_i32x4_cmp", "simd_i32x4_dot_i16x8", @@ -240,7 +239,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { "simd_i64x2_extmul_i32x4", "simd_i8x16_arith2", "simd_i8x16_cmp", - "simd_i8x16_sat_arith", "simd_int_to_int_extend", "simd_lane", "simd_load", diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index fae3d66a4698..49fea572c027 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -278,11 +278,20 @@ impl VecAluOpRRR { VecAluOpRRR::VandVV | VecAluOpRRR::VandVX => 0b001001, VecAluOpRRR::VorVV | VecAluOpRRR::VorVX => 0b001010, VecAluOpRRR::VxorVV | VecAluOpRRR::VxorVX => 0b001011, + VecAluOpRRR::VminuVV | VecAluOpRRR::VminuVX => 0b000100, + VecAluOpRRR::VminVV | VecAluOpRRR::VminVX => 0b000101, + VecAluOpRRR::VmaxuVV | VecAluOpRRR::VmaxuVX => 0b000110, + VecAluOpRRR::VmaxVV | VecAluOpRRR::VmaxVX => 0b000111, VecAluOpRRR::VslidedownVX => 0b001111, VecAluOpRRR::VfrsubVF => 0b100111, VecAluOpRRR::VmergeVVM | VecAluOpRRR::VmergeVXM | VecAluOpRRR::VfmergeVFM => 0b010111, - VecAluOpRRR::VfdivVV | VecAluOpRRR::VfdivVF => 0b100000, - VecAluOpRRR::VfrdivVF => 0b100001, + VecAluOpRRR::VfdivVV + | VecAluOpRRR::VfdivVF + | VecAluOpRRR::VsadduVV + | VecAluOpRRR::VsadduVX => 0b100000, + VecAluOpRRR::VfrdivVF | VecAluOpRRR::VsaddVV | VecAluOpRRR::VsaddVX => 0b100001, + VecAluOpRRR::VssubuVV | VecAluOpRRR::VssubuVX => 0b100010, + VecAluOpRRR::VssubVV | VecAluOpRRR::VssubVX => 0b100011, VecAluOpRRR::VfsgnjnVV => 0b001001, } } @@ -290,20 +299,36 @@ impl VecAluOpRRR { pub fn category(&self) -> VecOpCategory { match self { VecAluOpRRR::VaddVV + | VecAluOpRRR::VsaddVV + | VecAluOpRRR::VsadduVV | VecAluOpRRR::VsubVV + | VecAluOpRRR::VssubVV + | VecAluOpRRR::VssubuVV | VecAluOpRRR::VandVV | VecAluOpRRR::VorVV | VecAluOpRRR::VxorVV + | VecAluOpRRR::VminuVV + | VecAluOpRRR::VminVV + | VecAluOpRRR::VmaxuVV + | VecAluOpRRR::VmaxVV | VecAluOpRRR::VmergeVVM => VecOpCategory::OPIVV, VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => { VecOpCategory::OPMVV } VecAluOpRRR::VaddVX + | VecAluOpRRR::VsaddVX + | VecAluOpRRR::VsadduVX | VecAluOpRRR::VsubVX + | VecAluOpRRR::VssubVX + | VecAluOpRRR::VssubuVX | VecAluOpRRR::VrsubVX | VecAluOpRRR::VandVX | VecAluOpRRR::VorVX | VecAluOpRRR::VxorVX + | VecAluOpRRR::VminuVX + | VecAluOpRRR::VminVX + | VecAluOpRRR::VmaxuVX + | VecAluOpRRR::VmaxVX | VecAluOpRRR::VslidedownVX | VecAluOpRRR::VmergeVXM => VecOpCategory::OPIVX, VecAluOpRRR::VfaddVV @@ -365,6 +390,8 @@ impl VecAluOpRRImm5 { VecAluOpRRImm5::VxorVI => 0b001011, VecAluOpRRImm5::VslidedownVI => 0b001111, VecAluOpRRImm5::VmergeVIM => 0b010111, + VecAluOpRRImm5::VsadduVI => 0b100000, + VecAluOpRRImm5::VsaddVI => 0b100001, } } @@ -376,7 +403,9 @@ impl VecAluOpRRImm5 { | VecAluOpRRImm5::VorVI | VecAluOpRRImm5::VxorVI | VecAluOpRRImm5::VslidedownVI - | VecAluOpRRImm5::VmergeVIM => VecOpCategory::OPIVI, + | VecAluOpRRImm5::VmergeVIM + | VecAluOpRRImm5::VsadduVI + | VecAluOpRRImm5::VsaddVI => VecOpCategory::OPIVI, } } @@ -388,7 +417,9 @@ impl VecAluOpRRImm5 { | VecAluOpRRImm5::VandVI | VecAluOpRRImm5::VorVI | VecAluOpRRImm5::VxorVI - | VecAluOpRRImm5::VmergeVIM => false, + | VecAluOpRRImm5::VmergeVIM + | VecAluOpRRImm5::VsadduVI + | VecAluOpRRImm5::VsaddVI => false, } } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 0a6d8dc599b6..68af32830543 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -91,13 +91,21 @@ (type VecAluOpRRR (enum ;; Vector-Vector Opcodes (VaddVV) + (VsaddVV) + (VsadduVV) (VsubVV) + (VssubVV) + (VssubuVV) (VmulVV) (VmulhVV) (VmulhuVV) (VandVV) (VorVV) (VxorVV) + (VmaxVV) + (VmaxuVV) + (VminVV) + (VminuVV) (VfaddVV) (VfsubVV) (VfmulVV) @@ -107,11 +115,19 @@ ;; Vector-Scalar Opcodes (VaddVX) + (VsaddVX) + (VsadduVX) (VsubVX) (VrsubVX) + (VssubVX) + (VssubuVX) (VandVX) (VorVX) (VxorVX) + (VmaxVX) + (VmaxuVX) + (VminVX) + (VminuVX) (VslidedownVX) (VfaddVF) (VfsubVF) @@ -127,6 +143,8 @@ (type VecAluOpRRImm5 (enum ;; Regular VI Opcodes (VaddVI) + (VsaddVI) + (VsadduVI) (VrsubVI) (VandVI) (VorVI) @@ -280,6 +298,36 @@ (rule (rv_vadd_vi vs2 imm mask vstate) (vec_alu_rr_imm5 (VecAluOpRRImm5.VaddVI) vs2 imm mask vstate)) +;; Helper for emitting the `vsadd.vv` instruction. +(decl rv_vsadd_vv (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vsadd_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsaddVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsadd.vx` instruction. +(decl rv_vsadd_vx (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vsadd_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsaddVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsadd.vi` instruction. +(decl rv_vsadd_vi (Reg Imm5 VecOpMasking VState) Reg) +(rule (rv_vsadd_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VsaddVI) vs2 imm mask vstate)) + +;; Helper for emitting the `vsaddu.vv` instruction. +(decl rv_vsaddu_vv (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vsaddu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsadduVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsaddu.vx` instruction. +(decl rv_vsaddu_vx (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vsaddu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VsadduVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vsaddu.vi` instruction. +(decl rv_vsaddu_vi (Reg Imm5 VecOpMasking VState) Reg) +(rule (rv_vsaddu_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VsadduVI) vs2 imm mask vstate)) + ;; Helper for emitting the `vsub.vv` instruction. (decl rv_vsub_vv (Reg Reg VecOpMasking VState) Reg) (rule (rv_vsub_vv vs2 vs1 mask vstate) @@ -295,6 +343,26 @@ (rule (rv_vrsub_vx vs2 vs1 mask vstate) (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 vs1 mask vstate)) +;; Helper for emitting the `vssub.vv` instruction. +(decl rv_vssub_vv (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vssub_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssub.vx` instruction. +(decl rv_vssub_vx (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vssub_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssubu.vv` instruction. +(decl rv_vssubu_vv (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vssubu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vssubu.vx` instruction. +(decl rv_vssubu_vx (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vssubu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VssubuVX) vs2 vs1 mask vstate)) + ;; Helper for emitting the `vneg.v` pseudo-instruction. (decl rv_vneg_v (Reg VecOpMasking VState) Reg) (rule (rv_vneg_v vs2 mask vstate) @@ -372,6 +440,46 @@ (if-let neg1 (imm5_from_i8 -1)) (rv_vxor_vi vs2 neg1 mask vstate)) +;; Helper for emitting the `vmax.vv` instruction. +(decl rv_vmax_vv (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vmax_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmax.vx` instruction. +(decl rv_vmax_vx (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vmax_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmin.vv` instruction. +(decl rv_vmin_vv (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vmin_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmin.vx` instruction. +(decl rv_vmin_vx (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vmin_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmaxu.vv` instruction. +(decl rv_vmaxu_vv (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vmaxu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vmaxu.vx` instruction. +(decl rv_vmaxu_vx (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vmaxu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VmaxuVX) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vminu.vv` instruction. +(decl rv_vminu_vv (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vminu_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminuVV) vs2 vs1 mask vstate)) + +;; Helper for emitting the `vminu.vx` instruction. +(decl rv_vminu_vx (Reg Reg VecOpMasking VState) Reg) +(rule (rv_vminu_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VminuVX) vs2 vs1 mask vstate)) + ;; Helper for emitting the `vfadd.vv` instruction. (decl rv_vfadd_vv (Reg Reg VecOpMasking VState) Reg) (rule (rv_vfadd_vv vs2 vs1 mask vstate) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index fa5e35803190..4e3280673cd1 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -884,25 +884,64 @@ (t2 Reg y)) (value_regs t1 t2))) + ;;;;; Rules for `smax`;;;;;;;;; -(rule - (lower (has_type ty (smax x y))) + +(rule 0 (lower (has_type (ty_int ty) (smax x y))) (gen_int_select ty (IntSelectOP.Smax) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smax x y))) + (rv_vmax_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smax x (splat y)))) + (rv_vmax_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smax (splat x) y))) + (rv_vmax_vx y x (unmasked) ty)) + ;;;;; Rules for `smin`;;;;;;;;; -(rule - (lower (has_type ty (smin x y))) + +(rule 0 (lower (has_type (ty_int ty) (smin x y))) (gen_int_select ty (IntSelectOP.Smin) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smin x y))) + (rv_vmin_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (smin x (splat y)))) + (rv_vmin_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (smin (splat x) y))) + (rv_vmin_vx y x (unmasked) ty)) + ;;;;; Rules for `umax`;;;;;;;;; -(rule - (lower (has_type ty (umax x y))) + +(rule 0 (lower (has_type (ty_int ty) (umax x y))) (gen_int_select ty (IntSelectOP.Umax) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umax x y))) + (rv_vmaxu_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umax x (splat y)))) + (rv_vmaxu_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umax (splat x) y))) + (rv_vmaxu_vx y x (unmasked) ty)) + ;;;;; Rules for `umin`;;;;;;;;; -(rule - (lower (has_type ty (umin x y))) + +(rule 0 (lower (has_type (ty_int ty) (umin x y))) (gen_int_select ty (IntSelectOP.Umin) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umin x y))) + (rv_vminu_vv x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (umin x (splat y)))) + (rv_vminu_vx x y (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (umin (splat x) y))) + (rv_vminu_vx y x (unmasked) ty)) + + ;;;;; Rules for `debugtrap`;;;;;;;;; (rule (lower (debugtrap)) @@ -1178,3 +1217,53 @@ ;; similar in its splat rules. ;; TODO: Look through bitcasts when splatting out registers. We can use ;; `vmv.v.x` in a `(splat.f32x4 (bitcast.f32 val))`. And vice versa for integers. + +;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x y))) + (rv_vsaddu_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x (splat y)))) + (rv_vsaddu_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat (splat x) y))) + (rv_vsaddu_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat x (replicated_imm5 y)))) + (rv_vsaddu_vi x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (uadd_sat (replicated_imm5 x) y))) + (rv_vsaddu_vi y x (unmasked) ty)) + +;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x y))) + (rv_vsadd_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x (splat y)))) + (rv_vsadd_vx x y (unmasked) ty)) + +(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat (splat x) y))) + (rv_vsadd_vx y x (unmasked) ty)) + +(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat x (replicated_imm5 y)))) + (rv_vsadd_vi x y (unmasked) ty)) + +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (sadd_sat (replicated_imm5 x) y))) + (rv_vsadd_vi y x (unmasked) ty)) + +;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (usub_sat x y))) + (rv_vssubu_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (usub_sat x (splat y)))) + (rv_vssubu_vx x y (unmasked) ty)) + +;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 0 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x y))) + (rv_vssub_vv x y (unmasked) ty)) + +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ssub_sat x (splat y)))) + (rv_vssub_vx x y (unmasked) ty)) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-saddsat.clif b/cranelift/filetests/filetests/isa/riscv64/simd-saddsat.clif new file mode 100644 index 000000000000..897b45444703 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-saddsat.clif @@ -0,0 +1,491 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %sadd_sat_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = sadd_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vv v6,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x83, 0x11, 0x86 +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = sadd_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vv v6,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x83, 0x11, 0x86 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = sadd_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x83, 0x11, 0x86 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = sadd_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x83, 0x11, 0x86 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = sadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vi v4,v1,5 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0xb2, 0x12, 0x86 +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = sadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vi v4,v1,-16 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x32, 0x18, 0x86 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = sadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vi v4,v1,15 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0xb2, 0x17, 0x86 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = sadd_sat v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vi v4,v1,-5 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0xb2, 0x1d, 0x86 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = sadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0xd7, 0x42, 0x15, 0x86 +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = sadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x15, 0x86 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = sadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x86 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %sadd_sat_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = sadd_sat v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsadd.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x86 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-smax.clif b/cranelift/filetests/filetests/isa/riscv64/simd-smax.clif new file mode 100644 index 000000000000..b8d2ca48a53e --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-smax.clif @@ -0,0 +1,499 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %smax_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = smax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmax.vv v6,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x83, 0x11, 0x1e +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = smax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmax.vv v6,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x83, 0x11, 0x1e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = smax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmax.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x83, 0x11, 0x1e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = smax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmax.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x83, 0x11, 0x1e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = smax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,5 +; vmax.vx v5,v1,a2 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 5 +; .byte 0xd7, 0x42, 0x16, 0x1e +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = smax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-16 +; vmax.vx v5,v1,a2 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -0x10 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x16, 0x1e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = smax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,15 +; vmax.vx v5,v1,a2 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 0xf +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x1e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = smax v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-5 +; vmax.vx v5,v1,a2 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -5 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x1e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = smax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmax.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0xd7, 0x42, 0x15, 0x1e +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = smax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmax.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x15, 0x1e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = smax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmax.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x1e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smax_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = smax v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmax.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x1e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-smin.clif b/cranelift/filetests/filetests/isa/riscv64/simd-smin.clif new file mode 100644 index 000000000000..53392810b29f --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-smin.clif @@ -0,0 +1,499 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %smin_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = smin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmin.vv v6,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x83, 0x11, 0x16 +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = smin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmin.vv v6,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x83, 0x11, 0x16 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = smin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmin.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x83, 0x11, 0x16 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = smin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmin.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x83, 0x11, 0x16 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = smin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,5 +; vmin.vx v5,v1,a2 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 5 +; .byte 0xd7, 0x42, 0x16, 0x16 +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = smin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-16 +; vmin.vx v5,v1,a2 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -0x10 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x16, 0x16 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = smin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,15 +; vmin.vx v5,v1,a2 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 0xf +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x16 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = smin v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-5 +; vmin.vx v5,v1,a2 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -5 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x16 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = smin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmin.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0xd7, 0x42, 0x15, 0x16 +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = smin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmin.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x15, 0x16 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = smin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmin.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x16 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %smin_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = smin v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmin.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x16 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-ssubsat.clif b/cranelift/filetests/filetests/isa/riscv64/simd-ssubsat.clif new file mode 100644 index 000000000000..9352666d6f38 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-ssubsat.clif @@ -0,0 +1,501 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %ssub_sat_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = ssub_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssub.vv v6,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x83, 0x11, 0x8e +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = ssub_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssub.vv v6,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x83, 0x11, 0x8e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = ssub_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssub.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x83, 0x11, 0x8e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = ssub_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssub.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x83, 0x11, 0x8e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = ssub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,5 +; vssub.vx v5,v1,a2 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 5 +; .byte 0xd7, 0x42, 0x16, 0x8e +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = ssub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-16 +; vssub.vx v5,v1,a2 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -0x10 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x16, 0x8e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = ssub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,15 +; vssub.vx v5,v1,a2 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 0xf +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x8e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = ssub_sat v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v5,-5 #avl=2, #vtype=(e64, m1, ta, ma) +; vssub.vv v5,v5,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0xb2, 0x0d, 0x5e +; .byte 0xd7, 0x82, 0x50, 0x8e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = ssub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssub.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0xd7, 0x42, 0x15, 0x8e +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = ssub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssub.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x15, 0x8e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = ssub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssub.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x8e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ssub_sat_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = ssub_sat v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v6,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vssub.vv v6,v6,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x43, 0x05, 0x5e +; .byte 0x57, 0x83, 0x60, 0x8e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x83, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-uaddsat.clif b/cranelift/filetests/filetests/isa/riscv64/simd-uaddsat.clif new file mode 100644 index 000000000000..f69f99c1d869 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-uaddsat.clif @@ -0,0 +1,491 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %uadd_sat_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = uadd_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vv v6,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x83, 0x11, 0x82 +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = uadd_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vv v6,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x83, 0x11, 0x82 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = uadd_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x83, 0x11, 0x82 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = uadd_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x83, 0x11, 0x82 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = uadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vi v4,v1,5 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0xb2, 0x12, 0x82 +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = uadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vi v4,v1,-16 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x32, 0x18, 0x82 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = uadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vi v4,v1,15 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0xb2, 0x17, 0x82 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = uadd_sat v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vi v4,v1,-5 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0xb2, 0x1d, 0x82 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = uadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0xd7, 0x42, 0x15, 0x82 +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = uadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x15, 0x82 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = uadd_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x82 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %uadd_sat_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = uadd_sat v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsaddu.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x82 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-umax.clif b/cranelift/filetests/filetests/isa/riscv64/simd-umax.clif new file mode 100644 index 000000000000..6091c07405e1 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-umax.clif @@ -0,0 +1,499 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %umax_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = umax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmaxu.vv v6,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x83, 0x11, 0x1a +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = umax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmaxu.vv v6,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x83, 0x11, 0x1a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = umax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmaxu.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x83, 0x11, 0x1a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = umax v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmaxu.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x83, 0x11, 0x1a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = umax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,5 +; vmaxu.vx v5,v1,a2 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 5 +; .byte 0xd7, 0x42, 0x16, 0x1a +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = umax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-16 +; vmaxu.vx v5,v1,a2 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -0x10 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x16, 0x1a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = umax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,15 +; vmaxu.vx v5,v1,a2 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 0xf +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x1a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = umax v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-5 +; vmaxu.vx v5,v1,a2 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -5 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x1a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = umax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmaxu.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0xd7, 0x42, 0x15, 0x1a +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = umax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmaxu.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x15, 0x1a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = umax v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmaxu.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x1a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umax_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = umax v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmaxu.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x1a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-umin.clif b/cranelift/filetests/filetests/isa/riscv64/simd-umin.clif new file mode 100644 index 000000000000..55369ea4fff6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-umin.clif @@ -0,0 +1,499 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %umin_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = umin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vminu.vv v6,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x83, 0x11, 0x12 +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = umin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vminu.vv v6,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x83, 0x11, 0x12 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = umin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vminu.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x83, 0x11, 0x12 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = umin v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vminu.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x83, 0x11, 0x12 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = umin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,5 +; vminu.vx v5,v1,a2 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 5 +; .byte 0xd7, 0x42, 0x16, 0x12 +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = umin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-16 +; vminu.vx v5,v1,a2 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -0x10 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x16, 0x12 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = umin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,15 +; vminu.vx v5,v1,a2 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 0xf +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x12 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = umin v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-5 +; vminu.vx v5,v1,a2 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -5 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x12 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = umin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vminu.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0xd7, 0x42, 0x15, 0x12 +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = umin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vminu.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x15, 0x12 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = umin v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vminu.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x12 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %umin_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = umin v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vminu.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x12 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-usubsat.clif b/cranelift/filetests/filetests/isa/riscv64/simd-usubsat.clif new file mode 100644 index 000000000000..637d830ef38e --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-usubsat.clif @@ -0,0 +1,501 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %usub_sat_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = usub_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssubu.vv v6,v1,v3 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x83, 0x11, 0x8a +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = usub_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssubu.vv v6,v1,v3 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x83, 0x11, 0x8a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = usub_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssubu.vv v6,v1,v3 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x83, 0x11, 0x8a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = usub_sat v0, v1 + return v2 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssubu.vv v6,v1,v3 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi t6, s0, 0x20 +; .byte 0x87, 0x81, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x83, 0x11, 0x8a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x03, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = usub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,5 +; vssubu.vx v5,v1,a2 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 5 +; .byte 0xd7, 0x42, 0x16, 0x8a +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = usub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,-16 +; vssubu.vx v5,v1,a2 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, -0x10 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x16, 0x8a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = usub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; li a2,15 +; vssubu.vx v5,v1,a2 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; addi a2, zero, 0xf +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x16, 0x8a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = usub_sat v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.i v5,-5 #avl=2, #vtype=(e64, m1, ta, ma) +; vssubu.vv v5,v5,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0xb2, 0x0d, 0x5e +; .byte 0xd7, 0x82, 0x50, 0x8a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = usub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssubu.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0xd7, 0x42, 0x15, 0x8a +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = usub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssubu.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x15, 0x8a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = usub_sat v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vssubu.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x8a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %usub_sat_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = usub_sat v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vmv.v.x v6,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vssubu.vv v6,v6,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v6,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x43, 0x05, 0x5e +; .byte 0x57, 0x83, 0x60, 0x8a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x83, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/i128-min-max.clif b/cranelift/filetests/filetests/runtests/i128-min-max.clif new file mode 100644 index 000000000000..29d0626a59cd --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-min-max.clif @@ -0,0 +1,41 @@ +test interpret +test run +target riscv64 +target s390x + + +function %smin_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = smin v0, v1 + return v2 +} +; run: %smin_i128(1, 0) == 0 +; run: %smin_i128(-1, 2) == -1 +; run: %smin_i128(1, 5) == 1 + +function %umin_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = umin v0, v1 + return v2 +} +; run: %umin_i128(1, 0) == 0 +; run: %umin_i128(-1, 2) == 2 +; run: %umin_i128(1, 5) == 1 + +function %smax_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = smax v0, v1 + return v2 +} +; run: %smax_i128(1, 0) == 1 +; run: %smax_i128(-1, 2) == 2 +; run: %smax_i128(1, 5) == 5 + +function %umax_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = umax v0, v1 + return v2 +} +; run: %umax_i128(1, 0) == 1 +; run: %umax_i128(-1, 2) == -1 +; run: %umax_i128(1, 5) == 5 diff --git a/cranelift/filetests/filetests/runtests/integer-minmax.clif b/cranelift/filetests/filetests/runtests/integer-minmax.clif index d030e20c441d..d00ac4e86d48 100644 --- a/cranelift/filetests/filetests/runtests/integer-minmax.clif +++ b/cranelift/filetests/filetests/runtests/integer-minmax.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -; target s390x +target s390x target x86_64 target riscv64 diff --git a/cranelift/filetests/filetests/runtests/simd-min-max.clif b/cranelift/filetests/filetests/runtests/simd-min-max.clif index d63baec4ed96..95e304a17798 100644 --- a/cranelift/filetests/filetests/runtests/simd-min-max.clif +++ b/cranelift/filetests/filetests/runtests/simd-min-max.clif @@ -8,6 +8,7 @@ target x86_64 sse41 target x86_64 sse42 target x86_64 sse42 has_avx target s390x +target riscv64 has_v function %smin_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-saddsat-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-saddsat-aarch64.clif index 5b3cacaa874d..81ed12620619 100644 --- a/cranelift/filetests/filetests/runtests/simd-saddsat-aarch64.clif +++ b/cranelift/filetests/filetests/runtests/simd-saddsat-aarch64.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target riscv64 has_v function %saddsat_i32x4(i32x4, i32x4) -> i32x4 { block0(v0: i32x4, v1: i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-saddsat.clif b/cranelift/filetests/filetests/runtests/simd-saddsat.clif index b6d15611b285..70b29aa90dc2 100644 --- a/cranelift/filetests/filetests/runtests/simd-saddsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-saddsat.clif @@ -5,6 +5,7 @@ target s390x set enable_simd target x86_64 has_sse3 has_ssse3 has_sse41 target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v function %saddsat_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-ssubsat-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-ssubsat-aarch64.clif index 7292e78f89a2..6fbd734ab385 100644 --- a/cranelift/filetests/filetests/runtests/simd-ssubsat-aarch64.clif +++ b/cranelift/filetests/filetests/runtests/simd-ssubsat-aarch64.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target riscv64 has_v function %ssubsat_i32x4(i32x4, i32x4) -> i32x4 { block0(v0: i32x4, v1: i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-ssubsat.clif b/cranelift/filetests/filetests/runtests/simd-ssubsat.clif index 4797f9b83499..6486d5b97539 100644 --- a/cranelift/filetests/filetests/runtests/simd-ssubsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-ssubsat.clif @@ -5,6 +5,7 @@ target s390x set enable_simd target x86_64 has_sse3 has_ssse3 has_sse41 target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v function %ssubsat_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-uaddsat-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-uaddsat-aarch64.clif index 2342aed7e7bb..3b78cc6acf8c 100644 --- a/cranelift/filetests/filetests/runtests/simd-uaddsat-aarch64.clif +++ b/cranelift/filetests/filetests/runtests/simd-uaddsat-aarch64.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target riscv64 has_v ; i32x4 vectors aren't supported in `uadd_sat` outside AArch64 at the moment function %uaddsat_i32x4(i32x4, i32x4) -> i32x4 { diff --git a/cranelift/filetests/filetests/runtests/simd-uaddsat.clif b/cranelift/filetests/filetests/runtests/simd-uaddsat.clif index eeac7521078c..9ac228be0c84 100644 --- a/cranelift/filetests/filetests/runtests/simd-uaddsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-uaddsat.clif @@ -5,6 +5,7 @@ target s390x set enable_simd target x86_64 has_sse3 has_ssse3 has_sse41 target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v function %uaddsat_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-usubsat-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-usubsat-aarch64.clif index 4f331a5366b2..8b5328e5ee7f 100644 --- a/cranelift/filetests/filetests/runtests/simd-usubsat-aarch64.clif +++ b/cranelift/filetests/filetests/runtests/simd-usubsat-aarch64.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target riscv64 has_v ; i32x4 vectors aren't supported in `usub_sat` outside AArch64 at the moment function %usubsat_i32x4(i32x4, i32x4) -> i32x4 { diff --git a/cranelift/filetests/filetests/runtests/simd-usubsat.clif b/cranelift/filetests/filetests/runtests/simd-usubsat.clif index ccaddb050fa2..d363d2a0b1f0 100644 --- a/cranelift/filetests/filetests/runtests/simd-usubsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-usubsat.clif @@ -5,6 +5,7 @@ target s390x set enable_simd target x86_64 has_sse3 has_ssse3 has_sse41 target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64 has_v function %usubsat_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16):