From e51be74ae5ed12bb7284000067303bfbd03a10f3 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 6 May 2023 09:02:55 +0100 Subject: [PATCH 1/8] riscv64: Rename VecAluOpRRR Arms --- .../codegen/src/isa/riscv64/inst/vector.rs | 34 ++++++++++--------- .../codegen/src/isa/riscv64/inst_vector.isle | 32 ++++++++--------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index c6029fb2fdeb..e1c9b41a102f 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -246,26 +246,28 @@ impl VecAluOpRRR { } pub fn funct3(&self) -> u32 { match self { - VecAluOpRRR::Vadd - | VecAluOpRRR::Vsub - | VecAluOpRRR::Vand - | VecAluOpRRR::Vor - | VecAluOpRRR::Vxor => VecOpCategory::OPIVV, - VecAluOpRRR::Vmul | VecAluOpRRR::Vmulh | VecAluOpRRR::Vmulhu => VecOpCategory::OPMVV, + VecAluOpRRR::VaddVV + | VecAluOpRRR::VsubVV + | VecAluOpRRR::VandVV + | VecAluOpRRR::VorVV + | VecAluOpRRR::VxorVV => VecOpCategory::OPIVV, + VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => { + VecOpCategory::OPMVV + } } .encode() } pub fn funct6(&self) -> u32 { // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc match self { - VecAluOpRRR::Vadd => 0b000000, - VecAluOpRRR::Vsub => 0b000010, - VecAluOpRRR::Vmul => 0b100101, - VecAluOpRRR::Vmulh => 0b100111, - VecAluOpRRR::Vmulhu => 0b100100, - VecAluOpRRR::Vand => 0b001001, - VecAluOpRRR::Vor => 0b001010, - VecAluOpRRR::Vxor => 0b001011, + VecAluOpRRR::VaddVV => 0b000000, + VecAluOpRRR::VsubVV => 0b000010, + VecAluOpRRR::VmulVV => 0b100101, + VecAluOpRRR::VmulhVV => 0b100111, + VecAluOpRRR::VmulhuVV => 0b100100, + VecAluOpRRR::VandVV => 0b001001, + VecAluOpRRR::VorVV => 0b001010, + VecAluOpRRR::VxorVV => 0b001011, } } } @@ -274,8 +276,8 @@ impl fmt::Display for VecAluOpRRR { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut s = format!("{self:?}"); s.make_ascii_lowercase(); - s.push_str(".vv"); - f.write_str(&s) + let (opcode, category) = s.split_at(s.len() - 2); + f.write_str(&format!("{}.{}", opcode, category)) } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 01a60f80abfa..a91eaecec844 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -83,14 +83,14 @@ ;; Register to Register ALU Ops (type VecAluOpRRR (enum - (Vadd) - (Vsub) - (Vmul) - (Vmulh) - (Vmulhu) - (Vand) - (Vor) - (Vxor) + (VaddVV) + (VsubVV) + (VmulVV) + (VmulhVV) + (VmulhuVV) + (VandVV) + (VorVV) + (VxorVV) )) ;; Register-Imm ALU Ops @@ -181,7 +181,7 @@ ;; Helper for emitting the `vadd.vv` instruction. (decl rv_vadd_vv (Reg Reg VState) Reg) (rule (rv_vadd_vv vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate)) + (vec_alu_rrr (VecAluOpRRR.VaddVV) vs2 vs1 vstate)) ;; Helper for emitting the `vadd.vi` instruction. (decl rv_vadd_vi (Reg Imm5 VState) Reg) @@ -191,34 +191,34 @@ ;; Helper for emitting the `vsub.vv` instruction. (decl rv_vsub_vv (Reg Reg VState) Reg) (rule (rv_vsub_vv vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.Vsub) vs2 vs1 vstate)) + (vec_alu_rrr (VecAluOpRRR.VsubVV) vs2 vs1 vstate)) ;; Helper for emitting the `vmul.vv` instruction. (decl rv_vmul_vv (Reg Reg VState) Reg) (rule (rv_vmul_vv vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.Vmul) vs2 vs1 vstate)) + (vec_alu_rrr (VecAluOpRRR.VmulVV) vs2 vs1 vstate)) ;; Helper for emitting the `vmulh.vv` instruction. (decl rv_vmulh_vv (Reg Reg VState) Reg) (rule (rv_vmulh_vv vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.Vmulh) vs2 vs1 vstate)) + (vec_alu_rrr (VecAluOpRRR.VmulhVV) vs2 vs1 vstate)) ;; Helper for emitting the `vmulhu.vv` instruction. (decl rv_vmulhu_vv (Reg Reg VState) Reg) (rule (rv_vmulhu_vv vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.Vmulhu) vs2 vs1 vstate)) + (vec_alu_rrr (VecAluOpRRR.VmulhuVV) vs2 vs1 vstate)) ;; Helper for emitting the `vand.vv` instruction. (decl rv_vand_vv (Reg Reg VState) Reg) (rule (rv_vand_vv vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.Vand) vs2 vs1 vstate)) + (vec_alu_rrr (VecAluOpRRR.VandVV) vs2 vs1 vstate)) ;; Helper for emitting the `vor.vv` instruction. (decl rv_vor_vv (Reg Reg VState) Reg) (rule (rv_vor_vv vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.Vor) vs2 vs1 vstate)) + (vec_alu_rrr (VecAluOpRRR.VorVV) vs2 vs1 vstate)) ;; Helper for emitting the `vxor.vv` instruction. (decl rv_vxor_vv (Reg Reg VState) Reg) (rule (rv_vxor_vv vs2 vs1 vstate) - (vec_alu_rrr (VecAluOpRRR.Vxor) vs2 vs1 vstate)) + (vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 vstate)) From 9939732aaa08eec7d6b2d088cf3878529a0a59e4 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 6 May 2023 09:05:56 +0100 Subject: [PATCH 2/8] riscv64: Rename VecAluOpRRImm5 Arms --- cranelift/codegen/src/isa/riscv64/inst/vector.rs | 6 +++--- cranelift/codegen/src/isa/riscv64/inst_vector.isle | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index e1c9b41a102f..34960f614ecf 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -292,7 +292,7 @@ impl VecAluOpRRImm5 { pub fn funct6(&self) -> u32 { // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc match self { - VecAluOpRRImm5::Vadd => 0b000000, + VecAluOpRRImm5::VaddVI => 0b000000, } } } @@ -301,8 +301,8 @@ impl fmt::Display for VecAluOpRRImm5 { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut s = format!("{self:?}"); s.make_ascii_lowercase(); - s.push_str(".vi"); - f.write_str(&s) + let (opcode, category) = s.split_at(s.len() - 2); + f.write_str(&format!("{}.{}", opcode, category)) } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index a91eaecec844..8b26dacc70dc 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -95,7 +95,7 @@ ;; Register-Imm ALU Ops (type VecAluOpRRImm5 (enum - (Vadd) + (VaddVI) )) @@ -186,7 +186,7 @@ ;; Helper for emitting the `vadd.vi` instruction. (decl rv_vadd_vi (Reg Imm5 VState) Reg) (rule (rv_vadd_vi vs2 imm vstate) - (vec_alu_rr_imm5 (VecAluOpRRImm5.Vadd) vs2 imm vstate)) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VaddVI) vs2 imm vstate)) ;; Helper for emitting the `vsub.vv` instruction. (decl rv_vsub_vv (Reg Reg VState) Reg) From 9e90014c07af76ad9bfd0025d2be3049e21fb557 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 6 May 2023 09:51:42 +0100 Subject: [PATCH 3/8] riscv64: Add `vsub.vx` --- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 22 ++++- .../codegen/src/isa/riscv64/inst/vector.rs | 28 ++++--- .../codegen/src/isa/riscv64/inst_vector.isle | 9 +++ cranelift/codegen/src/isa/riscv64/lower.isle | 3 + .../filetests/isa/riscv64/simd-isub.clif | 40 +++++++++ .../filetests/runtests/simd-isub-splat.clif | 81 +++++++++++++++++++ 6 files changed, 168 insertions(+), 15 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-isub-splat.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 0e662d8054bc..8124b3adb938 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -4,7 +4,7 @@ #![allow(dead_code)] #![allow(non_camel_case_types)] -use super::lower::isle::generated_code::{VecAMode, VecElementWidth}; +use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpCategory}; use crate::binemit::{Addend, CodeOffset, Reloc}; pub use crate::ir::condcodes::IntCC; use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64}; @@ -17,7 +17,7 @@ use crate::{settings, CodegenError, CodegenResult}; pub use crate::ir::condcodes::FloatCC; use alloc::vec::Vec; -use regalloc2::{PRegSet, VReg}; +use regalloc2::{PRegSet, RegClass, VReg}; use smallvec::{smallvec, SmallVec}; use std::boxed::Box; use std::string::{String, ToString}; @@ -624,7 +624,23 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan // gen_prologue is called at emit stage. // no need let reg alloc know. } - &Inst::VecAluRRR { vd, vs1, vs2, .. } => { + &Inst::VecAluRRR { + op, vd, vs1, vs2, .. + } => { + debug_assert_eq!(vs2.class(), RegClass::Vector); + match op.category() { + VecOpCategory::OPIVV | VecOpCategory::OPFVV | VecOpCategory::OPMVV => { + debug_assert_eq!(vs1.class(), RegClass::Vector); + } + VecOpCategory::OPIVX | VecOpCategory::OPMVX => { + debug_assert_eq!(vs1.class(), RegClass::Int); + } + VecOpCategory::OPFVF => { + debug_assert_eq!(vs1.class(), RegClass::Float); + } + _ => unreachable!(), + } + collector.reg_use(vs1); collector.reg_use(vs2); collector.reg_def(vd); diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 34960f614ecf..72e21aa9b206 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -245,23 +245,13 @@ impl VecAluOpRRR { 0x57 } pub fn funct3(&self) -> u32 { - match self { - VecAluOpRRR::VaddVV - | VecAluOpRRR::VsubVV - | VecAluOpRRR::VandVV - | VecAluOpRRR::VorVV - | VecAluOpRRR::VxorVV => VecOpCategory::OPIVV, - VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => { - VecOpCategory::OPMVV - } - } - .encode() + self.category().encode() } pub fn funct6(&self) -> u32 { // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc match self { VecAluOpRRR::VaddVV => 0b000000, - VecAluOpRRR::VsubVV => 0b000010, + VecAluOpRRR::VsubVV | VecAluOpRRR::VsubVX => 0b000010, VecAluOpRRR::VmulVV => 0b100101, VecAluOpRRR::VmulhVV => 0b100111, VecAluOpRRR::VmulhuVV => 0b100100, @@ -270,6 +260,20 @@ impl VecAluOpRRR { VecAluOpRRR::VxorVV => 0b001011, } } + + pub fn category(&self) -> VecOpCategory { + match self { + VecAluOpRRR::VaddVV + | VecAluOpRRR::VsubVV + | VecAluOpRRR::VandVV + | VecAluOpRRR::VorVV + | VecAluOpRRR::VxorVV => VecOpCategory::OPIVV, + VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => { + VecOpCategory::OPMVV + } + VecAluOpRRR::VsubVX => VecOpCategory::OPIVX, + } + } } impl fmt::Display for VecAluOpRRR { diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 8b26dacc70dc..baac1872a363 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -83,6 +83,7 @@ ;; Register to Register ALU Ops (type VecAluOpRRR (enum + ;; Vector-Vector Opcodes (VaddVV) (VsubVV) (VmulVV) @@ -91,6 +92,9 @@ (VandVV) (VorVV) (VxorVV) + + ;; Vector-Scalar Opcodes + (VsubVX) )) ;; Register-Imm ALU Ops @@ -193,6 +197,11 @@ (rule (rv_vsub_vv vs2 vs1 vstate) (vec_alu_rrr (VecAluOpRRR.VsubVV) vs2 vs1 vstate)) +;; Helper for emitting the `vsub.vx` instruction. +(decl rv_vsub_vx (Reg Reg VState) Reg) +(rule (rv_vsub_vx vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VsubVX) vs2 vs1 vstate)) + ;; Helper for emitting the `vmul.vv` instruction. (decl rv_vmul_vv (Reg Reg VState) Reg) (rule (rv_vmul_vv vs2 vs1 vstate) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 207ea631d6b4..0f14752acfe0 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -140,6 +140,9 @@ (rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y))) (rv_vsub_vv x y ty)) +(rule 4 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat y)))) + (rv_vsub_vx x y ty)) + ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif b/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif index 550cddb7dd09..76720c12e325 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif @@ -169,3 +169,43 @@ block0(v0: i64x2, v1: i64x2): ; addi sp, sp, 0x10 ; ret +function %isub_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = isub v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vsub.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x0a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-isub-splat.clif b/cranelift/filetests/filetests/runtests/simd-isub-splat.clif new file mode 100644 index 000000000000..df0433af1400 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-isub-splat.clif @@ -0,0 +1,81 @@ +test interpret +test run +target aarch64 +target s390x +target x86_64 has_sse41=false +set enable_simd +target x86_64 +target x86_64 skylake +target riscv64 has_v + + +function %isub_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = isub v0, v2 + return v3 +} +; run: %isub_splat_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], 22) == [-21 -20 -19 -18 -17 -16 -15 -14 -13 -12 -11 -10 -9 -8 -7 -6] + +function %isub_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = isub v0, v2 + return v3 +} +; run: %isub_splat_i16x8([1 2 3 4 5 6 7 8], 22) == [-21 -20 -19 -18 -17 -16 -15 -14] + +function %isub_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = isub v0, v2 + return v3 +} +; run: %isub_splat_i32x4([1 2 3 4], 22) == [-21 -20 -19 -18] + +function %isub_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = isub v0, v2 + return v3 +} +; run: %isub_splat_i64x2([1 2], 22) == [-21 -20] + + + +function %isub_splat_const_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = isub v0, v2 + return v3 +} +; run: %isub_splat_const_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [-4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11] + +function %isub_splat_const_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = isub v0, v2 + return v3 +} +; run: %isub_splat_const_i16x8([1 2 3 4 5 6 7 8]) == [17 18 19 20 21 22 23 24] + +function %isub_splat_const_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = isub v0, v2 + return v3 +} +; run: %isub_splat_const_i32x4([1 2 3 4]) == [-14 -13 -12 -11] + +function %isub_splat_const_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = isub v0, v2 + return v3 +} +; run: %isub_splat_const_i64x2([1 2]) == [6 7] + From 1c9d067c7036b6aef5500dc62d028448871cf1f1 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 6 May 2023 09:59:41 +0100 Subject: [PATCH 4/8] riscv64: Add `vrsub.vx` --- .../codegen/src/isa/riscv64/inst/vector.rs | 3 +- .../codegen/src/isa/riscv64/inst_vector.isle | 6 +++ cranelift/codegen/src/isa/riscv64/lower.isle | 4 ++ .../filetests/isa/riscv64/simd-isub.clif | 40 +++++++++++++++++++ .../filetests/runtests/simd-isub-splat.clif | 34 ++++++++++++++++ 5 files changed, 86 insertions(+), 1 deletion(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 72e21aa9b206..9656dc1d00f1 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -252,6 +252,7 @@ impl VecAluOpRRR { match self { VecAluOpRRR::VaddVV => 0b000000, VecAluOpRRR::VsubVV | VecAluOpRRR::VsubVX => 0b000010, + VecAluOpRRR::VrsubVX => 0b000011, VecAluOpRRR::VmulVV => 0b100101, VecAluOpRRR::VmulhVV => 0b100111, VecAluOpRRR::VmulhuVV => 0b100100, @@ -271,7 +272,7 @@ impl VecAluOpRRR { VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => { VecOpCategory::OPMVV } - VecAluOpRRR::VsubVX => VecOpCategory::OPIVX, + VecAluOpRRR::VsubVX | VecAluOpRRR::VrsubVX => VecOpCategory::OPIVX, } } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index baac1872a363..75ac8312654b 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -95,6 +95,7 @@ ;; Vector-Scalar Opcodes (VsubVX) + (VrsubVX) )) ;; Register-Imm ALU Ops @@ -202,6 +203,11 @@ (rule (rv_vsub_vx vs2 vs1 vstate) (vec_alu_rrr (VecAluOpRRR.VsubVX) vs2 vs1 vstate)) +;; Helper for emitting the `vrsub.vx` instruction. +(decl rv_vrsub_vx (Reg Reg VState) Reg) +(rule (rv_vrsub_vx vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 vs1 vstate)) + ;; Helper for emitting the `vmul.vv` instruction. (decl rv_vmul_vv (Reg Reg VState) Reg) (rule (rv_vmul_vv vs2 vs1 vstate) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 0f14752acfe0..ee4c415a7b43 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -143,6 +143,10 @@ (rule 4 (lower (has_type (ty_vec_fits_in_register ty) (isub x (splat y)))) (rv_vsub_vx x y ty)) +(rule 5 (lower (has_type (ty_vec_fits_in_register ty) (isub (splat x) y))) + (rv_vrsub_vx y x ty)) + + ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif b/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif index 76720c12e325..9c7f08e2fa7e 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-isub.clif @@ -209,3 +209,43 @@ block0(v0: i64x2, v1: i64): ; addi sp, sp, 0x10 ; ret +function %isub_splat_reverse_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = isub v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vrsub.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x0e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-isub-splat.clif b/cranelift/filetests/filetests/runtests/simd-isub-splat.clif index df0433af1400..a24ee3a777cc 100644 --- a/cranelift/filetests/filetests/runtests/simd-isub-splat.clif +++ b/cranelift/filetests/filetests/runtests/simd-isub-splat.clif @@ -9,6 +9,40 @@ target x86_64 skylake target riscv64 has_v +function %isub_splat_reverse_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = isub v2, v0 + return v3 +} +; run: %isub_splat_reverse_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], 22) == [21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6] + +function %isub_splat_reverse_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = isub v2, v0 + return v3 +} +; run: %isub_splat_reverse_i16x8([1 2 3 4 5 6 7 8], 22) == [21 20 19 18 17 16 15 14] + +function %isub_splat_reverse_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = isub v2, v0 + return v3 +} +; run: %isub_splat_reverse_i32x4([1 2 3 4], 22) == [21 20 19 18] + +function %isub_splat_reverse_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = isub v2, v0 + return v3 +} +; run: %isub_splat_reverse_i64x2([1 2], 22) == [21 20] + + + function %isub_splat_i8x16(i8x16, i8) -> i8x16 { block0(v0: i8x16, v1: i8): v2 = splat.i8x16 v1 From 5d2b42d41753507b3e2528e76b0aaf853bad804a Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 6 May 2023 10:05:26 +0100 Subject: [PATCH 5/8] riscv64: Add `vrsub.vi` --- .../codegen/src/isa/riscv64/inst/vector.rs | 1 + .../codegen/src/isa/riscv64/inst_vector.isle | 6 +++ cranelift/codegen/src/isa/riscv64/lower.isle | 3 ++ .../filetests/runtests/simd-isub-splat.clif | 38 +++++++++++++++++++ 4 files changed, 48 insertions(+) diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index 9656dc1d00f1..f75f7e3ad3f5 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -298,6 +298,7 @@ impl VecAluOpRRImm5 { // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc match self { VecAluOpRRImm5::VaddVI => 0b000000, + VecAluOpRRImm5::VrsubVI => 0b000011, } } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 75ac8312654b..0f65edc91853 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -101,6 +101,7 @@ ;; Register-Imm ALU Ops (type VecAluOpRRImm5 (enum (VaddVI) + (VrsubVI) )) @@ -208,6 +209,11 @@ (rule (rv_vrsub_vx vs2 vs1 vstate) (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 vs1 vstate)) +;; Helper for emitting the `vrsub.vi` instruction. +(decl rv_vrsub_vi (Reg Imm5 VState) Reg) +(rule (rv_vrsub_vi vs2 imm vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VrsubVI) vs2 imm vstate)) + ;; Helper for emitting the `vmul.vv` instruction. (decl rv_vmul_vv (Reg Reg VState) Reg) (rule (rv_vmul_vv vs2 vs1 vstate) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index ee4c415a7b43..60c84969a87a 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -146,6 +146,9 @@ (rule 5 (lower (has_type (ty_vec_fits_in_register ty) (isub (splat x) y))) (rv_vrsub_vx y x ty)) +(rule 6 (lower (has_type (ty_vec_fits_in_register ty) (isub (replicated_imm5 x) y))) + (rv_vrsub_vi y x ty)) + ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/runtests/simd-isub-splat.clif b/cranelift/filetests/filetests/runtests/simd-isub-splat.clif index a24ee3a777cc..975c2fadb006 100644 --- a/cranelift/filetests/filetests/runtests/simd-isub-splat.clif +++ b/cranelift/filetests/filetests/runtests/simd-isub-splat.clif @@ -113,3 +113,41 @@ block0(v0: i64x2): } ; run: %isub_splat_const_i64x2([1 2]) == [6 7] + + +function %isub_splat_const_reverse_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iconst.i8 5 + v2 = splat.i8x16 v1 + v3 = isub v2, v0 + return v3 +} +; run: %isub_splat_const_reverse_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [4 3 2 1 0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11] + +function %isub_splat_const_reverse_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iconst.i16 -16 + v2 = splat.i16x8 v1 + v3 = isub v2, v0 + return v3 +} +; run: %isub_splat_const_reverse_i16x8([1 2 3 4 5 6 7 8]) == [-17 -18 -19 -20 -21 -22 -23 -24] + +function %isub_splat_const_reverse_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iconst.i32 15 + v2 = splat.i32x4 v1 + v3 = isub v2, v0 + return v3 +} +; run: %isub_splat_const_reverse_i32x4([1 2 3 4]) == [14 13 12 11] + +function %isub_splat_const_reverse_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iconst.i64 -5 + v2 = splat.i64x2 v1 + v3 = isub v2, v0 + return v3 +} +; run: %isub_splat_const_reverse_i64x2([1 2]) == [-6 -7] + From 84621b1e0b544bfecc359c136185662a045c4335 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 6 May 2023 10:24:11 +0100 Subject: [PATCH 6/8] riscv64: Add `vneg.v` --- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 9 +- .../codegen/src/isa/riscv64/inst_vector.isle | 5 + cranelift/codegen/src/isa/riscv64/lower.isle | 7 +- .../filetests/isa/riscv64/simd-ineg.clif | 159 ++++++++++++++++++ .../filetests/runtests/simd-ineg.clif | 23 +++ 5 files changed, 199 insertions(+), 4 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/simd-ineg.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 8124b3adb938..2f65d01e4d83 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -53,11 +53,11 @@ pub(crate) type VecWritableReg = Vec>; //============================================================================= // Instructions (top level): definition -use crate::isa::riscv64::lower::isle::generated_code::MInst; pub use crate::isa::riscv64::lower::isle::generated_code::{ AluOPRRI, AluOPRRR, AtomicOP, FClassResult, FFlagsException, FloatRoundOP, FloatSelectOP, FpuOPRR, FpuOPRRR, FpuOPRRRR, IntSelectOP, LoadOP, MInst as Inst, StoreOP, FRM, }; +use crate::isa::riscv64::lower::isle::generated_code::{MInst, VecAluOpRRR}; type BoxCallInfo = Box; type BoxCallIndInfo = Box; @@ -1575,7 +1575,12 @@ impl Inst { // Note: vs2 and vs1 here are opposite to the standard scalar ordering. // This is noted in Section 10.1 of the RISC-V Vector spec. - format!("{} {},{},{} {}", op, vd_s, vs2_s, vs1_s, vstate) + match (op, vs1) { + (VecAluOpRRR::VrsubVX, vs1) if vs1 == zero_reg() => { + format!("vneg.v {},{} {}", vd_s, vs2_s, vstate) + } + _ => format!("{} {},{},{} {}", op, vd_s, vs2_s, vs1_s, vstate), + } } &Inst::VecAluRRImm5 { op, diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index 0f65edc91853..a84a6c8af0a6 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -209,6 +209,11 @@ (rule (rv_vrsub_vx vs2 vs1 vstate) (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 vs1 vstate)) +;; Helper for emitting the `vneg.v` pseudo-instruction. +(decl rv_vneg_v (Reg VState) Reg) +(rule (rv_vneg_v vs2 vstate) + (vec_alu_rrr (VecAluOpRRR.VrsubVX) vs2 (zero_reg) vstate)) + ;; Helper for emitting the `vrsub.vi` instruction. (decl rv_vrsub_vi (Reg Imm5 VState) Reg) (rule (rv_vrsub_vi vs2 imm vstate) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 60c84969a87a..59d36e58ffb3 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -152,10 +152,13 @@ ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; `i64` and smaller. -(rule (lower (has_type ty (ineg val))) +(rule (lower (has_type (ty_int ty) (ineg val))) (neg ty val)) +(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (ineg x))) + (rv_vneg_v x ty)) + + ;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y))) diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-ineg.clif b/cranelift/filetests/filetests/isa/riscv64/simd-ineg.clif new file mode 100644 index 000000000000..36aba8eb32a8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/simd-ineg.clif @@ -0,0 +1,159 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_v + + +function %ineg_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = ineg v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vneg.v v4,v1 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x42, 0x10, 0x0e +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ineg_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = ineg v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vneg.v v4,v1 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0x57, 0x42, 0x10, 0x0e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ineg_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = ineg v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vneg.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x42, 0x10, 0x0e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %ineg_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = ineg v0 + return v1 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vneg.v v4,v1 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x42, 0x10, 0x0e +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x02, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-ineg.clif b/cranelift/filetests/filetests/runtests/simd-ineg.clif index 4cc78bdf795b..ff26ea5c6521 100644 --- a/cranelift/filetests/filetests/runtests/simd-ineg.clif +++ b/cranelift/filetests/filetests/runtests/simd-ineg.clif @@ -4,6 +4,21 @@ target s390x set enable_simd target x86_64 target x86_64 skylake +target riscv64 has_v + +function %ineg_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = ineg v0 + return v1 +} +; run: %ineg_i8x16([-1 10 2 4 5 6 7 8 9 10 -11 -12 -13 -14 -15 -16]) == [1 -10 -2 -4 -5 -6 -7 -8 -9 -10 11 12 13 14 15 16] + +function %ineg_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = ineg v0 + return v1 +} +; run: %ineg_i16x8([1 2 -3 -4 5 6 -7 -8]) == [-1 -2 3 4 -5 -6 7 8] function %ineg_i32x4(i32x4) -> i32x4 { block0(v0: i32x4): @@ -11,3 +26,11 @@ block0(v0: i32x4): return v1 } ; run: %ineg_i32x4([1 1 1 1]) == [-1 -1 -1 -1] +; run: %ineg_i32x4([1 -9 1 -10]) == [-1 9 -1 10] + +function %ineg_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = ineg v0 + return v1 +} +; run: %ineg_i64x2([99 -10]) == [-99 10] From 6a5e40e536093d0931533af9b2bab6f0e8a427cb Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 6 May 2023 17:42:29 +0100 Subject: [PATCH 7/8] riscv64: Add `vadd.vx` --- .../codegen/src/isa/riscv64/inst/vector.rs | 6 +- .../codegen/src/isa/riscv64/inst_vector.isle | 6 + cranelift/codegen/src/isa/riscv64/lower.isle | 10 +- .../filetests/isa/riscv64/simd-iadd.clif | 158 ++++++++++++++++++ .../filetests/runtests/simd-iadd-splat.clif | 49 +++++- 5 files changed, 217 insertions(+), 12 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index f75f7e3ad3f5..e94d7c435f9c 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -250,7 +250,7 @@ impl VecAluOpRRR { pub fn funct6(&self) -> u32 { // See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc match self { - VecAluOpRRR::VaddVV => 0b000000, + VecAluOpRRR::VaddVV | VecAluOpRRR::VaddVX => 0b000000, VecAluOpRRR::VsubVV | VecAluOpRRR::VsubVX => 0b000010, VecAluOpRRR::VrsubVX => 0b000011, VecAluOpRRR::VmulVV => 0b100101, @@ -272,7 +272,9 @@ impl VecAluOpRRR { VecAluOpRRR::VmulVV | VecAluOpRRR::VmulhVV | VecAluOpRRR::VmulhuVV => { VecOpCategory::OPMVV } - VecAluOpRRR::VsubVX | VecAluOpRRR::VrsubVX => VecOpCategory::OPIVX, + VecAluOpRRR::VaddVX | VecAluOpRRR::VsubVX | VecAluOpRRR::VrsubVX => { + VecOpCategory::OPIVX + } } } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index a84a6c8af0a6..ee02f7b7c503 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -94,6 +94,7 @@ (VxorVV) ;; Vector-Scalar Opcodes + (VaddVX) (VsubVX) (VrsubVX) )) @@ -189,6 +190,11 @@ (rule (rv_vadd_vv vs2 vs1 vstate) (vec_alu_rrr (VecAluOpRRR.VaddVV) vs2 vs1 vstate)) +;; Helper for emitting the `vadd.vx` instruction. +(decl rv_vadd_vx (Reg Reg VState) Reg) +(rule (rv_vadd_vx vs2 vs1 vstate) + (vec_alu_rrr (VecAluOpRRR.VaddVX) vs2 vs1 vstate)) + ;; Helper for emitting the `vadd.vi` instruction. (decl rv_vadd_vi (Reg Imm5 VState) Reg) (rule (rv_vadd_vi vs2 imm vstate) diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 59d36e58ffb3..cf30e6e89b0f 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -110,10 +110,16 @@ (rule 8 (lower (has_type (ty_vec_fits_in_register ty) (iadd x y))) (rv_vadd_vv x y ty)) -(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (replicated_imm5 y)))) +(rule 9 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (splat y)))) + (rv_vadd_vx x y ty)) + +(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd (splat x) y))) + (rv_vadd_vx y x ty)) + +(rule 11 (lower (has_type (ty_vec_fits_in_register ty) (iadd x (replicated_imm5 y)))) (rv_vadd_vi x y ty)) -(rule 10 (lower (has_type (ty_vec_fits_in_register ty) (iadd (replicated_imm5 x) y))) +(rule 12 (lower (has_type (ty_vec_fits_in_register ty) (iadd (replicated_imm5 x) y))) (rv_vadd_vi y x ty)) ;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif b/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif index f37d39bc19bf..dc5790303ced 100644 --- a/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif +++ b/cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif @@ -331,3 +331,161 @@ block0(v0: i64x2): ; addi sp, sp, 0x10 ; ret +function %iadd_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = iadd v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vadd.vx v5,v1,a0 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0xd7, 0x42, 0x15, 0x02 +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %iadd_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = iadd v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vadd.vx v5,v1,a0 #avl=8, #vtype=(e16, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x84, 0xcc +; .byte 0xd7, 0x42, 0x15, 0x02 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %iadd_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = iadd v0, v2 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vadd.vx v5,v1,a0 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x02 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %iadd_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = iadd v2, v0 + return v3 +} + +; VCode: +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma) +; vadd.vx v5,v1,a0 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v5,0(a1) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; ori s0, sp, 0 +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, s0, 0x10 +; .byte 0x87, 0x80, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0xd7, 0x42, 0x15, 0x02 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0xa7, 0x82, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif b/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif index 2fa55bc142aa..bebad9eb6383 100644 --- a/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif +++ b/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif @@ -8,38 +8,71 @@ target x86_64 target x86_64 skylake target riscv64 has_v -function %iadd_splat_i8x16(i8x16) -> i8x16 { +function %iadd_splat_const_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): v1 = iconst.i8 5 v2 = splat.i8x16 v1 v3 = iadd v0, v2 return v3 } -; run: %iadd_splat_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21] +; run: %iadd_splat_const_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21] -function %iadd_splat_i16x8(i16x8) -> i16x8 { +function %iadd_splat_const_i16x8(i16x8) -> i16x8 { block0(v0: i16x8): v1 = iconst.i16 -16 v2 = splat.i16x8 v1 v3 = iadd v0, v2 return v3 } -; run: %iadd_splat_i16x8([1 2 3 4 5 6 7 8]) == [-15 -14 -13 -12 -11 -10 -9 -8] +; run: %iadd_splat_const_i16x8([1 2 3 4 5 6 7 8]) == [-15 -14 -13 -12 -11 -10 -9 -8] -function %iadd_splat_i32x4(i32x4) -> i32x4 { +function %iadd_splat_const_i32x4(i32x4) -> i32x4 { block0(v0: i32x4): v1 = iconst.i32 15 v2 = splat.i32x4 v1 v3 = iadd v0, v2 return v3 } -; run: %iadd_splat_i32x4([1 2 3 4]) == [16 17 18 19] +; run: %iadd_splat_const_i32x4([1 2 3 4]) == [16 17 18 19] -function %iadd_splat_i64x2(i64x2) -> i64x2 { +function %iadd_splat_const_i64x2(i64x2) -> i64x2 { block0(v0: i64x2): v1 = iconst.i64 -5 v2 = splat.i64x2 v1 v3 = iadd v2, v0 return v3 } -; run: %iadd_splat_i64x2([1 2]) == [-4 -3] +; run: %iadd_splat_const_i64x2([1 2]) == [-4 -3] + + +function %iadd_splat_i8x16(i8x16, i8) -> i8x16 { +block0(v0: i8x16, v1: i8): + v2 = splat.i8x16 v1 + v3 = iadd v0, v2 + return v3 +} +; run: %iadd_splat_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], -15) == [-14 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1] + +function %iadd_splat_i16x8(i16x8, i16) -> i16x8 { +block0(v0: i16x8, v1: i16): + v2 = splat.i16x8 v1 + v3 = iadd v0, v2 + return v3 +} +; run: %iadd_splat_i16x8([1 2 3 4 5 6 7 8], -10) == [-9 -8 -7 -6 -5 -4 -3 -2] + +function %iadd_splat_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = splat.i32x4 v1 + v3 = iadd v0, v2 + return v3 +} +; run: %iadd_splat_i32x4([1 2 3 4], 22) == [23 24 25 26] + +function %iadd_splat_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = splat.i64x2 v1 + v3 = iadd v2, v0 + return v3 +} +; run: %iadd_splat_i64x2([1 2], 10) == [11 12] From d905232a5274388beb3d664fe75d478a674cdd83 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 10 May 2023 12:08:06 +0100 Subject: [PATCH 8/8] riscv64: Refactor Inst RegClass asserts --- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 19 ++++++------------- .../codegen/src/isa/riscv64/inst/vector.rs | 11 +++++++++++ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 2f65d01e4d83..d654e77765a6 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -4,7 +4,7 @@ #![allow(dead_code)] #![allow(non_camel_case_types)] -use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpCategory}; +use super::lower::isle::generated_code::{VecAMode, VecElementWidth}; use crate::binemit::{Addend, CodeOffset, Reloc}; pub use crate::ir::condcodes::IntCC; use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64}; @@ -627,25 +627,18 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan &Inst::VecAluRRR { op, vd, vs1, vs2, .. } => { + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); debug_assert_eq!(vs2.class(), RegClass::Vector); - match op.category() { - VecOpCategory::OPIVV | VecOpCategory::OPFVV | VecOpCategory::OPMVV => { - debug_assert_eq!(vs1.class(), RegClass::Vector); - } - VecOpCategory::OPIVX | VecOpCategory::OPMVX => { - debug_assert_eq!(vs1.class(), RegClass::Int); - } - VecOpCategory::OPFVF => { - debug_assert_eq!(vs1.class(), RegClass::Float); - } - _ => unreachable!(), - } + debug_assert_eq!(vs1.class(), op.vs1_regclass()); collector.reg_use(vs1); collector.reg_use(vs2); collector.reg_def(vd); } &Inst::VecAluRRImm5 { vd, vs2, .. } => { + debug_assert_eq!(vd.to_reg().class(), RegClass::Vector); + debug_assert_eq!(vs2.class(), RegClass::Vector); + collector.reg_use(vs2); collector.reg_def(vd); } diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index e94d7c435f9c..603da8690ea4 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -4,6 +4,7 @@ use crate::isa::riscv64::lower::isle::generated_code::{ VecAMode, VecAluOpRRImm5, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, VecOpCategory, VecOpMasking, VecTailMode, }; +use crate::machinst::RegClass; use crate::Reg; use core::fmt; @@ -277,6 +278,16 @@ impl VecAluOpRRR { } } } + + // vs1 is the only variable source, vs2 is fixed. + pub fn vs1_regclass(&self) -> RegClass { + match self.category() { + VecOpCategory::OPIVV | VecOpCategory::OPFVV | VecOpCategory::OPMVV => RegClass::Vector, + VecOpCategory::OPIVX | VecOpCategory::OPMVX => RegClass::Int, + VecOpCategory::OPFVF => RegClass::Float, + _ => unreachable!(), + } + } } impl fmt::Display for VecAluOpRRR {