riscv64: Use Vector RegClass for Vectors (#6366)

* riscv64: Use Vector Regclass * riscv64: Add assert to `Inst::Mov` It isn't ready yet * riscv64: Add SIMD vconst large test This was meant to exercise the changes in #6324 but was failing in RISC-V due to some missing regalloc bits. * riscv64: Restrict spill slot size * riscv64: Mark v0 as preferred * riscv64: Const compute clobbers
bytecodealliance · May 16, 2023 · b13bbc8 · b13bbc8
1 parent 5471fde
commit b13bbc8
Show file tree

Hide file tree

Showing 28 changed files with 1,860 additions and 460 deletions.
diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -1069,7 +1069,11 @@ impl ABIMachineSpec for AArch64MachineDeps {
         insts
     }
 
-    fn get_number_of_spillslots_for_value(rc: RegClass, vector_size: u32) -> u32 {
+    fn get_number_of_spillslots_for_value(
+        rc: RegClass,
+        vector_size: u32,
+        _isa_flags: &Self::F,
+    ) -> u32 {
         assert_eq!(vector_size % 8, 0);
         // We allocate in terms of 8-byte slots.
         match rc {

diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs
@@ -43,6 +43,40 @@ pub struct Riscv64MachineDeps;
 
 impl IsaFlags for RiscvFlags {}
 
+impl RiscvFlags {
+    pub(crate) fn min_vec_reg_size(&self) -> u64 {
+        let entries = [
+            (self.has_zvl65536b(), 65536),
+            (self.has_zvl32768b(), 32768),
+            (self.has_zvl16384b(), 16384),
+            (self.has_zvl8192b(), 8192),
+            (self.has_zvl4096b(), 4096),
+            (self.has_zvl2048b(), 2048),
+            (self.has_zvl1024b(), 1024),
+            (self.has_zvl512b(), 512),
+            (self.has_zvl256b(), 256),
+            // In order to claim the Application Profile V extension, a minimum
+            // register size of 128 is required. i.e. V implies Zvl128b.
+            (self.has_v(), 128),
+            (self.has_zvl128b(), 128),
+            (self.has_zvl64b(), 64),
+            (self.has_zvl32b(), 32),
+        ];
+
+        for (has_flag, size) in entries.into_iter() {
+            if !has_flag {
+                continue;
+            }
+
+            // Due to a limitation in regalloc2, we can't support types
+            // larger than 1024 bytes. So limit that here.
+            return std::cmp::min(size, 1024);
+        }
+
+        return 0;
+    }
+}
+
 impl ABIMachineSpec for Riscv64MachineDeps {
     type I = Inst;
     type F = RiscvFlags;
@@ -415,9 +449,9 @@ impl ABIMachineSpec for Riscv64MachineDeps {
             for reg in clobbered_callee_saves {
                 let r_reg = reg.to_reg();
                 let ty = match r_reg.class() {
-                    regalloc2::RegClass::Int => I64,
-                    regalloc2::RegClass::Float => F64,
-                    RegClass::Vector => unreachable!(),
+                    RegClass::Int => I64,
+                    RegClass::Float => F64,
+                    RegClass::Vector => unimplemented!("Vector Clobber Saves"),
                 };
                 if flags.unwind_info() {
                     insts.push(Inst::Unwind {
@@ -462,9 +496,9 @@ impl ABIMachineSpec for Riscv64MachineDeps {
         for reg in &clobbered_callee_saves {
             let rreg = reg.to_reg();
             let ty = match rreg.class() {
-                regalloc2::RegClass::Int => I64,
-                regalloc2::RegClass::Float => F64,
-                RegClass::Vector => unreachable!(),
+                RegClass::Int => I64,
+                RegClass::Float => F64,
+                RegClass::Vector => unimplemented!("Vector Clobber Restores"),
             };
             insts.push(Self::gen_load_stack(
                 StackAMode::SPOffset(-cur_offset, ty),
@@ -572,12 +606,16 @@ impl ABIMachineSpec for Riscv64MachineDeps {
         insts
     }
 
-    fn get_number_of_spillslots_for_value(rc: RegClass, _target_vector_bytes: u32) -> u32 {
+    fn get_number_of_spillslots_for_value(
+        rc: RegClass,
+        _target_vector_bytes: u32,
+        isa_flags: &RiscvFlags,
+    ) -> u32 {
         // We allocate in terms of 8-byte slots.
         match rc {
             RegClass::Int => 1,
             RegClass::Float => 1,
-            RegClass::Vector => unreachable!(),
+            RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32,
         }
     }
 
@@ -592,20 +630,7 @@ impl ABIMachineSpec for Riscv64MachineDeps {
     }
 
     fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet {
-        let mut v = PRegSet::empty();
-        for (k, need_save) in CALLER_SAVE_X_REG.iter().enumerate() {
-            if !*need_save {
-                continue;
-            }
-            v.add(px_reg(k));
-        }
-        for (k, need_save) in CALLER_SAVE_F_REG.iter().enumerate() {
-            if !*need_save {
-                continue;
-            }
-            v.add(pf_reg(k));
-        }
-        v
+        CLOBBERS
     }
 
     fn get_clobbered_callee_saves(
@@ -652,24 +677,12 @@ impl ABIMachineSpec for Riscv64MachineDeps {
     }
 }
 
-const CALLER_SAVE_X_REG: [bool; 32] = [
-    false, true, false, false, false, true, true, true, // 0-7
-    false, false, true, true, true, true, true, true, // 8-15
-    true, true, false, false, false, false, false, false, // 16-23
-    false, false, false, false, true, true, true, true, // 24-31
-];
 const CALLEE_SAVE_X_REG: [bool; 32] = [
     false, false, true, false, false, false, false, false, // 0-7
     true, true, false, false, false, false, false, false, // 8-15
     false, false, true, true, true, true, true, true, // 16-23
     true, true, true, true, false, false, false, false, // 24-31
 ];
-const CALLER_SAVE_F_REG: [bool; 32] = [
-    true, true, true, true, true, true, true, true, // 0-7
-    false, true, true, true, true, true, true, true, // 8-15
-    true, true, false, false, false, false, false, false, // 16-23
-    false, false, false, false, true, true, true, true, // 24-31
-];
 const CALLEE_SAVE_F_REG: [bool; 32] = [
     false, false, false, false, false, false, false, false, // 0-7
     true, false, false, false, false, false, false, false, // 8-15
@@ -680,10 +693,11 @@ const CALLEE_SAVE_F_REG: [bool; 32] = [
 /// This should be the registers that must be saved by callee.
 #[inline]
 fn is_reg_saved_in_prologue(_conv: CallConv, reg: RealReg) -> bool {
-    if reg.class() == RegClass::Int {
-        CALLEE_SAVE_X_REG[reg.hw_enc() as usize]
-    } else {
-        CALLEE_SAVE_F_REG[reg.hw_enc() as usize]
+    match reg.class() {
+        RegClass::Int => CALLEE_SAVE_X_REG[reg.hw_enc() as usize],
+        RegClass::Float => CALLEE_SAVE_F_REG[reg.hw_enc() as usize],
+        // All vector registers are caller saved.
+        RegClass::Vector => false,
     }
 }
 
@@ -697,12 +711,89 @@ fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
             RegClass::Float => {
                 clobbered_size += 8;
             }
-            RegClass::Vector => unreachable!(),
+            RegClass::Vector => unimplemented!("Vector Size Clobbered"),
         }
     }
     align_to(clobbered_size, 16)
 }
 
+const fn clobbers() -> PRegSet {
+    PRegSet::empty()
+        .with(px_reg(1))
+        .with(px_reg(5))
+        .with(px_reg(6))
+        .with(px_reg(7))
+        .with(px_reg(10))
+        .with(px_reg(11))
+        .with(px_reg(12))
+        .with(px_reg(13))
+        .with(px_reg(14))
+        .with(px_reg(15))
+        .with(px_reg(16))
+        .with(px_reg(17))
+        .with(px_reg(28))
+        .with(px_reg(29))
+        .with(px_reg(30))
+        .with(px_reg(31))
+        // F Regs
+        .with(pf_reg(0))
+        .with(pf_reg(1))
+        .with(pf_reg(2))
+        .with(pf_reg(3))
+        .with(pf_reg(4))
+        .with(pf_reg(5))
+        .with(pf_reg(6))
+        .with(pf_reg(7))
+        .with(pf_reg(9))
+        .with(pf_reg(10))
+        .with(pf_reg(11))
+        .with(pf_reg(12))
+        .with(pf_reg(13))
+        .with(pf_reg(14))
+        .with(pf_reg(15))
+        .with(pf_reg(16))
+        .with(pf_reg(17))
+        .with(pf_reg(28))
+        .with(pf_reg(29))
+        .with(pf_reg(30))
+        .with(pf_reg(31))
+        // V Regs - All vector regs get clobbered
+        .with(pv_reg(0))
+        .with(pv_reg(1))
+        .with(pv_reg(2))
+        .with(pv_reg(3))
+        .with(pv_reg(4))
+        .with(pv_reg(5))
+        .with(pv_reg(6))
+        .with(pv_reg(7))
+        .with(pv_reg(8))
+        .with(pv_reg(9))
+        .with(pv_reg(10))
+        .with(pv_reg(11))
+        .with(pv_reg(12))
+        .with(pv_reg(13))
+        .with(pv_reg(14))
+        .with(pv_reg(15))
+        .with(pv_reg(16))
+        .with(pv_reg(17))
+        .with(pv_reg(18))
+        .with(pv_reg(19))
+        .with(pv_reg(20))
+        .with(pv_reg(21))
+        .with(pv_reg(22))
+        .with(pv_reg(23))
+        .with(pv_reg(24))
+        .with(pv_reg(25))
+        .with(pv_reg(26))
+        .with(pv_reg(27))
+        .with(pv_reg(28))
+        .with(pv_reg(29))
+        .with(pv_reg(30))
+        .with(pv_reg(31))
+}
+
+const CLOBBERS: PRegSet = clobbers();
+
 impl Riscv64MachineDeps {
     fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
         insts.reserve(probe_count as usize);

diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
@@ -927,6 +927,8 @@ impl MachInstEmit for Inst {
             }
 
             &Inst::Mov { rd, rm, ty } => {
+                debug_assert_ne!(rd.to_reg().class(), RegClass::Vector);
+                debug_assert_ne!(rm.class(), RegClass::Vector);
                 if rd.to_reg() != rm {
                     let rm = allocs.next(rm);
                     let rd = allocs.next_writable(rd);

diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -7,7 +7,7 @@
 use super::lower::isle::generated_code::{VecAMode, VecElementWidth};
 use crate::binemit::{Addend, CodeOffset, Reloc};
 pub use crate::ir::condcodes::IntCC;
-use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, R32, R64};
+use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64};
 
 pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
 use crate::isa::{CallConv, FunctionAlignment};
@@ -667,7 +667,7 @@ impl MachInst for Inst {
         match rc {
             regalloc2::RegClass::Int => I64,
             regalloc2::RegClass::Float => F64,
-            regalloc2::RegClass::Vector => unreachable!(),
+            regalloc2::RegClass::Vector => I8X16,
         }
     }
 
@@ -770,7 +770,7 @@ impl MachInst for Inst {
                 let idx = (ty.bytes().ilog2() - 1) as usize;
                 let ty = &SIMD_TYPES[idx][..];
 
-                Ok((&[RegClass::Float], ty))
+                Ok((&[RegClass::Vector], ty))
             }
             _ => Err(CodegenError::Unsupported(format!(
                 "Unexpected SSA-value type: {}",
@@ -830,24 +830,13 @@ pub fn reg_name(reg: Reg) -> String {
                 28..=31 => format!("ft{}", real.hw_enc() - 20),
                 _ => unreachable!(),
             },
-            RegClass::Vector => unreachable!(),
+            RegClass::Vector => format!("v{}", real.hw_enc()),
         },
         None => {
             format!("{:?}", reg)
         }
     }
 }
-pub fn vec_reg_name(reg: Reg) -> String {
-    match reg.to_real_reg() {
-        Some(real) => {
-            assert_eq!(real.class(), RegClass::Float);
-            format!("v{}", real.hw_enc())
-        }
-        None => {
-            format!("{:?}", reg)
-        }
-    }
-}
 
 impl Inst {
     fn print_with_state(
@@ -859,10 +848,6 @@ impl Inst {
             let reg = allocs.next(reg);
             reg_name(reg)
         };
-        let format_vec_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String {
-            let reg = allocs.next(reg);
-            vec_reg_name(reg)
-        };
 
         let format_vec_amode = |amode: &VecAMode, allocs: &mut AllocationConsumer<'_>| -> String {
             match amode {
@@ -1568,9 +1553,9 @@ impl Inst {
                 vs2,
                 ref vstate,
             } => {
-                let vs1_s = format_vec_reg(vs1, allocs);
-                let vs2_s = format_vec_reg(vs2, allocs);
-                let vd_s = format_vec_reg(vd.to_reg(), allocs);
+                let vs1_s = format_reg(vs1, allocs);
+                let vs2_s = format_reg(vs2, allocs);
+                let vd_s = format_reg(vd.to_reg(), allocs);
 
                 // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
                 // This is noted in Section 10.1 of the RISC-V Vector spec.
@@ -1583,8 +1568,8 @@ impl Inst {
                 vs2,
                 ref vstate,
             } => {
-                let vs2_s = format_vec_reg(vs2, allocs);
-                let vd_s = format_vec_reg(vd.to_reg(), allocs);
+                let vs2_s = format_reg(vs2, allocs);
+                let vd_s = format_reg(vd.to_reg(), allocs);
 
                 format!("{} {},{},{} {}", op, vd_s, vs2_s, imm, vstate)
             }
@@ -1601,7 +1586,7 @@ impl Inst {
                 ..
             } => {
                 let base = format_vec_amode(from, allocs);
-                let vd = format_vec_reg(to.to_reg(), allocs);
+                let vd = format_reg(to.to_reg(), allocs);
                 format!("vl{}.v {},{} {}", eew, vd, base, vstate)
             }
             Inst::VecStore {
@@ -1612,7 +1597,7 @@ impl Inst {
                 ..
             } => {
                 let dst = format_vec_amode(to, allocs);
-                let vs3 = format_vec_reg(*from, allocs);
+                let vs3 = format_reg(*from, allocs);
                 format!("vs{}.v {},{} {}", eew, vs3, dst, vstate)
             }
         }