Skip to content

Commit

Permalink
riscv64: Use Vector RegClass for Vectors (#6366)
Browse files Browse the repository at this point in the history
* riscv64: Use Vector Regclass

* riscv64: Add assert to `Inst::Mov`

It isn't ready yet

* riscv64: Add SIMD vconst large test

This was meant to exercise the changes in #6324 but was failing in RISC-V due to some missing regalloc bits.

* riscv64: Restrict spill slot size

* riscv64: Mark v0 as preferred

* riscv64: Const compute clobbers
  • Loading branch information
afonso360 authored May 16, 2023
1 parent 5471fde commit b13bbc8
Show file tree
Hide file tree
Showing 28 changed files with 1,860 additions and 460 deletions.
6 changes: 5 additions & 1 deletion cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,11 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}

fn get_number_of_spillslots_for_value(rc: RegClass, vector_size: u32) -> u32 {
fn get_number_of_spillslots_for_value(
rc: RegClass,
vector_size: u32,
_isa_flags: &Self::F,
) -> u32 {
assert_eq!(vector_size % 8, 0);
// We allocate in terms of 8-byte slots.
match rc {
Expand Down
169 changes: 130 additions & 39 deletions cranelift/codegen/src/isa/riscv64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,40 @@ pub struct Riscv64MachineDeps;

impl IsaFlags for RiscvFlags {}

impl RiscvFlags {
pub(crate) fn min_vec_reg_size(&self) -> u64 {
let entries = [
(self.has_zvl65536b(), 65536),
(self.has_zvl32768b(), 32768),
(self.has_zvl16384b(), 16384),
(self.has_zvl8192b(), 8192),
(self.has_zvl4096b(), 4096),
(self.has_zvl2048b(), 2048),
(self.has_zvl1024b(), 1024),
(self.has_zvl512b(), 512),
(self.has_zvl256b(), 256),
// In order to claim the Application Profile V extension, a minimum
// register size of 128 is required. i.e. V implies Zvl128b.
(self.has_v(), 128),
(self.has_zvl128b(), 128),
(self.has_zvl64b(), 64),
(self.has_zvl32b(), 32),
];

for (has_flag, size) in entries.into_iter() {
if !has_flag {
continue;
}

// Due to a limitation in regalloc2, we can't support types
// larger than 1024 bytes. So limit that here.
return std::cmp::min(size, 1024);
}

return 0;
}
}

impl ABIMachineSpec for Riscv64MachineDeps {
type I = Inst;
type F = RiscvFlags;
Expand Down Expand Up @@ -415,9 +449,9 @@ impl ABIMachineSpec for Riscv64MachineDeps {
for reg in clobbered_callee_saves {
let r_reg = reg.to_reg();
let ty = match r_reg.class() {
regalloc2::RegClass::Int => I64,
regalloc2::RegClass::Float => F64,
RegClass::Vector => unreachable!(),
RegClass::Int => I64,
RegClass::Float => F64,
RegClass::Vector => unimplemented!("Vector Clobber Saves"),
};
if flags.unwind_info() {
insts.push(Inst::Unwind {
Expand Down Expand Up @@ -462,9 +496,9 @@ impl ABIMachineSpec for Riscv64MachineDeps {
for reg in &clobbered_callee_saves {
let rreg = reg.to_reg();
let ty = match rreg.class() {
regalloc2::RegClass::Int => I64,
regalloc2::RegClass::Float => F64,
RegClass::Vector => unreachable!(),
RegClass::Int => I64,
RegClass::Float => F64,
RegClass::Vector => unimplemented!("Vector Clobber Restores"),
};
insts.push(Self::gen_load_stack(
StackAMode::SPOffset(-cur_offset, ty),
Expand Down Expand Up @@ -572,12 +606,16 @@ impl ABIMachineSpec for Riscv64MachineDeps {
insts
}

fn get_number_of_spillslots_for_value(rc: RegClass, _target_vector_bytes: u32) -> u32 {
fn get_number_of_spillslots_for_value(
rc: RegClass,
_target_vector_bytes: u32,
isa_flags: &RiscvFlags,
) -> u32 {
// We allocate in terms of 8-byte slots.
match rc {
RegClass::Int => 1,
RegClass::Float => 1,
RegClass::Vector => unreachable!(),
RegClass::Vector => (isa_flags.min_vec_reg_size() / 8) as u32,
}
}

Expand All @@ -592,20 +630,7 @@ impl ABIMachineSpec for Riscv64MachineDeps {
}

fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet {
let mut v = PRegSet::empty();
for (k, need_save) in CALLER_SAVE_X_REG.iter().enumerate() {
if !*need_save {
continue;
}
v.add(px_reg(k));
}
for (k, need_save) in CALLER_SAVE_F_REG.iter().enumerate() {
if !*need_save {
continue;
}
v.add(pf_reg(k));
}
v
CLOBBERS
}

fn get_clobbered_callee_saves(
Expand Down Expand Up @@ -652,24 +677,12 @@ impl ABIMachineSpec for Riscv64MachineDeps {
}
}

const CALLER_SAVE_X_REG: [bool; 32] = [
false, true, false, false, false, true, true, true, // 0-7
false, false, true, true, true, true, true, true, // 8-15
true, true, false, false, false, false, false, false, // 16-23
false, false, false, false, true, true, true, true, // 24-31
];
const CALLEE_SAVE_X_REG: [bool; 32] = [
false, false, true, false, false, false, false, false, // 0-7
true, true, false, false, false, false, false, false, // 8-15
false, false, true, true, true, true, true, true, // 16-23
true, true, true, true, false, false, false, false, // 24-31
];
const CALLER_SAVE_F_REG: [bool; 32] = [
true, true, true, true, true, true, true, true, // 0-7
false, true, true, true, true, true, true, true, // 8-15
true, true, false, false, false, false, false, false, // 16-23
false, false, false, false, true, true, true, true, // 24-31
];
const CALLEE_SAVE_F_REG: [bool; 32] = [
false, false, false, false, false, false, false, false, // 0-7
true, false, false, false, false, false, false, false, // 8-15
Expand All @@ -680,10 +693,11 @@ const CALLEE_SAVE_F_REG: [bool; 32] = [
/// This should be the registers that must be saved by callee.
#[inline]
fn is_reg_saved_in_prologue(_conv: CallConv, reg: RealReg) -> bool {
if reg.class() == RegClass::Int {
CALLEE_SAVE_X_REG[reg.hw_enc() as usize]
} else {
CALLEE_SAVE_F_REG[reg.hw_enc() as usize]
match reg.class() {
RegClass::Int => CALLEE_SAVE_X_REG[reg.hw_enc() as usize],
RegClass::Float => CALLEE_SAVE_F_REG[reg.hw_enc() as usize],
// All vector registers are caller saved.
RegClass::Vector => false,
}
}

Expand All @@ -697,12 +711,89 @@ fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
RegClass::Float => {
clobbered_size += 8;
}
RegClass::Vector => unreachable!(),
RegClass::Vector => unimplemented!("Vector Size Clobbered"),
}
}
align_to(clobbered_size, 16)
}

const fn clobbers() -> PRegSet {
PRegSet::empty()
.with(px_reg(1))
.with(px_reg(5))
.with(px_reg(6))
.with(px_reg(7))
.with(px_reg(10))
.with(px_reg(11))
.with(px_reg(12))
.with(px_reg(13))
.with(px_reg(14))
.with(px_reg(15))
.with(px_reg(16))
.with(px_reg(17))
.with(px_reg(28))
.with(px_reg(29))
.with(px_reg(30))
.with(px_reg(31))
// F Regs
.with(pf_reg(0))
.with(pf_reg(1))
.with(pf_reg(2))
.with(pf_reg(3))
.with(pf_reg(4))
.with(pf_reg(5))
.with(pf_reg(6))
.with(pf_reg(7))
.with(pf_reg(9))
.with(pf_reg(10))
.with(pf_reg(11))
.with(pf_reg(12))
.with(pf_reg(13))
.with(pf_reg(14))
.with(pf_reg(15))
.with(pf_reg(16))
.with(pf_reg(17))
.with(pf_reg(28))
.with(pf_reg(29))
.with(pf_reg(30))
.with(pf_reg(31))
// V Regs - All vector regs get clobbered
.with(pv_reg(0))
.with(pv_reg(1))
.with(pv_reg(2))
.with(pv_reg(3))
.with(pv_reg(4))
.with(pv_reg(5))
.with(pv_reg(6))
.with(pv_reg(7))
.with(pv_reg(8))
.with(pv_reg(9))
.with(pv_reg(10))
.with(pv_reg(11))
.with(pv_reg(12))
.with(pv_reg(13))
.with(pv_reg(14))
.with(pv_reg(15))
.with(pv_reg(16))
.with(pv_reg(17))
.with(pv_reg(18))
.with(pv_reg(19))
.with(pv_reg(20))
.with(pv_reg(21))
.with(pv_reg(22))
.with(pv_reg(23))
.with(pv_reg(24))
.with(pv_reg(25))
.with(pv_reg(26))
.with(pv_reg(27))
.with(pv_reg(28))
.with(pv_reg(29))
.with(pv_reg(30))
.with(pv_reg(31))
}

const CLOBBERS: PRegSet = clobbers();

impl Riscv64MachineDeps {
fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
insts.reserve(probe_count as usize);
Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,8 @@ impl MachInstEmit for Inst {
}

&Inst::Mov { rd, rm, ty } => {
debug_assert_ne!(rd.to_reg().class(), RegClass::Vector);
debug_assert_ne!(rm.class(), RegClass::Vector);
if rd.to_reg() != rm {
let rm = allocs.next(rm);
let rd = allocs.next_writable(rd);
Expand Down
37 changes: 11 additions & 26 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use super::lower::isle::generated_code::{VecAMode, VecElementWidth};
use crate::binemit::{Addend, CodeOffset, Reloc};
pub use crate::ir::condcodes::IntCC;
use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, R32, R64};
use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64};

pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
use crate::isa::{CallConv, FunctionAlignment};
Expand Down Expand Up @@ -667,7 +667,7 @@ impl MachInst for Inst {
match rc {
regalloc2::RegClass::Int => I64,
regalloc2::RegClass::Float => F64,
regalloc2::RegClass::Vector => unreachable!(),
regalloc2::RegClass::Vector => I8X16,
}
}

Expand Down Expand Up @@ -770,7 +770,7 @@ impl MachInst for Inst {
let idx = (ty.bytes().ilog2() - 1) as usize;
let ty = &SIMD_TYPES[idx][..];

Ok((&[RegClass::Float], ty))
Ok((&[RegClass::Vector], ty))
}
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
Expand Down Expand Up @@ -830,24 +830,13 @@ pub fn reg_name(reg: Reg) -> String {
28..=31 => format!("ft{}", real.hw_enc() - 20),
_ => unreachable!(),
},
RegClass::Vector => unreachable!(),
RegClass::Vector => format!("v{}", real.hw_enc()),
},
None => {
format!("{:?}", reg)
}
}
}
pub fn vec_reg_name(reg: Reg) -> String {
match reg.to_real_reg() {
Some(real) => {
assert_eq!(real.class(), RegClass::Float);
format!("v{}", real.hw_enc())
}
None => {
format!("{:?}", reg)
}
}
}

impl Inst {
fn print_with_state(
Expand All @@ -859,10 +848,6 @@ impl Inst {
let reg = allocs.next(reg);
reg_name(reg)
};
let format_vec_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String {
let reg = allocs.next(reg);
vec_reg_name(reg)
};

let format_vec_amode = |amode: &VecAMode, allocs: &mut AllocationConsumer<'_>| -> String {
match amode {
Expand Down Expand Up @@ -1568,9 +1553,9 @@ impl Inst {
vs2,
ref vstate,
} => {
let vs1_s = format_vec_reg(vs1, allocs);
let vs2_s = format_vec_reg(vs2, allocs);
let vd_s = format_vec_reg(vd.to_reg(), allocs);
let vs1_s = format_reg(vs1, allocs);
let vs2_s = format_reg(vs2, allocs);
let vd_s = format_reg(vd.to_reg(), allocs);

// Note: vs2 and vs1 here are opposite to the standard scalar ordering.
// This is noted in Section 10.1 of the RISC-V Vector spec.
Expand All @@ -1583,8 +1568,8 @@ impl Inst {
vs2,
ref vstate,
} => {
let vs2_s = format_vec_reg(vs2, allocs);
let vd_s = format_vec_reg(vd.to_reg(), allocs);
let vs2_s = format_reg(vs2, allocs);
let vd_s = format_reg(vd.to_reg(), allocs);

format!("{} {},{},{} {}", op, vd_s, vs2_s, imm, vstate)
}
Expand All @@ -1601,7 +1586,7 @@ impl Inst {
..
} => {
let base = format_vec_amode(from, allocs);
let vd = format_vec_reg(to.to_reg(), allocs);
let vd = format_reg(to.to_reg(), allocs);
format!("vl{}.v {},{} {}", eew, vd, base, vstate)
}
Inst::VecStore {
Expand All @@ -1612,7 +1597,7 @@ impl Inst {
..
} => {
let dst = format_vec_amode(to, allocs);
let vs3 = format_vec_reg(*from, allocs);
let vs3 = format_reg(*from, allocs);
format!("vs{}.v {},{} {}", eew, vs3, dst, vstate)
}
}
Expand Down
Loading

0 comments on commit b13bbc8

Please sign in to comment.