Skip to content

Commit

Permalink
riscv64: Load constants from the constant pool (#6933)
Browse files Browse the repository at this point in the history
* riscv64: Use `Inst::load_u64_constant` when loading load offsets

* riscv64: Load ISLE constants from pool

* riscv64: Delete `offset32_imm`

* riscv64: Delete `pack_float_rounding_mode`

* riscv64: Delete `LoadConst32` instruction

* riscv64: Delete LoadConstant code

Move that logic into `LoadInlineConst`

* riscv64: Fix `LoadAddr` instructions for RegOffset

We were accidentally overriding the offset with the base.

* riscv64: Update inst_worst_case_size
  • Loading branch information
afonso360 authored Aug 30, 2023
1 parent 2bb65c9 commit 134dddc
Show file tree
Hide file tree
Showing 29 changed files with 343 additions and 381 deletions.
58 changes: 26 additions & 32 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,9 @@
(rd WritableReg)
(imm Imm20))

(LoadConst32
(rd WritableReg)
(imm u32))

(LoadConst64
(LoadInlineConst
(rd WritableReg)
(ty Type)
(imm u64))

(Auipc
Expand Down Expand Up @@ -1525,6 +1522,7 @@
;; Immediate Loading rules
;; TODO: Loading the zero reg directly causes a bunch of regalloc errors, we should look into it.
;; TODO: We can load constants like 0x3ff0000000000000 by loading the bits then doing a shift left.
;; TODO: Load floats using `fld` instead of `ld`
(decl imm (Type u64) Reg)

;; Refs get loaded as integers.
Expand Down Expand Up @@ -1553,10 +1551,12 @@
(if-let (i64_generate_imm imm20 imm12) (i64_sextend_u64 ty c))
(rv_addi (rv_lui imm20) imm12))

;; Otherwise we fall back to loading the immediate from memory
;; TODO: This doesen't yet emit the constant into the memory pool, but it would
;; be pretty neat if it did.
(rule 0 (imm (ty_int ty) c) (emit_load_const_64 c))
;; Otherwise we fall back to loading the immediate from the constant pool.
(rule 0 (imm (ty_int ty) c)
(emit_load
(LoadOP.Ld)
(mem_flags_trusted)
(gen_const_amode (emit_u64_le_const c))))

;; Imm12 Rules

Expand Down Expand Up @@ -1679,10 +1679,6 @@
(_ Unit (emit (MInst.AluRRR op dst src1 src2))))
dst))


(decl pack_float_rounding_mode (FRM) OptionFloatRoundingMode)
(extern constructor pack_float_rounding_mode pack_float_rounding_mode)

;; Helper for emitting `MInst.AluRRR` instructions.
(decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg)
(rule (fpu_rrr op ty src1 src2)
Expand Down Expand Up @@ -1714,12 +1710,6 @@
(_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero)))))
dst))

;; Helper for emitting the `LoadConst64` instruction.
(decl emit_load_const_64 (u64) XReg)
(rule (emit_load_const_64 imm)
(let ((dst WritableXReg (temp_writable_xreg))
(_ Unit (emit (MInst.LoadConst64 dst imm))))
dst))

;; Helper for emitting the `Lui` instruction.
;; TODO: This should be something like `emit_u_type`. And should share the
Expand Down Expand Up @@ -2298,24 +2288,28 @@
(decl gen_const_amode (VCodeConstant) AMode)
(extern constructor gen_const_amode gen_const_amode)

(decl offset32_imm (i32) Offset32)
(extern constructor offset32_imm offset32_imm)

;; Returns a canonical type for a LoadOP. We only return I64 or F64.
(decl load_op_reg_type (LoadOP) Type)
(rule 1 (load_op_reg_type (LoadOP.Fld)) $F64)
(rule 1 (load_op_reg_type (LoadOP.Flw)) $F64)
(rule 0 (load_op_reg_type _) $I64)

(decl emit_load (LoadOP MemFlags AMode) Reg)
(rule (emit_load op flags from)
(let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
(_ Unit (emit (MInst.Load dst op flags from))))
dst))

;; helper function to load from memory.
(decl gen_load (Reg Offset32 LoadOP MemFlags Type) Reg)
(rule
(gen_load p offset op flags ty)
(let
((tmp WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.Load tmp op flags (gen_amode p offset $I64)))))
tmp))
(rule (gen_load p offset op flags ty)
(emit_load op flags (gen_amode p offset $I64)))

(decl gen_load_128 (Reg Offset32 MemFlags) ValueRegs)
(rule
(gen_load_128 p offset flags)
(let
((low Reg (gen_load p offset (LoadOP.Ld) flags $I64))
(high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64)))
(rule (gen_load_128 p offset flags)
(let ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64))
(high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64)))
(value_regs low high)))

(decl default_memflags () MemFlags)
Expand Down
128 changes: 40 additions & 88 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,78 +25,6 @@ impl EmitInfo {
}
}

/// load constant by put the constant in the code stream.
/// calculate the pc and using load instruction.
/// This is only allow used in the emit stage.
/// Because of those instruction must execute together.
/// see https://github.com/bytecodealliance/wasmtime/pull/5612
#[derive(Clone, Copy)]
pub(crate) enum LoadConstant {
U32(u32),
U64(u64),
}

impl LoadConstant {
fn to_le_bytes(self) -> Vec<u8> {
match self {
LoadConstant::U32(x) => Vec::from_iter(x.to_le_bytes().into_iter()),
LoadConstant::U64(x) => Vec::from_iter(x.to_le_bytes().into_iter()),
}
}
fn load_op(self) -> LoadOP {
match self {
LoadConstant::U32(_) => LoadOP::Lwu,
LoadConstant::U64(_) => LoadOP::Ld,
}
}
fn load_ty(self) -> Type {
match self {
LoadConstant::U32(_) => R32,
LoadConstant::U64(_) => R64,
}
}

pub(crate) fn load_constant<F: FnMut(Type) -> Writable<Reg>>(
self,
rd: Writable<Reg>,
alloc_tmp: &mut F,
) -> SmallInstVec<Inst> {
let mut insts = SmallInstVec::new();
// get current pc.
let pc = alloc_tmp(I64);
insts.push(Inst::Auipc {
rd: pc,
imm: Imm20 { bits: 0 },
});
// load
insts.push(Inst::Load {
rd,
op: self.load_op(),
flags: MemFlags::new(),
from: AMode::RegOffset(pc.to_reg(), 12, self.load_ty()),
});
let data = self.to_le_bytes();
// jump over.
insts.push(Inst::Jal {
dest: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE + data.len() as i32),
});
insts.push(Inst::RawData { data });
insts
}

// load and perform an extra add.
pub(crate) fn load_constant_and_add(self, rd: Writable<Reg>, rs: Reg) -> SmallInstVec<Inst> {
let mut insts = self.load_constant(rd, &mut |_| rd);
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd,
rs1: rd.to_reg(),
rs2: rs,
});
insts
}
}

pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
}
Expand Down Expand Up @@ -399,8 +327,7 @@ impl Inst {
| Inst::BrTable { .. }
| Inst::Auipc { .. }
| Inst::Lui { .. }
| Inst::LoadConst32 { .. }
| Inst::LoadConst64 { .. }
| Inst::LoadInlineConst { .. }
| Inst::AluRRR { .. }
| Inst::FpuRRR { .. }
| Inst::AluRRImm12 { .. }
Expand Down Expand Up @@ -533,19 +460,34 @@ impl MachInstEmit for Inst {
let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12);
sink.put4(x);
}
&Inst::LoadConst32 { rd, imm } => {
&Inst::LoadInlineConst { rd, ty, imm } => {
let rd = allocs.next_writable(rd);
LoadConstant::U32(imm)
.load_constant(rd, &mut |_| rd)
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
}
&Inst::LoadConst64 { rd, imm } => {
let rd = allocs.next_writable(rd);
LoadConstant::U64(imm)
.load_constant(rd, &mut |_| rd)
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));

let data = &imm.to_le_bytes()[..ty.bytes() as usize];

let label_data: MachLabel = sink.get_label();
let label_end: MachLabel = sink.get_label();

// Load into rd
Inst::Load {
rd,
op: LoadOP::from_type(ty),
flags: MemFlags::new(),
from: AMode::Label(label_data),
}
.emit(&[], sink, emit_info, state);

// Jump over the inline pool
Inst::Jal {
dest: BranchTarget::Label(label_end),
}
.emit(&[], sink, emit_info, state);

// Emit the inline data
sink.bind_label(label_data, &mut state.ctrl_plane);
Inst::RawData { data: data.into() }.emit(&[], sink, emit_info, state);

sink.bind_label(label_end, &mut state.ctrl_plane);
}
&Inst::FpuRR {
frm,
Expand Down Expand Up @@ -666,6 +608,8 @@ impl MachInstEmit for Inst {
let offset = from.get_offset_with_state(state);
let offset_imm12 = Imm12::maybe_from_i64(offset);

// TODO: We shouldn't just fall back to `LoadAddr` immediately. For `MachLabel`s
// we should try to emit the `auipc` and add a relocation on this load.
let (addr, imm12) = match (base, offset_imm12) {
// If the offset fits into an imm12 we can directly encode it.
(Some(base), Some(imm12)) => (base, imm12),
Expand Down Expand Up @@ -693,6 +637,8 @@ impl MachInstEmit for Inst {
let offset = to.get_offset_with_state(state);
let offset_imm12 = Imm12::maybe_from_i64(offset);

// TODO: We shouldn't just fall back to `LoadAddr` immediately. For `MachLabel`s
// we should try to emit the `auipc` and add a relocation on this store.
let (addr, imm12) = match (base, offset_imm12) {
// If the offset fits into an imm12 we can directly encode it.
(Some(base), Some(imm12)) => (base, imm12),
Expand Down Expand Up @@ -1265,8 +1211,14 @@ impl MachInstEmit for Inst {
.emit(&[], sink, emit_info, state);
}
(_, Some(rs), None) => {
LoadConstant::U64(offset as u64)
.load_constant_and_add(rd, rs)
let mut insts = Inst::load_constant_u64(rd, offset as u64);
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd,
rs1: rd.to_reg(),
rs2: rs,
});
insts
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
}
Expand Down
29 changes: 13 additions & 16 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,23 @@ impl Inst {
pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value);
insts.unwrap_or_else(|| {
smallvec![Inst::LoadConst32 {
smallvec![Inst::LoadInlineConst {
rd,
imm: value as u32
ty: I32,
imm: value
}]
})
}

pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
let insts = Inst::load_const_imm(rd, value);
insts.unwrap_or_else(|| smallvec![Inst::LoadConst64 { rd, imm: value }])
insts.unwrap_or_else(|| {
smallvec![Inst::LoadInlineConst {
rd,
ty: I64,
imm: value
}]
})
}

pub(crate) fn construct_auipc_and_jalr(
Expand Down Expand Up @@ -377,8 +384,7 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
}
&Inst::Auipc { rd, .. } => collector.reg_def(rd),
&Inst::Lui { rd, .. } => collector.reg_def(rd),
&Inst::LoadConst32 { rd, .. } => collector.reg_def(rd),
&Inst::LoadConst64 { rd, .. } => collector.reg_def(rd),
&Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd),
&Inst::AluRRR { rd, rs1, rs2, .. } => {
collector.reg_use(rs1);
collector.reg_use(rs2);
Expand Down Expand Up @@ -958,7 +964,7 @@ impl MachInst for Inst {

fn worst_case_size() -> CodeOffset {
// calculate by test function riscv64_worst_case_instruction_size()
116
124
}

fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {
Expand Down Expand Up @@ -1371,16 +1377,7 @@ impl Inst {
&Inst::Lui { rd, ref imm } => {
format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits)
}
&Inst::LoadConst32 { rd, imm } => {
let rd = format_reg(rd.to_reg(), allocs);
let mut buf = String::new();
write!(&mut buf, "auipc {},0; ", rd).unwrap();
write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
write!(&mut buf, "j {}; ", Inst::INSTRUCTION_SIZE + 4).unwrap();
write!(&mut buf, ".4byte 0x{:x}", imm).unwrap();
buf
}
&Inst::LoadConst64 { rd, imm } => {
&Inst::LoadInlineConst { rd, imm, .. } => {
let rd = format_reg(rd.to_reg(), allocs);
let mut buf = String::new();
write!(&mut buf, "auipc {},0; ", rd).unwrap();
Expand Down
7 changes: 0 additions & 7 deletions cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,17 +440,10 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
self.backend.isa_flags.has_zbs()
}

fn offset32_imm(&mut self, offset: i32) -> Offset32 {
Offset32::new(offset)
}
fn default_memflags(&mut self) -> MemFlags {
MemFlags::new()
}

fn pack_float_rounding_mode(&mut self, f: &FRM) -> OptionFloatRoundingMode {
Some(*f)
}

fn int_convert_2_float_op(&mut self, from: Type, is_signed: bool, to: Type) -> FpuOPRR {
FpuOPRR::int_convert_2_float_op(from, is_signed, to)
}
Expand Down
Loading

0 comments on commit 134dddc

Please sign in to comment.