Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix spillslot size bug in SIMD by removing type-dependent spillslot allocation. #3645

Merged
merged 1 commit into from
Jan 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1145,12 +1145,11 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}

fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::V128, F32) | (RegClass::V128, F64) => 1,
(RegClass::V128, _) => 2,
match rc {
RegClass::I64 => 1,
RegClass::V128 => 2,
_ => panic!("Unexpected register class!"),
}
}
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/arm32/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
unimplemented!("StructArgs not implemented for ARM32 yet");
}

fn get_number_of_spillslots_for_value(rc: RegClass, _ty: Type) -> u32 {
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
match rc {
RegClass::I32 => 1,
_ => panic!("Unexpected register class!"),
Expand Down
8 changes: 4 additions & 4 deletions cranelift/codegen/src/isa/s390x/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -685,11 +685,11 @@ impl ABIMachineSpec for S390xMachineDeps {
unimplemented!("StructArgs not implemented for S390X yet");
}

fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::F64, _) => 1,
match rc {
RegClass::I64 => 1,
RegClass::F64 => 1,
_ => panic!("Unexpected register class!"),
}
}
Expand Down
9 changes: 4 additions & 5 deletions cranelift/codegen/src/isa/x64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -719,12 +719,11 @@ impl ABIMachineSpec for X64ABIMachineSpec {
insts
}

fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 {
// We allocate in terms of 8-byte slots.
match (rc, ty) {
(RegClass::I64, _) => 1,
(RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1,
(RegClass::V128, _) => 2,
match rc {
RegClass::I64 => 1,
RegClass::V128 => 2,
_ => panic!("Unexpected register class!"),
}
}
Expand Down
19 changes: 5 additions & 14 deletions cranelift/codegen/src/machinst/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,22 +161,13 @@ pub trait ABICallee {
fn stack_args_size(&self) -> u32;

/// Get the spill-slot size.
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
fn get_spillslot_size(&self, rc: RegClass) -> u32;

/// Generate a spill. The type, if known, is given; this can be used to
/// generate a store instruction optimized for the particular type rather
/// than the RegClass (e.g., only F64 that resides in a V128 register). If
/// no type is given, the implementation should spill the whole register.
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I;
/// Generate a spill.
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I;

/// Generate a reload (fill). As for spills, the type may be given to allow
/// a more optimized load instruction to be generated.
fn gen_reload(
&self,
to_reg: Writable<RealReg>,
from_slot: SpillSlot,
ty: Option<Type>,
) -> Self::I;
/// Generate a reload (fill).
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I;
}

/// Trait implemented by an object that tracks ABI-related state and can
Expand Down
73 changes: 19 additions & 54 deletions cranelift/codegen/src/machinst/abi_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -502,9 +502,8 @@ pub trait ABIMachineSpec {
size: usize,
) -> SmallVec<[Self::I; 8]>;

/// Get the number of spillslots required for the given register-class and
/// type.
fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32;
/// Get the number of spillslots required for the given register-class.
fn get_number_of_spillslots_for_value(rc: RegClass) -> u32;

/// Get the current virtual-SP offset from an instruction-emission state.
fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64;
Expand Down Expand Up @@ -658,6 +657,17 @@ fn get_special_purpose_param_register(
}
}

fn ty_from_class(class: RegClass) -> Type {
match class {
RegClass::I32 => I32,
RegClass::I64 => I64,
RegClass::F32 => F32,
RegClass::F64 => F64,
RegClass::V128 => I8X16,
_ => panic!("Unknown regclass: {:?}", class),
}
}

impl<M: ABIMachineSpec> ABICalleeImpl<M> {
/// Create a new body ABI instance.
pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult<Self> {
Expand Down Expand Up @@ -856,26 +866,6 @@ fn generate_gv<M: ABIMachineSpec>(
}
}

/// Return a type either from an optional type hint, or if not, from the default
/// type associated with the given register's class. This is used to generate
/// loads/spills appropriately given the type of value loaded/stored (which may
/// be narrower than the spillslot). We usually have the type because the
/// regalloc usually provides the vreg being spilled/reloaded, and we know every
/// vreg's type. However, the regalloc *can* request a spill/reload without an
/// associated vreg when needed to satisfy a safepoint (which requires all
/// ref-typed values, even those in real registers in the original vcode, to be
/// in spillslots).
fn ty_from_ty_hint_or_reg_class<M: ABIMachineSpec>(r: Reg, ty: Option<Type>) -> Type {
match (ty, r.get_class()) {
// If the type is provided
(Some(t), _) => t,
// If no type is provided, this should be a register spill for a
// safepoint, so we only expect I32/I64 (integer) registers.
(None, rc) if rc == M::word_reg_class() => M::word_type(),
_ => panic!("Unexpected register class!"),
}
}

fn gen_load_stack_multi<M: ABIMachineSpec>(
from: StackAMode,
dst: ValueRegs<Writable<Reg>>,
Expand Down Expand Up @@ -1203,14 +1193,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let sp_off = self.stackslots_size as i64 + spill_off;
log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);

// Integer types smaller than word size have been spilled as words below,
// and therefore must be reloaded in the same type.
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
M::word_type()
} else {
ty
};

gen_load_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), into_regs, ty)
}

Expand All @@ -1227,18 +1209,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let sp_off = self.stackslots_size as i64 + spill_off;
log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);

// When reloading from a spill slot, we might have lost information about real integer
// types. For instance, on the x64 backend, a zero-extension can become spurious and
// optimized into a move, causing vregs of types I32 and I64 to share the same coalescing
// equivalency class. As a matter of fact, such a value can be spilled as an I32 and later
// reloaded as an I64; to make sure the high bits are always defined, do a word-sized store
// all the time, in this case.
let ty = if ty.is_int() && ty.bytes() < M::word_bytes() {
M::word_type()
} else {
ty
};

gen_store_stack_multi::<M>(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty)
}

Expand Down Expand Up @@ -1383,25 +1353,20 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
self.sig.stack_arg_space as u32
}

fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
M::get_number_of_spillslots_for_value(rc, ty)
fn get_spillslot_size(&self, rc: RegClass) -> u32 {
M::get_number_of_spillslots_for_value(rc)
}

fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I {
let ty = ty_from_ty_hint_or_reg_class::<M>(from_reg.to_reg(), ty);
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I {
let ty = ty_from_class(from_reg.to_reg().get_class());
self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg()))
.into_iter()
.next()
.unwrap()
}

fn gen_reload(
&self,
to_reg: Writable<RealReg>,
from_slot: SpillSlot,
ty: Option<Type>,
) -> Self::I {
let ty = ty_from_ty_hint_or_reg_class::<M>(to_reg.to_reg().to_reg(), ty);
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I {
let ty = ty_from_class(to_reg.to_reg().get_class());
self.load_spillslot(
from_slot,
ty,
Expand Down
15 changes: 6 additions & 9 deletions cranelift/codegen/src/machinst/vcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -688,24 +688,21 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
self.vreg_types.len()
}

fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 {
let ty = self.vreg_type(vreg);
self.abi.get_spillslot_size(regclass, ty)
fn get_spillslot_size(&self, regclass: RegClass, _: VirtualReg) -> u32 {
self.abi.get_spillslot_size(regclass)
}

fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option<VirtualReg>) -> I {
let ty = vreg.map(|v| self.vreg_type(v));
self.abi.gen_spill(to_slot, from_reg, ty)
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, _: Option<VirtualReg>) -> I {
self.abi.gen_spill(to_slot, from_reg)
}

fn gen_reload(
&self,
to_reg: Writable<RealReg>,
from_slot: SpillSlot,
vreg: Option<VirtualReg>,
_: Option<VirtualReg>,
) -> I {
let ty = vreg.map(|v| self.vreg_type(v));
self.abi.gen_reload(to_reg, from_slot, ty)
self.abi.gen_reload(to_reg, from_slot)
}

fn gen_move(&self, to_reg: Writable<RealReg>, from_reg: RealReg, vreg: VirtualReg) -> I {
Expand Down
32 changes: 16 additions & 16 deletions cranelift/filetests/filetests/isa/aarch64/call.clif
Original file line number Diff line number Diff line change
Expand Up @@ -133,28 +133,28 @@ block0:

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #32
; nextln: sub sp, sp, #48
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str s0, [sp]
; nextln: str q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str d0, [sp, #8]
; nextln: str q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str d0, [sp, #16]
; nextln: str q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr s0, [sp]
; nextln: ldr q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr d0, [sp, #8]
; nextln: ldr q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr d0, [sp, #16]
; nextln: ldr q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: add sp, sp, #32
; nextln: add sp, sp, #48
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

Expand Down Expand Up @@ -223,28 +223,28 @@ block0:

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #32
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str s0, [sp]
; nextln: sub sp, sp, #48
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str d0, [sp, #8]
; nextln: str q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: str q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr s0, [sp]
; nextln: str q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr d0, [sp, #8]
; nextln: ldr q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: add sp, sp, #32
; nextln: ldr q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: add sp, sp, #48
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

Expand Down
Loading