Skip to content

Commit

Permalink
Cranelift: add alignment parameter to stack slots. (#8635)
Browse files Browse the repository at this point in the history
* Cranelift: add alignment parameter to stack slots.

Fixes #6716.

Currently, stack slots on the stack are aligned only to a machine-word
boundary. This is insufficient for some use-cases: for example, storing
SIMD data or structs that require a larger alignment.

This PR adds a parameter to the `StackSlotData` to specify alignment,
and the associated logic to the CLIF parser and printer. It updates the
shared ABI code to compute the stackslot layout taking the alignment
into account. In order to ensure the alignment is always a power of two,
it is stored as a shift amount (log2 of actual alignment) in the IR.

* Apply suggestions from code review

Co-authored-by: Trevor Elliott <awesomelyawesome@gmail.com>

* Update filetest.

* Update alignment to ValRaw vector.

* Fix printer test.

* cargo-fmt from suggestion update.

---------

Co-authored-by: Trevor Elliott <awesomelyawesome@gmail.com>
  • Loading branch information
cfallin and elliottt authored May 16, 2024
1 parent 934bf9d commit 45bc736
Show file tree
Hide file tree
Showing 12 changed files with 140 additions and 47 deletions.
62 changes: 27 additions & 35 deletions cranelift/codegen/src/ir/stackslot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,32 +70,38 @@ pub struct StackSlotData {

/// Size of stack slot in bytes.
pub size: StackSize,

/// Alignment of stack slot as a power-of-two exponent (log2
/// value). The stack slot will be at least this aligned; it may
/// be aligned according to other considerations, such as minimum
/// stack slot size or machine word size, as well.
pub align_shift: u8,
}

impl StackSlotData {
/// Create a stack slot with the specified byte size.
pub fn new(kind: StackSlotKind, size: StackSize) -> Self {
Self { kind, size }
}

/// Get the alignment in bytes of this stack slot given the stack pointer alignment.
pub fn alignment(&self, max_align: StackSize) -> StackSize {
debug_assert!(max_align.is_power_of_two());
if self.kind == StackSlotKind::ExplicitDynamicSlot {
max_align
} else {
// We want to find the largest power of two that divides both `self.size` and `max_align`.
// That is the same as isolating the rightmost bit in `x`.
let x = self.size | max_align;
// C.f. Hacker's delight.
x & x.wrapping_neg()
/// Create a stack slot with the specified byte size and alignment.
pub fn new(kind: StackSlotKind, size: StackSize, align_shift: u8) -> Self {
Self {
kind,
size,
align_shift,
}
}
}

impl fmt::Display for StackSlotData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} {}", self.kind, self.size)
if self.align_shift != 0 {
write!(
f,
"{} {}, align = {}",
self.kind,
self.size,
1u32 << self.align_shift
)
} else {
write!(f, "{} {}", self.kind, self.size)
}
}
}

Expand Down Expand Up @@ -146,8 +152,10 @@ mod tests {
fn stack_slot() {
let mut func = Function::new();

let ss0 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4));
let ss1 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8));
let ss0 =
func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 0));
let ss1 =
func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8, 0));
assert_eq!(ss0.to_string(), "ss0");
assert_eq!(ss1.to_string(), "ss1");

Expand Down Expand Up @@ -197,20 +205,4 @@ mod tests {
"explicit_dynamic_slot dt1"
);
}

#[test]
fn alignment() {
let slot = StackSlotData::new(StackSlotKind::ExplicitSlot, 8);

assert_eq!(slot.alignment(4), 4);
assert_eq!(slot.alignment(8), 8);
assert_eq!(slot.alignment(16), 8);

let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24);

assert_eq!(slot2.alignment(4), 4);
assert_eq!(slot2.alignment(8), 8);
assert_eq!(slot2.alignment(16), 8);
assert_eq!(slot2.alignment(32), 8);
}
}
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ mod tests {

let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ mod tests {

let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down
4 changes: 2 additions & 2 deletions cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ mod tests {

let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down Expand Up @@ -160,7 +160,7 @@ mod tests {

let mut context = Context::for_function(create_multi_return_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ mod tests {

let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down
6 changes: 5 additions & 1 deletion cranelift/codegen/src/machinst/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1178,7 +1178,11 @@ impl<M: ABIMachineSpec> Callee<M> {
sized_stack_offset = sized_stack_offset
.checked_add(data.size)
.ok_or(CodegenError::ImplLimitExceeded)?;
let mask = M::word_bytes() - 1;
// Always at least machine-word-align slots, but also
// satisfy the user's requested alignment.
debug_assert!(data.align_shift < 32);
let align = std::cmp::max(M::word_bytes(), 1u32 << data.align_shift);
let mask = align - 1;
sized_stack_offset = checked_round_up(sized_stack_offset, mask)
.ok_or(CodegenError::ImplLimitExceeded)?;
debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len());
Expand Down
9 changes: 8 additions & 1 deletion cranelift/codegen/src/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ mod tests {
f.name = UserFuncName::testcase("foo");
assert_eq!(f.to_string(), "function %foo() fast {\n}\n");

f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4));
f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 0));
assert_eq!(
f.to_string(),
"function %foo() fast {\n ss0 = explicit_slot 4\n}\n"
Expand Down Expand Up @@ -569,6 +569,13 @@ mod tests {
f.to_string(),
"function %foo() fast {\n ss0 = explicit_slot 4\n\nblock0(v0: i8, v1: f32x4):\n return\n}\n"
);

let mut f = Function::new();
f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 2));
assert_eq!(
f.to_string(),
"function u0:0() fast {\n ss0 = explicit_slot 4, align = 4\n}\n"
);
}

#[test]
Expand Down
61 changes: 61 additions & 0 deletions cranelift/filetests/filetests/isa/x64/stackslot.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
test compile precise-output
target x86_64

function %f0(i64 vmctx) -> i64, i64, i64, i64 {
gv0 = vmctx
stack_limit = gv0
ss0 = explicit_slot 8, align=16
ss1 = explicit_slot 8, align=16
ss2 = explicit_slot 4
ss3 = explicit_slot 4

block0(v0: i64):
v1 = stack_addr.i64 ss0
v2 = stack_addr.i64 ss1
v3 = stack_addr.i64 ss2
v4 = stack_addr.i64 ss3
return v1, v2, v3, v4
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; movq %rdi, %r10
; addq %r10, $48, %r10
; cmpq %rsp, %r10
; jnbe #trap=stk_ovf
; subq %rsp, $48, %rsp
; block0:
; lea rsp(0 + virtual offset), %rax
; lea rsp(16 + virtual offset), %rdx
; lea rsp(32 + virtual offset), %r8
; lea rsp(40 + virtual offset), %r9
; movq %r8, 0(%rsi)
; movq %r9, 8(%rsi)
; addq %rsp, $48, %rsp
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; movq %rdi, %r10
; addq $0x30, %r10
; cmpq %rsp, %r10
; ja 0x3b
; subq $0x30, %rsp
; block1: ; offset 0x18
; leaq (%rsp), %rax
; leaq 0x10(%rsp), %rdx
; leaq 0x20(%rsp), %r8
; leaq 0x28(%rsp), %r9
; movq %r8, (%rsi)
; movq %r9, 8(%rsi)
; addq $0x30, %rsp
; movq %rbp, %rsp
; popq %rbp
; retq
; ud2 ; trap: stk_ovf

2 changes: 1 addition & 1 deletion cranelift/fuzzgen/src/function_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1605,7 +1605,7 @@ where
fn generate_stack_slots(&mut self, builder: &mut FunctionBuilder) -> Result<()> {
for _ in 0..self.param(&self.config.static_stack_slots_per_function)? {
let bytes = self.param(&self.config.static_stack_slot_size)? as u32;
let ss_data = StackSlotData::new(StackSlotKind::ExplicitSlot, bytes);
let ss_data = StackSlotData::new(StackSlotKind::ExplicitSlot, bytes, 0);
let slot = builder.create_sized_stack_slot(ss_data);

// Generate one Alias Analysis Category for each slot
Expand Down
33 changes: 30 additions & 3 deletions cranelift/reader/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,11 @@ impl Context {
fn add_ss(&mut self, ss: StackSlot, data: StackSlotData, loc: Location) -> ParseResult<()> {
self.map.def_ss(ss, loc)?;
while self.function.sized_stack_slots.next_key().index() <= ss.index() {
self.function
.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 0));
self.function.create_sized_stack_slot(StackSlotData::new(
StackSlotKind::ExplicitSlot,
0,
0,
));
}
self.function.sized_stack_slots[ss] = data;
Ok(())
Expand Down Expand Up @@ -1483,6 +1486,7 @@ impl<'a> Parser<'a> {
// | "spill_slot"
// | "incoming_arg"
// | "outgoing_arg"
// stack-slot-flag ::= "align" "=" Bytes
fn parse_stack_slot_decl(&mut self) -> ParseResult<(StackSlot, StackSlotData)> {
let ss = self.match_ss("expected stack slot number: ss«n»")?;
self.match_token(Token::Equal, "expected '=' in stack slot declaration")?;
Expand All @@ -1498,7 +1502,30 @@ impl<'a> Parser<'a> {
if bytes > i64::from(u32::MAX) {
return err!(self.loc, "stack slot too large");
}
let data = StackSlotData::new(kind, bytes as u32);

// Parse flags.
let align = if self.token() == Some(Token::Comma) {
self.consume();
self.match_token(
Token::Identifier("align"),
"expected a valid stack-slot flag (currently only `align`)",
)?;
self.match_token(Token::Equal, "expected `=` after flag")?;
let align: i64 = self
.match_imm64("expected alignment-size after `align` flag")?
.into();
u32::try_from(align)
.map_err(|_| self.error("alignment must be a 32-bit unsigned integer"))?
} else {
1
};

if !align.is_power_of_two() {
return err!(self.loc, "stack slot alignment is not a power of two");
}
let align_shift = u8::try_from(align.ilog2()).unwrap(); // Always succeeds: range 0..=31.

let data = StackSlotData::new(kind, bytes as u32, align_shift);

// Collect any trailing comments.
self.token();
Expand Down
3 changes: 2 additions & 1 deletion crates/cranelift/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ impl wasmtime_environ::Compiler for Compiler {
let slot = match &ret {
NativeRet::Bare => None,
NativeRet::Retptr { size, .. } => Some(builder.func.create_sized_stack_slot(
ir::StackSlotData::new(ir::StackSlotKind::ExplicitSlot, *size),
ir::StackSlotData::new(ir::StackSlotKind::ExplicitSlot, *size, 0),
)),
};
if let Some(slot) = slot {
Expand Down Expand Up @@ -942,6 +942,7 @@ impl Compiler {
let slot = builder.func.create_sized_stack_slot(ir::StackSlotData::new(
ir::StackSlotKind::ExplicitSlot,
values_vec_byte_size,
4,
));
let values_vec_ptr = builder.ins().stack_addr(pointer_type, slot, 0);

Expand Down
1 change: 1 addition & 0 deletions crates/cranelift/src/compiler/component.rs
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ impl TrampolineCompiler<'_> {
.create_sized_stack_slot(ir::StackSlotData::new(
ir::StackSlotKind::ExplicitSlot,
pointer_type.bytes(),
0,
));
args.push(self.builder.ins().stack_addr(pointer_type, slot, 0));
}
Expand Down

0 comments on commit 45bc736

Please sign in to comment.