Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cranelift: add alignment parameter to stack slots. #8635

Merged
merged 6 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 27 additions & 35 deletions cranelift/codegen/src/ir/stackslot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,32 +70,38 @@ pub struct StackSlotData {

/// Size of stack slot in bytes.
pub size: StackSize,

/// Alignment of stack slot as a power-of-two exponent (log2
/// value). The stack slot will be at least this aligned; it may
/// be aligned according to other considerations, such as minimum
/// stack slot size or machine word size, as well.
pub align_shift: u8,
}

impl StackSlotData {
/// Create a stack slot with the specified byte size.
pub fn new(kind: StackSlotKind, size: StackSize) -> Self {
Self { kind, size }
}

/// Get the alignment in bytes of this stack slot given the stack pointer alignment.
pub fn alignment(&self, max_align: StackSize) -> StackSize {
debug_assert!(max_align.is_power_of_two());
if self.kind == StackSlotKind::ExplicitDynamicSlot {
max_align
} else {
// We want to find the largest power of two that divides both `self.size` and `max_align`.
// That is the same as isolating the rightmost bit in `x`.
let x = self.size | max_align;
// C.f. Hacker's delight.
x & x.wrapping_neg()
/// Create a stack slot with the specified byte size and alignment.
pub fn new(kind: StackSlotKind, size: StackSize, align_shift: u8) -> Self {
Self {
kind,
size,
align_shift,
}
}
}

impl fmt::Display for StackSlotData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} {}", self.kind, self.size)
if self.align_shift != 0 {
write!(
f,
"{} {}, align = {}",
self.kind,
self.size,
1u32 << self.align_shift
)
} else {
write!(f, "{} {}", self.kind, self.size)
}
}
}

Expand Down Expand Up @@ -146,8 +152,10 @@ mod tests {
fn stack_slot() {
let mut func = Function::new();

let ss0 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4));
let ss1 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8));
let ss0 =
func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 0));
let ss1 =
func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8, 0));
assert_eq!(ss0.to_string(), "ss0");
assert_eq!(ss1.to_string(), "ss1");

Expand Down Expand Up @@ -197,20 +205,4 @@ mod tests {
"explicit_dynamic_slot dt1"
);
}

#[test]
fn alignment() {
let slot = StackSlotData::new(StackSlotKind::ExplicitSlot, 8);

assert_eq!(slot.alignment(4), 4);
assert_eq!(slot.alignment(8), 8);
assert_eq!(slot.alignment(16), 8);

let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24);

assert_eq!(slot2.alignment(4), 4);
assert_eq!(slot2.alignment(8), 8);
assert_eq!(slot2.alignment(16), 8);
assert_eq!(slot2.alignment(32), 8);
}
}
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ mod tests {

let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ mod tests {

let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down
4 changes: 2 additions & 2 deletions cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ mod tests {

let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down Expand Up @@ -160,7 +160,7 @@ mod tests {

let mut context = Context::for_function(create_multi_return_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ mod tests {

let mut context = Context::for_function(create_function(
CallConv::SystemV,
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)),
));

let code = context
Expand Down
6 changes: 5 additions & 1 deletion cranelift/codegen/src/machinst/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1178,7 +1178,11 @@ impl<M: ABIMachineSpec> Callee<M> {
sized_stack_offset = sized_stack_offset
.checked_add(data.size)
.ok_or(CodegenError::ImplLimitExceeded)?;
let mask = M::word_bytes() - 1;
// Always at least machine-word-align slots, but also
// satisfy the user's requested alignment.
debug_assert!(data.align_shift < 32);
let align = std::cmp::max(M::word_bytes(), 1u32 << data.align_shift);
let mask = align - 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the alignment is bigger than the ABI stack alignment it would be necessary to adjust the stack pointer to correctly align it. This is missing here. Rust allows arbitrary power of two alignments for stack variables, even something crazy like a 1GB alignment. More realistically though, cache line alignment is used by crates like rayon for performance reasons. Without correctly aligning, rustc will trigger a debug assertion in some cases like for the aforementioned rayon crate.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yes, that's a good point -- to summarize: alignment of offset within the stack frame to alignment A does not imply alignment to A overall, because SP itself may be aligned less (e.g. only to A/4).

I don't have time to work further on this at the moment, but I'd be happy to review a PR that addressed this.

sized_stack_offset = checked_round_up(sized_stack_offset, mask)
.ok_or(CodegenError::ImplLimitExceeded)?;
debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len());
Expand Down
9 changes: 8 additions & 1 deletion cranelift/codegen/src/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ mod tests {
f.name = UserFuncName::testcase("foo");
assert_eq!(f.to_string(), "function %foo() fast {\n}\n");

f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4));
f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 0));
assert_eq!(
f.to_string(),
"function %foo() fast {\n ss0 = explicit_slot 4\n}\n"
Expand Down Expand Up @@ -569,6 +569,13 @@ mod tests {
f.to_string(),
"function %foo() fast {\n ss0 = explicit_slot 4\n\nblock0(v0: i8, v1: f32x4):\n return\n}\n"
);

let mut f = Function::new();
f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 2));
assert_eq!(
f.to_string(),
"function u0:0() fast {\n ss0 = explicit_slot 4, align = 4\n}\n"
);
}

#[test]
Expand Down
61 changes: 61 additions & 0 deletions cranelift/filetests/filetests/isa/x64/stackslot.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
test compile precise-output
target x86_64

function %f0(i64 vmctx) -> i64, i64, i64, i64 {
gv0 = vmctx
stack_limit = gv0
ss0 = explicit_slot 8, align=16
ss1 = explicit_slot 8, align=16
ss2 = explicit_slot 4
ss3 = explicit_slot 4

block0(v0: i64):
v1 = stack_addr.i64 ss0
v2 = stack_addr.i64 ss1
v3 = stack_addr.i64 ss2
v4 = stack_addr.i64 ss3
return v1, v2, v3, v4
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; movq %rdi, %r10
; addq %r10, $48, %r10
; cmpq %rsp, %r10
; jnbe #trap=stk_ovf
; subq %rsp, $48, %rsp
; block0:
; lea rsp(0 + virtual offset), %rax
; lea rsp(16 + virtual offset), %rdx
; lea rsp(32 + virtual offset), %r8
; lea rsp(40 + virtual offset), %r9
; movq %r8, 0(%rsi)
; movq %r9, 8(%rsi)
; addq %rsp, $48, %rsp
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; movq %rdi, %r10
; addq $0x30, %r10
; cmpq %rsp, %r10
; ja 0x3b
; subq $0x30, %rsp
; block1: ; offset 0x18
; leaq (%rsp), %rax
; leaq 0x10(%rsp), %rdx
; leaq 0x20(%rsp), %r8
; leaq 0x28(%rsp), %r9
; movq %r8, (%rsi)
; movq %r9, 8(%rsi)
; addq $0x30, %rsp
; movq %rbp, %rsp
; popq %rbp
; retq
; ud2 ; trap: stk_ovf

2 changes: 1 addition & 1 deletion cranelift/fuzzgen/src/function_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1605,7 +1605,7 @@ where
fn generate_stack_slots(&mut self, builder: &mut FunctionBuilder) -> Result<()> {
for _ in 0..self.param(&self.config.static_stack_slots_per_function)? {
let bytes = self.param(&self.config.static_stack_slot_size)? as u32;
let ss_data = StackSlotData::new(StackSlotKind::ExplicitSlot, bytes);
let ss_data = StackSlotData::new(StackSlotKind::ExplicitSlot, bytes, 0);
let slot = builder.create_sized_stack_slot(ss_data);

// Generate one Alias Analysis Category for each slot
Expand Down
33 changes: 30 additions & 3 deletions cranelift/reader/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,11 @@ impl Context {
fn add_ss(&mut self, ss: StackSlot, data: StackSlotData, loc: Location) -> ParseResult<()> {
self.map.def_ss(ss, loc)?;
while self.function.sized_stack_slots.next_key().index() <= ss.index() {
self.function
.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 0));
self.function.create_sized_stack_slot(StackSlotData::new(
StackSlotKind::ExplicitSlot,
0,
0,
));
}
self.function.sized_stack_slots[ss] = data;
Ok(())
Expand Down Expand Up @@ -1483,6 +1486,7 @@ impl<'a> Parser<'a> {
// | "spill_slot"
// | "incoming_arg"
// | "outgoing_arg"
// stack-slot-flag ::= "align" "=" Bytes
fn parse_stack_slot_decl(&mut self) -> ParseResult<(StackSlot, StackSlotData)> {
let ss = self.match_ss("expected stack slot number: ss«n»")?;
self.match_token(Token::Equal, "expected '=' in stack slot declaration")?;
Expand All @@ -1498,7 +1502,30 @@ impl<'a> Parser<'a> {
if bytes > i64::from(u32::MAX) {
return err!(self.loc, "stack slot too large");
}
let data = StackSlotData::new(kind, bytes as u32);

// Parse flags.
let align = if self.token() == Some(Token::Comma) {
self.consume();
self.match_token(
Token::Identifier("align"),
"expected a valid stack-slot flag (currently only `align`)",
)?;
self.match_token(Token::Equal, "expected `=` after flag")?;
let align: i64 = self
.match_imm64("expected alignment-size after `align` flag")?
.into();
u32::try_from(align)
.map_err(|_| self.error("alignment must be a 32-bit unsigned integer"))?
} else {
1
};

if !align.is_power_of_two() {
return err!(self.loc, "stack slot alignment is not a power of two");
}
let align_shift = u8::try_from(align.ilog2()).unwrap(); // Always succeeds: range 0..=31.

let data = StackSlotData::new(kind, bytes as u32, align_shift);

// Collect any trailing comments.
self.token();
Expand Down
3 changes: 2 additions & 1 deletion crates/cranelift/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ impl wasmtime_environ::Compiler for Compiler {
let slot = match &ret {
NativeRet::Bare => None,
NativeRet::Retptr { size, .. } => Some(builder.func.create_sized_stack_slot(
ir::StackSlotData::new(ir::StackSlotKind::ExplicitSlot, *size),
ir::StackSlotData::new(ir::StackSlotKind::ExplicitSlot, *size, 0),
)),
};
if let Some(slot) = slot {
Expand Down Expand Up @@ -942,6 +942,7 @@ impl Compiler {
let slot = builder.func.create_sized_stack_slot(ir::StackSlotData::new(
ir::StackSlotKind::ExplicitSlot,
values_vec_byte_size,
4,
));
let values_vec_ptr = builder.ins().stack_addr(pointer_type, slot, 0);

Expand Down
1 change: 1 addition & 0 deletions crates/cranelift/src/compiler/component.rs
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ impl TrampolineCompiler<'_> {
.create_sized_stack_slot(ir::StackSlotData::new(
ir::StackSlotKind::ExplicitSlot,
pointer_type.bytes(),
0,
));
args.push(self.builder.ins().stack_addr(pointer_type, slot, 0));
}
Expand Down
Loading