diff --git a/cranelift/codegen/src/ir/stackslot.rs b/cranelift/codegen/src/ir/stackslot.rs index dfe0109e90c5..d906fb291324 100644 --- a/cranelift/codegen/src/ir/stackslot.rs +++ b/cranelift/codegen/src/ir/stackslot.rs @@ -70,32 +70,38 @@ pub struct StackSlotData { /// Size of stack slot in bytes. pub size: StackSize, + + /// Alignment of stack slot as a power-of-two exponent (log2 + /// value). The stack slot will be at least this aligned; it may + /// be aligned according to other considerations, such as minimum + /// stack slot size or machine word size, as well. + pub align_shift: u8, } impl StackSlotData { - /// Create a stack slot with the specified byte size. - pub fn new(kind: StackSlotKind, size: StackSize) -> Self { - Self { kind, size } - } - - /// Get the alignment in bytes of this stack slot given the stack pointer alignment. - pub fn alignment(&self, max_align: StackSize) -> StackSize { - debug_assert!(max_align.is_power_of_two()); - if self.kind == StackSlotKind::ExplicitDynamicSlot { - max_align - } else { - // We want to find the largest power of two that divides both `self.size` and `max_align`. - // That is the same as isolating the rightmost bit in `x`. - let x = self.size | max_align; - // C.f. Hacker's delight. - x & x.wrapping_neg() + /// Create a stack slot with the specified byte size and alignment. + pub fn new(kind: StackSlotKind, size: StackSize, align_shift: u8) -> Self { + Self { + kind, + size, + align_shift, } } } impl fmt::Display for StackSlotData { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} {}", self.kind, self.size) + if self.align_shift != 0 { + write!( + f, + "{} {}, align = {}", + self.kind, + self.size, + 1u32 << self.align_shift + ) + } else { + write!(f, "{} {}", self.kind, self.size) + } } } @@ -146,8 +152,10 @@ mod tests { fn stack_slot() { let mut func = Function::new(); - let ss0 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); - let ss1 = func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8)); + let ss0 = + func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 0)); + let ss1 = + func.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 8, 0)); assert_eq!(ss0.to_string(), "ss0"); assert_eq!(ss1.to_string(), "ss1"); @@ -197,20 +205,4 @@ mod tests { "explicit_dynamic_slot dt1" ); } - - #[test] - fn alignment() { - let slot = StackSlotData::new(StackSlotKind::ExplicitSlot, 8); - - assert_eq!(slot.alignment(4), 4); - assert_eq!(slot.alignment(8), 8); - assert_eq!(slot.alignment(16), 8); - - let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24); - - assert_eq!(slot2.alignment(4), 4); - assert_eq!(slot2.alignment(8), 8); - assert_eq!(slot2.alignment(16), 8); - assert_eq!(slot2.alignment(32), 8); - } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs index a22b9e787cd8..da31d7d80db1 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs @@ -85,7 +85,7 @@ mod tests { let mut context = Context::for_function(create_function( CallConv::SystemV, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)), )); let code = context diff --git a/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs index 971201b85977..b0260ceefcc2 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs @@ -80,7 +80,7 @@ mod tests { let mut context = Context::for_function(create_function( CallConv::SystemV, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)), )); let code = context diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs index de287d87d9e7..9be66c14f7bc 100644 --- a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs @@ -116,7 +116,7 @@ mod tests { let mut context = Context::for_function(create_function( CallConv::SystemV, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)), )); let code = context @@ -160,7 +160,7 @@ mod tests { let mut context = Context::for_function(create_multi_return_function( CallConv::SystemV, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)), )); let code = context diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index 25d7237517f3..576cc060c557 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -112,7 +112,7 @@ mod tests { let mut context = Context::for_function(create_function( CallConv::SystemV, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64, 0)), )); let code = context diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 1edec7269361..38c4952e1f0f 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -1178,7 +1178,11 @@ impl Callee { sized_stack_offset = sized_stack_offset .checked_add(data.size) .ok_or(CodegenError::ImplLimitExceeded)?; - let mask = M::word_bytes() - 1; + // Always at least machine-word-align slots, but also + // satisfy the user's requested alignment. + debug_assert!(data.align_shift < 32); + let align = std::cmp::max(M::word_bytes(), 1u32 << data.align_shift); + let mask = align - 1; sized_stack_offset = checked_round_up(sized_stack_offset, mask) .ok_or(CodegenError::ImplLimitExceeded)?; debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len()); diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index 190e1c20e63b..e8008162598e 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -535,7 +535,7 @@ mod tests { f.name = UserFuncName::testcase("foo"); assert_eq!(f.to_string(), "function %foo() fast {\n}\n"); - f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); + f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 0)); assert_eq!( f.to_string(), "function %foo() fast {\n ss0 = explicit_slot 4\n}\n" @@ -569,6 +569,13 @@ mod tests { f.to_string(), "function %foo() fast {\n ss0 = explicit_slot 4\n\nblock0(v0: i8, v1: f32x4):\n return\n}\n" ); + + let mut f = Function::new(); + f.create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4, 2)); + assert_eq!( + f.to_string(), + "function u0:0() fast {\n ss0 = explicit_slot 4, align = 4\n}\n" + ); } #[test] diff --git a/cranelift/filetests/filetests/isa/x64/stackslot.clif b/cranelift/filetests/filetests/isa/x64/stackslot.clif new file mode 100644 index 000000000000..b4c013839172 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/stackslot.clif @@ -0,0 +1,61 @@ +test compile precise-output +target x86_64 + +function %f0(i64 vmctx) -> i64, i64, i64, i64 { + gv0 = vmctx + stack_limit = gv0 + ss0 = explicit_slot 8, align=16 + ss1 = explicit_slot 8, align=16 + ss2 = explicit_slot 4 + ss3 = explicit_slot 4 + +block0(v0: i64): + v1 = stack_addr.i64 ss0 + v2 = stack_addr.i64 ss1 + v3 = stack_addr.i64 ss2 + v4 = stack_addr.i64 ss3 + return v1, v2, v3, v4 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; movq %rdi, %r10 +; addq %r10, $48, %r10 +; cmpq %rsp, %r10 +; jnbe #trap=stk_ovf +; subq %rsp, $48, %rsp +; block0: +; lea rsp(0 + virtual offset), %rax +; lea rsp(16 + virtual offset), %rdx +; lea rsp(32 + virtual offset), %r8 +; lea rsp(40 + virtual offset), %r9 +; movq %r8, 0(%rsi) +; movq %r9, 8(%rsi) +; addq %rsp, $48, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; movq %rdi, %r10 +; addq $0x30, %r10 +; cmpq %rsp, %r10 +; ja 0x3b +; subq $0x30, %rsp +; block1: ; offset 0x18 +; leaq (%rsp), %rax +; leaq 0x10(%rsp), %rdx +; leaq 0x20(%rsp), %r8 +; leaq 0x28(%rsp), %r9 +; movq %r8, (%rsi) +; movq %r9, 8(%rsi) +; addq $0x30, %rsp +; movq %rbp, %rsp +; popq %rbp +; retq +; ud2 ; trap: stk_ovf + diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index 4e2506bdbc43..182336dfdc14 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -1605,7 +1605,7 @@ where fn generate_stack_slots(&mut self, builder: &mut FunctionBuilder) -> Result<()> { for _ in 0..self.param(&self.config.static_stack_slots_per_function)? { let bytes = self.param(&self.config.static_stack_slot_size)? as u32; - let ss_data = StackSlotData::new(StackSlotKind::ExplicitSlot, bytes); + let ss_data = StackSlotData::new(StackSlotKind::ExplicitSlot, bytes, 0); let slot = builder.create_sized_stack_slot(ss_data); // Generate one Alias Analysis Category for each slot diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 0dc9289ef593..9e176ccf9e10 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -245,8 +245,11 @@ impl Context { fn add_ss(&mut self, ss: StackSlot, data: StackSlotData, loc: Location) -> ParseResult<()> { self.map.def_ss(ss, loc)?; while self.function.sized_stack_slots.next_key().index() <= ss.index() { - self.function - .create_sized_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 0)); + self.function.create_sized_stack_slot(StackSlotData::new( + StackSlotKind::ExplicitSlot, + 0, + 0, + )); } self.function.sized_stack_slots[ss] = data; Ok(()) @@ -1483,6 +1486,7 @@ impl<'a> Parser<'a> { // | "spill_slot" // | "incoming_arg" // | "outgoing_arg" + // stack-slot-flag ::= "align" "=" Bytes fn parse_stack_slot_decl(&mut self) -> ParseResult<(StackSlot, StackSlotData)> { let ss = self.match_ss("expected stack slot number: ss«n»")?; self.match_token(Token::Equal, "expected '=' in stack slot declaration")?; @@ -1498,7 +1502,30 @@ impl<'a> Parser<'a> { if bytes > i64::from(u32::MAX) { return err!(self.loc, "stack slot too large"); } - let data = StackSlotData::new(kind, bytes as u32); + + // Parse flags. + let align = if self.token() == Some(Token::Comma) { + self.consume(); + self.match_token( + Token::Identifier("align"), + "expected a valid stack-slot flag (currently only `align`)", + )?; + self.match_token(Token::Equal, "expected `=` after flag")?; + let align: i64 = self + .match_imm64("expected alignment-size after `align` flag")? + .into(); + u32::try_from(align) + .map_err(|_| self.error("alignment must be a 32-bit unsigned integer"))? + } else { + 1 + }; + + if !align.is_power_of_two() { + return err!(self.loc, "stack slot alignment is not a power of two"); + } + let align_shift = u8::try_from(align.ilog2()).unwrap(); // Always succeeds: range 0..=31. + + let data = StackSlotData::new(kind, bytes as u32, align_shift); // Collect any trailing comments. self.token(); diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index fed9ac988f2c..c121802c1a55 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -407,7 +407,7 @@ impl wasmtime_environ::Compiler for Compiler { let slot = match &ret { NativeRet::Bare => None, NativeRet::Retptr { size, .. } => Some(builder.func.create_sized_stack_slot( - ir::StackSlotData::new(ir::StackSlotKind::ExplicitSlot, *size), + ir::StackSlotData::new(ir::StackSlotKind::ExplicitSlot, *size, 0), )), }; if let Some(slot) = slot { @@ -942,6 +942,7 @@ impl Compiler { let slot = builder.func.create_sized_stack_slot(ir::StackSlotData::new( ir::StackSlotKind::ExplicitSlot, values_vec_byte_size, + 4, )); let values_vec_ptr = builder.ins().stack_addr(pointer_type, slot, 0); diff --git a/crates/cranelift/src/compiler/component.rs b/crates/cranelift/src/compiler/component.rs index 250f7c3044f7..8068cade5ea3 100644 --- a/crates/cranelift/src/compiler/component.rs +++ b/crates/cranelift/src/compiler/component.rs @@ -775,6 +775,7 @@ impl TrampolineCompiler<'_> { .create_sized_stack_slot(ir::StackSlotData::new( ir::StackSlotKind::ExplicitSlot, pointer_type.bytes(), + 0, )); args.push(self.builder.ins().stack_addr(pointer_type, slot, 0)); }