From 806080fe9fd2b3b330869e90ced44c53458ddee9 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Thu, 4 Apr 2024 13:28:11 -0700 Subject: [PATCH] cranelift: Include clobbers and outgoing args in stack limit When we compute the amount of space that we need in a stack frame for the stack limit check, we were only counting spill-slots and explicit stack-slots. However, we need to account for all uses of the stack which occur before the next stack limit check. That includes clobbers and any stack arguments we want to pass to callees. The maximum amount that we could have missed by is essentially bounded by the number of arguments which could be passed to a function. In Wasmtime, that is limited by `MAX_WASM_FUNCTION_PARAMS` in `wasmparser::limits`, which is set to 1,000, and the largest arguments are 16-byte vectors, so this could undercount by about 16kB. This is not a security issue according to Wasmtime's security policy (https://docs.wasmtime.dev/security-what-is-considered-a-security-vulnerability.html) because it's the embedder's responsibility to ensure that the stack where Wasmtime is running has enough extra space on top of the configured `max_wasm_stack` size, and getting within 16kB of the host stack size is too small to be safe even with this fixed. However, this was definitely not the intended behavior when stack limit checks or stack probes are enabled, and anyone with non-default configurations or non-Wasmtime uses of Cranelift should evaluate whether this bug impacts your use case. (For reference: When Wasmtime is used in async mode or on Linux, the default stack size is 1.5MB larger than the default WebAssembly stack limit, so such configurations are typically safe regardless. On the other hand, on macOS the default non-async stack size for threads other than the main thread is the same size as the default for `max_wasm_stack`, so that is too small with or without this bug fix.) --- cranelift/codegen/src/isa/aarch64/abi.rs | 1 - cranelift/codegen/src/isa/riscv64/abi.rs | 1 - cranelift/codegen/src/isa/x64/abi.rs | 1 - cranelift/codegen/src/machinst/abi.rs | 18 +++++++++++++- cranelift/codegen/src/machinst/isle.rs | 6 +++++ .../filetests/isa/aarch64/stack-limit.clif | 24 +++++++++++-------- .../filetests/isa/riscv64/stack-limit.clif | 12 ++++++---- .../filetests/isa/s390x/stack-limit.clif | 16 ++++++++----- 8 files changed, 55 insertions(+), 24 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index fded724480bc..87d1c6aee416 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1166,7 +1166,6 @@ impl ABIMachineSpec for AArch64MachineDeps { }; // Return FrameLayout structure. - debug_assert!(outgoing_args_size == 0); FrameLayout { stack_args_size, setup_area_size, diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index abda85b0bf26..931e834374a6 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -697,7 +697,6 @@ impl ABIMachineSpec for Riscv64MachineDeps { }; // Return FrameLayout structure. - debug_assert!(outgoing_args_size == 0); FrameLayout { stack_args_size, setup_area_size, diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index ab0859a46143..386b5a5b8e0f 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -943,7 +943,6 @@ impl ABIMachineSpec for X64ABIMachineSpec { let setup_area_size = 16; // RBP, return address // Return FrameLayout structure. - debug_assert!(outgoing_args_size == 0); FrameLayout { stack_args_size, setup_area_size, diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index d86f1cf31b86..3f195baaaa27 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -1806,9 +1806,25 @@ impl Callee { &frame_layout, )); + // The stack limit check needs to cover all the stack adjustments we + // might make, up to the next stack limit check in any function we + // call. Since this happens after frame setup, the current function's + // setup area needs to be accounted for in the caller's stack limit + // check, but we need to account for any setup area that our callees + // might need. Note that s390x may also use the outgoing args area for + // backtrace support even in leaf functions, so that should be accounted + // for unconditionally. + let total_stacksize = frame_layout.clobber_size + + frame_layout.fixed_frame_storage_size + + frame_layout.outgoing_args_size + + if self.is_leaf { + 0 + } else { + frame_layout.setup_area_size + }; + // Leaf functions with zero stack don't need a stack check if one's // specified, otherwise always insert the stack check. - let total_stacksize = frame_layout.fixed_frame_storage_size; if total_stacksize > 0 || !self.is_leaf { if let Some((reg, stack_limit_load)) = &self.stack_limit { insts.extend(stack_limit_load.clone()); diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 3ef9b92a5b13..91462bc2299e 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -838,6 +838,12 @@ macro_rules! isle_prelude_method_helpers { mut caller: $abicaller, args: ValueSlice, ) -> InstOutput { + let off = self.lower_ctx.sigs()[abi].sized_stack_arg_space() + + self.lower_ctx.sigs()[abi].sized_stack_ret_space(); + self.lower_ctx + .abi_mut() + .accumulate_outgoing_args_size(off as u32); + caller.emit_stack_pre_adjust(self.lower_ctx); self.gen_call_common_args(&mut caller, args); diff --git a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif index 6bd7520ba92d..e30a32152994 100644 --- a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif @@ -55,7 +55,8 @@ block0(v0: i64): ; VCode: ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; subs xzr, sp, x0, UXTX +; add x16, x0, #16 +; subs xzr, sp, x16, UXTX ; b.lo #trap=stk_ovf ; block0: ; load_ext_name x2, TestCase(%foo)+0 @@ -67,11 +68,12 @@ block0(v0: i64): ; block0: ; offset 0x0 ; stp x29, x30, [sp, #-0x10]! ; mov x29, sp -; cmp sp, x0 -; b.lo #0x2c -; block1: ; offset 0x10 -; ldr x2, #0x18 -; b #0x20 +; add x16, x0, #0x10 +; cmp sp, x16 +; b.lo #0x30 +; block1: ; offset 0x14 +; ldr x2, #0x1c +; b #0x24 ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x2 @@ -95,6 +97,7 @@ block0(v0: i64): ; mov fp, sp ; ldr x16, [x0] ; ldr x16, [x16, #4] +; add x16, x16, #16 ; subs xzr, sp, x16, UXTX ; b.lo #trap=stk_ovf ; block0: @@ -109,11 +112,12 @@ block0(v0: i64): ; mov x29, sp ; ldur x16, [x0] ; ldur x16, [x16, #4] +; add x16, x16, #0x10 ; cmp sp, x16 -; b.lo #0x34 -; block1: ; offset 0x18 -; ldr x2, #0x20 -; b #0x28 +; b.lo #0x38 +; block1: ; offset 0x1c +; ldr x2, #0x24 +; b #0x2c ; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0 ; .byte 0x00, 0x00, 0x00, 0x00 ; blr x2 diff --git a/cranelift/filetests/filetests/isa/riscv64/stack-limit.clif b/cranelift/filetests/filetests/isa/riscv64/stack-limit.clif index be0c64af3ec5..6dd424c2a8f8 100644 --- a/cranelift/filetests/filetests/isa/riscv64/stack-limit.clif +++ b/cranelift/filetests/filetests/isa/riscv64/stack-limit.clif @@ -58,7 +58,8 @@ block0(v0: i64): ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; trap_if stk_ovf##(sp ult a0) +; addi t6,a0,16 +; trap_if stk_ovf##(sp ult t6) ; block0: ; load_sym a2,%foo+0 ; callind a2 @@ -73,9 +74,10 @@ block0(v0: i64): ; sd ra, 8(sp) ; sd s0, 0(sp) ; mv s0, sp -; bgeu sp, a0, 8 +; addi t6, a0, 0x10 +; bgeu sp, t6, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: stk_ovf -; block1: ; offset 0x18 +; block1: ; offset 0x1c ; auipc a2, 0 ; ld a2, 0xc(a2) ; j 0xc @@ -105,6 +107,7 @@ block0(v0: i64): ; mv fp,sp ; ld t6,0(a0) ; ld t6,4(t6) +; addi t6,t6,16 ; trap_if stk_ovf##(sp ult t6) ; block0: ; load_sym a2,%foo+0 @@ -122,9 +125,10 @@ block0(v0: i64): ; mv s0, sp ; ld t6, 0(a0) ; ld t6, 4(t6) +; addi t6, t6, 0x10 ; bgeu sp, t6, 8 ; .byte 0x00, 0x00, 0x00, 0x00 ; trap: stk_ovf -; block1: ; offset 0x20 +; block1: ; offset 0x24 ; auipc a2, 0 ; ld a2, 0xc(a2) ; j 0xc diff --git a/cranelift/filetests/filetests/isa/s390x/stack-limit.clif b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif index 0a2387b2fd6e..9f69853de6ba 100644 --- a/cranelift/filetests/filetests/isa/s390x/stack-limit.clif +++ b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif @@ -52,7 +52,8 @@ block0(v0: i64): } ; VCode: -; clgrtle %r15, %r2 +; la %r1, 160(%r2) +; clgrtle %r15, %r1 ; stmg %r14, %r15, 112(%r15) ; aghi %r15, -160 ; virtual_sp_offset_adjust 160 @@ -64,11 +65,12 @@ block0(v0: i64): ; ; Disassembled: ; block0: ; offset 0x0 -; clgrtle %r15, %r2 ; trap: stk_ovf +; la %r1, 0xa0(%r2) +; clgrtle %r15, %r1 ; trap: stk_ovf ; stmg %r14, %r15, 0x70(%r15) ; aghi %r15, -0xa0 -; block1: ; offset 0xe -; bras %r1, 0x1a +; block1: ; offset 0x12 +; bras %r1, 0x1e ; .byte 0x00, 0x00 ; reloc_external Abs8 %foo 0 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00 @@ -92,6 +94,7 @@ block0(v0: i64): ; VCode: ; lg %r1, 0(%r2) ; lg %r1, 4(%r1) +; la %r1, 160(%r1) ; clgrtle %r15, %r1 ; stmg %r14, %r15, 112(%r15) ; aghi %r15, -160 @@ -106,11 +109,12 @@ block0(v0: i64): ; block0: ; offset 0x0 ; lg %r1, 0(%r2) ; lg %r1, 4(%r1) +; la %r1, 0xa0(%r1) ; clgrtle %r15, %r1 ; trap: stk_ovf ; stmg %r14, %r15, 0x70(%r15) ; aghi %r15, -0xa0 -; block1: ; offset 0x1a -; bras %r1, 0x26 +; block1: ; offset 0x1e +; bras %r1, 0x2a ; .byte 0x00, 0x00 ; reloc_external Abs8 %foo 0 ; .byte 0x00, 0x00 ; .byte 0x00, 0x00