From a1d9030becf585ad715bdcfb5da290ea7583870c Mon Sep 17 00:00:00 2001 From: Christopher Serr Date: Sat, 25 Apr 2020 21:45:17 +0200 Subject: [PATCH] Revert "Windows FPRs preservation (#1216)" This reverts commit 4cca5100858cb7aa4bec9f68e80d3c309ae1007e. --- build.rs | 11 - cranelift/codegen/src/context.rs | 4 +- cranelift/codegen/src/isa/mod.rs | 3 +- cranelift/codegen/src/isa/x86/abi.rs | 241 ++---------------- cranelift/codegen/src/isa/x86/fde.rs | 13 +- cranelift/codegen/src/isa/x86/mod.rs | 4 +- cranelift/codegen/src/isa/x86/unwind.rs | 198 ++------------ .../isa/x86/windows_fastcall_x64.clif | 51 ---- .../isa/x86/windows_fastcall_x64_unwind.clif | 97 +------ cranelift/filetests/src/test_fde.rs | 4 +- cranelift/filetests/src/test_unwind.rs | 28 +- crates/api/src/trampoline/func.rs | 4 +- crates/environ/src/compilation.rs | 16 +- crates/environ/src/cranelift.rs | 4 +- crates/jit/src/compiler.rs | 8 +- 15 files changed, 76 insertions(+), 610 deletions(-) diff --git a/build.rs b/build.rs index 3eeb27c1796f..ea6e6c64071b 100644 --- a/build.rs +++ b/build.rs @@ -194,17 +194,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("reference_types", "table_copy_on_imported_tables") => return false, ("reference_types", _) => return true, - ("misc_testsuite", "export_large_signature") - | ("spec_testsuite", "call") - | ("multi_value", "call") - | ("multi_value", "func") => { - // FIXME These involves functions with very large stack frames that Cranelift currently - // cannot compile using the fastcall (Windows) calling convention. - // See https://github.com/bytecodealliance/wasmtime/pull/1216. - #[cfg(windows)] - return true; - } - _ => {} }, _ => panic!("unrecognized strategy"), diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 967d302ecb9f..ca70293c05fb 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -206,8 +206,8 @@ impl Context { isa: &dyn TargetIsa, kind: FrameUnwindKind, sink: &mut dyn FrameUnwindSink, - ) -> CodegenResult<()> { - isa.emit_unwind_info(&self.func, kind, sink) + ) { + isa.emit_unwind_info(&self.func, kind, sink); } /// Run the verifier on the function. diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index af263c2b5bf0..9c91d4219390 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -398,9 +398,8 @@ pub trait TargetIsa: fmt::Display + Send + Sync { _func: &ir::Function, _kind: binemit::FrameUnwindKind, _sink: &mut dyn binemit::FrameUnwindSink, - ) -> CodegenResult<()> { + ) { // No-op by default - Ok(()) } } diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs index c683f101ed2d..db67457a6c76 100644 --- a/cranelift/codegen/src/isa/x86/abi.rs +++ b/cranelift/codegen/src/isa/x86/abi.rs @@ -12,10 +12,8 @@ use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion}; use crate::binemit::{FrameUnwindKind, FrameUnwindSink}; use crate::cursor::{Cursor, CursorPosition, EncCursor}; use crate::ir; -use crate::ir::entities::StackSlot; use crate::ir::immediates::Imm64; use crate::ir::stackslot::{StackOffset, StackSize}; -use crate::ir::types; use crate::ir::{ get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, FrameLayoutChange, InstBuilder, ValueLoc, @@ -25,7 +23,6 @@ use crate::regalloc::RegisterSet; use crate::result::CodegenResult; use crate::stack_layout::layout_stack; use alloc::borrow::Cow; -use alloc::vec::Vec; use core::i32; use std::boxed::Box; use target_lexicon::{PointerWidth, Triple}; @@ -369,18 +366,17 @@ pub fn allocatable_registers(triple: &Triple, flags: &shared_settings::Flags) -> regs } -/// Get the set of callee-saved general-purpose registers. +/// Get the set of callee-saved registers. fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] { match isa.triple().pointer_width().unwrap() { PointerWidth::U16 => panic!(), PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi], PointerWidth::U64 => { if call_conv.extends_windows_fastcall() { - // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15 are - // considered nonvolatile and must be saved and restored by a function that uses - // them." + // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15 are considered nonvolatile + // and must be saved and restored by a function that uses them." // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention - // RSP & RBP are not listed below, since they are restored automatically during + // RSP & RSB are not listed below, since they are restored automatically during // a function call. If that wasn't the case, function calls (RET) would not work. &[ RU::rbx, @@ -398,45 +394,12 @@ fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] } } -/// Get the set of callee-saved floating-point (SIMD) registers. -fn callee_saved_fprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] { - match isa.triple().pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => &[], - PointerWidth::U64 => { - if call_conv.extends_windows_fastcall() { - // "registers RBX, ... , and XMM6-15 are considered nonvolatile and must be saved - // and restored by a function that uses them." - // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention as of - // February 5th, 2020. - &[ - RU::xmm6, - RU::xmm7, - RU::xmm8, - RU::xmm9, - RU::xmm10, - RU::xmm11, - RU::xmm12, - RU::xmm13, - RU::xmm14, - RU::xmm15, - ] - } else { - &[] - } - } - } -} - /// Get the set of callee-saved registers that are used. -fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet { +fn callee_saved_gprs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet { let mut all_callee_saved = RegisterSet::empty(); for reg in callee_saved_gprs(isa, func.signature.call_conv) { all_callee_saved.free(GPR, *reg as RegUnit); } - for reg in callee_saved_fprs(isa, func.signature.call_conv) { - all_callee_saved.free(FPR, *reg as RegUnit); - } let mut used = RegisterSet::empty(); for value_loc in func.locations.values() { @@ -444,14 +407,8 @@ fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterS // register. We don't use registers that overlap each other in the x86 ISA, but in others // we do. So this should not be blindly reused. if let ValueLoc::Reg(ru) = *value_loc { - if GPR.contains(ru) { - if !used.is_avail(GPR, ru) { - used.free(GPR, ru); - } - } else if FPR.contains(ru) { - if !used.is_avail(FPR, ru) { - used.free(FPR, ru); - } + if !used.is_avail(GPR, ru) { + used.free(GPR, ru); } } } @@ -467,14 +424,8 @@ fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterS match func.dfg[inst] { ir::instructions::InstructionData::RegMove { dst, .. } | ir::instructions::InstructionData::RegFill { dst, .. } => { - if GPR.contains(dst) { - if !used.is_avail(GPR, dst) { - used.free(GPR, dst); - } - } else if FPR.contains(dst) { - if !used.is_avail(FPR, dst) { - used.free(FPR, dst); - } + if !used.is_avail(GPR, dst) { + used.free(GPR, dst); } } _ => (), @@ -558,7 +509,7 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C panic!("TODO: windows-fastcall: x86-32 not implemented yet"); } - let csrs = callee_saved_regs_used(isa, func); + let csrs = callee_saved_gprs_used(isa, func); // The reserved stack area is composed of: // return address + frame pointer + all callee-saved registers + shadow space @@ -568,28 +519,11 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C // will adjust the stack pointer to make room for the rest of the required // space for this frame. let word_size = isa.pointer_bytes() as usize; - let num_fprs = csrs.iter(FPR).len(); let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32; - // Only create an FPR stack slot if we're going to save FPRs. - let fpr_slot = if num_fprs > 0 { - // Create a stack slot for FPRs to be preserved in. This is an `ExplicitSlot` because it - // seems to most closely map to it as a `StackSlotKind`: FPR preserve/restore should be - // through `stack_load` and `stack_store` (see later comment about issue #1198). Even - // though in a certain light FPR preserve/restore is "spilling" an argument, regalloc - // implies that `SpillSlot` may be eligible for certain optimizations, and we know with - // certainty that this space may not be reused in the function, nor moved around. - Some(func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::ExplicitSlot, - size: (num_fprs * types::F64X2.bytes() as usize) as u32, - offset: None, - })) - } else { - None - }; - // TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work // since cranelift does not support spill slots before incoming args + func.create_stack_slot(ir::StackSlotData { kind: ir::StackSlotKind::IncomingArg, size: csr_stack_size as u32, @@ -610,22 +544,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C func.signature.params.push(fp_arg); func.signature.returns.push(fp_arg); - for gp_csr in csrs.iter(GPR) { - let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, gp_csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - for fp_csr in csrs.iter(FPR) { - // The calling convention described in - // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires - // preserving the low 128 bits of XMM6-XMM15. - // - // TODO: For now, add just an `F64` rather than `F64X2` because `F64X2` would require - // encoding a fstDisp8 with REX bits set, and we currently can't encode that. F64 causes a - // whole XMM register to be preserved anyway. - let csr_arg = - ir::AbiParam::special_reg(types::F64, ir::ArgumentPurpose::CalleeSaved, fp_csr); + for csr in csrs.iter(GPR) { + let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr); func.signature.params.push(csr_arg); func.signature.returns.push(csr_arg); } @@ -633,14 +553,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C // Set up the cursor and insert the prologue let entry_block = func.layout.entry_block().expect("missing entry block"); let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); - let prologue_cfa_state = insert_common_prologue( - &mut pos, - local_stack_size, - reg_type, - &csrs, - fpr_slot.as_ref(), - isa, - ); + let prologue_cfa_state = + insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa); // Reset the cursor and insert the epilogue let mut pos = pos.at_position(CursorPosition::Nowhere); @@ -649,7 +563,6 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C local_stack_size, reg_type, &csrs, - fpr_slot.as_ref(), isa, prologue_cfa_state, ); @@ -662,11 +575,7 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C let pointer_width = isa.triple().pointer_width().unwrap(); let word_size = pointer_width.bytes() as usize; - let csrs = callee_saved_regs_used(isa, func); - assert!( - csrs.iter(FPR).len() == 0, - "SysV ABI does not have callee-save SIMD registers" - ); + let csrs = callee_saved_gprs_used(isa, func); // The reserved stack area is composed of: // return address + frame pointer + all callee-saved registers @@ -706,7 +615,7 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C let entry_block = func.layout.entry_block().expect("missing entry block"); let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); let prologue_cfa_state = - insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, None, isa); + insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa); // Reset the cursor and insert the epilogue let mut pos = pos.at_position(CursorPosition::Nowhere); @@ -715,7 +624,6 @@ fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C local_stack_size, reg_type, &csrs, - None, isa, prologue_cfa_state, ); @@ -730,7 +638,6 @@ fn insert_common_prologue( stack_size: i64, reg_type: ir::types::Type, csrs: &RegisterSet, - fpr_slot: Option<&StackSlot>, isa: &dyn TargetIsa, ) -> Option { let word_size = isa.pointer_bytes() as isize; @@ -741,11 +648,8 @@ fn insert_common_prologue( // pushed CSRs, frame pointer. // Also, the size of a return address, implicitly pushed by a x86 `call` instruction, // also should be accounted for. - // If any FPR are present, count them as well as necessary alignment space. // TODO: Check if the function body actually contains a `call` instruction. - let mut total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64; - - total_stack_size += csrs.iter(FPR).len() as i64 * types::F64X2.bytes() as i64; + let total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64; insert_stack_check(pos, total_stack_size, stack_limit_arg); } @@ -892,55 +796,6 @@ fn insert_common_prologue( } } - // Now that RSP is prepared for the function, we can use stack slots: - if let Some(fpr_slot) = fpr_slot { - debug_assert!(csrs.iter(FPR).len() != 0); - - // `stack_store` is not directly encodable in x86_64 at the moment, so we'll need a base - // address. We are well after postopt could run, so load the CSR region base once here, - // instead of hoping that the addr/store will be combined later. - // See also: https://github.com/bytecodealliance/wasmtime/pull/1198 - let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0); - - // Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at - // function entry. - pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16); - - let mut fpr_offset = 0; - - for reg in csrs.iter(FPR) { - // Append param to entry Block - let csr_arg = pos.func.dfg.append_block_param(block, types::F64); - - // Since regalloc has already run, we must assign a location. - pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); - - let reg_store_inst = - pos.ins() - .store(ir::MemFlags::trusted(), csr_arg, stack_addr, fpr_offset); - - // If we preserve FPRs, they occur after SP is adjusted, so also fix up the end point - // to this new instruction. - pos.func.prologue_end = Some(reg_store_inst); - fpr_offset += types::F64X2.bytes() as i32; - - if let Some(ref mut frame_layout) = pos.func.frame_layout { - let mut cfa_state = cfa_state - .as_mut() - .expect("cfa state exists when recording frame layout"); - cfa_state.current_depth -= types::F64X2.bytes() as isize; - frame_layout.instructions.insert( - reg_store_inst, - vec![FrameLayoutChange::RegAt { - reg, - cfa_offset: cfa_state.current_depth, - }] - .into_boxed_slice(), - ); - } - } - } - cfa_state } @@ -973,7 +828,6 @@ fn insert_common_epilogues( stack_size: i64, reg_type: ir::types::Type, csrs: &RegisterSet, - fpr_slot: Option<&StackSlot>, isa: &dyn TargetIsa, cfa_state: Option, ) { @@ -988,7 +842,6 @@ fn insert_common_epilogues( pos, reg_type, csrs, - fpr_slot, isa, is_last, cfa_state.clone(), @@ -1006,57 +859,11 @@ fn insert_common_epilogue( pos: &mut EncCursor, reg_type: ir::types::Type, csrs: &RegisterSet, - fpr_slot: Option<&StackSlot>, isa: &dyn TargetIsa, is_last: bool, mut cfa_state: Option, ) { let word_size = isa.pointer_bytes() as isize; - - // Even though instructions to restore FPRs are inserted first, we have to append them after - // restored GPRs to satisfy parameter order in the return. - let mut restored_fpr_values = Vec::new(); - - // Restore FPRs before we move RSP and invalidate stack slots. - if let Some(fpr_slot) = fpr_slot { - debug_assert!(csrs.iter(FPR).len() != 0); - - // `stack_load` is not directly encodable in x86_64 at the moment, so we'll need a base - // address. We are well after postopt could run, so load the CSR region base once here, - // instead of hoping that the addr/store will be combined later. - // - // See also: https://github.com/bytecodealliance/wasmtime/pull/1198 - let stack_addr = pos.ins().stack_addr(types::I64, *fpr_slot, 0); - - // Use r11 as fastcall allows it to be clobbered, and it won't have a meaningful value at - // function exit. - pos.func.locations[stack_addr] = ir::ValueLoc::Reg(RU::r11 as u16); - - let mut fpr_offset = 0; - - for reg in csrs.iter(FPR) { - let value = pos - .ins() - .load(types::F64, ir::MemFlags::trusted(), stack_addr, fpr_offset); - fpr_offset += types::F64X2.bytes() as i32; - - if let Some(ref mut cfa_state) = cfa_state.as_mut() { - // Note: don't bother recording a frame layout change because the popped value is - // still correct in memory, and won't be overwritten until we've returned where the - // current frame's layout would no longer matter. Only adjust `current_depth` for a - // consistency check later. - cfa_state.current_depth += types::F64X2.bytes() as isize; - } - // Unlike GPRs before, we don't need to step back after reach restoration because FPR - // restoration is order-insensitive. Furthermore: we want GPR restoration to begin - // after FPR restoration, so that stack adjustments occur after we're done relying on - // StackSlot validity. - - pos.func.locations[value] = ir::ValueLoc::Reg(reg); - restored_fpr_values.push(value); - } - } - if stack_size > 0 { pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)); } @@ -1096,10 +903,6 @@ fn insert_common_epilogue( pos.func.dfg.append_inst_arg(inst, csr_ret); } - for value in restored_fpr_values.into_iter() { - pos.func.dfg.append_inst_arg(inst, value); - } - if let Some(ref mut frame_layout) = pos.func.frame_layout { let cfa_state = cfa_state .as_mut() @@ -1150,21 +953,19 @@ pub fn emit_unwind_info( isa: &dyn TargetIsa, kind: FrameUnwindKind, sink: &mut dyn FrameUnwindSink, -) -> CodegenResult<()> { +) { match kind { FrameUnwindKind::Fastcall => { // Assumption: RBP is being used as the frame pointer // In the future, Windows fastcall codegen should usually omit the frame pointer - if let Some(info) = UnwindInfo::try_from_func(func, isa, Some(RU::rbp.into()))? { + if let Some(info) = UnwindInfo::try_from_func(func, isa, Some(RU::rbp.into())) { info.emit(sink); } } FrameUnwindKind::Libunwind => { if func.frame_layout.is_some() { - emit_fde(func, isa, sink)?; + emit_fde(func, isa, sink); } } } - - Ok(()) } diff --git a/cranelift/codegen/src/isa/x86/fde.rs b/cranelift/codegen/src/isa/x86/fde.rs index 85ed5b5f2a75..9d6e38de31af 100644 --- a/cranelift/codegen/src/isa/x86/fde.rs +++ b/cranelift/codegen/src/isa/x86/fde.rs @@ -4,7 +4,6 @@ use crate::binemit::{FrameUnwindOffset, FrameUnwindSink, Reloc}; use crate::ir::{FrameLayoutChange, Function}; use crate::isa::fde::RegisterMappingError; use crate::isa::{CallConv, RegUnit, TargetIsa}; -use crate::result::CodegenResult; use alloc::vec::Vec; use core::convert::TryInto; use gimli::write::{ @@ -179,11 +178,7 @@ fn to_cfi( } /// Creates FDE structure from FrameLayout. -pub fn emit_fde( - func: &Function, - isa: &dyn TargetIsa, - sink: &mut dyn FrameUnwindSink, -) -> CodegenResult<()> { +pub fn emit_fde(func: &Function, isa: &dyn TargetIsa, sink: &mut dyn FrameUnwindSink) { assert!(isa.name() == "x86"); // Expecting function with System V prologue @@ -271,8 +266,6 @@ pub fn emit_fde( // Need 0 marker for GCC unwind to end FDE "list". sink.bytes(&[0, 0, 0, 0]); - - Ok(()) } #[cfg(test)] @@ -321,7 +314,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new()); - emit_fde(&context.func, &*isa, &mut sink).expect("can emit fde"); + emit_fde(&context.func, &*isa, &mut sink); assert_eq!( sink.0, @@ -383,7 +376,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new()); - emit_fde(&context.func, &*isa, &mut sink).expect("can emit fde"); + emit_fde(&context.func, &*isa, &mut sink); assert_eq!( sink.0, diff --git a/cranelift/codegen/src/isa/x86/mod.rs b/cranelift/codegen/src/isa/x86/mod.rs index 881f12bdb1b4..042874ea69e5 100644 --- a/cranelift/codegen/src/isa/x86/mod.rs +++ b/cranelift/codegen/src/isa/x86/mod.rs @@ -177,8 +177,8 @@ impl TargetIsa for Isa { func: &ir::Function, kind: FrameUnwindKind, sink: &mut dyn FrameUnwindSink, - ) -> CodegenResult<()> { - abi::emit_unwind_info(func, self, kind, sink) + ) { + abi::emit_unwind_info(func, self, kind, sink); } } diff --git a/cranelift/codegen/src/isa/x86/unwind.rs b/cranelift/codegen/src/isa/x86/unwind.rs index 707b9e6d2c2b..de653b89a68f 100644 --- a/cranelift/codegen/src/isa/x86/unwind.rs +++ b/cranelift/codegen/src/isa/x86/unwind.rs @@ -1,13 +1,11 @@ //! Unwind information for x64 Windows. -use super::registers::{FPR, GPR, RU}; +use super::registers::{GPR, RU}; use crate::binemit::FrameUnwindSink; -use crate::ir::{Function, InstructionData, Opcode, ValueLoc}; +use crate::ir::{Function, InstructionData, Opcode}; use crate::isa::{CallConv, RegUnit, TargetIsa}; -use crate::result::{CodegenError, CodegenResult}; use alloc::vec::Vec; use byteorder::{ByteOrder, LittleEndian}; -use log::warn; /// Maximum (inclusive) size of a "small" stack allocation const SMALL_ALLOC_MAX_SIZE: u32 = 128; @@ -37,34 +35,18 @@ fn write_u32(sink: &mut dyn FrameUnwindSink, v: u32) { /// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values. #[derive(Debug, PartialEq, Eq)] enum UnwindCode { - PushRegister { - offset: u8, - reg: RegUnit, - }, - SaveXmm { - offset: u8, - reg: RegUnit, - stack_offset: u32, - }, - StackAlloc { - offset: u8, - size: u32, - }, - SetFramePointer { - offset: u8, - sp_offset: u8, - }, + PushRegister { offset: u8, reg: RegUnit }, + StackAlloc { offset: u8, size: u32 }, + SetFramePointer { offset: u8, sp_offset: u8 }, } impl UnwindCode { fn emit(&self, sink: &mut dyn FrameUnwindSink) { enum UnwindOperation { - PushNonvolatileRegister = 0, - LargeStackAlloc = 1, - SmallStackAlloc = 2, - SetFramePointer = 3, - SaveXmm128 = 8, - SaveXmm128Far = 9, + PushNonvolatileRegister, + LargeStackAlloc, + SmallStackAlloc, + SetFramePointer, } match self { @@ -76,28 +58,6 @@ impl UnwindCode { | (UnwindOperation::PushNonvolatileRegister as u8), ); } - Self::SaveXmm { - offset, - reg, - stack_offset, - } => { - write_u8(sink, *offset); - let stack_offset = stack_offset / 16; - if stack_offset <= core::u16::MAX as u32 { - write_u8( - sink, - (FPR.index_of(*reg) << 4) as u8 | (UnwindOperation::SaveXmm128 as u8), - ); - write_u16::(sink, stack_offset as u16); - } else { - write_u8( - sink, - (FPR.index_of(*reg) << 4) as u8 | (UnwindOperation::SaveXmm128Far as u8), - ); - write_u16::(sink, stack_offset as u16); - write_u16::(sink, (stack_offset >> 16) as u16); - } - } Self::StackAlloc { offset, size } => { // Stack allocations on Windows must be a multiple of 8 and be at least 1 slot assert!(*size >= 8); @@ -138,13 +98,6 @@ impl UnwindCode { 3 } } - Self::SaveXmm { stack_offset, .. } => { - if *stack_offset <= core::u16::MAX as u32 { - 2 - } else { - 3 - } - } _ => 1, } } @@ -168,10 +121,10 @@ impl UnwindInfo { func: &Function, isa: &dyn TargetIsa, frame_register: Option, - ) -> CodegenResult> { + ) -> Option { // Only Windows fastcall is supported for unwind information if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() { - return Ok(None); + return None; } let prologue_end = func.prologue_end.unwrap(); @@ -183,27 +136,10 @@ impl UnwindInfo { let mut unwind_codes = Vec::new(); let mut found_end = false; - // Have we saved at least one FPR? if so, we might have to check additional constraints. - let mut saved_fpr = false; - - // In addition to the min offset for a callee-save, we need to know the offset from the - // frame base to the stack pointer, so that we can record an unwind offset that spans only - // to the end of callee-save space. - let mut static_frame_allocation_size = 0u32; - - // For the time being, FPR preservation is split into a stack_addr and later store/load. - // Store the register used for stack store and ensure it is the same register with no - // intervening changes to the frame size. - let mut callee_save_region_reg = None; - // Also record the callee-save region's offset from RSP, because it must be added to FPR - // save offsets to compute an offset from the frame base. - let mut callee_save_offset = None; - for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) { // x64 ABI prologues cannot exceed 255 bytes in length if (offset + size) > 255 { - warn!("function prologues cannot exceed 255 bytes in size for Windows x64"); - return Err(CodegenError::CodeTooLarge); + panic!("function prologues cannot exceed 255 bytes in size for Windows x64"); } prologue_size += size; @@ -214,23 +150,18 @@ impl UnwindInfo { InstructionData::Unary { opcode, arg } => { match opcode { Opcode::X86Push => { - static_frame_allocation_size += 8; - unwind_codes.push(UnwindCode::PushRegister { offset: unwind_offset, reg: func.locations[arg].unwrap_reg(), }); } Opcode::AdjustSpDown => { - let stack_size = - stack_size.expect("expected a previous stack size instruction"); - static_frame_allocation_size += stack_size; - // This is used when calling a stack check function // We need to track the assignment to RAX which has the size of the stack unwind_codes.push(UnwindCode::StackAlloc { offset: unwind_offset, - size: stack_size, + size: stack_size + .expect("expected a previous stack size instruction"), }); } _ => {} @@ -239,10 +170,6 @@ impl UnwindInfo { InstructionData::CopySpecial { src, dst, .. } => { if let Some(frame_register) = frame_register { if src == (RU::rsp as RegUnit) && dst == frame_register { - // Constructing an rbp-based stack frame, so the static frame - // allocation restarts at 0 from here. - static_frame_allocation_size = 0; - unwind_codes.push(UnwindCode::SetFramePointer { offset: unwind_offset, sp_offset: 0, @@ -267,8 +194,6 @@ impl UnwindInfo { let imm: i64 = imm.into(); assert!(imm <= core::u32::MAX as i64); - static_frame_allocation_size += imm as u32; - unwind_codes.push(UnwindCode::StackAlloc { offset: unwind_offset, size: imm as u32, @@ -277,55 +202,6 @@ impl UnwindInfo { _ => {} } } - InstructionData::StackLoad { - opcode: Opcode::StackAddr, - stack_slot, - offset: _, - } => { - let result = func.dfg.inst_results(inst).get(0).unwrap(); - if let ValueLoc::Reg(frame_reg) = func.locations[*result] { - callee_save_region_reg = Some(frame_reg); - - // Figure out the offset in the call frame that `frame_reg` will have. - let frame_size = func - .stack_slots - .layout_info - .expect("func's stack slots have layout info if stack operations exist") - .frame_size; - // Because we're well after the prologue has been constructed, stack slots - // must have been laid out... - let slot_offset = func.stack_slots[stack_slot] - .offset - .expect("callee-save slot has an offset computed"); - let frame_offset = frame_size as i32 + slot_offset; - - callee_save_offset = Some(frame_offset as u32); - } - } - InstructionData::Store { - opcode: Opcode::Store, - args: [arg1, arg2], - flags: _flags, - offset, - } => { - if let (ValueLoc::Reg(ru), ValueLoc::Reg(base_ru)) = - (func.locations[arg1], func.locations[arg2]) - { - if Some(base_ru) == callee_save_region_reg { - let offset_int: i32 = offset.into(); - assert!(offset_int >= 0, "negative fpr offset would store outside the stack frame, and is almost certainly an error"); - let offset_int: u32 = offset_int as u32 + callee_save_offset.expect("FPR presevation requires an FPR save region, which has some stack offset"); - if FPR.contains(ru) { - saved_fpr = true; - unwind_codes.push(UnwindCode::SaveXmm { - offset: unwind_offset, - reg: ru, - stack_offset: offset_int, - }); - } - } - } - } _ => {} }; @@ -336,46 +212,16 @@ impl UnwindInfo { } if !found_end { - return Ok(None); + return None; } - if saved_fpr { - if static_frame_allocation_size > 240 && saved_fpr { - warn!("stack frame is too large ({} bytes) to use with Windows x64 SEH when preserving FPRs. \ - This is a Cranelift implementation limit, see \ - https://github.com/bytecodealliance/wasmtime/issues/1475", - static_frame_allocation_size); - return Err(CodegenError::ImplLimitExceeded); - } - // Only test static frame size is 16-byte aligned when an FPR is saved to avoid - // panicking when alignment is elided because no FPRs are saved and no child calls are - // made. - assert!( - static_frame_allocation_size % 16 == 0, - "static frame allocation must be a multiple of 16" - ); - } - - // Hack to avoid panicking unnecessarily. Because Cranelift generates prologues with RBP at - // one end of the call frame, and RSP at the other, required offsets are arbitrarily large. - // Windows x64 SEH only allows this offset be up to 240 bytes, however, meaning large - // frames are inexpressible, and we cannot actually compile the function. In case there are - // no preserved FPRs, we can lie without error and claim the offset to RBP is 0 - nothing - // will actually check it. This, then, avoids panics when compiling functions with large - // call frames. - let reported_frame_offset = if saved_fpr { - (static_frame_allocation_size / 16) as u8 - } else { - 0 - }; - - Ok(Some(Self { + Some(Self { flags: 0, // this assumes cranelift functions have no SEH handlers prologue_size: prologue_size as u8, frame_register, - frame_register_offset: reported_frame_offset, + frame_register_offset: 0, unwind_codes, - })) + }) } pub fn size(&self) -> usize { @@ -474,10 +320,7 @@ mod tests { context.compile(&*isa).expect("expected compilation"); - assert_eq!( - UnwindInfo::try_from_func(&context.func, &*isa, None).expect("can emit unwind info"), - None - ); + assert_eq!(UnwindInfo::try_from_func(&context.func, &*isa, None), None); } #[test] @@ -494,7 +337,6 @@ mod tests { context.compile(&*isa).expect("expected compilation"); let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into())) - .expect("can emit unwind info") .expect("expected unwind info"); assert_eq!( @@ -559,7 +401,6 @@ mod tests { context.compile(&*isa).expect("expected compilation"); let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into())) - .expect("can emit unwind info") .expect("expected unwind info"); assert_eq!( @@ -624,7 +465,6 @@ mod tests { context.compile(&*isa).expect("expected compilation"); let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into())) - .expect("can emit unwind info") .expect("expected unwind info"); assert_eq!( diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif index 917d179ccf74..55a6c59bed2e 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif @@ -32,31 +32,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64): } ; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; check that we preserve xmm6 and above if we're using them locally -function %float_callee_saves(f64, f64, f64, f64) windows_fastcall { -block0(v0: f64, v1: f64, v2: f64, v3: f64): -; explicitly use a callee-save register -[-, %xmm6] v4 = fadd v0, v1 -[-, %xmm7] v5 = fadd v0, v1 - return -} -; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7]) -> i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7] windows_fastcall { -; nextln: ss0 = explicit_slot 32, offset -80 -; nextln: ss1 = incoming_arg 16, offset -48 -; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64 [%xmm6], v9: f64 [%xmm7]): -; nextln: x86_push v6 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 64 -; nextln: v7 = stack_addr.i64 ss0 -; nextln: store notrap aligned v8, v7 -; nextln: store notrap aligned v9, v7+16 -; check: v10 = stack_addr.i64 ss0 -; nextln: v11 = load.f64 notrap aligned v10 -; nextln: v12 = load.f64 notrap aligned v10+16 -; nextln: adjust_sp_up_imm 64 -; nextln: v13 = x86_pop.i64 -; nextln: v13, v11, v12 - function %mixed_int_float(i64, f64, i64, f32) windows_fastcall { block0(v0: i64, v1: f64, v2: i64, v3: f32): return @@ -68,29 +43,3 @@ block0(v0: f32, v1: f64, v2: i64, v3: i64): return v1 } ; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall { - -function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall { - fn0 = %foo(i64, i64, i64, i64) -> i64 - fn1 = %foo2(i64, i64, i64, i64) -> i64 -block0(v0: i64): - v1 = load.i64 v0+0 - v2 = load.i64 v0+8 - v3 = load.i64 v0+16 - v4 = load.i64 v0+24 - v5 = load.i64 v0+32 - v6 = load.i64 v0+40 - v7 = load.i64 v0+48 - v8 = load.i64 v0+56 - v9 = load.i64 v0+64 - v10 = call fn0(v1, v2, v3, v4) - store.i64 v1, v0+8 - store.i64 v2, v0+16 - store.i64 v3, v0+24 - store.i64 v4, v0+32 - store.i64 v5, v0+40 - store.i64 v6, v0+48 - store.i64 v7, v0+56 - store.i64 v8, v0+64 - store.i64 v9, v0+72 - return v10 -} diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif index 1119c550128f..b146f0ac7696 100644 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif @@ -118,53 +118,6 @@ block0: ; nextln: ], ; nextln: } -function %fpr_with_function_call(i64, i64) windows_fastcall { - fn0 = %foo(f64, f64, i64, i64, i64) windows_fastcall; -block0(v0: i64, v1: i64): - v2 = load.f64 v0+0 - v3 = load.f64 v0+8 - v4 = load.i64 v0+16 - v15 = load.f64 v0+104 - v16 = load.f64 v0+112 - v17 = load.f64 v0+120 - v18 = load.f64 v0+128 - v19 = load.f64 v0+136 - v20 = load.f64 v0+144 - v21 = load.f64 v0+152 - v22 = load.f64 v0+160 - v23 = load.f64 v0+168 - call fn0(v2, v3, v4, v1, v1) - store.f64 v15, v1+104 - store.f64 v16, v1+112 - store.f64 v17, v1+120 - store.f64 v18, v1+128 - store.f64 v19, v1+136 - store.f64 v20, v1+144 - store.f64 v21, v1+152 - store.f64 v22, v1+160 - store.f64 v23, v1+168 - return -} -; Only check the first unwind code here because this test specifically looks to -; see that in a function that is not a leaf, a callee-save FPR is stored in an -; area that does not overlap either the callee's shadow space or stack argument -; space. -; -; sameln: UnwindInfo { -; nextln: version: 1, -; nextln: flags: 0, -; nextln: prologue_size: 26, -; nextln: unwind_code_count_raw: 7, -; nextln: frame_register: 5, -; nextln: frame_register_offset: 12, -; nextln: unwind_codes: [ -; nextln: UnwindCode { -; nextln: offset: 26, -; nextln: op: SaveXmm128, -; nextln: info: 15, -; nextln: value: U16( -; nextln: 3, - ; check a function that has CSRs function %lots_of_registers(i64, i64) windows_fastcall { block0(v0: i64, v1: i64): @@ -181,15 +134,6 @@ block0(v0: i64, v1: i64): v12 = load.i32 v0+80 v13 = load.i32 v0+88 v14 = load.i32 v0+96 - v15 = load.f64 v0+104 - v16 = load.f64 v0+112 - v17 = load.f64 v0+120 - v18 = load.f64 v0+128 - v19 = load.f64 v0+136 - v20 = load.f64 v0+144 - v21 = load.f64 v0+152 - v22 = load.f64 v0+160 - v23 = load.f64 v0+168 store.i32 v2, v1+0 store.i32 v3, v1+8 store.i32 v4, v1+16 @@ -203,53 +147,20 @@ block0(v0: i64, v1: i64): store.i32 v12, v1+80 store.i32 v13, v1+88 store.i32 v14, v1+96 - store.f64 v15, v1+104 - store.f64 v16, v1+112 - store.f64 v17, v1+120 - store.f64 v18, v1+128 - store.f64 v19, v1+136 - store.f64 v20, v1+144 - store.f64 v21, v1+152 - store.f64 v22, v1+160 - store.f64 v23, v1+168 return } ; sameln: UnwindInfo { ; nextln: version: 1, ; nextln: flags: 0, -; nextln: prologue_size: 44, -; nextln: unwind_code_count_raw: 16, +; nextln: prologue_size: 19, +; nextln: unwind_code_count_raw: 10, ; nextln: frame_register: 5, -; nextln: frame_register_offset: 10, +; nextln: frame_register_offset: 0, ; nextln: unwind_codes: [ ; nextln: UnwindCode { -; nextln: offset: 44, -; nextln: op: SaveXmm128, -; nextln: info: 8, -; nextln: value: U16( -; nextln: 2, -; nextln: ), -; nextln: }, -; nextln: UnwindCode { -; nextln: offset: 38, -; nextln: op: SaveXmm128, -; nextln: info: 7, -; nextln: value: U16( -; nextln: 1, -; nextln: ), -; nextln: }, -; nextln: UnwindCode { -; nextln: offset: 32, -; nextln: op: SaveXmm128, -; nextln: info: 6, -; nextln: value: U16( -; nextln: 0, -; nextln: ), -; nextln: }, -; nextln: UnwindCode { ; nextln: offset: 19, ; nextln: op: SmallStackAlloc, -; nextln: info: 12, +; nextln: info: 3, ; nextln: value: None, ; nextln: }, ; nextln: UnwindCode { diff --git a/cranelift/filetests/src/test_fde.rs b/cranelift/filetests/src/test_fde.rs index 5a9305479bba..3e3747fdde2c 100644 --- a/cranelift/filetests/src/test_fde.rs +++ b/cranelift/filetests/src/test_fde.rs @@ -64,9 +64,7 @@ impl SubTest for TestUnwind { } let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new()); - comp_ctx - .emit_unwind_info(isa, FrameUnwindKind::Libunwind, &mut sink) - .expect("can emit unwind info"); + comp_ctx.emit_unwind_info(isa, FrameUnwindKind::Libunwind, &mut sink); let mut text = String::new(); if sink.0.is_empty() { diff --git a/cranelift/filetests/src/test_unwind.rs b/cranelift/filetests/src/test_unwind.rs index bf51cb73db0f..3db1cbf8299e 100644 --- a/cranelift/filetests/src/test_unwind.rs +++ b/cranelift/filetests/src/test_unwind.rs @@ -59,9 +59,7 @@ impl SubTest for TestUnwind { } let mut sink = Sink(Vec::new()); - comp_ctx - .emit_unwind_info(isa, FrameUnwindKind::Fastcall, &mut sink) - .expect("can emit unwind info"); + comp_ctx.emit_unwind_info(isa, FrameUnwindKind::Fastcall, &mut sink); let mut text = String::new(); if sink.0.is_empty() { @@ -179,15 +177,15 @@ impl UnwindCode { #[derive(Debug)] enum UnwindOperation { - PushNonvolatileRegister = 0, - LargeStackAlloc = 1, - SmallStackAlloc = 2, - SetFramePointer = 3, - SaveNonvolatileRegister = 4, - SaveNonvolatileRegisterFar = 5, - SaveXmm128 = 8, - SaveXmm128Far = 9, - PushMachineFrame = 10, + PushNonvolatileRegister, + LargeStackAlloc, + SmallStackAlloc, + SetFramePointer, + SaveNonvolatileRegister, + SaveNonvolatileRegisterFar, + SaveXmm128, + SaveXmm128Far, + PushMachineFrame, } impl From for UnwindOperation { @@ -200,9 +198,9 @@ impl From for UnwindOperation { 3 => Self::SetFramePointer, 4 => Self::SaveNonvolatileRegister, 5 => Self::SaveNonvolatileRegisterFar, - 8 => Self::SaveXmm128, - 9 => Self::SaveXmm128Far, - 10 => Self::PushMachineFrame, + 6 => Self::SaveXmm128, + 7 => Self::SaveXmm128Far, + 8 => Self::PushMachineFrame, _ => panic!("unsupported unwind operation"), } } diff --git a/crates/api/src/trampoline/func.rs b/crates/api/src/trampoline/func.rs index d5d0e4dd262a..b9e1af961743 100644 --- a/crates/api/src/trampoline/func.rs +++ b/crates/api/src/trampoline/func.rs @@ -188,9 +188,7 @@ fn make_trampoline( .map_err(|error| pretty_error(&context.func, Some(isa), error)) .expect("compile_and_emit"); - let unwind_info = CompiledFunctionUnwindInfo::new(isa, &context) - .map_err(|error| pretty_error(&context.func, Some(isa), error)) - .expect("emit unwind info"); + let unwind_info = CompiledFunctionUnwindInfo::new(isa, &context); code_memory .allocate_for_function(&CompiledFunction { diff --git a/crates/environ/src/compilation.rs b/crates/environ/src/compilation.rs index 82f8786d1792..8c7bde36215d 100644 --- a/crates/environ/src/compilation.rs +++ b/crates/environ/src/compilation.rs @@ -4,7 +4,7 @@ use crate::cache::ModuleCacheDataTupleType; use crate::CacheConfig; use crate::ModuleTranslation; -use cranelift_codegen::{binemit, ir, isa, CodegenResult, Context}; +use cranelift_codegen::{binemit, ir, isa, Context}; use cranelift_entity::PrimaryMap; use cranelift_wasm::{DefinedFuncIndex, FuncIndex, WasmError}; use serde::{Deserialize, Serialize}; @@ -36,7 +36,7 @@ pub enum CompiledFunctionUnwindInfo { impl CompiledFunctionUnwindInfo { /// Constructs unwind info object. - pub fn new(isa: &dyn isa::TargetIsa, context: &Context) -> CodegenResult { + pub fn new(isa: &dyn isa::TargetIsa, context: &Context) -> Self { use cranelift_codegen::binemit::{ FrameUnwindKind, FrameUnwindOffset, FrameUnwindSink, Reloc, }; @@ -75,26 +75,24 @@ impl CompiledFunctionUnwindInfo { CallConv::SystemV | CallConv::Fast | CallConv::Cold => FrameUnwindKind::Libunwind, CallConv::WindowsFastcall => FrameUnwindKind::Fastcall, _ => { - return Ok(CompiledFunctionUnwindInfo::None); + return CompiledFunctionUnwindInfo::None; } }; let mut sink = Sink(Vec::new(), 0, Vec::new()); - context.emit_unwind_info(isa, kind, &mut sink)?; + context.emit_unwind_info(isa, kind, &mut sink); let Sink(data, offset, relocs) = sink; if data.is_empty() { - return Ok(CompiledFunctionUnwindInfo::None); + return CompiledFunctionUnwindInfo::None; } - let info = match kind { + match kind { FrameUnwindKind::Fastcall => CompiledFunctionUnwindInfo::Windows(data), FrameUnwindKind::Libunwind => { CompiledFunctionUnwindInfo::FrameLayout(data, offset, relocs) } - }; - - Ok(info) + } } /// Retuns true is no unwind info data. diff --git a/crates/environ/src/cranelift.rs b/crates/environ/src/cranelift.rs index 424eb779ea62..41cad1797e27 100644 --- a/crates/environ/src/cranelift.rs +++ b/crates/environ/src/cranelift.rs @@ -264,9 +264,7 @@ fn compile(env: CompileEnv<'_>) -> Result