diff --git a/Cargo.lock b/Cargo.lock index f69f4b57eb14..dd1158a161aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2353,8 +2353,6 @@ dependencies = [ [[package]] name = "regalloc2" version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d37148700dbb38f994cd99a1431613057f37ed934d7e4d799b7ab758c482461" dependencies = [ "fxhash", "log", diff --git a/Cargo.toml b/Cargo.toml index 8315036a9a53..844e1eae4187 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -138,3 +138,6 @@ harness = false [[bench]] name = "call" harness = false + +[profile.release] +debug = true diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 6db3e5eb081d..f12e0dac13c5 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -23,7 +23,7 @@ serde = { version = "1.0.94", features = ["derive"], optional = true } bincode = { version = "1.2.1", optional = true } gimli = { version = "0.26.0", default-features = false, features = ["write"], optional = true } smallvec = { version = "1.6.1" } -regalloc2 = { version = "0.2.2", features = ["checker"] } +regalloc2 = { version = "0.2.2", path = "../../../regalloc2/", features = ["checker", "trace-log"] } souper-ir = { version = "2.1.0", optional = true } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index dde4e1b3b8d8..9f79f50ea47d 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -14,7 +14,7 @@ use crate::settings; use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; -use regalloc2::VReg; +use regalloc2::{PRegMask, VReg}; use smallvec::{smallvec, SmallVec}; // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because @@ -1062,8 +1062,9 @@ impl ABIMachineSpec for AArch64MachineDeps { fn gen_call( dest: &CallDest, - uses: Vec, - defs: Vec>, + uses: SmallVec<[Reg; 8]>, + defs: SmallVec<[Writable; 8]>, + clobbers: PRegMask, opcode: ir::Opcode, tmp: Writable, callee_conv: isa::CallConv, @@ -1076,6 +1077,7 @@ impl ABIMachineSpec for AArch64MachineDeps { dest: name.clone(), uses, defs, + clobbers, opcode, caller_callconv: caller_conv, callee_callconv: callee_conv, @@ -1092,6 +1094,7 @@ impl ABIMachineSpec for AArch64MachineDeps { rn: tmp.to_reg(), uses, defs, + clobbers, opcode, caller_callconv: caller_conv, callee_callconv: callee_conv, @@ -1103,6 +1106,7 @@ impl ABIMachineSpec for AArch64MachineDeps { rn: *reg, uses, defs, + clobbers, opcode, caller_callconv: caller_conv, callee_callconv: callee_conv, @@ -1131,8 +1135,9 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.push(Inst::Call { info: Box::new(CallInfo { dest: ExternalName::LibCall(LibCall::Memcpy), - uses: vec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()], - defs: Self::get_regs_clobbered_by_call(call_conv), + uses: smallvec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()], + defs: smallvec![], + clobbers: Self::get_regs_clobbered_by_call(call_conv), opcode: Opcode::Call, caller_callconv: call_conv, callee_callconv: call_conv, @@ -1159,21 +1164,17 @@ impl ABIMachineSpec for AArch64MachineDeps { s.nominal_sp_to_fp } - fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec> { - let mut caller_saved = Vec::new(); - for i in 0..29 { - let x = writable_xreg(i); - if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg().unwrap()) { - caller_saved.push(x); - } - } - for i in 0..32 { - let v = writable_vreg(i); - if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg().unwrap()) { - caller_saved.push(v); - } + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegMask { + let mut clobbers = DEFAULT_AAPCS_CLOBBERS; + + if call_conv_of_callee.extends_baldrdash() { + clobbers.add(xreg_preg(16)); + clobbers.add(xreg_preg(17)); + clobbers.add(xreg_preg(18)); + clobbers.add(vreg_preg(31)); } - caller_saved + + clobbers } fn get_ext_mode( @@ -1290,47 +1291,53 @@ fn get_regs_restored_in_epilogue( (int_saves, vec_saves) } -fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool { - if call_conv_of_callee.extends_baldrdash() { - match r.class() { - RegClass::Int => { - let enc = r.hw_enc() & 31; - if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc as usize] { - return true; - } - // Otherwise, fall through to preserve native's ABI caller-saved. - } - RegClass::Float => { - let enc = r.hw_enc() & 31; - if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc as usize] { - return true; - } - // Otherwise, fall through to preserve native's ABI caller-saved. - } - }; - } - - match r.class() { - RegClass::Int => { - // x0 - x17 inclusive are caller-saves. - r.hw_enc() <= 17 - } - RegClass::Float => { - // v0 - v7 inclusive and v16 - v31 inclusive are caller-saves. The - // upper 64 bits of v8 - v15 inclusive are also caller-saves. - // However, because we cannot currently represent partial registers - // to regalloc.rs, we indicate here that every vector register is - // caller-save. Because this function is used at *callsites*, - // approximating in this direction (save more than necessary) is - // conservative and thus safe. - // - // Note that we set the 'not included in clobber set' flag in the - // regalloc.rs API when a call instruction's callee has the same ABI - // as the caller (the current function body); this is safe (anything - // clobbered by callee can be clobbered by caller as well) and - // avoids unnecessary saves of v8-v15 in the prologue even though we - // include them as defs here. - true - } - } +const fn default_aapcs_clobbers() -> PRegMask { + PRegMask::empty() + // x0 - x17 inclusive are caller-saves. + .with(xreg_preg(0)) + .with(xreg_preg(1)) + .with(xreg_preg(2)) + .with(xreg_preg(3)) + .with(xreg_preg(4)) + .with(xreg_preg(5)) + .with(xreg_preg(6)) + .with(xreg_preg(7)) + .with(xreg_preg(8)) + .with(xreg_preg(9)) + .with(xreg_preg(10)) + .with(xreg_preg(11)) + .with(xreg_preg(12)) + .with(xreg_preg(13)) + .with(xreg_preg(14)) + .with(xreg_preg(15)) + .with(xreg_preg(16)) + .with(xreg_preg(17)) + // v0 - v7 inclusive are caller-saves. + .with(vreg_preg(0)) + .with(vreg_preg(1)) + .with(vreg_preg(2)) + .with(vreg_preg(3)) + .with(vreg_preg(4)) + .with(vreg_preg(5)) + .with(vreg_preg(6)) + .with(vreg_preg(7)) + // v16 - v31 inclusive are caller-saves. + .with(vreg_preg(16)) + .with(vreg_preg(17)) + .with(vreg_preg(18)) + .with(vreg_preg(19)) + .with(vreg_preg(20)) + .with(vreg_preg(21)) + .with(vreg_preg(22)) + .with(vreg_preg(23)) + .with(vreg_preg(24)) + .with(vreg_preg(25)) + .with(vreg_preg(26)) + .with(vreg_preg(27)) + .with(vreg_preg(28)) + .with(vreg_preg(29)) + .with(vreg_preg(30)) + .with(vreg_preg(31)) } + +const DEFAULT_AAPCS_CLOBBERS: PRegMask = default_aapcs_clobbers(); diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index c475dd261ba9..612cd3aaca13 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -5278,8 +5278,9 @@ fn test_aarch64_binemit() { Inst::Call { info: Box::new(CallInfo { dest: ExternalName::testcase("test0"), - uses: Vec::new(), - defs: Vec::new(), + uses: smallvec![], + defs: smallvec![], + clobbers: PRegMask::empty(), opcode: Opcode::Call, caller_callconv: CallConv::SystemV, callee_callconv: CallConv::SystemV, @@ -5293,8 +5294,9 @@ fn test_aarch64_binemit() { Inst::CallInd { info: Box::new(CallIndInfo { rn: xreg(10), - uses: Vec::new(), - defs: Vec::new(), + uses: smallvec![], + defs: smallvec![], + clobbers: PRegMask::empty(), opcode: Opcode::CallIndirect, caller_callconv: CallConv::SystemV, callee_callconv: CallConv::SystemV, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index e43e9ad7ee95..a54482b7c3b5 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -13,7 +13,7 @@ use crate::machinst::{PrettyPrint, Reg, RegClass, Writable}; use alloc::vec::Vec; use core::convert::TryFrom; -use regalloc2::VReg; +use regalloc2::{PRegMask, VReg}; use smallvec::{smallvec, SmallVec}; use std::string::{String, ToString}; @@ -70,8 +70,9 @@ impl BitOp { #[derive(Clone, Debug)] pub struct CallInfo { pub dest: ExternalName, - pub uses: Vec, - pub defs: Vec>, + pub uses: SmallVec<[Reg; 8]>, + pub defs: SmallVec<[Writable; 8]>, + pub clobbers: PRegMask, pub opcode: Opcode, pub caller_callconv: CallConv, pub callee_callconv: CallConv, @@ -82,8 +83,9 @@ pub struct CallInfo { #[derive(Clone, Debug)] pub struct CallIndInfo { pub rn: Reg, - pub uses: Vec, - pub defs: Vec>, + pub uses: SmallVec<[Reg; 8]>, + pub defs: SmallVec<[Writable; 8]>, + pub clobbers: PRegMask, pub opcode: Opcode, pub caller_callconv: CallConv, pub callee_callconv: CallConv, @@ -983,11 +985,13 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan &Inst::Call { ref info, .. } => { collector.reg_uses(&info.uses[..]); collector.reg_defs(&info.defs[..]); + collector.reg_clobbers(info.clobbers); } &Inst::CallInd { ref info, .. } => { collector.reg_use(info.rn); collector.reg_uses(&info.uses[..]); collector.reg_defs(&info.defs[..]); + collector.reg_clobbers(info.clobbers); } &Inst::CondBr { ref kind, .. } => match kind { CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => { @@ -1028,9 +1032,9 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan &Inst::VirtualSPOffsetAdj { .. } => {} &Inst::ElfTlsGetAddr { .. } => { - for reg in AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV) { - collector.reg_def(reg); - } + collector.reg_clobbers(AArch64MachineDeps::get_regs_clobbered_by_call( + CallConv::SystemV, + )); } &Inst::Unwind { .. } => {} &Inst::EmitIsland { .. } => {} diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index 9e486e332a28..2cbfe5f3326a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -24,9 +24,13 @@ pub const PINNED_REG: u8 = 21; /// Get a reference to an X-register (integer register). Do not use /// this for xsp / xzr; we have two special registers for those. pub fn xreg(num: u8) -> Reg { + Reg::from(xreg_preg(num)) +} + +/// Get the given X-register as a PReg. +pub(crate) const fn xreg_preg(num: u8) -> PReg { assert!(num < 31); - let preg = PReg::new(num as usize, RegClass::Int); - Reg::from(VReg::new(preg.index(), RegClass::Int)) + PReg::new(num as usize, RegClass::Int) } /// Get a writable reference to an X-register. @@ -36,9 +40,13 @@ pub fn writable_xreg(num: u8) -> Writable { /// Get a reference to a V-register (vector/FP register). pub fn vreg(num: u8) -> Reg { + Reg::from(vreg_preg(num)) +} + +/// Get the given V-register as a PReg. +pub(crate) const fn vreg_preg(num: u8) -> PReg { assert!(num < 32); - let preg = PReg::new(num as usize, RegClass::Float); - Reg::from(VReg::new(preg.index(), RegClass::Float)) + PReg::new(num as usize, RegClass::Float) } /// Get a writable reference to a V-register. diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 55396731e7dc..0366c89a6359 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -71,8 +71,7 @@ use crate::settings; use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; -use regalloc2::PReg; -use regalloc2::VReg; +use regalloc2::{PReg, PRegMask, VReg}; use smallvec::{smallvec, SmallVec}; use std::convert::TryFrom; @@ -618,8 +617,9 @@ impl ABIMachineSpec for S390xMachineDeps { fn gen_call( dest: &CallDest, - uses: Vec, - defs: Vec>, + uses: SmallVec<[Reg; 8]>, + defs: SmallVec<[Writable; 8]>, + clobbers: PRegMask, opcode: ir::Opcode, tmp: Writable, _callee_conv: isa::CallConv, @@ -633,6 +633,7 @@ impl ABIMachineSpec for S390xMachineDeps { dest: name.clone(), uses, defs, + clobbers, opcode, }), }), @@ -648,6 +649,7 @@ impl ABIMachineSpec for S390xMachineDeps { rn: tmp.to_reg(), uses, defs, + clobbers, opcode, }), }); @@ -658,6 +660,7 @@ impl ABIMachineSpec for S390xMachineDeps { rn: *reg, uses, defs, + clobbers, opcode, }), }), @@ -693,21 +696,8 @@ impl ABIMachineSpec for S390xMachineDeps { s.initial_sp_offset } - fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec> { - let mut caller_saved = Vec::new(); - for i in 0..15 { - let x = writable_gpr(i); - if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg().unwrap()) { - caller_saved.push(x); - } - } - for i in 0..15 { - let v = writable_fpr(i); - if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg().unwrap()) { - caller_saved.push(v); - } - } - caller_saved + fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegMask { + CLOBBERS } fn get_ext_mode( @@ -783,15 +773,22 @@ fn get_regs_saved_in_prologue( (int_saves, fpr_saves) } -fn is_reg_clobbered_by_call(_call_conv: isa::CallConv, r: RealReg) -> bool { - match r.class() { - RegClass::Int => { - // r0 - r5 inclusive are caller-saves. - r.hw_enc() <= 5 - } - RegClass::Float => { - // f0 - f7 inclusive are caller-saves. - r.hw_enc() <= 7 - } - } +const fn clobbers() -> PRegMask { + PRegMask::empty() + .with(gpr_preg(0)) + .with(gpr_preg(1)) + .with(gpr_preg(2)) + .with(gpr_preg(3)) + .with(gpr_preg(4)) + .with(gpr_preg(5)) + .with(fpr_preg(0)) + .with(fpr_preg(1)) + .with(fpr_preg(2)) + .with(fpr_preg(3)) + .with(fpr_preg(4)) + .with(fpr_preg(5)) + .with(fpr_preg(6)) + .with(fpr_preg(7)) } + +const CLOBBERS: PRegMask = clobbers(); diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index a3fc7cc319e2..3ab551db9405 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -7,7 +7,7 @@ use crate::{settings, CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use core::convert::TryFrom; -use regalloc2::VReg; +use regalloc2::{PRegMask, VReg}; use smallvec::{smallvec, SmallVec}; use std::string::{String, ToString}; pub mod regs; @@ -36,8 +36,9 @@ pub use crate::isa::s390x::lower::isle::generated_code::{ #[derive(Clone, Debug)] pub struct CallInfo { pub dest: ExternalName, - pub uses: Vec, - pub defs: Vec>, + pub uses: SmallVec<[Reg; 8]>, + pub defs: SmallVec<[Writable; 8]>, + pub clobbers: PRegMask, pub opcode: Opcode, } @@ -46,8 +47,9 @@ pub struct CallInfo { #[derive(Clone, Debug)] pub struct CallIndInfo { pub rn: Reg, - pub uses: Vec, - pub defs: Vec>, + pub uses: SmallVec<[Reg; 8]>, + pub defs: SmallVec<[Writable; 8]>, + pub clobbers: PRegMask, pub opcode: Opcode, } @@ -660,12 +662,14 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_def(link); collector.reg_uses(&*info.uses); collector.reg_defs(&*info.defs); + collector.reg_clobbers(info.clobbers); } &Inst::CallInd { link, ref info } => { collector.reg_def(link); collector.reg_use(info.rn); collector.reg_uses(&*info.uses); collector.reg_defs(&*info.defs); + collector.reg_clobbers(info.clobbers); } &Inst::Ret { link, ref rets } => { collector.reg_use(link); diff --git a/cranelift/codegen/src/isa/s390x/inst/regs.rs b/cranelift/codegen/src/isa/s390x/inst/regs.rs index 2782a3d1ff4b..a3af1b5568b9 100644 --- a/cranelift/codegen/src/isa/s390x/inst/regs.rs +++ b/cranelift/codegen/src/isa/s390x/inst/regs.rs @@ -13,11 +13,15 @@ use crate::settings; /// Get a reference to a GPR (integer register). pub fn gpr(num: u8) -> Reg { - assert!(num < 16); - let preg = PReg::new(num as usize, RegClass::Int); + let preg = gpr_preg(num); Reg::from(VReg::new(preg.index(), RegClass::Int)) } +pub(crate) const fn gpr_preg(num: u8) -> PReg { + assert!(num < 16); + PReg::new(num as usize, RegClass::Int) +} + /// Get a writable reference to a GPR. pub fn writable_gpr(num: u8) -> Writable { Writable::from_reg(gpr(num)) @@ -25,14 +29,13 @@ pub fn writable_gpr(num: u8) -> Writable { /// Get a reference to a FPR (floating-point register). pub fn fpr(num: u8) -> Reg { - assert!(num < 16); - let preg = PReg::new(num as usize, RegClass::Float); + let preg = fpr_preg(num); Reg::from(VReg::new(preg.index(), RegClass::Float)) } -/// Get a writable reference to a V-register. -pub fn writable_fpr(num: u8) -> Writable { - Writable::from_reg(fpr(num)) +pub(crate) const fn fpr_preg(num: u8) -> PReg { + assert!(num < 16); + PReg::new(num as usize, RegClass::Float) } /// Get a reference to the stack-pointer register. diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4fe588d17df3..888cd531b514 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -11,7 +11,7 @@ use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use args::*; -use regalloc2::VReg; +use regalloc2::{PRegMask, VReg}; use smallvec::{smallvec, SmallVec}; use std::convert::TryFrom; @@ -488,9 +488,12 @@ impl ABIMachineSpec for X64ABIMachineSpec { )); insts.push(Inst::CallKnown { dest: ExternalName::LibCall(LibCall::Probestack), - uses: vec![regs::rax()], - defs: vec![], - opcode: Opcode::Call, + info: Box::new(CallInfo { + uses: smallvec![regs::rax()], + defs: smallvec![], + clobbers: PRegMask::empty(), + opcode: Opcode::Call, + }), }); insts } @@ -633,8 +636,9 @@ impl ABIMachineSpec for X64ABIMachineSpec { /// Generate a call instruction/sequence. fn gen_call( dest: &CallDest, - uses: Vec, - defs: Vec>, + uses: SmallVec<[Reg; 8]>, + defs: SmallVec<[Writable; 8]>, + clobbers: PRegMask, opcode: ir::Opcode, tmp: Writable, _callee_conv: isa::CallConv, @@ -643,7 +647,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { let mut insts = SmallVec::new(); match dest { &CallDest::ExtName(ref name, RelocDistance::Near) => { - insts.push(Inst::call_known(name.clone(), uses, defs, opcode)); + insts.push(Inst::call_known(name.clone(), uses, defs, clobbers, opcode)); } &CallDest::ExtName(ref name, RelocDistance::Far) => { insts.push(Inst::LoadExtName { @@ -655,11 +659,18 @@ impl ABIMachineSpec for X64ABIMachineSpec { RegMem::reg(tmp.to_reg()), uses, defs, + clobbers, opcode, )); } &CallDest::Reg(reg) => { - insts.push(Inst::call_unknown(RegMem::reg(reg), uses, defs, opcode)); + insts.push(Inst::call_unknown( + RegMem::reg(reg), + uses, + defs, + clobbers, + opcode, + )); } } insts @@ -703,8 +714,9 @@ impl ABIMachineSpec for X64ABIMachineSpec { }); insts.push(Inst::call_unknown( RegMem::reg(memcpy_addr), - /* uses = */ vec![arg0, arg1, arg2], - /* defs = */ Self::get_regs_clobbered_by_call(call_conv), + /* uses = */ smallvec![arg0, arg1, arg2], + /* defs = */ smallvec![], + /* clobbers = */ Self::get_regs_clobbered_by_call(call_conv), Opcode::Call, )); insts @@ -726,51 +738,21 @@ impl ABIMachineSpec for X64ABIMachineSpec { s.nominal_sp_to_fp } - fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec> { - let mut caller_saved = vec![ - // intersection of Systemv and FastCall calling conventions: - // - GPR: all except RDI, RSI, RBX, RBP, R12 to R15. - // SysV adds RDI, RSI (FastCall makes these callee-saved). - Writable::from_reg(regs::rax()), - Writable::from_reg(regs::rcx()), - Writable::from_reg(regs::rdx()), - Writable::from_reg(regs::r8()), - Writable::from_reg(regs::r9()), - Writable::from_reg(regs::r10()), - Writable::from_reg(regs::r11()), - // - XMM: XMM0-5. SysV adds the rest (XMM6-XMM15). - Writable::from_reg(regs::xmm0()), - Writable::from_reg(regs::xmm1()), - Writable::from_reg(regs::xmm2()), - Writable::from_reg(regs::xmm3()), - Writable::from_reg(regs::xmm4()), - Writable::from_reg(regs::xmm5()), - ]; - - if !call_conv_of_callee.extends_windows_fastcall() { - caller_saved.push(Writable::from_reg(regs::rsi())); - caller_saved.push(Writable::from_reg(regs::rdi())); - caller_saved.push(Writable::from_reg(regs::xmm6())); - caller_saved.push(Writable::from_reg(regs::xmm7())); - caller_saved.push(Writable::from_reg(regs::xmm8())); - caller_saved.push(Writable::from_reg(regs::xmm9())); - caller_saved.push(Writable::from_reg(regs::xmm10())); - caller_saved.push(Writable::from_reg(regs::xmm11())); - caller_saved.push(Writable::from_reg(regs::xmm12())); - caller_saved.push(Writable::from_reg(regs::xmm13())); - caller_saved.push(Writable::from_reg(regs::xmm14())); - caller_saved.push(Writable::from_reg(regs::xmm15())); - } + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegMask { + let mut clobbers = if call_conv_of_callee.extends_windows_fastcall() { + WINDOWS_CLOBBERS + } else { + SYSV_CLOBBERS + }; if call_conv_of_callee.extends_baldrdash() { - caller_saved.push(Writable::from_reg(regs::r12())); - caller_saved.push(Writable::from_reg(regs::r13())); - // Not r14; implicitly preserved in the entry. - caller_saved.push(Writable::from_reg(regs::r15())); - caller_saved.push(Writable::from_reg(regs::rbx())); + clobbers.add(regs::gpr_preg(regs::ENC_R12)); + clobbers.add(regs::gpr_preg(regs::ENC_R13)); + clobbers.add(regs::gpr_preg(regs::ENC_R15)); + clobbers.add(regs::gpr_preg(regs::ENC_RBX)); } - caller_saved + clobbers } fn get_ext_mode( @@ -1032,3 +1014,52 @@ fn compute_clobber_size(clobbers: &[Writable]) -> u32 { } align_to(clobbered_size, 16) } + +const WINDOWS_CLOBBERS: PRegMask = windows_clobbers(); +const SYSV_CLOBBERS: PRegMask = sysv_clobbers(); + +const fn windows_clobbers() -> PRegMask { + PRegMask::empty() + .with(regs::gpr_preg(regs::ENC_RAX)) + .with(regs::gpr_preg(regs::ENC_RCX)) + .with(regs::gpr_preg(regs::ENC_RDX)) + .with(regs::gpr_preg(regs::ENC_R8)) + .with(regs::gpr_preg(regs::ENC_R9)) + .with(regs::gpr_preg(regs::ENC_R10)) + .with(regs::gpr_preg(regs::ENC_R11)) + .with(regs::fpr_preg(0)) + .with(regs::fpr_preg(1)) + .with(regs::fpr_preg(2)) + .with(regs::fpr_preg(3)) + .with(regs::fpr_preg(4)) + .with(regs::fpr_preg(5)) +} + +const fn sysv_clobbers() -> PRegMask { + PRegMask::empty() + .with(regs::gpr_preg(regs::ENC_RAX)) + .with(regs::gpr_preg(regs::ENC_RCX)) + .with(regs::gpr_preg(regs::ENC_RDX)) + .with(regs::gpr_preg(regs::ENC_RSI)) + .with(regs::gpr_preg(regs::ENC_RDI)) + .with(regs::gpr_preg(regs::ENC_R8)) + .with(regs::gpr_preg(regs::ENC_R9)) + .with(regs::gpr_preg(regs::ENC_R10)) + .with(regs::gpr_preg(regs::ENC_R11)) + .with(regs::fpr_preg(0)) + .with(regs::fpr_preg(1)) + .with(regs::fpr_preg(2)) + .with(regs::fpr_preg(3)) + .with(regs::fpr_preg(4)) + .with(regs::fpr_preg(5)) + .with(regs::fpr_preg(6)) + .with(regs::fpr_preg(7)) + .with(regs::fpr_preg(8)) + .with(regs::fpr_preg(9)) + .with(regs::fpr_preg(10)) + .with(regs::fpr_preg(11)) + .with(regs::fpr_preg(12)) + .with(regs::fpr_preg(13)) + .with(regs::fpr_preg(14)) + .with(regs::fpr_preg(15)) +} diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index a3c06d26ad37..a6bba2fc6182 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -315,15 +315,11 @@ ;; Direct call: call simm32. (CallKnown (dest ExternalName) - (uses VecReg) - (defs VecWritableReg) - (opcode Opcode)) + (info BoxCallInfo)) ;; Indirect call: callq (reg mem) (CallUnknown (dest RegMem) - (uses VecReg) - (defs VecWritableReg) - (opcode Opcode)) + (info BoxCallInfo)) ;; Return. (Ret (rets VecReg)) @@ -502,6 +498,8 @@ LFence SFence)) +(type BoxCallInfo extern (enum)) + ;; Get the `OperandSize` for a given `Type`, rounding smaller types up to 32 bits. (decl operand_size_of_type_32_64 (Type) OperandSize) (extern constructor operand_size_of_type_32_64 operand_size_of_type_32_64) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 0192945c478a..77c5b025927f 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1195,7 +1195,11 @@ pub(crate) fn emit( sink.put1(0x58 + (enc_dst & 7)); } - Inst::CallKnown { dest, opcode, .. } => { + Inst::CallKnown { + dest, + info: call_info, + .. + } => { if info.flags.enable_probestack() { sink.add_trap(TrapCode::StackOverflow); } @@ -1207,12 +1211,16 @@ pub(crate) fn emit( // beginning of the immediate field. emit_reloc(sink, Reloc::X86CallPCRel4, &dest, -4); sink.put4(0); - if opcode.is_call() { - sink.add_call_site(*opcode); + if call_info.opcode.is_call() { + sink.add_call_site(call_info.opcode); } } - Inst::CallUnknown { dest, opcode, .. } => { + Inst::CallUnknown { + dest, + info: call_info, + .. + } => { let dest = dest.with_allocs(allocs); if info.flags.enable_probestack() { @@ -1251,8 +1259,8 @@ pub(crate) fn emit( if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s); } - if opcode.is_call() { - sink.add_call_site(*opcode); + if call_info.opcode.is_call() { + sink.add_call_site(call_info.opcode); } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index d1e2929c1d7e..a575757f0bdd 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3572,8 +3572,9 @@ fn test_x64_emit() { namespace: 0, index: 0, }, - Vec::new(), - Vec::new(), + smallvec![], + smallvec![], + PRegMask::default(), Opcode::Call, ), "E800000000", @@ -3583,7 +3584,13 @@ fn test_x64_emit() { // ======================================================== // CallUnknown fn call_unknown(rm: RegMem) -> Inst { - Inst::call_unknown(rm, Vec::new(), Vec::new(), Opcode::CallIndirect) + Inst::call_unknown( + rm, + smallvec![], + smallvec![], + PRegMask::default(), + Opcode::CallIndirect, + ) } insns.push((call_unknown(RegMem::reg(rbp)), "FFD5", "call *%rbp")); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index c022d16b2dea..e95f60d1ab60 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -8,8 +8,9 @@ use crate::isa::x64::settings as x64_settings; use crate::isa::CallConv; use crate::machinst::*; use crate::{settings, CodegenError, CodegenResult}; +use alloc::boxed::Box; use alloc::vec::Vec; -use regalloc2::{Allocation, VReg}; +use regalloc2::{Allocation, PRegMask, VReg}; use smallvec::{smallvec, SmallVec}; use std::fmt; use std::string::{String, ToString}; @@ -29,6 +30,19 @@ use args::*; // `Inst` is defined inside ISLE as `MInst`. We publicly re-export it here. pub use super::lower::isle::generated_code::MInst as Inst; +// Out-of-line data for calls, to keep the size of `Inst` downn. +#[derive(Clone, Debug)] +pub struct CallInfo { + /// Register uses of this call. + pub uses: SmallVec<[Reg; 8]>, + /// Register defs of this call. + pub defs: SmallVec<[Writable; 8]>, + /// Registers clobbered by this call, as per its calling convention. + pub clobbers: PRegMask, + /// The opcode of this call. + pub opcode: Opcode, +} + pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool { let xs = x as i64; xs == ((xs << 32) >> 32) @@ -646,30 +660,38 @@ impl Inst { pub(crate) fn call_known( dest: ExternalName, - uses: Vec, - defs: Vec>, + uses: SmallVec<[Reg; 8]>, + defs: SmallVec<[Writable; 8]>, + clobbers: PRegMask, opcode: Opcode, ) -> Inst { Inst::CallKnown { dest, - uses, - defs, - opcode, + info: Box::new(CallInfo { + uses, + defs, + clobbers, + opcode, + }), } } pub(crate) fn call_unknown( dest: RegMem, - uses: Vec, - defs: Vec>, + uses: SmallVec<[Reg; 8]>, + defs: SmallVec<[Writable; 8]>, + clobbers: PRegMask, opcode: Opcode, ) -> Inst { dest.assert_regclass_is(RegClass::Int); Inst::CallUnknown { dest, - uses, - defs, - opcode, + info: Box::new(CallInfo { + uses, + defs, + clobbers, + opcode, + }), } } @@ -1967,34 +1989,25 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_def(dst.to_writable_reg()); } - Inst::CallKnown { - ref uses, ref defs, .. - } => { - for &u in uses { + Inst::CallKnown { ref info, .. } => { + for &u in &info.uses { collector.reg_use(u); } - for &d in defs { + for &d in &info.defs { collector.reg_def(d); } - // FIXME: keep clobbers separate in the Inst and use - // `reg_clobber()`. + collector.reg_clobbers(info.clobbers); } - Inst::CallUnknown { - ref uses, - ref defs, - dest, - .. - } => { + Inst::CallUnknown { ref info, dest, .. } => { dest.get_operands(collector); - for &u in uses { + for &u in &info.uses { collector.reg_use(u); } - for &d in defs { + for &d in &info.defs { collector.reg_def(d); } - // FIXME: keep clobbers separate in the Inst and use - // `reg_clobber()`. + collector.reg_clobbers(info.clobbers); } Inst::JmpTableSeq { @@ -2066,10 +2079,9 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol // pseudoinstruction (and relocation that it emits) is specific to // ELF systems; other x86-64 targets with other conventions (i.e., // Windows) use different TLS strategies. - for reg in X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV) { - // FIXME: use actual clobber functionality. - collector.reg_def(reg); - } + collector.reg_clobbers(X64ABIMachineSpec::get_regs_clobbered_by_call( + CallConv::SystemV, + )); } Inst::Unwind { .. } => {} diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index 7dee9a9a17bf..8d460964443c 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -33,9 +33,12 @@ pub const ENC_R15: u8 = 15; // Constructors for Regs. fn gpr(enc: u8) -> Reg { - let preg = PReg::new(enc as usize, RegClass::Int); + let preg = gpr_preg(enc); Reg::from(VReg::new(preg.index(), RegClass::Int)) } +pub(crate) const fn gpr_preg(enc: u8) -> PReg { + PReg::new(enc as usize, RegClass::Int) +} pub(crate) fn rsi() -> Reg { gpr(ENC_RSI) @@ -96,10 +99,14 @@ pub(crate) fn pinned_reg() -> Reg { } fn fpr(enc: u8) -> Reg { - let preg = PReg::new(enc as usize, RegClass::Float); + let preg = fpr_preg(enc); Reg::from(VReg::new(preg.index(), RegClass::Float)) } +pub(crate) const fn fpr_preg(enc: u8) -> PReg { + PReg::new(enc as usize, RegClass::Float) +} + pub(crate) fn xmm0() -> Reg { fpr(0) } diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index f5948d75a5c7..66e31a315a8e 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -18,7 +18,7 @@ use crate::{ settings::Flags, unwind::UnwindInst, x64::{ - inst::{args::*, regs}, + inst::{args::*, regs, CallInfo}, settings::Flags as IsaFlags, }, }, @@ -26,8 +26,11 @@ use crate::{ isle::*, AtomicRmwOp, InsnInput, InsnOutput, LowerCtx, VCodeConstant, VCodeConstantData, }, }; +use std::boxed::Box; use std::convert::TryFrom; +type BoxCallInfo = Box; + pub struct SinkableLoad { inst: Inst, addr_input: InsnInput, diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 41e81197eead..a734d5d51081 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -125,7 +125,6 @@ use super::abi::*; use crate::binemit::StackMap; -use crate::fx::FxHashSet; use crate::ir::types::*; use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot}; use crate::machinst::*; @@ -133,6 +132,7 @@ use crate::settings; use crate::CodegenResult; use crate::{ir, isa}; use alloc::vec::Vec; +use regalloc2::{PReg, PRegMask}; use smallvec::{smallvec, SmallVec}; use std::convert::TryFrom; use std::marker::PhantomData; @@ -475,8 +475,9 @@ pub trait ABIMachineSpec { /// temporary register to use to synthesize the called address, if needed. fn gen_call( dest: &CallDest, - uses: Vec, - defs: Vec>, + uses: SmallVec<[Reg; 8]>, + defs: SmallVec<[Writable; 8]>, + clobbers: PRegMask, opcode: ir::Opcode, tmp: Writable, callee_conv: isa::CallConv, @@ -504,7 +505,7 @@ pub trait ABIMachineSpec { /// Get all caller-save registers, that is, registers that we expect /// not to be saved across a call to a callee with the given ABI. - fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec>; + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegMask; /// Get the needed extension mode, given the mode attached to the argument /// in the signature and the calling convention. The input (the attribute in @@ -1356,15 +1357,17 @@ impl ABICallee for ABICalleeImpl { } } -fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { +fn abisig_to_uses_defs_clobbers( + sig: &ABISig, +) -> (SmallVec<[Reg; 8]>, SmallVec<[Writable; 8]>, PRegMask) { // Compute uses: all arg regs. - let mut uses = FxHashSet::default(); + let mut uses = smallvec![]; for arg in &sig.args { if let &ABIArg::Slots { ref slots, .. } = arg { for slot in slots { match slot { &ABIArgSlot::Reg { reg, .. } => { - uses.insert(Reg::from(reg)); + uses.push(Reg::from(reg)); } _ => {} } @@ -1372,16 +1375,19 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec = M::get_regs_clobbered_by_call(sig.call_conv) - .into_iter() - .collect(); + let mut defs = smallvec![]; for ret in &sig.rets { if let &ABIArg::Slots { ref slots, .. } = ret { for slot in slots { match slot { &ABIArgSlot::Reg { reg, .. } => { - defs.insert(Writable::from_reg(Reg::from(reg))); + defs.push(Writable::from_reg(Reg::from(reg))); + clobbers.remove(PReg::from(reg)); } _ => {} } @@ -1389,12 +1395,7 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>(); - let mut defs = defs.into_iter().collect::>(); - uses.sort_unstable(); - defs.sort_unstable(); - - (uses, defs) + (uses, defs, clobbers) } /// ABI object for a callsite. @@ -1404,9 +1405,11 @@ pub struct ABICallerImpl { /// The called function's signature. sig: ABISig, /// All uses for the callsite, i.e., function args. - uses: Vec, - /// All defs for the callsite, i.e., return values and caller-saves. - defs: Vec>, + uses: SmallVec<[Reg; 8]>, + /// All defs for the callsite, i.e., return values. + defs: SmallVec<[Writable; 8]>, + /// Caller-save clobbers. + clobbers: PRegMask, /// Call destination. dest: CallDest, /// Actual call opcode; used to distinguish various types of calls. @@ -1439,12 +1442,13 @@ impl ABICallerImpl { ) -> CodegenResult> { let ir_sig = ensure_struct_return_ptr_is_returned(sig); let sig = ABISig::from_func_sig::(&ir_sig, flags)?; - let (uses, defs) = abisig_to_uses_and_defs::(&sig); + let (uses, defs, clobbers) = abisig_to_uses_defs_clobbers::(&sig); Ok(ABICallerImpl { ir_sig, sig, uses, defs, + clobbers, dest: CallDest::ExtName(extname.clone(), dist), opcode: ir::Opcode::Call, caller_conv, @@ -1464,12 +1468,13 @@ impl ABICallerImpl { ) -> CodegenResult> { let ir_sig = ensure_struct_return_ptr_is_returned(sig); let sig = ABISig::from_func_sig::(&ir_sig, flags)?; - let (uses, defs) = abisig_to_uses_and_defs::(&sig); + let (uses, defs, clobbers) = abisig_to_uses_defs_clobbers::(&sig); Ok(ABICallerImpl { ir_sig, sig, uses, defs, + clobbers, dest: CallDest::Reg(ptr), opcode, caller_conv, @@ -1695,6 +1700,7 @@ impl ABICaller for ABICallerImpl { &self.dest, uses, defs, + self.clobbers, self.opcode, tmp, self.sig.call_conv, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 24cee437a8e5..55f9f83d44e6 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -16,7 +16,6 @@ pub type ValueArray2 = [Value; 2]; pub type ValueArray3 = [Value; 3]; pub type WritableReg = Writable; pub type VecReg = Vec; -pub type VecWritableReg = Vec; pub type ValueRegs = crate::machinst::ValueRegs; pub type InstOutput = SmallVec<[ValueRegs; 2]>; pub type InstOutputBuilder = Cell; diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs index 8b0b835aac8c..b7d377f32206 100644 --- a/cranelift/codegen/src/machinst/reg.rs +++ b/cranelift/codegen/src/machinst/reg.rs @@ -5,7 +5,7 @@ use crate::machinst::MachInst; use alloc::{string::String, vec::Vec}; use core::{fmt::Debug, hash::Hash}; -use regalloc2::{Allocation, Operand, PReg, VReg}; +use regalloc2::{Allocation, Operand, PReg, PRegMask, VReg}; use smallvec::{smallvec, SmallVec}; #[cfg(feature = "enable-serde")] @@ -290,7 +290,7 @@ pub type RegClass = regalloc2::RegClass; pub struct OperandCollector<'a, F: Fn(VReg) -> VReg> { operands: &'a mut Vec, operands_start: usize, - clobbers: Vec, + clobbers: PRegMask, renamer: F, } @@ -301,7 +301,7 @@ impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> { Self { operands, operands_start, - clobbers: vec![], + clobbers: PRegMask::default(), renamer, } } @@ -313,15 +313,10 @@ impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> { self.operands.push(operand); } - /// Add a clobber. - fn add_clobber(&mut self, clobber: PReg) { - self.clobbers.push(clobber); - } - /// Finish the operand collection and return the tuple giving the /// range of indices in the flattened operand array, and the - /// clobber array. - pub fn finish(self) -> ((u32, u32), Vec) { + /// clobber set. + pub fn finish(self) -> ((u32, u32), PRegMask) { let start = self.operands_start as u32; let end = self.operands.len() as u32; ((start, end), self.clobbers) @@ -403,12 +398,11 @@ impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> { )); } - /// Add a register clobber. This is a register that is written by - /// the instruction, so must be reserved (not used) for the whole - /// instruction, but is not used afterward. - #[allow(dead_code)] // FIXME: use clobbers rather than defs for calls! - pub fn reg_clobber(&mut self, reg: Writable) { - self.add_clobber(PReg::from(reg.to_reg())); + /// Add a register clobber set. This is a set of registers that + /// are written by the instruction, so must be reserved (not used) + /// for the whole instruction, but are not used afterward. + pub fn reg_clobbers(&mut self, regs: PRegMask) { + self.clobbers.add_all(regs); } } diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 68a0549d791a..82037ac7520e 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -25,7 +25,7 @@ use crate::timing; use crate::ValueLocRange; use regalloc2::{ Edit, Function as RegallocFunction, InstOrEdit, InstRange, Operand, OperandKind, PReg, - RegClass, VReg, + PRegMask, RegClass, VReg, }; use alloc::boxed::Box; @@ -79,12 +79,8 @@ pub struct VCode { /// instruction's operands. operand_ranges: Vec<(u32, u32)>, - /// Clobbers: a sparse map from instruction indices to clobber lists. - clobber_ranges: FxHashMap, - - /// A flat list of clobbered registers, with index ranges held by - /// `clobber_ranges`. - clobbers: Vec, + /// Clobbers: a sparse map from instruction indices to clobber masks. + clobbers: FxHashMap, /// Move information: for a given InsnIndex, (src, dst) operand pair. is_move: FxHashMap, @@ -568,13 +564,8 @@ impl VCodeBuilder { let (ops, clobbers) = op_collector.finish(); self.vcode.operand_ranges.push(ops); - if !clobbers.is_empty() { - let start = self.vcode.clobbers.len(); - self.vcode.clobbers.extend(clobbers.into_iter()); - let end = self.vcode.clobbers.len(); - self.vcode - .clobber_ranges - .insert(InsnIndex::new(i), (start as u32, end as u32)); + if clobbers != PRegMask::default() { + self.vcode.clobbers.insert(InsnIndex::new(i), clobbers); } if let Some((dst, src)) = insn.is_move() { @@ -628,8 +619,7 @@ impl VCode { insts: Vec::with_capacity(10 * n_blocks), operands: Vec::with_capacity(30 * n_blocks), operand_ranges: Vec::with_capacity(10 * n_blocks), - clobber_ranges: FxHashMap::default(), - clobbers: vec![], + clobbers: FxHashMap::default(), is_move: FxHashMap::default(), srclocs: Vec::with_capacity(10 * n_blocks), entry: BlockIndex::new(0), @@ -710,13 +700,15 @@ impl VCode { } // Also add explicitly-clobbered registers. - if let Some(&(start, end)) = self.clobber_ranges.get(&InsnIndex::new(i)) { - let inst_clobbers = &self.clobbers[(start as usize)..(end as usize)]; - for &preg in inst_clobbers { - let reg = RealReg::from(preg); - if clobbered_set.insert(reg) { - clobbered.push(Writable::from_reg(reg)); - } + for preg in self + .clobbers + .get(&InsnIndex::new(i)) + .cloned() + .unwrap_or_default() + { + let reg = RealReg::from(preg); + if clobbered_set.insert(reg) { + clobbered.push(Writable::from_reg(reg)); } } } @@ -1192,12 +1184,8 @@ impl RegallocFunction for VCode { &self.operands[start as usize..end as usize] } - fn inst_clobbers(&self, insn: InsnIndex) -> &[PReg] { - if let Some(&(start, end)) = self.clobber_ranges.get(&insn) { - &self.clobbers[start as usize..end as usize] - } else { - &[] - } + fn inst_clobbers(&self, insn: InsnIndex) -> PRegMask { + self.clobbers.get(&insn).cloned().unwrap_or_default() } fn num_vregs(&self) -> usize { diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index 41ed9a3e9d50..21a0c715bdfc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -29,28 +29,10 @@ block0(v0: i32): ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; stp x27, x28, [sp, #-16]! -; stp x25, x26, [sp, #-16]! -; stp x23, x24, [sp, #-16]! -; stp x21, x22, [sp, #-16]! -; stp x19, x20, [sp, #-16]! -; stp d14, d15, [sp, #-16]! -; stp d12, d13, [sp, #-16]! -; stp d10, d11, [sp, #-16]! -; stp d8, d9, [sp, #-16]! ; block0: ; mov w0, w0 ; ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x5 -; ldp d8, d9, [sp], #16 -; ldp d10, d11, [sp], #16 -; ldp d12, d13, [sp], #16 -; ldp d14, d15, [sp], #16 -; ldp x19, x20, [sp], #16 -; ldp x21, x22, [sp], #16 -; ldp x23, x24, [sp], #16 -; ldp x25, x26, [sp], #16 -; ldp x27, x28, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret @@ -72,28 +54,10 @@ block0(v0: i32): ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; stp x27, x28, [sp, #-16]! -; stp x25, x26, [sp, #-16]! -; stp x23, x24, [sp, #-16]! -; stp x21, x22, [sp, #-16]! -; stp x19, x20, [sp, #-16]! -; stp d14, d15, [sp, #-16]! -; stp d12, d13, [sp, #-16]! -; stp d10, d11, [sp, #-16]! -; stp d8, d9, [sp, #-16]! ; block0: ; sxtw x0, w0 ; ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x5 -; ldp d8, d9, [sp], #16 -; ldp d10, d11, [sp], #16 -; ldp d12, d13, [sp], #16 -; ldp d14, d15, [sp], #16 -; ldp x19, x20, [sp], #16 -; ldp x21, x22, [sp], #16 -; ldp x23, x24, [sp], #16 -; ldp x25, x26, [sp], #16 -; ldp x27, x28, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret @@ -176,29 +140,31 @@ block0: ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; sub sp, sp, #48 +; str d15, [sp, #-16]! +; stp d11, d13, [sp, #-16]! ; block0: ; ldr x9, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x9 -; str q0, [sp] +; mov v15.16b, v0.16b ; ldr x11, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x11 -; str q0, [sp, #16] +; mov v13.16b, v0.16b ; ldr x13, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x13 -; str q0, [sp, #32] +; mov v11.16b, v0.16b ; ldr x15, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x15 -; ldr q0, [sp] +; mov v0.16b, v15.16b ; ldr x1, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x1 -; ldr q0, [sp, #16] +; mov v0.16b, v13.16b ; ldr x3, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x3 -; ldr q0, [sp, #32] +; mov v0.16b, v11.16b ; ldr x5, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x5 -; add sp, sp, #48 +; ldp d11, d13, [sp], #16 +; ldr d15, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret @@ -220,29 +186,31 @@ block0: ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; sub sp, sp, #48 +; str d15, [sp, #-16]! +; stp d11, d13, [sp, #-16]! ; block0: ; ldr x9, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x9 -; str q0, [sp] +; mov v15.16b, v0.16b ; ldr x11, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x11 -; str q0, [sp, #16] +; mov v13.16b, v0.16b ; ldr x13, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x13 -; str q0, [sp, #32] +; mov v11.16b, v0.16b ; ldr x15, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x15 -; ldr q0, [sp] +; mov v0.16b, v15.16b ; ldr x1, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x1 -; ldr q0, [sp, #16] +; mov v0.16b, v13.16b ; ldr x3, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x3 -; ldr q0, [sp, #32] +; mov v0.16b, v11.16b ; ldr x5, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x5 -; add sp, sp, #48 +; ldp d11, d13, [sp], #16 +; ldr d15, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret @@ -268,29 +236,31 @@ block0: ; stp fp, lr, [sp, #-16]! ; mov fp, sp -; sub sp, sp, #48 +; str d15, [sp, #-16]! +; stp d11, d13, [sp, #-16]! ; block0: ; ldr x9, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x9 -; str q0, [sp] +; mov v15.16b, v0.16b ; ldr x11, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x11 -; str q0, [sp, #16] +; mov v13.16b, v0.16b ; ldr x13, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x13 -; str q0, [sp, #32] +; mov v11.16b, v0.16b ; ldr x15, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x15 -; ldr q0, [sp] +; mov v0.16b, v15.16b ; ldr x1, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x1 -; ldr q0, [sp, #16] +; mov v0.16b, v13.16b ; ldr x3, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x3 -; ldr q0, [sp, #32] +; mov v0.16b, v11.16b ; ldr x5, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 ; blr x5 -; add sp, sp, #48 +; ldp d11, d13, [sp], #16 +; ldr d15, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif index 7cc42b191bdf..64b527f0ae88 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif @@ -13,19 +13,11 @@ block0(v0: i32): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; str x25, [sp, #-16]! -; stp d14, d15, [sp, #-16]! -; stp d12, d13, [sp, #-16]! -; stp d10, d11, [sp, #-16]! -; stp d8, d9, [sp, #-16]! ; block0: ; mov x25, x0 ; elf_tls_get_addr u1:0 ; mov x1, x0 ; mov x0, x25 -; ldp d8, d9, [sp], #16 -; ldp d10, d11, [sp], #16 -; ldp d12, d13, [sp], #16 -; ldp d14, d15, [sp], #16 ; ldr x25, [sp], #16 ; ldp fp, lr, [sp], #16 ; ret