diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 961e8216e50f..38614438c40b 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -396,27 +396,21 @@ impl ABIMachineSpec for AArch64MachineDeps { fn gen_ret( setup_frame: bool, isa_flags: &aarch64_settings::Flags, + call_conv: isa::CallConv, rets: Vec, stack_bytes_to_pop: u32, ) -> Inst { - if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) { - let key = if isa_flags.sign_return_address_with_bkey() { - APIKey::B - } else { - APIKey::A - }; - - Inst::AuthenticatedRet { + match select_api_key(isa_flags, call_conv, setup_frame) { + Some(key) => Inst::AuthenticatedRet { key, is_hint: !isa_flags.has_pauth(), rets, stack_bytes_to_pop, - } - } else { - Inst::Ret { + }, + None => Inst::Ret { rets, stack_bytes_to_pop, - } + }, } } @@ -558,36 +552,32 @@ impl ABIMachineSpec for AArch64MachineDeps { ) -> SmallInstVec { let mut insts = SmallVec::new(); - if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) { - let key = if isa_flags.sign_return_address_with_bkey() { - APIKey::B - } else { - APIKey::A - }; - - insts.push(Inst::Pacisp { key }); - - if flags.unwind_info() { - insts.push(Inst::Unwind { - inst: UnwindInst::Aarch64SetPointerAuth { - return_addresses: true, - }, - }); - } - } else { - if isa_flags.use_bti() { - insts.push(Inst::Bti { - targets: BranchTargetType::C, - }); + match select_api_key(isa_flags, call_conv, setup_frame) { + Some(key) => { + insts.push(Inst::Paci { key }); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::Aarch64SetPointerAuth { + return_addresses: true, + }, + }); + } } + None => { + if isa_flags.use_bti() { + insts.push(Inst::Bti { + targets: BranchTargetType::C, + }); + } - if flags.unwind_info() && call_conv.extends_apple_aarch64() { - // The macOS unwinder seems to require this. - insts.push(Inst::Unwind { - inst: UnwindInst::Aarch64SetPointerAuth { - return_addresses: false, - }, - }); + if flags.unwind_info() && call_conv.extends_apple_aarch64() { + // The macOS unwinder seems to require this. + insts.push(Inst::Unwind { + inst: UnwindInst::Aarch64SetPointerAuth { + return_addresses: false, + }, + }); + } } } @@ -1161,8 +1151,39 @@ impl ABIMachineSpec for AArch64MachineDeps { } } +fn select_api_key( + isa_flags: &aarch64_settings::Flags, + call_conv: isa::CallConv, + setup_frame: bool, +) -> Option { + if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) { + // The `tail` calling convention uses a zero modifier rather than SP + // because tail calls may happen with a different stack pointer than + // when the function was entered, meaning that it won't be the same when + // the return address is decrypted. + Some(if isa_flags.sign_return_address_with_bkey() { + match call_conv { + isa::CallConv::Tail => APIKey::BZ, + _ => APIKey::BSP, + } + } else { + match call_conv { + isa::CallConv::Tail => APIKey::AZ, + _ => APIKey::ASP, + } + }) + } else { + None + } +} + impl AArch64CallSite { - pub fn emit_return_call(mut self, ctx: &mut Lower, args: isle::ValueSlice) { + pub fn emit_return_call( + mut self, + ctx: &mut Lower, + args: isle::ValueSlice, + isa_flags: &aarch64_settings::Flags, + ) { let (new_stack_arg_size, old_stack_arg_size) = self.emit_temporary_tail_call_frame(ctx, args); @@ -1174,6 +1195,7 @@ impl AArch64CallSite { opcode, old_stack_arg_size, new_stack_arg_size, + key: select_api_key(isa_flags, isa::CallConv::Tail, true), }); match dest { diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 47a85cf59f4b..30e397eef856 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -902,7 +902,7 @@ ;; Pointer authentication code for instruction address with modifier in SP; ;; equivalent to a no-op if Pointer authentication (FEAT_PAuth) is not ;; supported. - (Pacisp + (Paci (key APIKey)) ;; Strip pointer authentication code from instruction address in LR; @@ -1645,8 +1645,14 @@ ;; Keys for instruction address PACs (type APIKey (enum - (A) - (B) + ;; API key A with the modifier of SP + (ASP) + ;; API key B with the modifier of SP + (BSP) + ;; API key A with the modifier of zero + (AZ) + ;; API key B with the modifier of zero + (BZ) )) ;; Branch target types diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 1c29591ba2d0..cdbf8ff5ca1a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -739,3 +739,17 @@ impl VectorSize { } } } + +impl APIKey { + /// Returns the encoding of the `auti{key}` instruction used to decrypt the + /// `lr` register. + pub fn enc_auti_hint(&self) -> u32 { + let (crm, op2) = match self { + APIKey::AZ => (0b0011, 0b100), + APIKey::ASP => (0b0011, 0b101), + APIKey::BZ => (0b0011, 0b110), + APIKey::BSP => (0b0011, 0b111), + }; + 0xd503201f | (crm << 8) | (op2 << 5) + } +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 085daaf27153..280d00fcb0ec 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -3139,35 +3139,38 @@ impl MachInstEmit for Inst { stack_bytes_to_pop, .. } => { - let key = match key { - APIKey::A => 0b0, - APIKey::B => 0b1, + if stack_bytes_to_pop != 0 { + // The requirement that `stack_bytes_to_pop` fit in an + // `Imm12` isn't fundamental, but lifting it is left for + // future PRs. + let imm12 = Imm12::maybe_from_u64(u64::from(stack_bytes_to_pop)) + .expect("stack bytes to pop must fit in Imm12"); + Inst::AluRRImm12 { + alu_op: ALUOp::Add, + size: OperandSize::Size64, + rd: writable_stack_reg(), + rn: stack_reg(), + imm12, + } + .emit(&[], sink, emit_info, state); + } + + let (op2, is_hint) = match key { + APIKey::AZ => (0b100, true), + APIKey::ASP => (0b101, is_hint), + APIKey::BZ => (0b110, true), + APIKey::BSP => (0b111, is_hint), }; if is_hint { - sink.put4(0xd50323bf | key << 6); // autiasp / autibsp + sink.put4(key.enc_auti_hint()); Inst::Ret { rets: vec![], - stack_bytes_to_pop, + stack_bytes_to_pop: 0, } .emit(&[], sink, emit_info, state); } else { - if stack_bytes_to_pop != 0 { - // The requirement that `stack_bytes_to_pop` fit in an - // `Imm12` isn't fundamental, but lifting it is left for - // future PRs. - let imm12 = Imm12::maybe_from_u64(u64::from(stack_bytes_to_pop)) - .expect("stack bytes to pop must fit in Imm12"); - Inst::AluRRImm12 { - alu_op: ALUOp::Add, - size: OperandSize::Size64, - rd: writable_stack_reg(), - rn: stack_reg(), - imm12, - } - .emit(&[], sink, emit_info, state); - } - sink.put4(0xd65f0bff | key << 10); // retaa / retab + sink.put4(0xd65f0bff | (op2 << 9)); // reta{key} } } &Inst::Call { ref info } => { @@ -3208,15 +3211,7 @@ impl MachInstEmit for Inst { ref callee, ref info, } => { - emit_return_call_common_sequence( - &mut allocs, - sink, - emit_info, - state, - info.new_stack_arg_size, - info.old_stack_arg_size, - &info.uses, - ); + emit_return_call_common_sequence(&mut allocs, sink, emit_info, state, info); // Note: this is not `Inst::Jump { .. }.emit(..)` because we // have different metadata in this case: we don't have a label @@ -3233,15 +3228,7 @@ impl MachInstEmit for Inst { &Inst::ReturnCallInd { callee, ref info } => { let callee = allocs.next(callee); - emit_return_call_common_sequence( - &mut allocs, - sink, - emit_info, - state, - info.new_stack_arg_size, - info.old_stack_arg_size, - &info.uses, - ); + emit_return_call_common_sequence(&mut allocs, sink, emit_info, state, info); Inst::IndirectBr { rn: callee, @@ -3556,13 +3543,15 @@ impl MachInstEmit for Inst { add.emit(&[], sink, emit_info, state); } } - &Inst::Pacisp { key } => { - let key = match key { - APIKey::A => 0b0, - APIKey::B => 0b1, + &Inst::Paci { key } => { + let (crm, op2) = match key { + APIKey::AZ => (0b0011, 0b000), + APIKey::ASP => (0b0011, 0b001), + APIKey::BZ => (0b0011, 0b010), + APIKey::BSP => (0b0011, 0b011), }; - sink.put4(0xd503233f | key << 6); + sink.put4(0xd503211f | (crm << 8) | (op2 << 5)); } &Inst::Xpaclri => sink.put4(0xd50320ff), &Inst::Bti { targets } => { @@ -3768,18 +3757,16 @@ fn emit_return_call_common_sequence( sink: &mut MachBuffer, emit_info: &EmitInfo, state: &mut EmitState, - new_stack_arg_size: u32, - old_stack_arg_size: u32, - uses: &CallArgList, + info: &ReturnCallInfo, ) { - for u in uses { + for u in info.uses.iter() { let _ = allocs.next(u.vreg); } // We are emitting a dynamic number of instructions and might need an // island. We emit four instructions regardless of how many stack arguments // we have, and then two instructions per word of stack argument space. - let new_stack_words = new_stack_arg_size / 8; + let new_stack_words = info.new_stack_arg_size / 8; let insts = 4 + 2 * new_stack_words; let size_of_inst = 4; let space_needed = insts * size_of_inst; @@ -3820,7 +3807,7 @@ fn emit_return_call_common_sequence( // actual jump happens outside this helper function. assert_eq!( - new_stack_arg_size % 8, + info.new_stack_arg_size % 8, 0, "size of new stack arguments must be 8-byte aligned" ); @@ -3830,7 +3817,8 @@ fn emit_return_call_common_sequence( // arguments as well as accounting for the two words we pushed onto the // stack upon entry to this function (the return address and old frame // pointer). - let fp_to_callee_sp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size) + 16; + let fp_to_callee_sp = + i64::from(info.old_stack_arg_size) - i64::from(info.new_stack_arg_size) + 16; let tmp1 = regs::writable_spilltmp_reg(); let tmp2 = regs::writable_tmp2_reg(); @@ -3910,10 +3898,14 @@ fn emit_return_call_common_sequence( } .emit(&[], sink, emit_info, state); - state.virtual_sp_offset -= i64::from(new_stack_arg_size); + state.virtual_sp_offset -= i64::from(info.new_stack_arg_size); trace!( "return_call[_ind] adjusts virtual sp offset by {} -> {}", - new_stack_arg_size, + info.new_stack_arg_size, state.virtual_sp_offset ); + + if let Some(key) = info.key { + sink.put4(key.enc_auti_hint()); + } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index bcbe1fb84c5c..c5d41c7f62c7 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -136,6 +136,8 @@ pub struct ReturnCallInfo { /// for copying the frame over our current frame. It must already be /// allocated on the stack. pub new_stack_arg_size: u32, + /// API key to use to restore the return address, if any. + pub key: Option, } /// Additional information for JTSequence instructions, left out of line to lower the size of the Inst @@ -939,7 +941,7 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); memarg_operands(mem, collector); } - &Inst::Pacisp { .. } | &Inst::Xpaclri => { + &Inst::Paci { .. } | &Inst::Xpaclri => { // Neither LR nor SP is an allocatable register, so there is no need // to do anything. } @@ -2613,15 +2615,17 @@ impl Inst { ref rets, } => { let key = match key { - APIKey::A => "a", - APIKey::B => "b", + APIKey::AZ => "az", + APIKey::BZ => "bz", + APIKey::ASP => "asp", + APIKey::BSP => "bsp", }; let mut s = match (is_hint, stack_bytes_to_pop) { (false, 0) => format!("reta{key}"), (false, n) => { format!("add sp, sp, #{n} ; reta{key}") } - (true, 0) => format!("auti{key}sp ; ret"), + (true, 0) => format!("auti{key} ; ret"), (true, n) => { format!("add sp, sp, #{n} ; auti{key} ; ret") } @@ -2814,13 +2818,15 @@ impl Inst { } ret } - &Inst::Pacisp { key } => { + &Inst::Paci { key } => { let key = match key { - APIKey::A => "a", - APIKey::B => "b", + APIKey::AZ => "az", + APIKey::BZ => "bz", + APIKey::ASP => "asp", + APIKey::BSP => "bsp", }; - "paci".to_string() + key + "sp" + "paci".to_string() + key } &Inst::Xpaclri => "xpaclri".to_string(), &Inst::Bti { targets } => { diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 72f843372524..22adc76e3f98 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -110,7 +110,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { caller_conv, self.backend.flags().clone(), ); - call_site.emit_return_call(self.lower_ctx, args); + call_site.emit_return_call(self.lower_ctx, args, &self.backend.isa_flags); InstOutput::new() } @@ -138,7 +138,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { caller_conv, self.backend.flags().clone(), ); - call_site.emit_return_call(self.lower_ctx, args); + call_site.emit_return_call(self.lower_ctx, args, &self.backend.isa_flags); InstOutput::new() } diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index e4cfb0e101c7..8a8c807c19c5 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -253,6 +253,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { fn gen_ret( _setup_frame: bool, _isa_flags: &Self::F, + _call_conv: isa::CallConv, rets: Vec, stack_bytes_to_pop: u32, ) -> Inst { diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 8c0fd25f1bd7..fdfae057eefb 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -443,6 +443,7 @@ impl ABIMachineSpec for S390xMachineDeps { fn gen_ret( _setup_frame: bool, _isa_flags: &s390x_settings::Flags, + _call_conv: isa::CallConv, rets: Vec, stack_bytes_to_pop: u32, ) -> Inst { diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index feb3d447276c..0131275a2ae9 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -351,6 +351,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn gen_ret( _setup_frame: bool, _isa_flags: &x64_settings::Flags, + _call_conv: isa::CallConv, rets: Vec, stack_bytes_to_pop: u32, ) -> Self::I { diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index e3b40d583088..2511e1994904 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -451,6 +451,7 @@ pub trait ABIMachineSpec { fn gen_ret( setup_frame: bool, isa_flags: &Self::F, + call_conv: isa::CallConv, rets: Vec, stack_bytes_to_pop: u32, ) -> Self::I; @@ -1713,6 +1714,7 @@ impl Callee { M::gen_ret( self.setup_frame, &self.isa_flags, + self.call_conv, rets, self.stack_bytes_to_pop(sigs), ) @@ -1980,6 +1982,7 @@ impl Callee { insts.push(M::gen_ret( self.setup_frame, &self.isa_flags, + self.call_conv, vec![], self.stack_bytes_to_pop(sigs), )); diff --git a/cranelift/filetests/filetests/isa/aarch64/call-pauth-bkey.clif b/cranelift/filetests/filetests/isa/aarch64/call-pauth-bkey.clif new file mode 100644 index 000000000000..06e6ecc54e48 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/call-pauth-bkey.clif @@ -0,0 +1,120 @@ +test compile precise-output +set unwind_info=false +target aarch64 sign_return_address sign_return_address_with_bkey has_pauth + +function %f1(i64) -> i64 { + fn0 = %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; VCode: +; pacibsp +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; load_ext_name x3, TestCase(%g)+0 +; blr x3 +; ldp fp, lr, [sp], #16 +; retabsp +; +; Disassembled: +; block0: ; offset 0x0 +; pacibsp +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; block1: ; offset 0xc +; ldr x3, #0x14 +; b #0x1c +; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 +; .byte 0x00, 0x00, 0x00, 0x00 +; blr x3 +; ldp x29, x30, [sp], #0x10 +; retab + +function %f2(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iadd.i64 v0, v1 + return v2 +} + +; VCode: +; block0: +; add x0, x0, x1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; add x0, x0, x1 +; ret + +function %tail_convention(i64) -> i64 tail { + fn0 = %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; VCode: +; pacibz +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; mov x0, x2 +; load_ext_name x3, TestCase(%g)+0 +; blr x3 +; mov x2, x0 +; ldp fp, lr, [sp], #16 +; retabz +; +; Disassembled: +; block0: ; offset 0x0 +; pacibz +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; block1: ; offset 0xc +; mov x0, x2 +; ldr x3, #0x18 +; b #0x20 +; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 +; .byte 0x00, 0x00, 0x00, 0x00 +; blr x3 +; mov x2, x0 +; ldp x29, x30, [sp], #0x10 +; autibz +; ret + +function %tail_call(i64) -> i64 tail { + fn0 = %g(i64) -> i64 tail + +block0(v0: i64): + return_call fn0(v0) +} + +; VCode: +; pacibz +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; load_ext_name x3, TestCase(%g)+0 +; return_call_ind x3 old_stack_arg_size:0 new_stack_arg_size:0 x2=x2 +; +; Disassembled: +; block0: ; offset 0x0 +; pacibz +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; block1: ; offset 0xc +; ldr x3, #0x14 +; b #0x1c +; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 +; .byte 0x00, 0x00, 0x00, 0x00 +; ldp x16, x30, [x29] +; add sp, x29, #0x10 +; mov x29, x16 +; autibz +; br x3 + diff --git a/cranelift/filetests/filetests/isa/aarch64/call-pauth.clif b/cranelift/filetests/filetests/isa/aarch64/call-pauth.clif index 4de61e59a9dd..74dbb2cfd82c 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call-pauth.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call-pauth.clif @@ -51,3 +51,71 @@ block0(v0: i64, v1: i64): ; add x0, x0, x1 ; ret +function %tail_convention(i64) -> i64 tail { + fn0 = %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; VCode: +; paciaz +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; mov x0, x2 +; load_ext_name x3, TestCase(%g)+0 +; blr x3 +; mov x2, x0 +; ldp fp, lr, [sp], #16 +; autiaz ; ret +; +; Disassembled: +; block0: ; offset 0x0 +; paciaz +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; block1: ; offset 0xc +; mov x0, x2 +; ldr x3, #0x18 +; b #0x20 +; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 +; .byte 0x00, 0x00, 0x00, 0x00 +; blr x3 +; mov x2, x0 +; ldp x29, x30, [sp], #0x10 +; autiaz +; ret + +function %tail_call(i64) -> i64 tail { + fn0 = %g(i64) -> i64 tail + +block0(v0: i64): + return_call fn0(v0) +} + +; VCode: +; paciaz +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; load_ext_name x3, TestCase(%g)+0 +; return_call_ind x3 old_stack_arg_size:0 new_stack_arg_size:0 x2=x2 +; +; Disassembled: +; block0: ; offset 0x0 +; paciaz +; stp x29, x30, [sp, #-0x10]! +; mov x29, sp +; block1: ; offset 0xc +; ldr x3, #0x14 +; b #0x1c +; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %g 0 +; .byte 0x00, 0x00, 0x00, 0x00 +; ldp x16, x30, [x29] +; add sp, x29, #0x10 +; mov x29, x16 +; autiaz +; br x3 +