From 2d8714233107c1592a0406e4340b2f951c244802 Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Fri, 7 Jan 2022 14:38:39 +0000 Subject: [PATCH] Initial forward-edge CFI implementation Give the user the option to start all basic blocks that are targets of indirect branches with the BTI instruction introduced by the Branch Target Identification extension to the Arm instruction set architecture. Copyright (c) 2022, Arm Limited. --- cranelift/codegen/meta/src/isa/arm64.rs | 11 +- cranelift/codegen/src/alias_analysis.rs | 2 +- cranelift/codegen/src/inst_predicates.rs | 16 ++- cranelift/codegen/src/isa/aarch64/abi.rs | 28 +++-- cranelift/codegen/src/isa/aarch64/inst.isle | 14 +++ .../codegen/src/isa/aarch64/inst/emit.rs | 10 ++ .../src/isa/aarch64/inst/emit_tests.rs | 7 ++ cranelift/codegen/src/isa/aarch64/inst/mod.rs | 30 ++++- .../codegen/src/isa/aarch64/lower/isle.rs | 2 +- .../codegen/src/isa/aarch64/lower_inst.rs | 16 ++- cranelift/codegen/src/machinst/abi_impl.rs | 11 +- cranelift/codegen/src/machinst/blockorder.rs | 16 ++- cranelift/codegen/src/machinst/mod.rs | 10 ++ cranelift/codegen/src/machinst/vcode.rs | 22 ++++ .../filetests/filetests/isa/aarch64/bti.clif | 111 ++++++++++++++++++ .../filetests/isa/aarch64/jumptable.clif | 4 +- cranelift/jit/src/backend.rs | 15 ++- cranelift/jit/src/memory.rs | 42 +++++-- .../src/generators/codegen_settings.rs | 1 + crates/jit/src/code_memory.rs | 6 +- crates/jit/src/instantiate.rs | 3 +- crates/runtime/src/mmap.rs | 30 ++++- crates/runtime/src/mmap_vec.rs | 12 +- crates/wasmtime/src/component/component.rs | 13 +- crates/wasmtime/src/engine.rs | 14 ++- crates/wasmtime/src/module.rs | 40 ++++--- crates/wasmtime/src/module/serialization.rs | 13 +- crates/wasmtime/src/trampoline/func.rs | 5 +- 28 files changed, 424 insertions(+), 80 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/aarch64/bti.clif diff --git a/cranelift/codegen/meta/src/isa/arm64.rs b/cranelift/codegen/meta/src/isa/arm64.rs index 7fc17738bb27..9e1aac536422 100644 --- a/cranelift/codegen/meta/src/isa/arm64.rs +++ b/cranelift/codegen/meta/src/isa/arm64.rs @@ -5,13 +5,13 @@ use crate::shared::Definitions as SharedDefinitions; fn define_settings(_shared: &SettingGroup) -> SettingGroup { let mut setting = SettingGroupBuilder::new("arm64"); - let has_lse = setting.add_bool( + + setting.add_bool( "has_lse", "Has Large System Extensions (FEAT_LSE) support.", "", false, ); - setting.add_bool( "has_pauth", "Has Pointer authentication (FEAT_PAuth) support; enables the use of \ @@ -44,8 +44,13 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup { "", false, ); + setting.add_bool( + "use_bti", + "Use Branch Target Identification (FEAT_BTI) instructions.", + "", + false, + ); - setting.add_predicate("use_lse", predicate!(has_lse)); setting.build() } diff --git a/cranelift/codegen/src/alias_analysis.rs b/cranelift/codegen/src/alias_analysis.rs index 53d3ba60cfc6..2e2087b0642f 100644 --- a/cranelift/codegen/src/alias_analysis.rs +++ b/cranelift/codegen/src/alias_analysis.rs @@ -237,7 +237,7 @@ impl<'a> AliasAnalysis<'a> { trace!("after inst{}: state is {:?}", inst.index(), state); } - visit_block_succs(self.func, block, |_inst, succ| { + visit_block_succs(self.func, block, |_inst, succ, _from_table| { let succ_first_inst = self .func .layout diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 8d36742979ce..bdd8e21a1cde 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -130,7 +130,11 @@ pub fn has_memory_fence_semantics(op: Opcode) -> bool { } /// Visit all successors of a block with a given visitor closure. -pub(crate) fn visit_block_succs(f: &Function, block: Block, mut visit: F) { +pub(crate) fn visit_block_succs( + f: &Function, + block: Block, + mut visit: F, +) { for inst in f.layout.block_likely_branches(block) { if f.dfg[inst].opcode().is_branch() { visit_branch_targets(f, inst, &mut visit); @@ -138,18 +142,20 @@ pub(crate) fn visit_block_succs(f: &Function, block: Bloc } } -fn visit_branch_targets(f: &Function, inst: Inst, visit: &mut F) { +fn visit_branch_targets(f: &Function, inst: Inst, visit: &mut F) { match f.dfg[inst].analyze_branch(&f.dfg.value_lists) { BranchInfo::NotABranch => {} BranchInfo::SingleDest(dest, _) => { - visit(inst, dest); + visit(inst, dest, false); } BranchInfo::Table(table, maybe_dest) => { if let Some(dest) = maybe_dest { - visit(inst, dest); + // The default block is reached via a direct conditional branch, + // so it is not part of the table. + visit(inst, dest, false); } for &dest in f.jump_tables[table].as_slice() { - visit(inst, dest); + visit(inst, dest, true); } } } diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index b30c4e5cb55e..6e231e857f92 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -67,7 +67,11 @@ fn saved_reg_stack_size( /// point for the trait; it is never actually instantiated. pub struct AArch64MachineDeps; -impl IsaFlags for aarch64_settings::Flags {} +impl IsaFlags for aarch64_settings::Flags { + fn is_forward_edge_cfi_enabled(&self) -> bool { + self.use_bti() + } +} impl ABIMachineSpec for AArch64MachineDeps { type I = Inst; @@ -541,13 +545,21 @@ impl ABIMachineSpec for AArch64MachineDeps { }, }); } - } else if flags.unwind_info() && call_conv.extends_apple_aarch64() { - // The macOS unwinder seems to require this. - insts.push(Inst::Unwind { - inst: UnwindInst::Aarch64SetPointerAuth { - return_addresses: false, - }, - }); + } else { + if isa_flags.use_bti() { + insts.push(Inst::Bti { + targets: BranchTargetType::C, + }); + } + + if flags.unwind_info() && call_conv.extends_apple_aarch64() { + // The macOS unwinder seems to require this. + insts.push(Inst::Unwind { + inst: UnwindInst::Aarch64SetPointerAuth { + return_addresses: false, + }, + }); + } } insts diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 1f8122188d28..c20fb6fe2ab0 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -780,6 +780,11 @@ (Pacisp (key APIKey)) + ;; Branch target identification; equivalent to a no-op if Branch Target + ;; Identification (FEAT_BTI) is not supported. + (Bti + (targets BranchTargetType)) + ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This ;; controls how AMode::NominalSPOffset args are lowered. (VirtualSPOffsetAdj @@ -1355,6 +1360,15 @@ (B) )) +;; Branch target types +(type BranchTargetType + (enum + (None) + (C) + (J) + (JC) +)) + ;; Extractors for target features ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl use_lse () Inst) (extern extractor use_lse use_lse) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 3c4baa63f9af..7c3628c216ec 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -3121,6 +3121,16 @@ impl MachInstEmit for Inst { sink.put4(0xd503233f | key << 6); } + &Inst::Bti { targets } => { + let targets = match targets { + BranchTargetType::None => 0b00, + BranchTargetType::C => 0b01, + BranchTargetType::J => 0b10, + BranchTargetType::JC => 0b11, + }; + + sink.put4(0xd503241f | targets << 6); + } &Inst::VirtualSPOffsetAdj { offset } => { trace!( "virtual sp offset adjusted by {} -> {}", diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index daa33fed4677..6d043d640ba9 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -57,6 +57,13 @@ fn test_aarch64_binemit() { "retab", )); insns.push((Inst::Pacisp { key: APIKey::B }, "7F2303D5", "pacibsp")); + insns.push(( + Inst::Bti { + targets: BranchTargetType::J, + }, + "9F2403D5", + "bti j", + )); insns.push((Inst::Nop0, "", "nop-zero-len")); insns.push((Inst::Nop4, "1F2003D5", "nop")); insns.push((Inst::Csdb, "9F2203D5", "csdb")); diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index fb4f4e394527..efcce63f236e 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -36,8 +36,8 @@ mod emit_tests; // Instructions (top level): definition pub use crate::isa::aarch64::lower::isle::generated_code::{ - ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, - FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp, + ALUOp, ALUOp3, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1, FPUOp2, + FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp, }; @@ -1040,6 +1040,7 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan memarg_operands(mem, collector); } &Inst::Pacisp { .. } => {} + &Inst::Bti { .. } => {} &Inst::VirtualSPOffsetAdj { .. } => {} &Inst::ElfTlsGetAddr { .. } => { @@ -1234,6 +1235,19 @@ impl MachInst for Inst { fn ref_type_regclass(_: &settings::Flags) -> RegClass { RegClass::Int } + + fn gen_block_start( + is_indirect_branch_target: bool, + is_forward_edge_cfi_enabled: bool, + ) -> Option { + if is_indirect_branch_target && is_forward_edge_cfi_enabled { + Some(Inst::Bti { + targets: BranchTargetType::J, + }) + } else { + None + } + } } //============================================================================= @@ -2602,7 +2616,7 @@ impl Inst { "csel {}, xzr, {}, hs ; ", "csdb ; ", "adr {}, pc+16 ; ", - "ldrsw {}, [{}, {}, LSL 2] ; ", + "ldrsw {}, [{}, {}, uxtw #2] ; ", "add {}, {}, {} ; ", "br {} ; ", "jt_entries {:?}" @@ -2715,6 +2729,16 @@ impl Inst { "paci".to_string() + key + "sp" } + &Inst::Bti { targets } => { + let targets = match targets { + BranchTargetType::None => "", + BranchTargetType::C => " c", + BranchTargetType::J => " j", + BranchTargetType::JC => " jc", + }; + + "bti".to_string() + targets + } &Inst::VirtualSPOffsetAdj { offset } => { state.virtual_sp_offset += offset; format!("virtual_sp_offset_adjust {}", offset) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 49ea91e1081c..c081c8f21c64 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -72,7 +72,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> isle_prelude_methods!(); fn use_lse(&mut self, _: Inst) -> Option<()> { - if self.isa_flags.use_lse() { + if self.isa_flags.has_lse() { Some(()) } else { None diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 69e795c01846..5bb502029d8c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1796,18 +1796,20 @@ pub(crate) fn lower_branch( // emit_island // this forces an island at this point // // if the jumptable would push us past // // the deadline - // subs idx, #jt_size + // cmp idx, #jt_size // b.hs default + // csel vTmp2, xzr, idx, hs + // csdb // adr vTmp1, PC+16 - // ldr vTmp2, [vTmp1, idx, lsl #2] - // add vTmp2, vTmp2, vTmp1 - // br vTmp2 + // ldr vTmp2, [vTmp1, vTmp2, uxtw #2] + // add vTmp1, vTmp1, vTmp2 + // br vTmp1 // [jumptable offsets relative to JT base] let jt_size = targets.len() - 1; assert!(jt_size <= std::u32::MAX as usize); ctx.emit(Inst::EmitIsland { - needed_space: 4 * (6 + jt_size) as CodeOffset, + needed_space: 4 * (8 + jt_size) as CodeOffset, }); let ridx = put_input_in_reg( @@ -1846,8 +1848,10 @@ pub(crate) fn lower_branch( // Emit the compound instruction that does: // // b.hs default + // csel rB, xzr, rIndex, hs + // csdb // adr rA, jt - // ldrsw rB, [rA, rIndex, UXTW 2] + // ldrsw rB, [rA, rB, uxtw #2] // add rA, rA, rB // br rA // [jt entries] diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index ca51d1078f72..7b4103153ebd 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -281,7 +281,12 @@ impl StackAMode { } /// Trait implemented by machine-specific backend to represent ISA flags. -pub trait IsaFlags: Clone {} +pub trait IsaFlags: Clone { + /// Get a flag indicating whether forward-edge CFI is enabled. + fn is_forward_edge_cfi_enabled(&self) -> bool { + false + } +} /// Trait implemented by machine-specific backend to provide information about /// register assignments and to allow generating the specific instructions for @@ -1110,6 +1115,10 @@ impl Callee { } } + pub fn is_forward_edge_cfi_enabled(&self) -> bool { + self.isa_flags.is_forward_edge_cfi_enabled() + } + /// Get the calling convention implemented by this ABI object. pub fn call_conv(&self) -> isa::CallConv { self.sig.call_conv diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs index 4d1708dc4bb4..8a0f8bfac8e9 100644 --- a/cranelift/codegen/src/machinst/blockorder.rs +++ b/cranelift/codegen/src/machinst/blockorder.rs @@ -106,6 +106,8 @@ pub struct BlockLoweringOrder { /// which is used by VCode emission to sink the blocks at the last /// moment (when we actually emit bytes into the MachBuffer). cold_blocks: FxHashSet, + /// CLIF BBs that are indirect branch targets. + indirect_branch_targets: FxHashSet, } /// The origin of a block in the lowered block-order: either an original CLIF @@ -230,14 +232,20 @@ impl BlockLoweringOrder { // Cache the block successors to avoid re-examining branches below. let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new(); let mut block_succ_range = SecondaryMap::with_default((0, 0)); + let mut indirect_branch_targets = FxHashSet::default(); + for block in f.layout.blocks() { let block_succ_start = block_succs.len(); let mut succ_idx = 0; - visit_block_succs(f, block, |inst, succ| { + visit_block_succs(f, block, |inst, succ, from_table| { block_out_count[block] += 1; block_in_count[succ] += 1; block_succs.push((inst, succ_idx, succ)); succ_idx += 1; + + if from_table { + indirect_branch_targets.insert(succ); + } }); let block_succ_end = block_succs.len(); block_succ_range[block] = (block_succ_start, block_succ_end); @@ -474,6 +482,7 @@ impl BlockLoweringOrder { lowered_succ_ranges, orig_map, cold_blocks, + indirect_branch_targets, }; trace!("BlockLoweringOrder: {:?}", result); result @@ -494,6 +503,11 @@ impl BlockLoweringOrder { pub fn is_cold(&self, block: BlockIndex) -> bool { self.cold_blocks.contains(&block) } + + /// Determine whether the given CLIF BB is an indirect branch target. + pub fn is_indirect_branch_target(&self, block: Block) -> bool { + self.indirect_branch_targets.contains(&block) + } } #[cfg(test)] diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 52d3557e3c97..7fef2aa587f6 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -170,6 +170,16 @@ pub trait MachInst: Clone + Debug { /// Is this a safepoint? fn is_safepoint(&self) -> bool; + /// Generate an instruction that must appear at the beginning of a basic + /// block, if any. Note that the return value must not be subject to + /// register allocation. + fn gen_block_start( + _is_indirect_branch_target: bool, + _is_forward_edge_cfi_enabled: bool, + ) -> Option { + None + } + /// A label-use kind: a type that describes the types of label references that /// can occur in an instruction. type LabelUse: MachInstLabelUse; diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index bcb0e02721a8..687c8cb0a539 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -820,6 +820,8 @@ impl VCode { ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32); } + let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled(); + for (block_order_idx, &block) in final_order.iter().enumerate() { trace!("emitting block {:?}", block); let new_offset = I::align_basic_block(buffer.cur_offset()); @@ -877,6 +879,26 @@ impl VCode { last_offset = Some(cur_offset); } + let lb = self.block_order.lowered_order()[block.index()]; + let b = if let Some(b) = lb.orig_block() { + b + } else { + // If there is no original block, then this must be a pure edge + // block. Note that the successor must have an original block. + let (_, succ) = self.block_order.succ_indices(block)[0]; + + self.block_order.lowered_order()[succ.index()] + .orig_block() + .expect("Edge block successor must be body block.") + }; + + if let Some(block_start) = I::gen_block_start( + self.block_order.is_indirect_branch_target(b), + is_forward_edge_cfi_enabled, + ) { + do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state); + } + for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { match inst_or_edit { InstOrEdit::Inst(iix) => { diff --git a/cranelift/filetests/filetests/isa/aarch64/bti.clif b/cranelift/filetests/filetests/isa/aarch64/bti.clif new file mode 100644 index 000000000000..6aab429c7198 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/bti.clif @@ -0,0 +1,111 @@ +test compile precise-output +set unwind_info=false +target aarch64 use_bti + +function %f1(i32) -> i32 { + jt0 = jump_table [block1, block2, block3] + +block0(v0: i32): + br_table v0, block4, jt0 + +block1: + v1 = iconst.i32 1 + jump block5(v1) + +block2: + v2 = iconst.i32 2 + jump block5(v2) + +block3: + v3 = iconst.i32 3 + jump block5(v3) + +block4: + v4 = iconst.i32 4 + jump block5(v4) + +block5(v5: i32): + v6 = iadd.i32 v0, v5 + return v6 +} + +; bti c +; block0: +; emit_island 44 +; subs wzr, w0, #3 +; b.hs label1 ; csel x1, xzr, x0, hs ; csdb ; adr x15, pc+16 ; ldrsw x1, [x15, x1, uxtw #2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] +; block1: +; movz x5, #4 +; b label2 +; block2: +; b label9 +; block3: +; bti j +; movz x5, #1 +; b label4 +; block4: +; b label9 +; block5: +; bti j +; movz x5, #2 +; b label6 +; block6: +; b label9 +; block7: +; bti j +; movz x5, #3 +; b label8 +; block8: +; b label9 +; block9: +; add w0, w0, w5 +; ret + +function %f2(i64) -> i64 { + jt0 = jump_table [block2] + +block0(v0: i64): + v1 = ireduce.i32 v0 + v2 = load.i64 notrap aligned table v0 + br_table v1, block1, jt0 + +block1: + return v2 + +block2: + v3 = iconst.i64 42 + v4 = iadd.i64 v2, v3 + return v4 +} + +; bti c +; block0: +; ldr x2, [x0] +; emit_island 36 +; subs wzr, w0, #1 +; b.hs label1 ; csel x9, xzr, x0, hs ; csdb ; adr x8, pc+16 ; ldrsw x9, [x8, x9, uxtw #2] ; add x8, x8, x9 ; br x8 ; jt_entries [Label(MachLabel(2))] +; block1: +; mov x0, x2 +; ret +; block2: +; bti j +; mov x0, x2 +; add x0, x0, #42 +; ret + +function %f3(i64) -> i64 { + fn0 = %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; bti c +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; ldr x5, 8 ; b 12 ; data TestCase(TestcaseName { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] }) + 0 +; blr x5 +; ldp fp, lr, [sp], #16 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif index efd0697d82e3..37f3798e46bb 100644 --- a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif +++ b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif @@ -30,9 +30,9 @@ block5(v5: i32): } ; block0: -; emit_island 36 +; emit_island 44 ; subs wzr, w0, #3 -; b.hs label1 ; csel x1, xzr, x0, hs ; csdb ; adr x15, pc+16 ; ldrsw x1, [x15, x1, LSL 2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] +; b.hs label1 ; csel x1, xzr, x0, hs ; csdb ; adr x15, pc+16 ; ldrsw x1, [x15, x1, uxtw #2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] ; block1: ; movz x5, #4 ; b label2 diff --git a/cranelift/jit/src/backend.rs b/cranelift/jit/src/backend.rs index a24d723a4065..cec135aa91e6 100644 --- a/cranelift/jit/src/backend.rs +++ b/cranelift/jit/src/backend.rs @@ -476,6 +476,8 @@ impl JITModule { ); } + let enable_branch_protection = + cfg!(target_arch = "aarch64") && use_bti(&builder.isa.isa_flags()); let mut module = Self { isa: builder.isa, hotswap_enabled: builder.hotswap_enabled, @@ -483,9 +485,9 @@ impl JITModule { lookup_symbols: builder.lookup_symbols, libcall_names: builder.libcall_names, memory: MemoryHandle { - code: Memory::new(), - readonly: Memory::new(), - writable: Memory::new(), + code: Memory::new(enable_branch_protection), + readonly: Memory::new(false), + writable: Memory::new(false), }, declarations: ModuleDeclarations::default(), function_got_entries: SecondaryMap::new(), @@ -947,3 +949,10 @@ fn lookup_with_dlsym(name: &str) -> Option<*const u8> { None } } + +fn use_bti(isa_flags: &Vec) -> bool { + isa_flags + .iter() + .find(|&f| f.name == "use_bti") + .map_or(false, |f| f.as_bool().unwrap_or(false)) +} diff --git a/cranelift/jit/src/memory.rs b/cranelift/jit/src/memory.rs index 02f274c72ff4..de116d0da7e3 100644 --- a/cranelift/jit/src/memory.rs +++ b/cranelift/jit/src/memory.rs @@ -113,15 +113,17 @@ pub(crate) struct Memory { already_protected: usize, current: PtrLen, position: usize, + enable_branch_protection: bool, } impl Memory { - pub(crate) fn new() -> Self { + pub(crate) fn new(enable_branch_protection: bool) -> Self { Self { allocations: Vec::new(), already_protected: 0, current: PtrLen::new(), position: 0, + enable_branch_protection, } } @@ -157,14 +159,35 @@ impl Memory { pub(crate) fn set_readable_and_executable(&mut self) { self.finish_current(); + let set_region_readable_and_executable = |ptr, len| { + if len != 0 { + if self.enable_branch_protection { + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + if std::arch::is_aarch64_feature_detected!("bti") { + let prot = libc::PROT_EXEC | libc::PROT_READ | 0x10; // PROT_BTI + + unsafe { + if libc::mprotect(ptr as *mut libc::c_void, len, prot) < 0 { + panic!("unable to make memory readable+executable"); + } + } + + return; + } + } + + unsafe { + region::protect(ptr, len, region::Protection::READ_EXECUTE) + .expect("unable to make memory readable+executable"); + } + } + }; + #[cfg(feature = "selinux-fix")] { for &PtrLen { ref map, ptr, len } in &self.allocations[self.already_protected..] { - if len != 0 && map.is_some() { - unsafe { - region::protect(ptr, len, region::Protection::READ_EXECUTE) - .expect("unable to make memory readable+executable"); - } + if map.is_some() { + set_region_readable_and_executable(ptr, len); } } } @@ -172,12 +195,7 @@ impl Memory { #[cfg(not(feature = "selinux-fix"))] { for &PtrLen { ptr, len } in &self.allocations[self.already_protected..] { - if len != 0 { - unsafe { - region::protect(ptr, len, region::Protection::READ_EXECUTE) - .expect("unable to make memory readable+executable"); - } - } + set_region_readable_and_executable(ptr, len); } } diff --git a/crates/fuzzing/src/generators/codegen_settings.rs b/crates/fuzzing/src/generators/codegen_settings.rs index 767800687149..ab75c28db688 100644 --- a/crates/fuzzing/src/generators/codegen_settings.rs +++ b/crates/fuzzing/src/generators/codegen_settings.rs @@ -127,6 +127,7 @@ impl<'a> Arbitrary<'a> for CodegenSettings { "aarch64" => { test: is_aarch64_feature_detected, + std: "bti" => clif: "use_bti", std: "lse" => clif: "has_lse", }, }; diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs index 5dfe1a111593..73d217c15690 100644 --- a/crates/jit/src/code_memory.rs +++ b/crates/jit/src/code_memory.rs @@ -16,6 +16,7 @@ pub struct CodeMemory { mmap: ManuallyDrop, unwind_registration: ManuallyDrop>, published: bool, + enable_branch_protection: bool, } impl Drop for CodeMemory { @@ -53,7 +54,7 @@ impl CodeMemory { /// /// The returned `CodeMemory` manages the internal `MmapVec` and the /// `publish` method is used to actually make the memory executable. - pub fn new(mmap: MmapVec) -> Self { + pub fn new(mmap: MmapVec, enable_branch_protection: bool) -> Self { #[cfg(all(target_arch = "aarch64", target_os = "linux"))] { // This is a requirement of the `membarrier` call executed by the `publish` method. @@ -67,6 +68,7 @@ impl CodeMemory { mmap: ManuallyDrop::new(mmap), unwind_registration: ManuallyDrop::new(None), published: false, + enable_branch_protection, } } @@ -159,7 +161,7 @@ impl CodeMemory { // read/execute, notably not using read/write/execute to prevent // modifications. self.mmap - .make_executable(text_range.clone()) + .make_executable(text_range.clone(), self.enable_branch_protection) .expect("unable to make memory executable"); #[cfg(all(target_arch = "aarch64", target_os = "linux"))] diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index ecc516c42f2d..400512cc7342 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -397,12 +397,13 @@ impl CompiledModule { info: Option, profiler: &dyn ProfilingAgent, id_allocator: &CompiledModuleIdAllocator, + enable_branch_protection: bool, ) -> Result { // Transfer ownership of `obj` to a `CodeMemory` object which will // manage permissions, such as the executable bit. Once it's located // there we also publish it for being able to execute. Note that this // step will also resolve pending relocations in the compiled image. - let mut code_memory = CodeMemory::new(mmap); + let mut code_memory = CodeMemory::new(mmap, enable_branch_protection); let code = code_memory .publish() .context("failed to publish code memory")?; diff --git a/crates/runtime/src/mmap.rs b/crates/runtime/src/mmap.rs index a00a47c7dbbd..44c0296f8450 100644 --- a/crates/runtime/src/mmap.rs +++ b/crates/runtime/src/mmap.rs @@ -412,7 +412,11 @@ impl Mmap { } /// Makes the specified `range` within this `Mmap` to be read/execute. - pub unsafe fn make_executable(&self, range: Range) -> Result<()> { + pub unsafe fn make_executable( + &self, + range: Range, + enable_branch_protection: bool, + ) -> Result<()> { assert!(range.start <= self.len()); assert!(range.end <= self.len()); assert!(range.start <= range.end); @@ -428,8 +432,15 @@ impl Mmap { use std::io; use windows_sys::Win32::System::Memory::*; + let flags = if enable_branch_protection { + // TODO: We use this check to avoid an unused variable warning, + // but some of the CFG-related flags might be applicable + PAGE_EXECUTE_READ + } else { + PAGE_EXECUTE_READ + }; let mut old = 0; - let result = VirtualProtect(base, len, PAGE_EXECUTE_READ, &mut old); + let result = VirtualProtect(base, len, flags, &mut old); if result == 0 { return Err(io::Error::last_os_error().into()); } @@ -438,8 +449,23 @@ impl Mmap { #[cfg(not(windows))] { use rustix::mm::{mprotect, MprotectFlags}; + + if enable_branch_protection { + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + if std::arch::is_aarch64_feature_detected!("bti") { + let prot = libc::PROT_EXEC | libc::PROT_READ | 0x10; // PROT_BTI + + return if libc::mprotect(base as *mut libc::c_void, len, prot) < 0 { + Err(std::io::Error::last_os_error()).context("mprotect() failed") + } else { + Ok(()) + }; + } + } + mprotect(base, len, MprotectFlags::READ | MprotectFlags::EXEC)?; } + Ok(()) } diff --git a/crates/runtime/src/mmap_vec.rs b/crates/runtime/src/mmap_vec.rs index 2eaedc5a3034..9249f2a5e132 100644 --- a/crates/runtime/src/mmap_vec.rs +++ b/crates/runtime/src/mmap_vec.rs @@ -102,9 +102,15 @@ impl MmapVec { } /// Makes the specified `range` within this `mmap` to be read/execute. - pub unsafe fn make_executable(&self, range: Range) -> Result<()> { - self.mmap - .make_executable(range.start + self.range.start..range.end + self.range.start) + pub unsafe fn make_executable( + &self, + range: Range, + enable_branch_protection: bool, + ) -> Result<()> { + self.mmap.make_executable( + range.start + self.range.start..range.end + self.range.start, + enable_branch_protection, + ) } /// Returns the underlying file that this mmap is mapping, if present. diff --git a/crates/wasmtime/src/component/component.rs b/crates/wasmtime/src/component/component.rs index f4afc9789f73..c3e231f357c1 100644 --- a/crates/wasmtime/src/component/component.rs +++ b/crates/wasmtime/src/component/component.rs @@ -152,7 +152,13 @@ impl Component { // do so. This should build up a mapping from // `SignatureIndex` to `VMSharedSignatureIndex` once and // then reuse that for each module somehow. - Module::from_parts(engine, mmap, info, types.clone()) + Module::from_parts( + engine, + mmap, + info, + types.clone(), + engine.is_branch_protection_enabled(&engine.compiler().isa_flags()), + ) })?; Ok(modules.into_iter().collect::>()) @@ -163,7 +169,10 @@ impl Component { ); let static_modules = static_modules?; let (lowerings, always_trap, transcoders, trampolines, trampoline_obj) = trampolines?; - let mut trampoline_obj = CodeMemory::new(trampoline_obj); + let mut trampoline_obj = CodeMemory::new( + trampoline_obj, + engine.is_branch_protection_enabled(&engine.compiler().isa_flags()), + ); let code = trampoline_obj.publish()?; let text = wasmtime_jit::subslice_range(code.text, code.mmap); diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index a77155532ee7..8d845fd98354 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -4,6 +4,7 @@ use anyhow::Result; use once_cell::sync::OnceCell; #[cfg(feature = "parallel-compilation")] use rayon::prelude::*; +use std::collections::BTreeMap; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; #[cfg(feature = "cache")] @@ -216,7 +217,7 @@ impl Engine { pub fn precompile_module(&self, bytes: &[u8]) -> Result> { #[cfg(feature = "wat")] let bytes = wat::parse_bytes(&bytes)?; - let (mmap, _, types) = crate::Module::build_artifacts(self, &bytes)?; + let (mmap, _, types, _) = crate::Module::build_artifacts(self, &bytes)?; crate::module::SerializedModule::from_artifacts(self, &mmap, &types) .to_bytes(&self.config().module_version) } @@ -461,6 +462,9 @@ impl Engine { "sign_return_address" => Some(true), // No effect on its own. "sign_return_address_with_bkey" => Some(true), + // The `BTI` instruction acts as a `NOP` when unsupported, so it + // is safe to enable it. + "use_bti" => Some(true), // fall through to the very bottom to indicate that support is // not enabled to test whether this feature is enabled on the // host. @@ -530,6 +534,14 @@ impl Engine { flag )) } + + pub(crate) fn is_branch_protection_enabled( + &self, + isa_flags: &BTreeMap, + ) -> bool { + cfg!(target_arch = "aarch64") + && matches!(isa_flags.get("use_bti"), Some(FlagValue::Bool(true))) + } } impl Default for Engine { diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 43a8280b6cea..e4ca596fe0d0 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -1,7 +1,7 @@ -use crate::Engine; use crate::{ signatures::SignatureCollection, types::{ExportType, ExternType, ImportType}, + Engine, }; use anyhow::{bail, Context, Result}; use once_cell::sync::OnceCell; @@ -288,7 +288,7 @@ impl Module { cfg_if::cfg_if! { if #[cfg(feature = "cache")] { let state = (HashedEngineCompileEnv(engine), binary); - let (mmap, info, types) = wasmtime_cache::ModuleCacheEntry::new( + let (mmap, info, types, enable_branch_protection) = wasmtime_cache::ModuleCacheEntry::new( "wasmtime", engine.cache_config(), ) @@ -299,7 +299,7 @@ impl Module { |(engine, wasm)| Module::build_artifacts(engine.0, wasm), // Implementation of how to serialize artifacts - |(engine, _wasm), (mmap, _info, types)| { + |(engine, _wasm), (mmap, _info, types, _enable_branch_protection)| { SerializedModule::from_artifacts( engine.0, mmap, @@ -316,11 +316,11 @@ impl Module { }, )?; } else { - let (mmap, info, types) = Module::build_artifacts(engine, binary)?; + let (mmap, info, types, enable_branch_protection) = Module::build_artifacts(engine, binary)?; } }; - Self::from_parts(engine, mmap, info, types) + Self::from_parts(engine, mmap, info, types, enable_branch_protection) } /// Converts an input binary-encoded WebAssembly module to compilation @@ -328,23 +328,20 @@ impl Module { /// /// This is where compilation actually happens of WebAssembly modules and /// translation/parsing/validation of the binary input occurs. The actual - /// result here is a triple of: - /// - /// * The index into the second field of the "main module". The "main - /// module" in this case is the outermost module described by the `wasm` - /// input, and is here for the module linking proposal. - /// * A list of compilation artifacts for each module found within `wasm`. - /// Note that if module linking is disabled then this list will always - /// have a size of exactly 1. These pairs are returned by - /// `wasmtime_jit::finish_compile`. - /// * Type information about all the modules returned. All returned modules - /// have local type information with indices that refer to these returned + /// result here is a combination of: + /// + /// * The compilation artifacts for the module found within `wasm`, as + /// returned by `wasmtime_jit::finish_compile`. + /// * Type information about the module returned. All returned modules have + /// local type information with indices that refer to these returned /// tables. + /// * A boolean value indicating whether forward-edge CFI has been applied + /// to the compiled module. #[cfg(compiler)] pub(crate) fn build_artifacts( engine: &Engine, wasm: &[u8], - ) -> Result<(MmapVec, Option, ModuleTypes)> { + ) -> Result<(MmapVec, Option, ModuleTypes, bool)> { let tunables = &engine.config().tunables; // First a `ModuleEnvironment` is created which records type information @@ -360,7 +357,12 @@ impl Module { .context("failed to parse WebAssembly module")?; let types = types.finish(); let (mmap, info) = Module::compile_functions(engine, translation, &types)?; - Ok((mmap, info, types)) + Ok(( + mmap, + info, + types, + engine.is_branch_protection_enabled(&engine.compiler().isa_flags()), + )) } #[cfg(compiler)] @@ -517,12 +519,14 @@ impl Module { mmap: MmapVec, info: Option, types: impl Into, + enable_branch_protection: bool, ) -> Result { let module = Arc::new(CompiledModule::from_artifacts( mmap, info, engine.profiler(), engine.unique_id_allocator(), + enable_branch_protection, )?); // Validate the module can be used with the current allocator diff --git a/crates/wasmtime/src/module/serialization.rs b/crates/wasmtime/src/module/serialization.rs index bb0d7fd401dc..a6b7b7add1ff 100644 --- a/crates/wasmtime/src/module/serialization.rs +++ b/crates/wasmtime/src/module/serialization.rs @@ -204,14 +204,14 @@ impl<'a> SerializedModule<'a> { } pub fn into_module(self, engine: &Engine) -> Result { - let (mmap, info, types) = self.into_parts(engine)?; - Module::from_parts(engine, mmap, info, types) + let (mmap, info, types, enable_branch_protection) = self.into_parts(engine)?; + Module::from_parts(engine, mmap, info, types, enable_branch_protection) } pub fn into_parts( mut self, engine: &Engine, - ) -> Result<(MmapVec, Option, ModuleTypes)> { + ) -> Result<(MmapVec, Option, ModuleTypes, bool)> { // Verify that the compilation settings in the engine match the // compilation settings of the module that's being loaded. self.check_triple(engine)?; @@ -223,7 +223,12 @@ impl<'a> SerializedModule<'a> { let module = self.artifacts.unwrap_owned(); - Ok((module, None, self.metadata.types.unwrap_owned())) + Ok(( + module, + None, + self.metadata.types.unwrap_owned(), + engine.is_branch_protection_enabled(&self.metadata.isa_flags), + )) } pub fn to_bytes(&self, version_strat: &ModuleVersionStrategy) -> Result> { diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs index cc9cd570e3ee..5b2c48958f04 100644 --- a/crates/wasmtime/src/trampoline/func.rs +++ b/crates/wasmtime/src/trampoline/func.rs @@ -117,7 +117,10 @@ where // Copy the results of JIT compilation into executable memory, and this will // also take care of unwind table registration. - let mut code_memory = CodeMemory::new(obj); + let mut code_memory = CodeMemory::new( + obj, + engine.is_branch_protection_enabled(&engine.compiler().isa_flags()), + ); let code = code_memory.publish()?; register_trampolines(engine.profiler(), &code.obj);