From cfe59d002afa9d9724b320abdbcd86f9d3671df4 Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Fri, 7 Jan 2022 14:38:39 +0000 Subject: [PATCH] Initial forward-edge CFI implementation Give the user the option to start all basic blocks that are targets of indirect branches with the BTI instruction introduced by the Branch Target Identification extension to the Arm instruction set architecture. Copyright (c) 2022, Arm Limited. --- cranelift/codegen/meta/src/isa/arm64.rs | 14 ++- cranelift/codegen/src/alias_analysis.rs | 2 +- cranelift/codegen/src/inst_predicates.rs | 16 ++- cranelift/codegen/src/isa/aarch64/abi.rs | 11 +- cranelift/codegen/src/isa/aarch64/inst.isle | 14 +++ .../codegen/src/isa/aarch64/inst/emit.rs | 10 ++ .../src/isa/aarch64/inst/emit_tests.rs | 7 ++ cranelift/codegen/src/isa/aarch64/inst/mod.rs | 38 +++++- cranelift/codegen/src/machinst/abi.rs | 3 + cranelift/codegen/src/machinst/abi_impl.rs | 13 ++- cranelift/codegen/src/machinst/blockorder.rs | 15 ++- cranelift/codegen/src/machinst/mod.rs | 14 ++- cranelift/codegen/src/machinst/vcode.rs | 20 ++++ .../filetests/filetests/isa/aarch64/bti.clif | 110 ++++++++++++++++++ cranelift/jit/src/backend.rs | 15 ++- cranelift/jit/src/memory.rs | 42 +++++-- crates/fuzzing/src/generators.rs | 1 + crates/jit/src/code_memory.rs | 6 +- crates/jit/src/instantiate.rs | 3 +- crates/runtime/src/mmap.rs | 29 ++++- crates/runtime/src/mmap_vec.rs | 12 +- crates/wasmtime/src/component/component.rs | 13 ++- crates/wasmtime/src/engine.rs | 5 +- crates/wasmtime/src/lib.rs | 7 ++ crates/wasmtime/src/module.rs | 47 ++++---- crates/wasmtime/src/module/serialization.rs | 15 ++- crates/wasmtime/src/trampoline/func.rs | 7 +- 27 files changed, 409 insertions(+), 80 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/aarch64/bti.clif diff --git a/cranelift/codegen/meta/src/isa/arm64.rs b/cranelift/codegen/meta/src/isa/arm64.rs index 5fd7b69309d6..6c242186de6f 100644 --- a/cranelift/codegen/meta/src/isa/arm64.rs +++ b/cranelift/codegen/meta/src/isa/arm64.rs @@ -5,7 +5,19 @@ use crate::shared::Definitions as SharedDefinitions; fn define_settings(_shared: &SettingGroup) -> SettingGroup { let mut setting = SettingGroupBuilder::new("arm64"); - let has_lse = setting.add_bool("has_lse", "Has Large System Extensions support.", "", false); + let has_lse = setting.add_bool( + "has_lse", + "Has Large System Extensions (FEAT_LSE) support.", + "", + false, + ); + + setting.add_bool( + "use_bti", + "Use Branch Target Identification (FEAT_BTI) instructions.", + "", + false, + ); setting.add_predicate("use_lse", predicate!(has_lse)); setting.build() diff --git a/cranelift/codegen/src/alias_analysis.rs b/cranelift/codegen/src/alias_analysis.rs index ba76d13c9eb0..ba48175ae7d8 100644 --- a/cranelift/codegen/src/alias_analysis.rs +++ b/cranelift/codegen/src/alias_analysis.rs @@ -236,7 +236,7 @@ impl<'a> AliasAnalysis<'a> { log::trace!("after inst{}: state is {:?}", inst.index(), state); } - visit_block_succs(self.func, block, |_inst, succ| { + visit_block_succs(self.func, block, |_inst, succ, _from_table| { let succ_first_inst = self .func .layout diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 8d36742979ce..bdd8e21a1cde 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -130,7 +130,11 @@ pub fn has_memory_fence_semantics(op: Opcode) -> bool { } /// Visit all successors of a block with a given visitor closure. -pub(crate) fn visit_block_succs(f: &Function, block: Block, mut visit: F) { +pub(crate) fn visit_block_succs( + f: &Function, + block: Block, + mut visit: F, +) { for inst in f.layout.block_likely_branches(block) { if f.dfg[inst].opcode().is_branch() { visit_branch_targets(f, inst, &mut visit); @@ -138,18 +142,20 @@ pub(crate) fn visit_block_succs(f: &Function, block: Bloc } } -fn visit_branch_targets(f: &Function, inst: Inst, visit: &mut F) { +fn visit_branch_targets(f: &Function, inst: Inst, visit: &mut F) { match f.dfg[inst].analyze_branch(&f.dfg.value_lists) { BranchInfo::NotABranch => {} BranchInfo::SingleDest(dest, _) => { - visit(inst, dest); + visit(inst, dest, false); } BranchInfo::Table(table, maybe_dest) => { if let Some(dest) = maybe_dest { - visit(inst, dest); + // The default block is reached via a direct conditional branch, + // so it is not part of the table. + visit(inst, dest, false); } for &dest in f.jump_tables[table].as_slice() { - visit(inst, dest); + visit(inst, dest, true); } } } diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index dde4e1b3b8d8..eb32d943e0cb 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -622,10 +622,10 @@ impl ABIMachineSpec for AArch64MachineDeps { } } - fn gen_debug_frame_info( + fn gen_prologue_start( call_conv: isa::CallConv, flags: &settings::Flags, - _isa_flags: &Vec, + isa_flags: &Vec, ) -> SmallInstVec { let mut insts = SmallVec::new(); if flags.unwind_info() && call_conv.extends_apple_aarch64() { @@ -635,6 +635,13 @@ impl ABIMachineSpec for AArch64MachineDeps { }, }); } + + if has_bool_setting("use_bti", isa_flags) { + insts.push(Inst::Bti { + targets: BranchTargetType::C, + }); + } + insts } diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 18398ca73676..78cab62d144b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -732,6 +732,11 @@ (rd WritableReg) (mem AMode)) + ;; Branch target identification; equivalent to a no-op if Branch Target + ;; Identification (FEAT_BTI) is not supported. + (Bti + (targets BranchTargetType)) + ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This ;; controls how AMode::NominalSPOffset args are lowered. (VirtualSPOffsetAdj @@ -1282,6 +1287,15 @@ (Xchg) )) +;; Branch target types +(type BranchTargetType + (enum + (None) + (C) + (J) + (JC) +)) + ;; Extractors for target features ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl use_lse () Inst) (extern extractor use_lse use_lse) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 57def924b1f1..757727fb1d5d 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -3044,6 +3044,16 @@ impl MachInstEmit for Inst { add.emit(&[], sink, emit_info, state); } } + &Inst::Bti { targets } => { + let targets = match targets { + BranchTargetType::None => 0b00, + BranchTargetType::C => 0b01, + BranchTargetType::J => 0b10, + BranchTargetType::JC => 0b11, + }; + + sink.put4(0xd503241f | targets << 6); + } &Inst::VirtualSPOffsetAdj { offset } => { log::trace!( "virtual sp offset adjusted by {} -> {}", diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index c475dd261ba9..9b9392e2f840 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -38,6 +38,13 @@ fn test_aarch64_binemit() { // // $ echo "mov x1, x2" | aarch64inst.sh insns.push((Inst::Ret { rets: vec![] }, "C0035FD6", "ret")); + insns.push(( + Inst::Bti { + targets: BranchTargetType::J, + }, + "9F2403D5", + "bti j", + )); insns.push((Inst::Nop0, "", "nop-zero-len")); insns.push((Inst::Nop4, "1F2003D5", "nop")); insns.push(( diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index e43e9ad7ee95..e6f6de4fc3db 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -36,9 +36,10 @@ mod emit_tests; // Instructions (top level): definition pub use crate::isa::aarch64::lower::isle::generated_code::{ - ALUOp, ALUOp3, AtomicRMWLoopOp, AtomicRMWOp, BitOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, - FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, - VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, VecShiftImmOp, + ALUOp, ALUOp3, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1, FPUOp2, FPUOp3, + FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUOp, VecExtendOp, + VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongOp, + VecShiftImmOp, }; /// A floating-point unit (FPU) operation with two args, a register and an immediate. @@ -1025,6 +1026,7 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_def(rd); memarg_operands(mem, collector); } + &Inst::Bti { .. } => {} &Inst::VirtualSPOffsetAdj { .. } => {} &Inst::ElfTlsGetAddr { .. } => { @@ -1224,6 +1226,19 @@ impl MachInst for Inst { fn ref_type_regclass(_: &settings::Flags) -> RegClass { RegClass::Int } + + fn gen_block_start( + is_indirect_branch_target: bool, + isa_flags: &Vec, + ) -> Option { + if is_indirect_branch_target && has_bool_setting("use_bti", isa_flags) { + Some(Inst::Bti { + targets: BranchTargetType::J, + }) + } else { + None + } + } } //============================================================================= @@ -2703,6 +2718,16 @@ impl Inst { } ret } + &Inst::Bti { targets } => { + let targets = match targets { + BranchTargetType::None => "", + BranchTargetType::C => " c", + BranchTargetType::J => " j", + BranchTargetType::JC => " jc", + }; + + "bti".to_string() + targets + } &Inst::VirtualSPOffsetAdj { offset } => { state.virtual_sp_offset += offset; format!("virtual_sp_offset_adjust {}", offset) @@ -2896,3 +2921,10 @@ impl MachInstLabelUse for LabelUse { } } } + +pub fn has_bool_setting(name: &str, isa_flags: &Vec) -> bool { + isa_flags + .iter() + .find(|&f| f.name == name) + .map_or(false, |f| f.as_bool().unwrap_or(false)) +} diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 4d7442b67012..9062dc516379 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -38,6 +38,9 @@ pub trait ABICallee { /// Get the settings controlling this function's compilation. fn flags(&self) -> &settings::Flags; + /// Get the ISA-specific flag values controlling this function's compilation. + fn isa_flags(&self) -> &Vec; + /// Get the calling convention implemented by this ABI object. fn call_conv(&self) -> CallConv; diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 41e81197eead..374630d6b36c 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -403,9 +403,10 @@ pub trait ABIMachineSpec { /// Generate a meta-instruction that adjusts the nominal SP offset. fn gen_nominal_sp_adj(amount: i32) -> Self::I; - /// Generates extra unwind instructions for a new frame for this - /// architecture, whether the frame has a prologue sequence or not. - fn gen_debug_frame_info( + /// Generates the mandatory part of the prologue, irrespective of whether + /// the usual frame-setup sequence for this architecture is required or not, + /// e.g. extra unwind instructions. + fn gen_prologue_start( _call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &Vec, @@ -935,6 +936,10 @@ impl ABICallee for ABICalleeImpl { &self.flags } + fn isa_flags(&self) -> &Vec { + &self.isa_flags + } + fn call_conv(&self) -> isa::CallConv { self.sig.call_conv } @@ -1240,7 +1245,7 @@ impl ABICallee for ABICalleeImpl { ); insts.extend( - M::gen_debug_frame_info(self.call_conv, &self.flags, &self.isa_flags).into_iter(), + M::gen_prologue_start(self.call_conv, &self.flags, &self.isa_flags).into_iter(), ); if self.setup_frame { diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs index 82fec1968b18..06dc995c1552 100644 --- a/cranelift/codegen/src/machinst/blockorder.rs +++ b/cranelift/codegen/src/machinst/blockorder.rs @@ -106,6 +106,8 @@ pub struct BlockLoweringOrder { /// which is used by VCode emission to sink the blocks at the last /// moment (when we actually emit bytes into the MachBuffer). cold_blocks: FxHashSet, + /// CLIF BBs that are indirect branch targets. + indirect_branch_targets: FxHashSet, } /// The origin of a block in the lowered block-order: either an original CLIF @@ -224,14 +226,19 @@ impl BlockLoweringOrder { let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new(); let mut block_succ_range = SecondaryMap::with_default((0, 0)); let mut fallthrough_return_block = None; + let mut indirect_branch_targets = FxHashSet::default(); for block in f.layout.blocks() { let block_succ_start = block_succs.len(); let mut succ_idx = 0; - visit_block_succs(f, block, |inst, succ| { + visit_block_succs(f, block, |inst, succ, from_table| { block_out_count[block] += 1; block_in_count[succ] += 1; block_succs.push((inst, succ_idx, succ)); succ_idx += 1; + + if from_table { + indirect_branch_targets.insert(succ); + } }); let block_succ_end = block_succs.len(); block_succ_range[block] = (block_succ_start, block_succ_end); @@ -476,6 +483,7 @@ impl BlockLoweringOrder { lowered_succ_ranges, orig_map, cold_blocks, + indirect_branch_targets, }; log::trace!("BlockLoweringOrder: {:?}", result); result @@ -496,6 +504,11 @@ impl BlockLoweringOrder { pub fn is_cold(&self, block: BlockIndex) -> bool { self.cold_blocks.contains(&block) } + + /// Determine whether the given CLIF BB is an indirect branch target. + pub fn is_indirect_branch_target(&self, block: Block) -> bool { + self.indirect_branch_targets.contains(&block) + } } #[cfg(test)] diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 6d0d145349ad..c4ec79f8be68 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -47,7 +47,7 @@ use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap}; use crate::ir::{SourceLoc, StackSlot, Type}; use crate::result::CodegenResult; -use crate::settings::Flags; +use crate::settings; use crate::value_label::ValueLabelsRanges; use alloc::boxed::Box; use alloc::vec::Vec; @@ -162,11 +162,21 @@ pub trait MachInst: Clone + Debug { /// What is the register class used for reference types (GC-observable pointers)? Can /// be dependent on compilation flags. - fn ref_type_regclass(_flags: &Flags) -> RegClass; + fn ref_type_regclass(_flags: &settings::Flags) -> RegClass; /// Is this a safepoint? fn is_safepoint(&self) -> bool; + /// Generate an instruction that must appear at the beginning of a basic + /// block, if any. Note that the return value must not be subject to + /// register allocation. + fn gen_block_start( + _is_indirect_branch_target: bool, + _isa_flags: &Vec, + ) -> Option { + None + } + /// A label-use kind: a type that describes the types of label references that /// can occur in an instruction. type LabelUse: MachInstLabelUse; diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 68a0549d791a..19583e9b5f01 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -851,6 +851,26 @@ impl VCode { last_offset = Some(cur_offset); } + let lb = self.block_order.lowered_order()[block.index()]; + let b = if let Some(b) = lb.orig_block() { + b + } else { + // If there is no original block, then this must be a pure edge + // block. Note that the successor must have an original block. + let (_, succ) = self.block_order.succ_indices(block)[0]; + + self.block_order.lowered_order()[succ.index()] + .orig_block() + .expect("Edge block successor must be body block.") + }; + + if let Some(block_start) = I::gen_block_start( + self.block_order.is_indirect_branch_target(b), + self.abi.isa_flags(), + ) { + do_emit(&block_start, &[], &mut disasm, &mut buffer, &mut state); + } + for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { match inst_or_edit { InstOrEdit::Inst(iix) => { diff --git a/cranelift/filetests/filetests/isa/aarch64/bti.clif b/cranelift/filetests/filetests/isa/aarch64/bti.clif new file mode 100644 index 000000000000..3a5bfb712c86 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/bti.clif @@ -0,0 +1,110 @@ +test compile precise-output +set unwind_info=false +target aarch64 use_bti + +function %f1(i64) -> i64 { + jt0 = jump_table [block1, block2, block3] + +block0(v0: i64): + br_table v0, block4, jt0 + +block1: + v1 = iconst.i64 1 + jump block5(v1) + +block2: + v2 = iconst.i64 2 + jump block5(v2) + +block3: + v3 = iconst.i64 3 + jump block5(v3) + +block4: + v4 = iconst.i64 4 + jump block5(v4) + +block5(v5: i64): + v6 = iadd.i64 v0, v5 + return v6 +} + +; bti c +; block0: +; emit_island 36 +; subs wzr, w0, #3 +; b.hs label1 ; adr x15, pc+16 ; ldrsw x1, [x15, x0, LSL 2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] +; block1: +; movz x5, #4 +; b label2 +; block2: +; b label9 +; block3: +; bti j +; movz x5, #1 +; b label4 +; block4: +; b label9 +; block5: +; bti j +; movz x5, #2 +; b label6 +; block6: +; b label9 +; block7: +; bti j +; movz x5, #3 +; b label8 +; block8: +; b label9 +; block9: +; add x0, x0, x5 +; ret + +function %f2(i64) -> i64 { + jt0 = jump_table [block2] + +block0(v0: i64): + v1 = load.i64 notrap aligned table v0 + br_table v0, block1, jt0 + +block1: + return v1 + +block2: + v2 = iconst.i64 42 + v3 = iadd.i64 v1, v2 + return v3 +} + +; bti c +; block0: +; ldr x2, [x0] +; emit_island 28 +; subs wzr, w0, #1 +; b.hs label1 ; adr x8, pc+16 ; ldrsw x9, [x8, x0, LSL 2] ; add x8, x8, x9 ; br x8 ; jt_entries [Label(MachLabel(2))] +; block1: +; mov x0, x2 +; ret +; block2: +; bti j +; mov x0, x2 +; add x0, x0, #42 +; ret + +function %f3(i64) -> i64 { + fn0 = %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; bti c +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x5 +; ldp fp, lr, [sp], #16 +; ret diff --git a/cranelift/jit/src/backend.rs b/cranelift/jit/src/backend.rs index 3e0cc7368bb2..bf8c6b9dfea1 100644 --- a/cranelift/jit/src/backend.rs +++ b/cranelift/jit/src/backend.rs @@ -479,6 +479,8 @@ impl JITModule { ); } + let enable_branch_protection = + cfg!(target_arch = "aarch64") && use_bti(&builder.isa.isa_flags()); let mut module = Self { isa: builder.isa, hotswap_enabled: builder.hotswap_enabled, @@ -486,9 +488,9 @@ impl JITModule { lookup_symbols: builder.lookup_symbols, libcall_names: builder.libcall_names, memory: MemoryHandle { - code: Memory::new(), - readonly: Memory::new(), - writable: Memory::new(), + code: Memory::new(enable_branch_protection), + readonly: Memory::new(false), + writable: Memory::new(false), }, declarations: ModuleDeclarations::default(), function_got_entries: SecondaryMap::new(), @@ -913,3 +915,10 @@ fn lookup_with_dlsym(name: &str) -> Option<*const u8> { None } } + +fn use_bti(isa_flags: &Vec) -> bool { + isa_flags + .iter() + .find(|&f| f.name == "use_bti") + .map_or(false, |f| f.as_bool().unwrap_or(false)) +} diff --git a/cranelift/jit/src/memory.rs b/cranelift/jit/src/memory.rs index c183da9db248..88fea142757a 100644 --- a/cranelift/jit/src/memory.rs +++ b/cranelift/jit/src/memory.rs @@ -112,15 +112,17 @@ pub(crate) struct Memory { already_protected: usize, current: PtrLen, position: usize, + enable_branch_protection: bool, } impl Memory { - pub(crate) fn new() -> Self { + pub(crate) fn new(enable_branch_protection: bool) -> Self { Self { allocations: Vec::new(), already_protected: 0, current: PtrLen::new(), position: 0, + enable_branch_protection, } } @@ -156,14 +158,35 @@ impl Memory { pub(crate) fn set_readable_and_executable(&mut self) { self.finish_current(); + let set_region_readable_and_executable = |ptr, len| { + if len != 0 { + if self.enable_branch_protection { + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + if std::arch::is_aarch64_feature_detected!("bti") { + let prot = libc::PROT_EXEC | libc::PROT_READ | 0x10; // PROT_BTI + + unsafe { + if libc::mprotect(ptr as *mut libc::c_void, len, prot) < 0 { + panic!("unable to make memory readable+executable"); + } + } + + return; + } + } + + unsafe { + region::protect(ptr, len, region::Protection::READ_EXECUTE) + .expect("unable to make memory readable+executable"); + } + } + }; + #[cfg(feature = "selinux-fix")] { for &PtrLen { ref map, ptr, len } in &self.allocations[self.already_protected..] { - if len != 0 && map.is_some() { - unsafe { - region::protect(ptr, len, region::Protection::READ_EXECUTE) - .expect("unable to make memory readable+executable"); - } + if map.is_some() { + set_region_readable_and_executable(ptr, len); } } } @@ -171,12 +194,7 @@ impl Memory { #[cfg(not(feature = "selinux-fix"))] { for &PtrLen { ptr, len } in &self.allocations[self.already_protected..] { - if len != 0 { - unsafe { - region::protect(ptr, len, region::Protection::READ_EXECUTE) - .expect("unable to make memory readable+executable"); - } - } + set_region_readable_and_executable(ptr, len); } } diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index 19c82b2654ef..9c6b422571b8 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -780,6 +780,7 @@ impl<'a> Arbitrary<'a> for CodegenSettings { "aarch64" => { test: is_aarch64_feature_detected, + std: "bti" => clif: "use_bti", std: "lse" => clif: "has_lse", }, }; diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs index 5dfe1a111593..73d217c15690 100644 --- a/crates/jit/src/code_memory.rs +++ b/crates/jit/src/code_memory.rs @@ -16,6 +16,7 @@ pub struct CodeMemory { mmap: ManuallyDrop, unwind_registration: ManuallyDrop>, published: bool, + enable_branch_protection: bool, } impl Drop for CodeMemory { @@ -53,7 +54,7 @@ impl CodeMemory { /// /// The returned `CodeMemory` manages the internal `MmapVec` and the /// `publish` method is used to actually make the memory executable. - pub fn new(mmap: MmapVec) -> Self { + pub fn new(mmap: MmapVec, enable_branch_protection: bool) -> Self { #[cfg(all(target_arch = "aarch64", target_os = "linux"))] { // This is a requirement of the `membarrier` call executed by the `publish` method. @@ -67,6 +68,7 @@ impl CodeMemory { mmap: ManuallyDrop::new(mmap), unwind_registration: ManuallyDrop::new(None), published: false, + enable_branch_protection, } } @@ -159,7 +161,7 @@ impl CodeMemory { // read/execute, notably not using read/write/execute to prevent // modifications. self.mmap - .make_executable(text_range.clone()) + .make_executable(text_range.clone(), self.enable_branch_protection) .expect("unable to make memory executable"); #[cfg(all(target_arch = "aarch64", target_os = "linux"))] diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index ecc516c42f2d..400512cc7342 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -397,12 +397,13 @@ impl CompiledModule { info: Option, profiler: &dyn ProfilingAgent, id_allocator: &CompiledModuleIdAllocator, + enable_branch_protection: bool, ) -> Result { // Transfer ownership of `obj` to a `CodeMemory` object which will // manage permissions, such as the executable bit. Once it's located // there we also publish it for being able to execute. Note that this // step will also resolve pending relocations in the compiled image. - let mut code_memory = CodeMemory::new(mmap); + let mut code_memory = CodeMemory::new(mmap, enable_branch_protection); let code = code_memory .publish() .context("failed to publish code memory")?; diff --git a/crates/runtime/src/mmap.rs b/crates/runtime/src/mmap.rs index 7c2fb99cd52d..2dfa3a428775 100644 --- a/crates/runtime/src/mmap.rs +++ b/crates/runtime/src/mmap.rs @@ -403,7 +403,11 @@ impl Mmap { } /// Makes the specified `range` within this `Mmap` to be read/execute. - pub unsafe fn make_executable(&self, range: Range) -> Result<()> { + pub unsafe fn make_executable( + &self, + range: Range, + enable_branch_protection: bool, + ) -> Result<()> { assert!(range.start <= self.len()); assert!(range.end <= self.len()); assert!(range.start <= range.end); @@ -412,11 +416,24 @@ impl Mmap { "changing of protections isn't page-aligned", ); - region::protect( - self.as_ptr().add(range.start), - range.end - range.start, - region::Protection::READ_EXECUTE, - )?; + let address = self.as_ptr().add(range.start); + let size = range.end - range.start; + + if enable_branch_protection { + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + if std::arch::is_aarch64_feature_detected!("bti") { + let prot = libc::PROT_EXEC | libc::PROT_READ | 0x10; // PROT_BTI + + return if libc::mprotect(address as *mut libc::c_void, size, prot) < 0 { + Err(anyhow!("mprotect() failed")) + } else { + Ok(()) + }; + } + } + + region::protect(address, size, region::Protection::READ_EXECUTE)?; + Ok(()) } diff --git a/crates/runtime/src/mmap_vec.rs b/crates/runtime/src/mmap_vec.rs index 2779668b20f0..c06e8b0cd23f 100644 --- a/crates/runtime/src/mmap_vec.rs +++ b/crates/runtime/src/mmap_vec.rs @@ -113,9 +113,15 @@ impl MmapVec { } /// Makes the specified `range` within this `mmap` to be read/execute. - pub unsafe fn make_executable(&self, range: Range) -> Result<()> { - self.mmap - .make_executable(range.start + self.range.start..range.end + self.range.start) + pub unsafe fn make_executable( + &self, + range: Range, + enable_branch_protection: bool, + ) -> Result<()> { + self.mmap.make_executable( + range.start + self.range.start..range.end + self.range.start, + enable_branch_protection, + ) } /// Returns the underlying file that this mmap is mapping, if present. diff --git a/crates/wasmtime/src/component/component.rs b/crates/wasmtime/src/component/component.rs index c2f57c76942b..8bc51e68c8df 100644 --- a/crates/wasmtime/src/component/component.rs +++ b/crates/wasmtime/src/component/component.rs @@ -1,5 +1,5 @@ use crate::signatures::SignatureCollection; -use crate::{Engine, Module}; +use crate::{is_branch_protection_enabled, Engine, Module}; use anyhow::{bail, Context, Result}; use std::fs; use std::ops::Range; @@ -9,7 +9,7 @@ use std::sync::Arc; use wasmtime_environ::component::{ ComponentTypes, GlobalInitializer, LoweredIndex, StaticModuleIndex, TrampolineInfo, Translator, }; -use wasmtime_environ::PrimaryMap; +use wasmtime_environ::{FlagValue, PrimaryMap}; use wasmtime_jit::CodeMemory; use wasmtime_runtime::VMFunctionBody; @@ -122,7 +122,7 @@ impl Component { || -> Result<_> { let upvars = modules.into_iter().map(|(_, t)| t).collect::>(); let modules = engine.run_maybe_parallel(upvars, |module| { - let (mmap, info) = + let (mmap, info, enable_branch_protection) = Module::compile_functions(engine, module, types.module_types())?; // FIXME: the `SignatureCollection` here is re-registering // the entire list of wasm types within `types` on each @@ -130,7 +130,7 @@ impl Component { // do so. This should build up a mapping from // `SignatureIndex` to `VMSharedSignatureIndex` once and // then reuse that for each module somehow. - Module::from_parts(engine, mmap, info, types.clone()) + Module::from_parts(engine, mmap, info, types.clone(), enable_branch_protection) })?; Ok(modules.into_iter().collect::>()) @@ -160,7 +160,10 @@ impl Component { ); let static_modules = static_modules?; let (trampolines, trampoline_obj) = trampolines?; - let mut trampoline_obj = CodeMemory::new(trampoline_obj); + let mut trampoline_obj = CodeMemory::new( + trampoline_obj, + is_branch_protection_enabled(&engine.compiler().isa_flags()), + ); let code = trampoline_obj.publish()?; let text = wasmtime_jit::subslice_range(code.text, code.mmap); diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index b0361ad3cd17..7699bf0425c4 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -216,7 +216,7 @@ impl Engine { pub fn precompile_module(&self, bytes: &[u8]) -> Result> { #[cfg(feature = "wat")] let bytes = wat::parse_bytes(&bytes)?; - let (mmap, _, types) = crate::Module::build_artifacts(self, &bytes)?; + let (mmap, _, types, _) = crate::Module::build_artifacts(self, &bytes)?; crate::module::SerializedModule::from_artifacts(self, &mmap, &types) .to_bytes(&self.config().module_version) } @@ -439,6 +439,9 @@ impl Engine { { enabled = match flag { "has_lse" => Some(std::arch::is_aarch64_feature_detected!("lse")), + // The `BTI` instruction acts as a `NOP` when unsupported, so it + // is safe to enable it. + "use_bti" => Some(true), // fall through to the very bottom to indicate that support is // not enabled to test whether this feature is enabled on the // host. diff --git a/crates/wasmtime/src/lib.rs b/crates/wasmtime/src/lib.rs index 64cf5f84de1a..f7719dffd3cf 100644 --- a/crates/wasmtime/src/lib.rs +++ b/crates/wasmtime/src/lib.rs @@ -425,6 +425,9 @@ pub use crate::values::*; #[cfg(feature = "component-model")] pub mod component; +use std::collections::BTreeMap; +use wasmtime_environ::FlagValue; + cfg_if::cfg_if! { if #[cfg(all(target_os = "macos", not(feature = "posix-signals-on-macos")))] { // no extensions for macOS at this time @@ -468,3 +471,7 @@ fn _assert_send_sync() { _assert_send(Instance::new_async(s, m, &[])) } } + +fn is_branch_protection_enabled(isa_flags: &BTreeMap) -> bool { + cfg!(target_arch = "aarch64") && matches!(isa_flags.get("use_bti"), Some(FlagValue::Bool(true))) +} diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 1ee02855830b..eddc0beb22eb 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -1,7 +1,8 @@ -use crate::Engine; use crate::{ + is_branch_protection_enabled, signatures::SignatureCollection, types::{ExportType, ExternType, ImportType}, + Engine, }; use anyhow::{bail, Context, Result}; use once_cell::sync::OnceCell; @@ -286,7 +287,7 @@ impl Module { cfg_if::cfg_if! { if #[cfg(feature = "cache")] { let state = (HashedEngineCompileEnv(engine), binary); - let (mmap, info, types) = wasmtime_cache::ModuleCacheEntry::new( + let (mmap, info, types, enable_branch_protection) = wasmtime_cache::ModuleCacheEntry::new( "wasmtime", engine.cache_config(), ) @@ -297,7 +298,7 @@ impl Module { |(engine, wasm)| Module::build_artifacts(engine.0, wasm), // Implementation of how to serialize artifacts - |(engine, _wasm), (mmap, _info, types)| { + |(engine, _wasm), (mmap, _info, types, _enable_branch_protection)| { SerializedModule::from_artifacts( engine.0, mmap, @@ -314,11 +315,11 @@ impl Module { }, )?; } else { - let (mmap, info, types) = Module::build_artifacts(engine, binary)?; + let (mmap, info, types, enable_branch_protection) = Module::build_artifacts(engine, binary)?; } }; - Self::from_parts(engine, mmap, info, types) + Self::from_parts(engine, mmap, info, types, enable_branch_protection) } /// Converts an input binary-encoded WebAssembly module to compilation @@ -326,23 +327,20 @@ impl Module { /// /// This is where compilation actually happens of WebAssembly modules and /// translation/parsing/validation of the binary input occurs. The actual - /// result here is a triple of: - /// - /// * The index into the second field of the "main module". The "main - /// module" in this case is the outermost module described by the `wasm` - /// input, and is here for the module linking proposal. - /// * A list of compilation artifacts for each module found within `wasm`. - /// Note that if module linking is disabled then this list will always - /// have a size of exactly 1. These pairs are returned by - /// `wasmtime_jit::finish_compile`. - /// * Type information about all the modules returned. All returned modules - /// have local type information with indices that refer to these returned + /// result here is a combination of: + /// + /// * The compilation artifacts for the module found within `wasm`, as + /// returned by `wasmtime_jit::finish_compile`. + /// * Type information about the module returned. All returned modules have + /// local type information with indices that refer to these returned /// tables. + /// * A boolean value indicating whether forward-edge CFI has been applied + /// to the compiled module. #[cfg(compiler)] pub(crate) fn build_artifacts( engine: &Engine, wasm: &[u8], - ) -> Result<(MmapVec, Option, ModuleTypes)> { + ) -> Result<(MmapVec, Option, ModuleTypes, bool)> { let tunables = &engine.config().tunables; // First a `ModuleEnvironment` is created which records type information @@ -357,8 +355,9 @@ impl Module { .translate(parser, wasm) .context("failed to parse WebAssembly module")?; let types = types.finish(); - let (mmap, info) = Module::compile_functions(engine, translation, &types)?; - Ok((mmap, info, types)) + let (mmap, info, enable_branch_protection) = + Module::compile_functions(engine, translation, &types)?; + Ok((mmap, info, types, enable_branch_protection)) } #[cfg(compiler)] @@ -366,7 +365,7 @@ impl Module { engine: &Engine, mut translation: ModuleTranslation<'_>, types: &ModuleTypes, - ) -> Result<(MmapVec, Option)> { + ) -> Result<(MmapVec, Option, bool)> { // Compile all functions in parallel using rayon. This will also perform // validation of function bodies. let tunables = &engine.config().tunables; @@ -406,7 +405,11 @@ impl Module { let (mmap, info) = wasmtime_jit::finish_compile(translation, obj, funcs, trampolines, tunables)?; - Ok((mmap, Some(info))) + Ok(( + mmap, + Some(info), + is_branch_protection_enabled(&engine.compiler().isa_flags()), + )) } /// Deserializes an in-memory compiled module previously created with @@ -489,12 +492,14 @@ impl Module { mmap: MmapVec, info: Option, types: impl Into, + enable_branch_protection: bool, ) -> Result { let module = Arc::new(CompiledModule::from_artifacts( mmap, info, engine.profiler(), engine.unique_id_allocator(), + enable_branch_protection, )?); // Validate the module can be used with the current allocator diff --git a/crates/wasmtime/src/module/serialization.rs b/crates/wasmtime/src/module/serialization.rs index cb45b6ed79e7..860d0cfe5552 100644 --- a/crates/wasmtime/src/module/serialization.rs +++ b/crates/wasmtime/src/module/serialization.rs @@ -39,7 +39,7 @@ //! //! This format is implemented by the `to_bytes` and `from_mmap` function. -use crate::{Engine, Module, ModuleVersionStrategy}; +use crate::{is_branch_protection_enabled, Engine, Module, ModuleVersionStrategy}; use anyhow::{anyhow, bail, Context, Result}; use object::read::elf::FileHeader; use object::Bytes; @@ -204,14 +204,14 @@ impl<'a> SerializedModule<'a> { } pub fn into_module(self, engine: &Engine) -> Result { - let (mmap, info, types) = self.into_parts(engine)?; - Module::from_parts(engine, mmap, info, types) + let (mmap, info, types, enable_branch_protection) = self.into_parts(engine)?; + Module::from_parts(engine, mmap, info, types, enable_branch_protection) } pub fn into_parts( mut self, engine: &Engine, - ) -> Result<(MmapVec, Option, ModuleTypes)> { + ) -> Result<(MmapVec, Option, ModuleTypes, bool)> { // Verify that the compilation settings in the engine match the // compilation settings of the module that's being loaded. self.check_triple(engine)?; @@ -223,7 +223,12 @@ impl<'a> SerializedModule<'a> { let module = self.artifacts.unwrap_owned(); - Ok((module, None, self.metadata.types.unwrap_owned())) + Ok(( + module, + None, + self.metadata.types.unwrap_owned(), + is_branch_protection_enabled(&self.metadata.isa_flags), + )) } pub fn to_bytes(&self, version_strat: &ModuleVersionStrategy) -> Result> { diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs index fce77bd2b928..af1713c05a16 100644 --- a/crates/wasmtime/src/trampoline/func.rs +++ b/crates/wasmtime/src/trampoline/func.rs @@ -1,7 +1,7 @@ //! Support for a calling of an imported function. use crate::module::BareModuleInfo; -use crate::{Engine, FuncType, Trap, ValRaw}; +use crate::{is_branch_protection_enabled, Engine, FuncType, Trap, ValRaw}; use anyhow::Result; use std::any::Any; use std::panic::{self, AssertUnwindSafe}; @@ -124,7 +124,10 @@ where // Copy the results of JIT compilation into executable memory, and this will // also take care of unwind table registration. - let mut code_memory = CodeMemory::new(obj); + let mut code_memory = CodeMemory::new( + obj, + is_branch_protection_enabled(&engine.compiler().isa_flags()), + ); let code = code_memory.publish()?; register_trampolines(engine.profiler(), &code.obj);