From f8c03d5acbb755c695a4a47efce99f5f8dfb1e34 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 25 Aug 2023 01:49:43 +0200 Subject: [PATCH] Don't force veneers on island emission (#6902) * Don't force veneers on island emission This commit is a follow-up to #6804 with the discussion on #6818. This undoes some of the changes from #6804, such as bringing a size parameter back to `emit_island`, and implements the changes discussed in #6818. Namely fixups are now tracked in a `pending_fixups` list for editing and modification and then during island emission they're flushed into a `BinaryHeap` tracked as `fixup_records`. Additionally calculation of the size of the pending island is now done as a single calculation rather than updating metadata as we go along. This is required because fixups are no longer entirely cleared during island emission so the previous logic of "reset the island size to zero" and have it get recalculated as the island is emitted is no longer applicable. I opted to go with a simplistic version of this for now which assumes that all veneers are the worst case size, but if necessary I think this could be more optimal by tracking each veneer in a counter. Closes #6818 * Keep a running count for pending fixup deadline Update this as pending fixups are added and then clear this out when islands are emitted. * Don't force all fixups to go through a binary heap Instead process them immediately if they're ready. --- .../codegen/src/isa/aarch64/inst/emit.rs | 4 +- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 4 + .../codegen/src/isa/riscv64/inst/emit.rs | 4 +- cranelift/codegen/src/isa/riscv64/inst/mod.rs | 4 + cranelift/codegen/src/isa/s390x/inst/mod.rs | 4 + cranelift/codegen/src/isa/x64/inst/mod.rs | 4 + cranelift/codegen/src/machinst/buffer.rs | 406 ++++++++---------- cranelift/codegen/src/machinst/mod.rs | 2 + cranelift/codegen/src/machinst/vcode.rs | 2 +- 9 files changed, 213 insertions(+), 221 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index a9afd6b0463c..0877db54b0a5 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -3579,7 +3579,7 @@ impl MachInstEmit for Inst { dest: BranchTarget::Label(jump_around_label), }; jmp.emit(&[], sink, emit_info, state); - sink.emit_island(&mut state.ctrl_plane); + sink.emit_island(needed_space + 4, &mut state.ctrl_plane); sink.bind_label(jump_around_label, &mut state.ctrl_plane); } } @@ -3776,7 +3776,7 @@ fn emit_return_call_common_sequence( dest: BranchTarget::Label(jump_around_label), }; jmp.emit(&[], sink, emit_info, state); - sink.emit_island(&mut state.ctrl_plane); + sink.emit_island(space_needed + 4, &mut state.ctrl_plane); sink.bind_label(jump_around_label, &mut state.ctrl_plane); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 006a6b807d3d..d1053577dd6c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -2984,6 +2984,10 @@ impl MachInstLabelUse for LabelUse { } } + fn worst_case_veneer_size() -> CodeOffset { + 20 + } + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return /// an offset and label-use for the veneer's use of the original label. fn generate_veneer( diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index a0a724f5ecf8..e15d6413127a 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -1189,7 +1189,7 @@ impl MachInstEmit for Inst { // we need to emit a jump table here to support that jump. let distance = (targets.len() * 2 * Inst::INSTRUCTION_SIZE as usize) as u32; if sink.island_needed(distance) { - sink.emit_island(&mut state.ctrl_plane); + sink.emit_island(distance, &mut state.ctrl_plane); } // Emit the jumps back to back @@ -3104,7 +3104,7 @@ fn emit_return_call_common_sequence( dest: BranchTarget::Label(jump_around_label), } .emit(&[], sink, emit_info, state); - sink.emit_island(&mut state.ctrl_plane); + sink.emit_island(space_needed + 4, &mut state.ctrl_plane); sink.bind_label(jump_around_label, &mut state.ctrl_plane); } diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index a6c90991ec66..0939f61f3c63 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -2014,6 +2014,10 @@ impl MachInstLabelUse for LabelUse { } } + fn worst_case_veneer_size() -> CodeOffset { + 8 + } + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return /// an offset and label-use for the veneer's use of the original label. fn generate_veneer( diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index e3b963aacb57..2d50c43b67cf 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -3406,6 +3406,10 @@ impl MachInstLabelUse for LabelUse { 0 } + fn worst_case_veneer_size() -> CodeOffset { + 0 + } + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return /// an offset and label-use for the veneer's use of the original label. fn generate_veneer( diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index a1047d6709d0..c09df5fc2d3f 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -2742,6 +2742,10 @@ impl MachInstLabelUse for LabelUse { } } + fn worst_case_veneer_size() -> CodeOffset { + 0 + } + fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) { match self { LabelUse::JmpRel32 | LabelUse::PCRel32 => { diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 5f7b4508b3b5..e5de3b40ee66 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -162,29 +162,13 @@ //! alone. The way this was implemented put them back on a list and //! resulted in quadratic behavior. //! -//! To avoid this, we could use a better data structure that allows -//! us to query for fixups with deadlines "coming soon" and generate -//! veneers for only those fixups. However, there is some -//! interaction with the branch peephole optimizations: the -//! invariant there is that branches in the "most recent branches -//! contiguous with end of buffer" list have corresponding fixups in -//! order (so that when we chomp the branch, we can chomp its fixup -//! too). -//! -//! So instead, when we generate an island, for now we create -//! veneers for *all* pending fixups, then if upgraded to a kind -//! that no longer supports veneers (is at "max range"), kick the -//! fixups off to a list that is *not* processed at islands except -//! for one last pass after emission. This allows us to skip the -//! work and avoids the quadratic behvior. We expect that this is -//! fine-ish for now: islands are relatively rare, and if they do -//! happen and generate unnecessary veneers (as will now happen for -//! the case above) we'll only get one unnecessary veneer per -//! branch (then they are at max range already). -//! -//! Longer-term, we could use a data structure that allows querying -//! by deadline, as long as we can properly chomp just-added fixups -//! when chomping branches. +//! To avoid this fixups are split into two lists: one "pending" list and one +//! final list. The pending list is kept around for handling fixups related to +//! branches so it can be edited/truncated. When an island is reached, which +//! starts processing fixups, all pending fixups are flushed into the final +//! list. The final list is a `BinaryHeap` which enables fixup processing to +//! only process those which are required during island emission, deferring +//! all longer-range fixups to later. use crate::binemit::{Addend, CodeOffset, Reloc, StackMap}; use crate::ir::{ExternalName, Opcode, RelSourceLoc, SourceLoc, TrapCode}; @@ -196,7 +180,9 @@ use crate::trace; use crate::{timing, VCodeConstantData}; use cranelift_control::ControlPlane; use cranelift_entity::{entity_impl, PrimaryMap}; -use smallvec::{smallvec, SmallVec}; +use smallvec::SmallVec; +use std::cmp::Ordering; +use std::collections::BinaryHeap; use std::convert::TryFrom; use std::mem; use std::string::String; @@ -242,12 +228,6 @@ enum ForceVeneers { No, } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum IsLastIsland { - Yes, - No, -} - /// A buffer of output to be produced, fixed up, and then emitted to a CodeSink /// in bulk. /// @@ -288,24 +268,18 @@ pub struct MachBuffer { label_aliases: SmallVec<[MachLabel; 16]>, /// Constants that must be emitted at some point. pending_constants: SmallVec<[VCodeConstant; 16]>, + /// Byte size of all constants in `pending_constants`. + pending_constants_size: CodeOffset, /// Traps that must be emitted at some point. pending_traps: SmallVec<[MachLabelTrap; 16]>, + /// Fixups that haven't yet been flushed into `fixup_records` below and may + /// be related to branches that are chomped. These all get added to + /// `fixup_records` during island emission. + pending_fixup_records: SmallVec<[MachLabelFixup; 16]>, + /// The nearest upcoming deadline for entries in `pending_fixup_records`. + pending_fixup_deadline: CodeOffset, /// Fixups that must be performed after all code is emitted. - fixup_records: SmallVec<[MachLabelFixup; 16]>, - /// Fixups whose labels are at maximum range already: these need - /// not be considered in island emission until we're done - /// emitting. - fixup_records_max_range: SmallVec<[MachLabelFixup; 16]>, - /// Current deadline at which all constants are flushed and all code labels - /// are extended by emitting long-range jumps in an island. This flush - /// should be rare (e.g., on AArch64, the shortest-range PC-rel references - /// are +/- 1MB for conditional jumps and load-literal instructions), so - /// it's acceptable to track a minimum and flush-all rather than doing more - /// detailed "current minimum" / sort-by-deadline trickery. - island_deadline: CodeOffset, - /// How many bytes are needed in the worst case for an island, given all - /// pending constants and fixups. - island_worst_case_size: CodeOffset, + fixup_records: BinaryHeap>, /// Latest branches, to facilitate in-place editing for better fallthrough /// behavior and empty-block removal. latest_branches: SmallVec<[MachBranch; 4]>, @@ -449,11 +423,11 @@ impl MachBuffer { label_offsets: SmallVec::new(), label_aliases: SmallVec::new(), pending_constants: SmallVec::new(), + pending_constants_size: 0, pending_traps: SmallVec::new(), - fixup_records: SmallVec::new(), - fixup_records_max_range: SmallVec::new(), - island_deadline: UNKNOWN_LABEL_OFFSET, - island_worst_case_size: 0, + pending_fixup_records: SmallVec::new(), + pending_fixup_deadline: u32::MAX, + fixup_records: Default::default(), latest_branches: SmallVec::new(), labels_at_tail: SmallVec::new(), labels_at_tail_off: 0, @@ -623,8 +597,8 @@ impl MachBuffer { "defer constant: eventually emit {size} bytes aligned \ to {align} at label {label:?}", ); - self.update_deadline(size, u32::MAX); self.pending_constants.push(constant); + self.pending_constants_size += size as u32; self.constants[constant].upcoming_label = Some(label); label } @@ -703,19 +677,13 @@ impl MachBuffer { // Add the fixup, and update the worst-case island size based on a // veneer for this label use. - self.fixup_records.push(MachLabelFixup { + let fixup = MachLabelFixup { label, offset, kind, - }); - if kind.supports_veneer() { - self.island_worst_case_size += kind.veneer_size(); - self.island_worst_case_size &= !(I::LabelUse::ALIGN - 1); - } - let deadline = offset.saturating_add(kind.max_pos_range()); - if deadline < self.island_deadline { - self.island_deadline = deadline; - } + }; + self.pending_fixup_deadline = self.pending_fixup_deadline.min(fixup.deadline()); + self.pending_fixup_records.push(fixup); // Post-invariant: no mutations to branches/labels data structures. } @@ -737,8 +705,8 @@ impl MachBuffer { pub fn add_uncond_branch(&mut self, start: CodeOffset, end: CodeOffset, target: MachLabel) { assert!(self.cur_offset() == start); debug_assert!(end > start); - assert!(!self.fixup_records.is_empty()); - let fixup = self.fixup_records.len() - 1; + assert!(!self.pending_fixup_records.is_empty()); + let fixup = self.pending_fixup_records.len() - 1; self.lazily_clear_labels_at_tail(); self.latest_branches.push(MachBranch { start, @@ -768,9 +736,9 @@ impl MachBuffer { ) { assert!(self.cur_offset() == start); debug_assert!(end > start); - assert!(!self.fixup_records.is_empty()); + assert!(!self.pending_fixup_records.is_empty()); debug_assert!(inverted.len() == (end - start) as usize); - let fixup = self.fixup_records.len() - 1; + let fixup = self.pending_fixup_records.len() - 1; let inverted = Some(SmallVec::from(inverted)); self.lazily_clear_labels_at_tail(); self.latest_branches.push(MachBranch { @@ -800,7 +768,7 @@ impl MachBuffer { // cur_off, self.labels_at_tail --> // (end of buffer) self.data.truncate(b.start as usize); - self.fixup_records.truncate(b.fixup); + self.pending_fixup_records.truncate(b.fixup); while let Some(last_srcloc) = self.srclocs.last_mut() { if last_srcloc.end <= b.start { break; @@ -892,7 +860,7 @@ impl MachBuffer { "enter optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}", self.latest_branches, self.labels_at_tail, - self.fixup_records + self.pending_fixup_records ); // We continue to munch on branches at the tail of the buffer until no @@ -1134,7 +1102,7 @@ impl MachBuffer { // inverted branch, in case we later edit this branch // again. prev_b.inverted = Some(not_inverted); - self.fixup_records[prev_b.fixup].label = target; + self.pending_fixup_records[prev_b.fixup].label = target; trace!(" -> reassigning target of condbr to {:?}", target); prev_b.target = target; debug_assert_eq!(off_before_edit, self.cur_offset()); @@ -1153,7 +1121,7 @@ impl MachBuffer { "leave optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}", self.latest_branches, self.labels_at_tail, - self.fixup_records + self.pending_fixup_records ); } @@ -1186,7 +1154,6 @@ impl MachBuffer { /// This will batch all traps into the end of the function. pub fn defer_trap(&mut self, code: TrapCode, stack_map: Option) -> MachLabel { let label = self.get_label(); - self.update_deadline(I::TRAP_OPCODE.len(), u32::MAX); self.pending_traps.push(MachLabelTrap { label, code, @@ -1196,20 +1163,13 @@ impl MachBuffer { label } - fn update_deadline(&mut self, len: usize, max_distance: CodeOffset) { - trace!("defer: eventually emit {} bytes", len); - let deadline = self.cur_offset().saturating_add(max_distance); - self.island_worst_case_size += len as CodeOffset; - self.island_worst_case_size = - (self.island_worst_case_size + I::LabelUse::ALIGN - 1) & !(I::LabelUse::ALIGN - 1); - if deadline < self.island_deadline { - self.island_deadline = deadline; - } - } - /// Is an island needed within the next N bytes? pub fn island_needed(&self, distance: CodeOffset) -> bool { - self.worst_case_end_of_island(distance) > self.island_deadline + let deadline = match self.fixup_records.peek() { + Some(fixup) => fixup.deadline().min(self.pending_fixup_deadline), + None => self.pending_fixup_deadline, + }; + deadline < u32::MAX && self.worst_case_end_of_island(distance) > deadline } /// Returns the maximal offset that islands can reach if `distance` more @@ -1218,9 +1178,18 @@ impl MachBuffer { /// This is used to determine if veneers need insertions since jumps that /// can't reach past this point must get a veneer of some form. fn worst_case_end_of_island(&self, distance: CodeOffset) -> CodeOffset { + // Assume that all fixups will require veneers and that the veneers are + // the worst-case size for each platform. This is an over-generalization + // to avoid iterating over the `fixup_records` list or maintaining + // information about it as we go along. + let island_worst_case_size = ((self.fixup_records.len() + self.pending_fixup_records.len()) + as u32) + * (I::LabelUse::worst_case_veneer_size()) + + self.pending_constants_size + + (self.pending_traps.len() * I::TRAP_OPCODE.len()) as u32; self.cur_offset() .saturating_add(distance) - .saturating_add(self.island_worst_case_size) + .saturating_add(island_worst_case_size) } /// Emit all pending constants and required pending veneers. @@ -1228,8 +1197,8 @@ impl MachBuffer { /// Should only be called if `island_needed()` returns true, i.e., if we /// actually reach a deadline. It's not necessarily a problem to do so /// otherwise but it may result in unnecessary work during emission. - pub fn emit_island(&mut self, ctrl_plane: &mut ControlPlane) { - self.emit_island_maybe_forced(ForceVeneers::No, IsLastIsland::No, ctrl_plane); + pub fn emit_island(&mut self, distance: CodeOffset, ctrl_plane: &mut ControlPlane) { + self.emit_island_maybe_forced(ForceVeneers::No, distance, ctrl_plane); } /// Same as `emit_island`, but an internal API with a `force_veneers` @@ -1237,18 +1206,13 @@ impl MachBuffer { fn emit_island_maybe_forced( &mut self, force_veneers: ForceVeneers, - last_island: IsLastIsland, + distance: CodeOffset, ctrl_plane: &mut ControlPlane, ) { // We're going to purge fixups, so no latest-branch editing can happen // anymore. self.latest_branches.clear(); - // Reset internal calculations about islands since we're going to - // change the calculus as we apply fixups. - self.island_deadline = UNKNOWN_LABEL_OFFSET; - self.island_worst_case_size = 0; - // End the current location tracking since anything emitted during this // function shouldn't be attributed to whatever the current source // location is. @@ -1260,6 +1224,8 @@ impl MachBuffer { self.end_srcloc(); } + let forced_threshold = self.worst_case_end_of_island(distance); + // First flush out all traps/constants so we have more labels in case // fixups are applied against these labels. // @@ -1300,112 +1266,107 @@ impl MachBuffer { self.get_appended_space(size); } - let last_island_fixups = match last_island { - IsLastIsland::Yes => mem::take(&mut self.fixup_records_max_range), - IsLastIsland::No => smallvec![], - }; - for fixup in mem::take(&mut self.fixup_records) - .into_iter() - .chain(last_island_fixups.into_iter()) - { - trace!("emit_island: fixup {:?}", fixup); - let MachLabelFixup { - label, - offset, - kind, - } = fixup; - let label_offset = self.resolve_label_offset(label); - let start = offset as usize; - let end = (offset + kind.patch_size()) as usize; - - if label_offset != UNKNOWN_LABEL_OFFSET { - // If the offset of the label for this fixup is known then - // we're going to do something here-and-now. We're either going - // to patch the original offset because it's an in-bounds jump, - // or we're going to generate a veneer, patch the fixup to jump - // to the veneer, and then keep going. - // - // If the label comes after the original fixup, then we should - // be guaranteed that the jump is in-bounds. Otherwise there's - // a bug somewhere because this method wasn't called soon - // enough. All forward-jumps are tracked and should get veneers - // before their deadline comes and they're unable to jump - // further. - // - // Otherwise if the label is before the fixup, then that's a - // backwards jump. If it's past the maximum negative range - // then we'll emit a veneer that to jump forward to which can - // then jump backwards. - let veneer_required = if label_offset >= offset { - assert!((label_offset - offset) <= kind.max_pos_range()); - false - } else { - (offset - label_offset) > kind.max_neg_range() - }; - trace!( - " -> label_offset = {}, known, required = {} (pos {} neg {})", - label_offset, - veneer_required, - kind.max_pos_range(), - kind.max_neg_range() - ); - - if (force_veneers == ForceVeneers::Yes && kind.supports_veneer()) || veneer_required - { - self.emit_veneer(label, offset, kind); - } else { - let slice = &mut self.data[start..end]; - trace!("patching in-range!"); - kind.patch(slice, offset, label_offset); - } + // Either handle all pending fixups because they're ready or move them + // onto the `BinaryHeap` tracking all pending fixups if they aren't + // ready. + assert!(self.latest_branches.is_empty()); + for fixup in mem::take(&mut self.pending_fixup_records) { + if self.should_apply_fixup(&fixup, forced_threshold) { + self.handle_fixup(fixup, force_veneers, forced_threshold); } else { - // If the offset of this label is not known at this time then - // there are three possibilities: - // - // 1. It's possible that the label is already a "max - // range" label: a veneer would not help us any, - // and so we need not consider the label during - // island emission any more until the very end (the - // last "island" pass). In this case we kick the - // label into a separate list to process once at - // the end, to avoid quadratic behavior (see - // "quadratic island behavior" above, and issue - // #6798). - // - // 2. Or, we may be about to exceed the maximum jump range of - // this fixup. In that case a veneer is inserted to buy some - // more budget for the forward-jump. It's guaranteed that the - // label will eventually come after where we're at, so we know - // that the forward jump is necessary. - // - // 3. Otherwise, we're still within range of the - // forward jump but the precise target isn't known - // yet. In that case, to avoid quadratic behavior - // (again, see above), we emit a veneer and if the - // resulting label-use fixup is then max-range, we - // put it in the max-range list. We could enqueue - // the fixup for processing later, and this would - // enable slightly fewer veneers, but islands are - // relatively rare and the cost of "upgrading" all - // forward label refs that cross an island should - // be relatively low. - if !kind.supports_veneer() { - self.fixup_records_max_range.push(MachLabelFixup { - label, - offset, - kind, - }); - } else { - self.emit_veneer(label, offset, kind); - } + self.fixup_records.push(fixup); } } + self.pending_fixup_deadline = u32::MAX; + while let Some(fixup) = self.fixup_records.peek() { + trace!("emit_island: fixup {:?}", fixup); + + // If this fixup shouldn't be applied, that means its label isn't + // defined yet and there'll be remaining space to apply a veneer if + // necessary in the future after this island. In that situation + // because `fixup_records` is sorted by deadline this loop can + // exit. + if !self.should_apply_fixup(fixup, forced_threshold) { + break; + } + + let fixup = self.fixup_records.pop().unwrap(); + self.handle_fixup(fixup, force_veneers, forced_threshold); + } if let Some(loc) = cur_loc { self.start_srcloc(loc); } } + fn should_apply_fixup(&self, fixup: &MachLabelFixup, forced_threshold: CodeOffset) -> bool { + let label_offset = self.resolve_label_offset(fixup.label); + label_offset != UNKNOWN_LABEL_OFFSET || fixup.deadline() < forced_threshold + } + + fn handle_fixup( + &mut self, + fixup: MachLabelFixup, + force_veneers: ForceVeneers, + forced_threshold: CodeOffset, + ) { + let MachLabelFixup { + label, + offset, + kind, + } = fixup; + let start = offset as usize; + let end = (offset + kind.patch_size()) as usize; + let label_offset = self.resolve_label_offset(label); + + if label_offset != UNKNOWN_LABEL_OFFSET { + // If the offset of the label for this fixup is known then + // we're going to do something here-and-now. We're either going + // to patch the original offset because it's an in-bounds jump, + // or we're going to generate a veneer, patch the fixup to jump + // to the veneer, and then keep going. + // + // If the label comes after the original fixup, then we should + // be guaranteed that the jump is in-bounds. Otherwise there's + // a bug somewhere because this method wasn't called soon + // enough. All forward-jumps are tracked and should get veneers + // before their deadline comes and they're unable to jump + // further. + // + // Otherwise if the label is before the fixup, then that's a + // backwards jump. If it's past the maximum negative range + // then we'll emit a veneer that to jump forward to which can + // then jump backwards. + let veneer_required = if label_offset >= offset { + assert!((label_offset - offset) <= kind.max_pos_range()); + false + } else { + (offset - label_offset) > kind.max_neg_range() + }; + trace!( + " -> label_offset = {}, known, required = {} (pos {} neg {})", + label_offset, + veneer_required, + kind.max_pos_range(), + kind.max_neg_range() + ); + + if (force_veneers == ForceVeneers::Yes && kind.supports_veneer()) || veneer_required { + self.emit_veneer(label, offset, kind); + } else { + let slice = &mut self.data[start..end]; + trace!("patching in-range!"); + kind.patch(slice, offset, label_offset); + } + } else { + // If the offset of this label is not known at this time then + // that means that a veneer is required because after this + // island the target can't be in range of the original target. + assert!(forced_threshold - offset > kind.max_pos_range()); + self.emit_veneer(label, offset, kind); + } + } + /// Emits a "veneer" the `kind` code at `offset` to jump to `label`. /// /// This will generate extra machine code, using `kind`, to get a @@ -1447,17 +1408,8 @@ impl MachBuffer { // Register a new use of `label` with our new veneer fixup and // offset. This'll recalculate deadlines accordingly and // enqueue this fixup to get processed at some later - // time. Note that if we now have a max-range, we instead skip - // the usual fixup list to avoid quadratic behavior. - if veneer_label_use.supports_veneer() { - self.use_label_at_offset(veneer_fixup_off, label, veneer_label_use); - } else { - self.fixup_records_max_range.push(MachLabelFixup { - label, - offset: veneer_fixup_off, - kind: veneer_label_use, - }); - } + // time. + self.use_label_at_offset(veneer_fixup_off, label, veneer_label_use); } fn finish_emission_maybe_forcing_veneers( @@ -1468,18 +1420,19 @@ impl MachBuffer { while !self.pending_constants.is_empty() || !self.pending_traps.is_empty() || !self.fixup_records.is_empty() - || !self.fixup_records_max_range.is_empty() + || !self.pending_fixup_records.is_empty() { // `emit_island()` will emit any pending veneers and constants, and // as a side-effect, will also take care of any fixups with resolved // labels eagerly. - self.emit_island_maybe_forced(force_veneers, IsLastIsland::Yes, ctrl_plane); + self.emit_island_maybe_forced(force_veneers, u32::MAX, ctrl_plane); } // Ensure that all labels have been fixed up after the last island is emitted. This is a // full (release-mode) assert because an unresolved label means the emitted code is // incorrect. assert!(self.fixup_records.is_empty()); + assert!(self.pending_fixup_records.is_empty()); } /// Finish any deferred emissions and/or fixups. @@ -1732,6 +1685,32 @@ struct MachLabelFixup { kind: I::LabelUse, } +impl MachLabelFixup { + fn deadline(&self) -> CodeOffset { + self.offset.saturating_add(self.kind.max_pos_range()) + } +} + +impl PartialEq for MachLabelFixup { + fn eq(&self, other: &Self) -> bool { + self.deadline() == other.deadline() + } +} + +impl Eq for MachLabelFixup {} + +impl PartialOrd for MachLabelFixup { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for MachLabelFixup { + fn cmp(&self, other: &Self) -> Ordering { + other.deadline().cmp(&self.deadline()) + } +} + /// A relocation resulting from a compilation. #[derive(Clone, Debug, PartialEq)] #[cfg_attr(feature = "enable-serde", derive(serde::Serialize, serde::Deserialize))] @@ -1873,7 +1852,7 @@ impl TextSectionBuilder for MachTextSectionBuilder { let size = func.len() as u32; if self.force_veneers == ForceVeneers::Yes || self.buf.island_needed(size) { self.buf - .emit_island_maybe_forced(self.force_veneers, IsLastIsland::No, ctrl_plane); + .emit_island_maybe_forced(self.force_veneers, size, ctrl_plane); } self.buf.align_to(align); @@ -2055,7 +2034,7 @@ mod test { buf.bind_label(label(1), state.ctrl_plane_mut()); while buf.cur_offset() < 2000000 { if buf.island_needed(0) { - buf.emit_island(state.ctrl_plane_mut()); + buf.emit_island(0, state.ctrl_plane_mut()); } let inst = Inst::Nop4; inst.emit(&[], &mut buf, &info, &mut state); @@ -2087,20 +2066,15 @@ mod test { // one for this 19-bit jump and one for the unconditional 26-bit // jump below. A 19-bit veneer is 4 bytes large and the 26-bit // veneer is 20 bytes large, which means that pessimistically - // assuming we'll need two veneers we need 24 bytes of extra - // space, meaning that the actual island should come 24-bytes - // before the deadline. - taken: BranchTarget::ResolvedOffset((1 << 20) - 4 - 20), - - // This branch is in-range so no veneers are technically - // be needed; however because we resolve *all* pending - // fixups that cross an island when that island occurs, it - // will have a veneer as well. This veneer comes just - // after the one above. (Note that because the CondBr has - // two instructions, the conditinoal and unconditional, - // this offset is the same, though the veneer is four - // bytes later.) - not_taken: BranchTarget::ResolvedOffset((1 << 20) - 4 - 20), + // assuming we'll need two veneers. Currently each veneer is + // pessimistically assumed to be the maximal size which means we + // need 40 bytes of extra space, meaning that the actual island + // should come 40-bytes before the deadline. + taken: BranchTarget::ResolvedOffset((1 << 20) - 20 - 20), + + // This branch is in-range so no veneers should be needed, it should + // go directly to the target. + not_taken: BranchTarget::ResolvedOffset(2000000 + 4 - 4), }; inst.emit(&[], &mut buf2, &info, &mut state); diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index e08384c30ab2..5bff571782b3 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -234,6 +234,8 @@ pub trait MachInstLabelUse: Clone + Copy + Debug + Eq { fn supports_veneer(self) -> bool; /// How many bytes are needed for a veneer? fn veneer_size(self) -> CodeOffset; + /// What's the largest possible veneer that may be generated? + fn worst_case_veneer_size() -> CodeOffset; /// Generate a veneer. The given code-buffer slice is `self.veneer_size()` /// bytes long at offset `veneer_offset` in the buffer. The original /// label-use will be patched to refer to this veneer's offset. A new diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 59c7328c3aa8..6b05b38c8bd3 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -1050,7 +1050,7 @@ impl VCode { bb_padding.len() as u32 + I::LabelUse::ALIGN - 1 }; if buffer.island_needed(padding + worst_case_next_bb) { - buffer.emit_island(ctrl_plane); + buffer.emit_island(padding + worst_case_next_bb, ctrl_plane); } // Insert padding, if configured, to stress the `MachBuffer`'s