Skip to content

Commit

Permalink
Merge pull request #3709 from cfallin/cold-blocks-dead-code-bug
Browse files Browse the repository at this point in the history
Cranelift: Fix cold-blocks-related lowering bug.
  • Loading branch information
cfallin authored Jan 21, 2022
2 parents 78ff829 + ef1b2d2 commit d61e4e0
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 17 deletions.
103 changes: 103 additions & 0 deletions cranelift/codegen/src/isa/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,106 @@ fn isa_constructor(
let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags);
Box::new(backend)
}

#[cfg(test)]
mod test {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::types::*;
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
use crate::isa::CallConv;
use crate::settings;
use crate::settings::Configurable;
use core::str::FromStr;
use target_lexicon::Triple;

/// We have to test cold blocks by observing final machine code,
/// rather than VCode, because the VCode orders blocks in lowering
/// order, not emission order. (The exact difference between the
/// two is that cold blocks are sunk in the latter.) We might as
/// well do the test here, where we have a backend to use.
#[test]
fn test_cold_blocks() {
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
sig.returns.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);

let bb0 = func.dfg.make_block();
let arg0 = func.dfg.append_block_param(bb0, I32);
let bb1 = func.dfg.make_block();
let bb2 = func.dfg.make_block();
let bb3 = func.dfg.make_block();
let bb1_param = func.dfg.append_block_param(bb1, I32);
let bb3_param = func.dfg.append_block_param(bb3, I32);

let mut pos = FuncCursor::new(&mut func);

pos.insert_block(bb0);
let v0 = pos.ins().iconst(I32, 0x1234);
let v1 = pos.ins().iadd(arg0, v0);
pos.ins().brnz(v1, bb1, &[v1]);
pos.ins().jump(bb2, &[]);

pos.insert_block(bb1);
let v2 = pos.ins().isub(v1, v0);
let v3 = pos.ins().iadd(v2, bb1_param);
pos.ins().brnz(v1, bb2, &[]);
pos.ins().jump(bb3, &[v3]);

pos.func.layout.set_cold(bb2);
pos.insert_block(bb2);
let v4 = pos.ins().iadd(v1, v0);
pos.ins().brnz(v4, bb2, &[]);
pos.ins().jump(bb1, &[v4]);

pos.insert_block(bb3);
pos.ins().return_(&[bb3_param]);

let mut shared_flags_builder = settings::builder();
shared_flags_builder.set("opt_level", "none").unwrap();
shared_flags_builder.set("enable_verifier", "true").unwrap();
let shared_flags = settings::Flags::new(shared_flags_builder);
let isa_flags = x64_settings::Flags::new(&shared_flags, x64_settings::builder());
let backend = X64Backend::new_with_flags(
Triple::from_str("x86_64").unwrap(),
shared_flags,
isa_flags,
);
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
.unwrap();
let code = result.buffer.data();

// 00000000 55 push rbp
// 00000001 4889E5 mov rbp,rsp
// 00000004 4889FE mov rsi,rdi
// 00000007 81C634120000 add esi,0x1234
// 0000000D 85F6 test esi,esi
// 0000000F 0F841B000000 jz near 0x30
// 00000015 4889F7 mov rdi,rsi
// 00000018 4889F0 mov rax,rsi
// 0000001B 81E834120000 sub eax,0x1234
// 00000021 01F8 add eax,edi
// 00000023 85F6 test esi,esi
// 00000025 0F8505000000 jnz near 0x30
// 0000002B 4889EC mov rsp,rbp
// 0000002E 5D pop rbp
// 0000002F C3 ret
// 00000030 4889F7 mov rdi,rsi <--- cold block
// 00000033 81C734120000 add edi,0x1234
// 00000039 85FF test edi,edi
// 0000003B 0F85EFFFFFFF jnz near 0x30
// 00000041 E9D2FFFFFF jmp 0x18

let golden = vec![
85, 72, 137, 229, 72, 137, 254, 129, 198, 52, 18, 0, 0, 133, 246, 15, 132, 27, 0, 0, 0,
72, 137, 247, 72, 137, 240, 129, 232, 52, 18, 0, 0, 1, 248, 133, 246, 15, 133, 5, 0, 0,
0, 72, 137, 236, 93, 195, 72, 137, 247, 129, 199, 52, 18, 0, 0, 133, 255, 15, 133, 239,
255, 255, 255, 233, 210, 255, 255, 255,
];

assert_eq!(code, &golden[..]);
}
}
40 changes: 25 additions & 15 deletions cranelift/codegen/src/machinst/blockorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ pub struct BlockLoweringOrder {
/// blocks.
#[allow(dead_code)]
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
/// Cold blocks. These blocks are not reordered in the
/// `lowered_order` above; the lowered order must respect RPO
/// (uses after defs) in order for lowering to be
/// correct. Instead, this set is used to provide `is_cold()`,
/// which is used by VCode emission to sink the blocks at the last
/// moment (when we actually emit bytes into the MachBuffer).
cold_blocks: FxHashSet<BlockIndex>,
}

/// The origin of a block in the lowered block-order: either an original CLIF
Expand Down Expand Up @@ -378,31 +385,28 @@ impl BlockLoweringOrder {

postorder.reverse();
let mut rpo = postorder;

// Step 3: sink any cold blocks to the end of the
// function. Put the "deferred last" block truly at the end;
// this is a correctness requirement (for fallthrough
// returns).
rpo.sort_by_key(|block| {
block
.0
.orig_block()
.map(|block| f.layout.is_cold(block))
.unwrap_or(false)
});

if let Some(d) = deferred_last {
rpo.push(d);
}

// Step 4: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
// Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
let mut lowered_order = vec![];
let mut cold_blocks = FxHashSet::default();
let mut lowered_succ_ranges = vec![];
let mut lb_to_bindex = FxHashMap::default();
for (block, succ_range) in rpo.into_iter() {
lb_to_bindex.insert(block, lowered_order.len() as BlockIndex);
let index = lowered_order.len() as BlockIndex;
lb_to_bindex.insert(block, index);
lowered_order.push(block);
lowered_succ_ranges.push(succ_range);

if block
.orig_block()
.map(|b| f.layout.is_cold(b))
.unwrap_or(false)
{
cold_blocks.insert(index);
}
}

let lowered_succ_indices = lowered_succs
Expand All @@ -424,6 +428,7 @@ impl BlockLoweringOrder {
lowered_succ_indices,
lowered_succ_ranges,
orig_map,
cold_blocks,
};
log::trace!("BlockLoweringOrder: {:?}", result);
result
Expand All @@ -439,6 +444,11 @@ impl BlockLoweringOrder {
let range = self.lowered_succ_ranges[block as usize];
&self.lowered_succ_indices[range.0..range.1]
}

/// Determine whether the given lowered-block index is cold.
pub fn is_cold(&self, block: BlockIndex) -> bool {
self.cold_blocks.contains(&block)
}
}

#[cfg(test)]
Expand Down
17 changes: 15 additions & 2 deletions cranelift/codegen/src/machinst/vcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -470,11 +470,24 @@ impl<I: VCodeInst> VCode<I> {
let mut inst_ends = vec![0; self.insts.len()];
let mut label_insn_iix = vec![0; self.num_blocks()];

// Construct the final order we emit code in: cold blocks at the end.
let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
for block in 0..self.num_blocks() {
let block = block as BlockIndex;
if self.block_order.is_cold(block) {
cold_blocks.push(block);
} else {
final_order.push(block);
}
}
final_order.extend(cold_blocks);

// Emit blocks.
let mut safepoint_idx = 0;
let mut cur_srcloc = None;
let mut last_offset = None;
for block in 0..self.num_blocks() {
let block = block as BlockIndex;
for block in final_order {
let new_offset = I::align_basic_block(buffer.cur_offset());
while new_offset > buffer.cur_offset() {
// Pad with NOPs up to the aligned block offset.
Expand Down

0 comments on commit d61e4e0

Please sign in to comment.