Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cranelift: Fix cold-blocks-related lowering bug. #3709

Merged
merged 1 commit into from
Jan 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions cranelift/codegen/src/isa/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,106 @@ fn isa_constructor(
let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags);
Box::new(backend)
}

#[cfg(test)]
mod test {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::ir::types::*;
use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
use crate::isa::CallConv;
use crate::settings;
use crate::settings::Configurable;
use core::str::FromStr;
use target_lexicon::Triple;

/// We have to test cold blocks by observing final machine code,
/// rather than VCode, because the VCode orders blocks in lowering
/// order, not emission order. (The exact difference between the
/// two is that cold blocks are sunk in the latter.) We might as
/// well do the test here, where we have a backend to use.
#[test]
fn test_cold_blocks() {
let name = ExternalName::testcase("test0");
let mut sig = Signature::new(CallConv::SystemV);
sig.params.push(AbiParam::new(I32));
sig.returns.push(AbiParam::new(I32));
let mut func = Function::with_name_signature(name, sig);

let bb0 = func.dfg.make_block();
let arg0 = func.dfg.append_block_param(bb0, I32);
let bb1 = func.dfg.make_block();
let bb2 = func.dfg.make_block();
let bb3 = func.dfg.make_block();
let bb1_param = func.dfg.append_block_param(bb1, I32);
let bb3_param = func.dfg.append_block_param(bb3, I32);

let mut pos = FuncCursor::new(&mut func);

pos.insert_block(bb0);
let v0 = pos.ins().iconst(I32, 0x1234);
let v1 = pos.ins().iadd(arg0, v0);
pos.ins().brnz(v1, bb1, &[v1]);
pos.ins().jump(bb2, &[]);

pos.insert_block(bb1);
let v2 = pos.ins().isub(v1, v0);
let v3 = pos.ins().iadd(v2, bb1_param);
pos.ins().brnz(v1, bb2, &[]);
pos.ins().jump(bb3, &[v3]);

pos.func.layout.set_cold(bb2);
pos.insert_block(bb2);
let v4 = pos.ins().iadd(v1, v0);
pos.ins().brnz(v4, bb2, &[]);
pos.ins().jump(bb1, &[v4]);

pos.insert_block(bb3);
pos.ins().return_(&[bb3_param]);

let mut shared_flags_builder = settings::builder();
shared_flags_builder.set("opt_level", "none").unwrap();
shared_flags_builder.set("enable_verifier", "true").unwrap();
let shared_flags = settings::Flags::new(shared_flags_builder);
let isa_flags = x64_settings::Flags::new(&shared_flags, x64_settings::builder());
let backend = X64Backend::new_with_flags(
Triple::from_str("x86_64").unwrap(),
shared_flags,
isa_flags,
);
let result = backend
.compile_function(&mut func, /* want_disasm = */ false)
.unwrap();
let code = result.buffer.data();

// 00000000 55 push rbp
// 00000001 4889E5 mov rbp,rsp
// 00000004 4889FE mov rsi,rdi
// 00000007 81C634120000 add esi,0x1234
// 0000000D 85F6 test esi,esi
// 0000000F 0F841B000000 jz near 0x30
// 00000015 4889F7 mov rdi,rsi
// 00000018 4889F0 mov rax,rsi
// 0000001B 81E834120000 sub eax,0x1234
// 00000021 01F8 add eax,edi
// 00000023 85F6 test esi,esi
// 00000025 0F8505000000 jnz near 0x30
// 0000002B 4889EC mov rsp,rbp
// 0000002E 5D pop rbp
// 0000002F C3 ret
// 00000030 4889F7 mov rdi,rsi <--- cold block
// 00000033 81C734120000 add edi,0x1234
// 00000039 85FF test edi,edi
// 0000003B 0F85EFFFFFFF jnz near 0x30
// 00000041 E9D2FFFFFF jmp 0x18

let golden = vec![
85, 72, 137, 229, 72, 137, 254, 129, 198, 52, 18, 0, 0, 133, 246, 15, 132, 27, 0, 0, 0,
72, 137, 247, 72, 137, 240, 129, 232, 52, 18, 0, 0, 1, 248, 133, 246, 15, 133, 5, 0, 0,
0, 72, 137, 236, 93, 195, 72, 137, 247, 129, 199, 52, 18, 0, 0, 133, 255, 15, 133, 239,
255, 255, 255, 233, 210, 255, 255, 255,
];

assert_eq!(code, &golden[..]);
}
}
40 changes: 25 additions & 15 deletions cranelift/codegen/src/machinst/blockorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ pub struct BlockLoweringOrder {
/// blocks.
#[allow(dead_code)]
orig_map: SecondaryMap<Block, Option<BlockIndex>>,
/// Cold blocks. These blocks are not reordered in the
/// `lowered_order` above; the lowered order must respect RPO
/// (uses after defs) in order for lowering to be
/// correct. Instead, this set is used to provide `is_cold()`,
/// which is used by VCode emission to sink the blocks at the last
/// moment (when we actually emit bytes into the MachBuffer).
cold_blocks: FxHashSet<BlockIndex>,
}

/// The origin of a block in the lowered block-order: either an original CLIF
Expand Down Expand Up @@ -378,31 +385,28 @@ impl BlockLoweringOrder {

postorder.reverse();
let mut rpo = postorder;

// Step 3: sink any cold blocks to the end of the
// function. Put the "deferred last" block truly at the end;
// this is a correctness requirement (for fallthrough
// returns).
rpo.sort_by_key(|block| {
block
.0
.orig_block()
.map(|block| f.layout.is_cold(block))
.unwrap_or(false)
});

if let Some(d) = deferred_last {
rpo.push(d);
}

// Step 4: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
// Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
let mut lowered_order = vec![];
let mut cold_blocks = FxHashSet::default();
let mut lowered_succ_ranges = vec![];
let mut lb_to_bindex = FxHashMap::default();
for (block, succ_range) in rpo.into_iter() {
lb_to_bindex.insert(block, lowered_order.len() as BlockIndex);
let index = lowered_order.len() as BlockIndex;
lb_to_bindex.insert(block, index);
lowered_order.push(block);
lowered_succ_ranges.push(succ_range);

if block
.orig_block()
.map(|b| f.layout.is_cold(b))
.unwrap_or(false)
{
cold_blocks.insert(index);
}
}

let lowered_succ_indices = lowered_succs
Expand All @@ -424,6 +428,7 @@ impl BlockLoweringOrder {
lowered_succ_indices,
lowered_succ_ranges,
orig_map,
cold_blocks,
};
log::trace!("BlockLoweringOrder: {:?}", result);
result
Expand All @@ -439,6 +444,11 @@ impl BlockLoweringOrder {
let range = self.lowered_succ_ranges[block as usize];
&self.lowered_succ_indices[range.0..range.1]
}

/// Determine whether the given lowered-block index is cold.
pub fn is_cold(&self, block: BlockIndex) -> bool {
self.cold_blocks.contains(&block)
}
}

#[cfg(test)]
Expand Down
17 changes: 15 additions & 2 deletions cranelift/codegen/src/machinst/vcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -470,11 +470,24 @@ impl<I: VCodeInst> VCode<I> {
let mut inst_ends = vec![0; self.insts.len()];
let mut label_insn_iix = vec![0; self.num_blocks()];

// Construct the final order we emit code in: cold blocks at the end.
let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
for block in 0..self.num_blocks() {
let block = block as BlockIndex;
if self.block_order.is_cold(block) {
cold_blocks.push(block);
} else {
final_order.push(block);
}
}
final_order.extend(cold_blocks);

// Emit blocks.
let mut safepoint_idx = 0;
let mut cur_srcloc = None;
let mut last_offset = None;
for block in 0..self.num_blocks() {
let block = block as BlockIndex;
for block in final_order {
let new_offset = I::align_basic_block(buffer.cur_offset());
while new_offset > buffer.cur_offset() {
// Pad with NOPs up to the aligned block offset.
Expand Down