Skip to content

Commit

Permalink
Cranelift: Refactor return_call[_indirect] lowering (#6666)
Browse files Browse the repository at this point in the history
Commons up some code paths and sets the stage for other architectures. This
should have fewer calls back and forth between architecture specific and
independent bits of code, which I have found hard to keep track of. Now,
lowering tail calls is done in architecture specific code that can call out to
architecture independent helpers as needed. Before it was architecture
independent code that would call architecture specific hooks that would call
architecture independent helpers. Too much stuff split across too many
layers. This new approach removes at least one layer of indirection and
unnecessarily confusing abstraction.
  • Loading branch information
fitzgen authored Jun 28, 2023
1 parent 8f8fbf8 commit 1191091
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 244 deletions.
13 changes: 0 additions & 13 deletions cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1046,19 +1046,6 @@ impl ABIMachineSpec for AArch64MachineDeps {
insts
}

fn gen_return_call(
_callee: CallDest,
_new_stack_arg_size: u32,
_old_stack_arg_size: u32,
_ret_addr: Option<Reg>,
_fp: Reg,
_tmp: Writable<Reg>,
_tmp2: Writable<Reg>,
_uses: abi::CallArgList,
) -> SmallVec<[Self::I; 2]> {
todo!();
}

fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
call_conv: isa::CallConv,
dst: Reg,
Expand Down
13 changes: 0 additions & 13 deletions cranelift/codegen/src/isa/riscv64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,19 +565,6 @@ impl ABIMachineSpec for Riscv64MachineDeps {
insts
}

fn gen_return_call(
_callee: CallDest,
_new_stack_arg_size: u32,
_old_stack_arg_size: u32,
_ret_addr: Option<Reg>,
_fp: Reg,
_tmp: Writable<Reg>,
_tmp2: Writable<Reg>,
_uses: abi::CallArgList,
) -> SmallVec<[Self::I; 2]> {
todo!();
}

fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
call_conv: isa::CallConv,
dst: Reg,
Expand Down
13 changes: 0 additions & 13 deletions cranelift/codegen/src/isa/s390x/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -748,19 +748,6 @@ impl ABIMachineSpec for S390xMachineDeps {
unreachable!();
}

fn gen_return_call(
_callee: CallDest,
_new_stack_arg_size: u32,
_old_stack_arg_size: u32,
_ret_addr: Option<Reg>,
_fp: Reg,
_tmp: Writable<Reg>,
_tmp2: Writable<Reg>,
_uses: abi::CallArgList,
) -> SmallVec<[Self::I; 2]> {
todo!();
}

fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
_call_conv: isa::CallConv,
_dst: Reg,
Expand Down
133 changes: 86 additions & 47 deletions cranelift/codegen/src/isa/x64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use args::*;
use regalloc2::{PRegSet, VReg};
use regalloc2::{PReg, PRegSet, VReg};
use smallvec::{smallvec, SmallVec};
use std::convert::TryFrom;

Expand Down Expand Up @@ -713,52 +713,6 @@ impl ABIMachineSpec for X64ABIMachineSpec {
insts
}

fn gen_return_call(
callee: CallDest,
new_stack_arg_size: u32,
old_stack_arg_size: u32,
ret_addr: Option<Reg>,
fp: Reg,
tmp: Writable<Reg>,
tmp2: Writable<Reg>,
uses: abi::CallArgList,
) -> SmallVec<[Self::I; 2]> {
let ret_addr = ret_addr.map(|r| Gpr::new(r).unwrap());
let fp = Gpr::new(fp).unwrap();
let tmp = WritableGpr::from_writable_reg(tmp).unwrap();
let info = Box::new(ReturnCallInfo {
new_stack_arg_size,
old_stack_arg_size,
ret_addr,
fp,
tmp,
uses,
});
match callee {
CallDest::ExtName(callee, RelocDistance::Near) => {
smallvec![Inst::ReturnCallKnown { callee, info }]
}
CallDest::ExtName(callee, RelocDistance::Far) => {
smallvec![
Inst::LoadExtName {
dst: tmp2,
name: Box::new(callee.clone()),
offset: 0,
distance: RelocDistance::Far,
},
Inst::ReturnCallUnknown {
callee: tmp2.into(),
info,
}
]
}
CallDest::Reg(callee) => smallvec![Inst::ReturnCallUnknown {
callee: callee.into(),
info,
}],
}
}

fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
call_conv: isa::CallConv,
dst: Reg,
Expand Down Expand Up @@ -886,6 +840,91 @@ impl ABIMachineSpec for X64ABIMachineSpec {
}
}

impl X64CallSite {
pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
// Allocate additional stack space for the new stack frame. We will
// build it in the newly allocated space, but then copy it over our
// current frame at the last moment.
let new_stack_arg_size = self.emit_allocate_tail_call_frame(ctx);
let old_stack_arg_size = ctx.abi().stack_args_size(ctx.sigs());

// Make a copy of the frame pointer, since we use it when copying down
// the new stack frame.
let fp = ctx.temp_writable_gpr();
let rbp = PReg::from(regs::rbp().to_real_reg().unwrap());
ctx.emit(Inst::MovFromPReg { src: rbp, dst: fp });

// Load the return address, because copying our new stack frame
// over our current stack frame might overwrite it, and we'll need to
// place it in the correct location after we do that copy.
//
// But we only need to actually move the return address if the size of
// stack arguments changes.
let ret_addr = if new_stack_arg_size != old_stack_arg_size {
let ret_addr = ctx.temp_writable_gpr();
ctx.emit(Inst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 8,
base: *fp.to_reg(),
flags: MemFlags::trusted(),
}),
dst: ret_addr,
});
Some(ret_addr.to_reg())
} else {
None
};

// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.emit_args(ctx, args);
if let Some(i) = ctx.sigs()[self.sig()].stack_ret_arg() {
let ret_area_ptr = ctx.abi().ret_area_ptr().expect(
"if the tail callee has a return pointer, then the tail caller \
must as well",
);
for inst in self.gen_arg(ctx, i.into(), ValueRegs::one(ret_area_ptr.to_reg())) {
ctx.emit(inst);
}
}

// Finally, emit the macro instruction to copy the new stack frame over
// our current one and do the actual tail call!

let dest = self.dest().clone();
let info = Box::new(ReturnCallInfo {
new_stack_arg_size,
old_stack_arg_size,
ret_addr,
fp: fp.to_reg(),
tmp: ctx.temp_writable_gpr(),
uses: self.take_uses(),
});
match dest {
CallDest::ExtName(callee, RelocDistance::Near) => {
ctx.emit(Inst::ReturnCallKnown { callee, info });
}
CallDest::ExtName(callee, RelocDistance::Far) => {
let tmp2 = ctx.temp_writable_gpr();
ctx.emit(Inst::LoadExtName {
dst: tmp2.to_writable_reg(),
name: Box::new(callee),
offset: 0,
distance: RelocDistance::Far,
});
ctx.emit(Inst::ReturnCallUnknown {
callee: tmp2.to_writable_reg().into(),
info,
});
}
CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallUnknown {
callee: callee.into(),
info,
}),
}
}
}

impl From<StackAMode> for SyntheticAmode {
fn from(amode: StackAMode) -> Self {
// We enforce a 128 MB stack-frame size limit above, so these
Expand Down
12 changes: 12 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ use target_lexicon::Triple;
//=============================================================================
// Helpers for instruction lowering.

impl Lower<'_, Inst> {
#[inline]
pub fn temp_writable_gpr(&mut self) -> WritableGpr {
WritableGpr::from_writable_reg(self.alloc_tmp(types::I64).only_reg().unwrap()).unwrap()
}

#[inline]
pub fn temp_writable_xmm(&mut self) -> WritableXmm {
WritableXmm::from_writable_reg(self.alloc_tmp(types::F64).only_reg().unwrap()).unwrap()
}
}

fn is_int_or_ref_ty(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
Expand Down
114 changes: 6 additions & 108 deletions cranelift/codegen/src/isa/x64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,66 +95,15 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {

let callee = self.put_in_reg(callee);

let mut call_site = X64CallSite::from_ptr(
let call_site = X64CallSite::from_ptr(
self.lower_ctx.sigs(),
callee_sig,
callee,
Opcode::ReturnCallIndirect,
caller_conv,
self.backend.flags().clone(),
);

// Allocate additional stack space for the new stack frame. We will
// build it in the newly allocated space, but then copy it over our
// current frame at the last moment.
let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx);
let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs());

// Make a copy of the frame pointer, since we use it when copying down
// the new stack frame.
let fp = self.temp_writable_gpr();
let rbp = self.preg_rbp();
self.lower_ctx
.emit(MInst::MovFromPReg { src: rbp, dst: fp });

// Load the return address, because copying our new stack frame
// over our current stack frame might overwrite it, and we'll need to
// place it in the correct location after we do that copy.
//
// But we only need to actually move the return address if the size of
// stack arguments changes.
let ret_addr = if new_stack_arg_size != old_stack_arg_size {
let ret_addr = self.temp_writable_gpr();
self.lower_ctx.emit(MInst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 8,
base: fp.to_reg().to_reg(),
flags: MemFlags::trusted(),
}),
dst: ret_addr,
});
Some(ret_addr)
} else {
None
};

// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.gen_call_common_args(&mut call_site, args);

// Finally, emit the macro instruction to copy the new stack frame over
// our current one and do the actual tail call!
let tmp = self.temp_writable_gpr();
let tmp2 = self.temp_writable_gpr();
call_site.emit_return_call(
self.lower_ctx,
new_stack_arg_size,
old_stack_arg_size,
ret_addr.map(|r| r.to_reg().to_reg()),
fp.to_reg().to_reg(),
tmp.to_writable_reg(),
tmp2.to_writable_reg(),
);
call_site.emit_return_call(self.lower_ctx, args);

InstOutput::new()
}
Expand All @@ -173,66 +122,15 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
"Can only do `return_call`s from within a `tail` calling convention function"
);

let mut call_site = X64CallSite::from_func(
let call_site = X64CallSite::from_func(
self.lower_ctx.sigs(),
callee_sig,
&callee,
distance,
caller_conv,
self.backend.flags().clone(),
);

// Allocate additional stack space for the new stack frame. We will
// build it in the newly allocated space, but then copy it over our
// current frame at the last moment.
let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx);
let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs());

// Make a copy of the frame pointer, since we use it when copying down
// the new stack frame.
let fp = self.temp_writable_gpr();
let rbp = self.preg_rbp();
self.lower_ctx
.emit(MInst::MovFromPReg { src: rbp, dst: fp });

// Load the return address, because copying our new stack frame
// over our current stack frame might overwrite it, and we'll need to
// place it in the correct location after we do that copy.
//
// But we only need to actually move the return address if the size of
// stack arguments changes.
let ret_addr = if new_stack_arg_size != old_stack_arg_size {
let ret_addr = self.temp_writable_gpr();
self.lower_ctx.emit(MInst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 8,
base: fp.to_reg().to_reg(),
flags: MemFlags::trusted(),
}),
dst: ret_addr,
});
Some(ret_addr)
} else {
None
};

// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.gen_call_common_args(&mut call_site, args);

// Finally, emit the macro instruction to copy the new stack frame over
// our current one and do the actual tail call!
let tmp = self.temp_writable_gpr();
let tmp2 = self.temp_writable_gpr();
call_site.emit_return_call(
self.lower_ctx,
new_stack_arg_size,
old_stack_arg_size,
ret_addr.map(|r| r.to_reg().to_reg()),
fp.to_reg().to_reg(),
tmp.to_writable_reg(),
tmp2.to_writable_reg(),
);
call_site.emit_return_call(self.lower_ctx, args);

InstOutput::new()
}
Expand Down Expand Up @@ -599,12 +497,12 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {

#[inline]
fn temp_writable_gpr(&mut self) -> WritableGpr {
Writable::from_reg(Gpr::new(self.temp_writable_reg(I64).to_reg()).unwrap())
self.lower_ctx.temp_writable_gpr()
}

#[inline]
fn temp_writable_xmm(&mut self) -> WritableXmm {
Writable::from_reg(Xmm::new(self.temp_writable_reg(I8X16).to_reg()).unwrap())
self.lower_ctx.temp_writable_xmm()
}

#[inline]
Expand Down
Loading

0 comments on commit 1191091

Please sign in to comment.