Skip to content

Commit

Permalink
Auto merge of #72205 - ecstatic-morse:nrvo, r=oli-obk
Browse files Browse the repository at this point in the history
Dumb NRVO

This is a very simple version of an NRVO pass, which scans backwards from the `return` terminator to see if there is an an assignment like `_0 = _1`. If a basic block with two or more predecessors is encountered during this scan without first seeing an assignment to the return place, we bail out. This avoids running a full "reaching definitions" dataflow analysis.

I wanted to see how much `rustc` would benefit from even a very limited version of this optimization. We should be able to use this as a point of comparison for more advanced versions that are based on live ranges.

r? @ghost
  • Loading branch information
bors committed May 21, 2020
2 parents 963bf52 + f509862 commit 7f79e98
Show file tree
Hide file tree
Showing 13 changed files with 350 additions and 48 deletions.
3 changes: 2 additions & 1 deletion src/librustc_codegen_ssa/mir/debuginfo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
let full_debug_info = bx.sess().opts.debuginfo == DebugInfo::Full;

// FIXME(eddyb) maybe name the return place as `_0` or `return`?
if local == mir::RETURN_PLACE {
if local == mir::RETURN_PLACE && !self.mir.local_decls[mir::RETURN_PLACE].is_user_variable()
{
return;
}

Expand Down
2 changes: 2 additions & 0 deletions src/librustc_mir/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub mod generator;
pub mod inline;
pub mod instcombine;
pub mod no_landing_pads;
pub mod nrvo;
pub mod promote_consts;
pub mod qualify_min_const_fn;
pub mod remove_noop_landing_pads;
Expand Down Expand Up @@ -324,6 +325,7 @@ fn run_optimization_passes<'tcx>(
&remove_noop_landing_pads::RemoveNoopLandingPads,
&simplify::SimplifyCfg::new("after-remove-noop-landing-pads"),
&simplify::SimplifyCfg::new("final"),
&nrvo::RenameReturnPlace,
&simplify::SimplifyLocals,
];

Expand Down
238 changes: 238 additions & 0 deletions src/librustc_mir/transform/nrvo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
use rustc_hir::Mutability;
use rustc_index::bit_set::HybridBitSet;
use rustc_middle::mir::visit::{MutVisitor, PlaceContext, Visitor};
use rustc_middle::mir::{self, BasicBlock, Local, Location};
use rustc_middle::ty::TyCtxt;

use crate::transform::{MirPass, MirSource};

/// This pass looks for MIR that always copies the same local into the return place and eliminates
/// the copy by renaming all uses of that local to `_0`.
///
/// This allows LLVM to perform an optimization similar to the named return value optimization
/// (NRVO) that is guaranteed in C++. This avoids a stack allocation and `memcpy` for the
/// relatively common pattern of allocating a buffer on the stack, mutating it, and returning it by
/// value like so:
///
/// ```rust
/// fn foo(init: fn(&mut [u8; 1024])) -> [u8; 1024] {
/// let mut buf = [0; 1024];
/// init(&mut buf);
/// buf
/// }
/// ```
///
/// For now, this pass is very simple and only capable of eliminating a single copy. A more general
/// version of copy propagation, such as the one based on non-overlapping live ranges in [#47954] and
/// [#71003], could yield even more benefits.
///
/// [#47954]: https://github.com/rust-lang/rust/pull/47954
/// [#71003]: https://github.com/rust-lang/rust/pull/71003
pub struct RenameReturnPlace;

impl<'tcx> MirPass<'tcx> for RenameReturnPlace {
fn run_pass(&self, tcx: TyCtxt<'tcx>, src: MirSource<'tcx>, body: &mut mir::Body<'tcx>) {
if tcx.sess.opts.debugging_opts.mir_opt_level == 0 {
return;
}

let returned_local = match local_eligible_for_nrvo(body) {
Some(l) => l,
None => {
debug!("`{:?}` was ineligible for NRVO", src.def_id());
return;
}
};

// Sometimes, the return place is assigned a local of a different but coercable type, for
// example `&T` instead of `&mut T`. Overwriting the `LocalInfo` for the return place would
// result in it having an incorrect type. Although this doesn't seem to cause a problem in
// codegen, bail out anyways since it happens so rarely.
let ret_ty = body.local_decls[mir::RETURN_PLACE].ty;
let assigned_ty = body.local_decls[returned_local].ty;
if ret_ty != assigned_ty {
debug!("`{:?}` was eligible for NRVO but for type mismatch", src.def_id());
debug!("typeof(_0) != typeof({:?}); {:?} != {:?}", returned_local, ret_ty, assigned_ty);
return;
}

debug!(
"`{:?}` was eligible for NRVO, making {:?} the return place",
src.def_id(),
returned_local
);

RenameToReturnPlace { tcx, to_rename: returned_local }.visit_body(body);

// Clean up the `NOP`s we inserted for statements made useless by our renaming.
for block_data in body.basic_blocks_mut() {
block_data.statements.retain(|stmt| stmt.kind != mir::StatementKind::Nop);
}

// Overwrite the debuginfo of `_0` with that of the renamed local.
let (renamed_decl, ret_decl) =
body.local_decls.pick2_mut(returned_local, mir::RETURN_PLACE);
ret_decl.clone_from(renamed_decl);

// The return place is always mutable.
ret_decl.mutability = Mutability::Mut;
}
}

/// MIR that is eligible for the NRVO must fulfill two conditions:
/// 1. The return place must not be read prior to the `Return` terminator.
/// 2. A simple assignment of a whole local to the return place (e.g., `_0 = _1`) must be the
/// only definition of the return place reaching the `Return` terminator.
///
/// If the MIR fulfills both these conditions, this function returns the `Local` that is assigned
/// to the return place along all possible paths through the control-flow graph.
fn local_eligible_for_nrvo(body: &mut mir::Body<'_>) -> Option<Local> {
if IsReturnPlaceRead::run(body) {
return None;
}

let mut copied_to_return_place = None;
for block in body.basic_blocks().indices() {
// Look for blocks with a `Return` terminator.
if !matches!(body[block].terminator().kind, mir::TerminatorKind::Return) {
continue;
}

// Look for an assignment of a single local to the return place prior to the `Return`.
let returned_local = find_local_assigned_to_return_place(block, body)?;
match body.local_kind(returned_local) {
// FIXME: Can we do this for arguments as well?
mir::LocalKind::Arg => return None,

mir::LocalKind::ReturnPointer => bug!("Return place was assigned to itself?"),
mir::LocalKind::Var | mir::LocalKind::Temp => {}
}

// If multiple different locals are copied to the return place. We can't pick a
// single one to rename.
if copied_to_return_place.map_or(false, |old| old != returned_local) {
return None;
}

copied_to_return_place = Some(returned_local);
}

return copied_to_return_place;
}

fn find_local_assigned_to_return_place(
start: BasicBlock,
body: &mut mir::Body<'_>,
) -> Option<Local> {
let mut block = start;
let mut seen = HybridBitSet::new_empty(body.basic_blocks().len());

// Iterate as long as `block` has exactly one predecessor that we have not yet visited.
while seen.insert(block) {
trace!("Looking for assignments to `_0` in {:?}", block);

let local = body[block].statements.iter().rev().find_map(as_local_assigned_to_return_place);
if local.is_some() {
return local;
}

match body.predecessors()[block].as_slice() {
&[pred] => block = pred,
_ => return None,
}
}

return None;
}

// If this statement is an assignment of an unprojected local to the return place,
// return that local.
fn as_local_assigned_to_return_place(stmt: &mir::Statement<'_>) -> Option<Local> {
if let mir::StatementKind::Assign(box (lhs, rhs)) = &stmt.kind {
if lhs.as_local() == Some(mir::RETURN_PLACE) {
if let mir::Rvalue::Use(mir::Operand::Copy(rhs) | mir::Operand::Move(rhs)) = rhs {
return rhs.as_local();
}
}
}

None
}

struct RenameToReturnPlace<'tcx> {
to_rename: Local,
tcx: TyCtxt<'tcx>,
}

/// Replaces all uses of `self.to_rename` with `_0`.
impl MutVisitor<'tcx> for RenameToReturnPlace<'tcx> {
fn tcx(&self) -> TyCtxt<'tcx> {
self.tcx
}

fn visit_statement(&mut self, stmt: &mut mir::Statement<'tcx>, loc: Location) {
// Remove assignments of the local being replaced to the return place, since it is now the
// return place:
// _0 = _1
if as_local_assigned_to_return_place(stmt) == Some(self.to_rename) {
stmt.kind = mir::StatementKind::Nop;
return;
}

// Remove storage annotations for the local being replaced:
// StorageLive(_1)
if let mir::StatementKind::StorageLive(local) | mir::StatementKind::StorageDead(local) =
stmt.kind
{
if local == self.to_rename {
stmt.kind = mir::StatementKind::Nop;
return;
}
}

self.super_statement(stmt, loc)
}

fn visit_terminator(&mut self, terminator: &mut mir::Terminator<'tcx>, loc: Location) {
// Ignore the implicit "use" of the return place in a `Return` statement.
if let mir::TerminatorKind::Return = terminator.kind {
return;
}

self.super_terminator(terminator, loc);
}

fn visit_local(&mut self, l: &mut Local, _: PlaceContext, _: Location) {
assert_ne!(*l, mir::RETURN_PLACE);
if *l == self.to_rename {
*l = mir::RETURN_PLACE;
}
}
}

struct IsReturnPlaceRead(bool);

impl IsReturnPlaceRead {
fn run(body: &mir::Body<'_>) -> bool {
let mut vis = IsReturnPlaceRead(false);
vis.visit_body(body);
vis.0
}
}

impl Visitor<'tcx> for IsReturnPlaceRead {
fn visit_local(&mut self, &l: &Local, ctxt: PlaceContext, _: Location) {
if l == mir::RETURN_PLACE && ctxt.is_use() && !ctxt.is_place_assignment() {
self.0 = true;
}
}

fn visit_terminator(&mut self, terminator: &mir::Terminator<'tcx>, loc: Location) {
// Ignore the implicit "use" of the return place in a `Return` statement.
if let mir::TerminatorKind::Return = terminator.kind {
return;
}

self.super_terminator(terminator, loc);
}
}
2 changes: 1 addition & 1 deletion src/test/codegen/align-enum.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// compile-flags: -C no-prepopulate-passes
// compile-flags: -C no-prepopulate-passes -Z mir-opt-level=0
// ignore-tidy-linelength

#![crate_type = "lib"]
Expand Down
2 changes: 1 addition & 1 deletion src/test/codegen/align-struct.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// compile-flags: -C no-prepopulate-passes
// compile-flags: -C no-prepopulate-passes -Z mir-opt-level=0
// ignore-tidy-linelength

#![crate_type = "lib"]
Expand Down
17 changes: 17 additions & 0 deletions src/test/codegen/nrvo.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// compile-flags: -O

#![crate_type = "lib"]

// Ensure that we do not call `memcpy` for the following function.
// `memset` and `init` should be called directly on the return pointer.
#[no_mangle]
pub fn nrvo(init: fn(&mut [u8; 4096])) -> [u8; 4096] {
// CHECK-LABEL: nrvo
// CHECK: @llvm.memset
// CHECK-NOT: @llvm.memcpy
// CHECK: ret
// CHECK-EMPTY
let mut buf = [0; 4096];
init(&mut buf);
buf
}
39 changes: 9 additions & 30 deletions src/test/debuginfo/generic-function.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// ignore-tidy-linelength

// min-lldb-version: 310

// compile-flags:-g
Expand All @@ -12,31 +10,21 @@
// gdb-check:$1 = 1
// gdb-command:print *t1
// gdb-check:$2 = 2.5
// gdb-command:print ret
// gdbg-check:$3 = {__0 = {__0 = 1, __1 = 2.5}, __1 = {__0 = 2.5, __1 = 1}}
// gdbr-check:$3 = ((1, 2.5), (2.5, 1))
// gdb-command:continue

// gdb-command:print *t0
// gdb-check:$4 = 3.5
// gdb-check:$3 = 3.5
// gdb-command:print *t1
// gdb-check:$5 = 4
// gdb-command:print ret
// gdbg-check:$6 = {__0 = {__0 = 3.5, __1 = 4}, __1 = {__0 = 4, __1 = 3.5}}
// gdbr-check:$6 = ((3.5, 4), (4, 3.5))
// gdb-check:$4 = 4
// gdb-command:continue

// gdb-command:print *t0
// gdb-check:$7 = 5
// gdb-check:$5 = 5
// gdb-command:print *t1
// gdbg-check:$8 = {a = 6, b = 7.5}
// gdbr-check:$8 = generic_function::Struct {a: 6, b: 7.5}
// gdb-command:print ret
// gdbg-check:$9 = {__0 = {__0 = 5, __1 = {a = 6, b = 7.5}}, __1 = {__0 = {a = 6, b = 7.5}, __1 = 5}}
// gdbr-check:$9 = ((5, generic_function::Struct {a: 6, b: 7.5}), (generic_function::Struct {a: 6, b: 7.5}, 5))
// gdbg-check:$6 = {a = 6, b = 7.5}
// gdbr-check:$6 = generic_function::Struct {a: 6, b: 7.5}
// gdb-command:continue


// === LLDB TESTS ==================================================================================

// lldb-command:run
Expand All @@ -47,31 +35,22 @@
// lldb-command:print *t1
// lldbg-check:[...]$1 = 2.5
// lldbr-check:(f64) *t1 = 2.5
// lldb-command:print ret
// lldbg-check:[...]$2 = ((1, 2.5), (2.5, 1))
// lldbr-check:(((i32, f64), (f64, i32))) ret = { = { = 1 = 2.5 } = { = 2.5 = 1 } }
// lldb-command:continue

// lldb-command:print *t0
// lldbg-check:[...]$3 = 3.5
// lldbg-check:[...]$2 = 3.5
// lldbr-check:(f64) *t0 = 3.5
// lldb-command:print *t1
// lldbg-check:[...]$4 = 4
// lldbg-check:[...]$3 = 4
// lldbr-check:(u16) *t1 = 4
// lldb-command:print ret
// lldbg-check:[...]$5 = ((3.5, 4), (4, 3.5))
// lldbr-check:(((f64, u16), (u16, f64))) ret = { = { = 3.5 = 4 } = { = 4 = 3.5 } }
// lldb-command:continue

// lldb-command:print *t0
// lldbg-check:[...]$6 = 5
// lldbg-check:[...]$4 = 5
// lldbr-check:(i32) *t0 = 5
// lldb-command:print *t1
// lldbg-check:[...]$7 = Struct { a: 6, b: 7.5 }
// lldbg-check:[...]$5 = Struct { a: 6, b: 7.5 }
// lldbr-check:(generic_function::Struct) *t1 = Struct { a: 6, b: 7.5 }
// lldb-command:print ret
// lldbg-check:[...]$8 = ((5, Struct { a: 6, b: 7.5 }), (Struct { a: 6, b: 7.5 }, 5))
// lldbr-check:(((i32, generic_function::Struct), (generic_function::Struct, i32))) ret = { = { = 5 = Struct { a: 6, b: 7.5 } } = { = Struct { a: 6, b: 7.5 } = 5 } }
// lldb-command:continue

#![feature(omit_gdb_pretty_printer_section)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,14 @@ fn a(_1: &mut [T]) -> &mut [T] {
let mut _4: &mut [T]; // in scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:6
scope 1 {
debug self => _4; // in scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
let mut _5: &mut [T]; // in scope 1 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15
}

bb0: {
StorageLive(_2); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15
StorageLive(_3); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15
StorageLive(_4); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:6
_4 = &mut (*_1); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:6
StorageLive(_5); // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
_5 = _4; // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
_3 = _5; // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
StorageDead(_5); // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
_3 = _4; // scope 1 at $SRC_DIR/libcore/convert/mod.rs:LL:COL
_2 = &mut (*_3); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15
StorageDead(_4); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:14: 3:15
_0 = &mut (*_2); // scope 0 at $DIR/issue-58867-inline-as-ref-as-mut.rs:3:5: 3:15
Expand Down
Loading

0 comments on commit 7f79e98

Please sign in to comment.