Skip to content

Commit

Permalink
Auto merge of #8192 - alexcrichton:lto-optimizations, r=ehuss
Browse files Browse the repository at this point in the history
Don't force rustc to do codegen for LTO builds

This commit updates Cargo's implementation of LTO builds to do less work
and hopefully be faster when doing a cold build. Additionaly this should
save space on disk! The general idea is that the compiler does not need
object files if it's only going to perform LTO with some artifacts. In
this case all rustc needs to do is load bitcode from dependencies. This
means that if you're doing an LTO build generating object code for
intermediate dependencies is just wasted time!

Here Cargo is updated with more intrusive knowledge about LTO. Cargo
will now analyze the dependency graph to figure out which crates are
being compiled with LTO, and then it will figure out which dependencies
only need to have bitcode in them. Pure-bitcode artifacts are emitted
with the `-Clinker-plugin-lto` flag. Some artifacts are still used in
multiple scenarios (such as those shared between build scripts and final
artifacts), so those are not compiled with `-Clinker-plugin-lto` since
the linker is not guaranteed to know how to perform LTO.

This functionality was recently implemented in rust-lang/rust#71528
where rustc is now capable of reading bitcode from `-Clinker-plugin-lto`
rlibs. Previously rustc would only read its own format of bitcode, but
this has now been extended! This support is now on nightly, hence this
PR.
  • Loading branch information
bors committed May 4, 2020
2 parents 893db8c + e221925 commit 91dc1e1
Show file tree
Hide file tree
Showing 6 changed files with 410 additions and 19 deletions.
2 changes: 1 addition & 1 deletion src/cargo/core/compiler/build_context/target_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pub struct TargetInfo {
pub rustflags: Vec<String>,
/// Extra flags to pass to `rustdoc`, see `env_args`.
pub rustdocflags: Vec<String>,
/// Remove this when it hits stable (1.44)
/// Remove this when it hits stable (1.45)
pub supports_embed_bitcode: Option<bool>,
}

Expand Down
8 changes: 8 additions & 0 deletions src/cargo/core/compiler/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use super::custom_build::{self, BuildDeps, BuildScriptOutputs, BuildScripts};
use super::fingerprint::Fingerprint;
use super::job_queue::JobQueue;
use super::layout::Layout;
use super::lto::Lto;
use super::unit_graph::UnitDep;
use super::{BuildContext, Compilation, CompileKind, CompileMode, Executor, FileFlavor};

Expand Down Expand Up @@ -72,6 +73,11 @@ pub struct Context<'a, 'cfg> {
/// jobserver clients for each Unit (which eventually becomes a rustc
/// process).
pub rustc_clients: HashMap<Unit, Client>,

/// Map of the LTO-status of each unit. This indicates what sort of
/// compilation is happening (only object, only bitcode, both, etc), and is
/// precalculated early on.
pub lto: HashMap<Unit, Lto>,
}

impl<'a, 'cfg> Context<'a, 'cfg> {
Expand Down Expand Up @@ -111,6 +117,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> {
rmeta_required: HashSet::new(),
rustc_clients: HashMap::new(),
pipelining,
lto: HashMap::new(),
})
}

Expand All @@ -123,6 +130,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> {
self.prepare_units()?;
self.prepare()?;
custom_build::build_map(&mut self)?;
super::lto::generate(&mut self)?;
self.check_collistions()?;

for unit in &self.bcx.roots {
Expand Down
8 changes: 7 additions & 1 deletion src/cargo/core/compiler/fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
//! -C incremental=… flag | ✓ |
//! mtime of sources | ✓[^3] |
//! RUSTFLAGS/RUSTDOCFLAGS | ✓ |
//! LTO flags | ✓ |
//! is_std | | ✓
//!
//! [^1]: Build script and bin dependencies are not included.
Expand Down Expand Up @@ -1241,7 +1242,12 @@ fn calculate_normal(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Finger
}
.to_vec();

let profile_hash = util::hash_u64((&unit.profile, unit.mode, cx.bcx.extra_args_for(unit)));
let profile_hash = util::hash_u64((
&unit.profile,
unit.mode,
cx.bcx.extra_args_for(unit),
cx.lto[unit],
));
// Include metadata since it is exposed as environment variables.
let m = unit.pkg.manifest().metadata();
let metadata = util::hash_u64((&m.authors, &m.description, &m.homepage, &m.repository));
Expand Down
116 changes: 116 additions & 0 deletions src/cargo/core/compiler/lto.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
use crate::core::compiler::{Context, Unit};
use crate::core::interning::InternedString;
use crate::core::profiles;
use crate::util::errors::CargoResult;
use std::collections::hash_map::{Entry, HashMap};

/// Possible ways to run rustc and request various parts of LTO.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub enum Lto {
/// LTO is run for this rustc, and it's `-Clto=foo` where `foo` is optional.
Run(Option<InternedString>),

/// This rustc invocation only needs to produce bitcode, there's no need to
/// produce object files, so we can pass `-Clinker-plugin-lto`
OnlyBitcode,

/// This rustc invocation needs to embed bitcode in object files. This means
/// that object files may be used for a normal link, and the crate may be
/// loaded for LTO later, so both are required.
EmbedBitcode,

/// Nothing related to LTO is required of this compilation.
None,
}

pub fn generate(cx: &mut Context<'_, '_>) -> CargoResult<()> {
let mut map = HashMap::new();
for unit in cx.bcx.roots.iter() {
calculate(cx, &mut map, unit, false)?;
}
cx.lto = map;
Ok(())
}

fn calculate(
cx: &Context<'_, '_>,
map: &mut HashMap<Unit, Lto>,
unit: &Unit,
require_bitcode: bool,
) -> CargoResult<()> {
let (lto, require_bitcode_for_deps) = if unit.target.for_host() {
// Disable LTO for host builds since we only really want to perform LTO
// for the final binary, and LTO on plugins/build scripts/proc macros is
// largely not desired.
(Lto::None, false)
} else if unit.target.can_lto() {
// Otherwise if this target can perform LTO then we're going to read the
// LTO value out of the profile.
assert!(!require_bitcode); // can't depend on binaries/staticlib/etc
match unit.profile.lto {
profiles::Lto::Named(s) => match s.as_str() {
"n" | "no" | "off" => (Lto::Run(Some(s)), false),
_ => (Lto::Run(Some(s)), true),
},
profiles::Lto::Bool(true) => (Lto::Run(None), true),
profiles::Lto::Bool(false) => (Lto::None, false),
}
} else if require_bitcode {
// Otherwise we're a dependency of something, an rlib. This means that
// if our parent required bitcode of some kind then we need to generate
// bitcode.
(Lto::OnlyBitcode, true)
} else {
(Lto::None, false)
};

match map.entry(unit.clone()) {
// If we haven't seen this unit before then insert our value and keep
// going.
Entry::Vacant(v) => {
v.insert(lto);
}

Entry::Occupied(mut v) => {
let result = match (lto, v.get()) {
// Targets which execute LTO cannot be depended on, so these
// units should only show up once in the dependency graph, so we
// should never hit this case.
(Lto::Run(_), _) | (_, Lto::Run(_)) => {
unreachable!("lto-able targets shouldn't show up twice")
}

// If we calculated the same thing as before then we can bail
// out quickly.
(Lto::OnlyBitcode, Lto::OnlyBitcode) | (Lto::None, Lto::None) => return Ok(()),

// This is where the trickiness happens. This unit needs
// bitcode and the previously calculated value for this unit
// says it didn't need bitcode (or vice versa). This means that
// we're a shared dependency between some targets which require
// LTO and some which don't. This means that instead of being
// either only-objects or only-bitcode we have to embed both in
// rlibs (used for different compilations), so we switch to
// embedding bitcode.
(Lto::OnlyBitcode, Lto::None)
| (Lto::OnlyBitcode, Lto::EmbedBitcode)
| (Lto::None, Lto::OnlyBitcode)
| (Lto::None, Lto::EmbedBitcode) => Lto::EmbedBitcode,

// Currently this variant is never calculated above, so no need
// to handle this case.
(Lto::EmbedBitcode, _) => unreachable!(),
};
// No need to recurse if we calculated the same value as before.
if result == *v.get() {
return Ok(());
}
v.insert(result);
}
}

for dep in cx.unit_deps(unit) {
calculate(cx, map, &dep.unit, require_bitcode_for_deps)?;
}
Ok(())
}
41 changes: 24 additions & 17 deletions src/cargo/core/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mod job;
mod job_queue;
mod layout;
mod links;
mod lto;
mod output_depinfo;
pub mod standard_lib;
mod timings;
Expand Down Expand Up @@ -42,7 +43,7 @@ use self::output_depinfo::output_depinfo;
use self::unit_graph::UnitDep;
pub use crate::core::compiler::unit::{Unit, UnitInterner};
use crate::core::manifest::TargetSourcePath;
use crate::core::profiles::{Lto, PanicStrategy, Profile};
use crate::core::profiles::{PanicStrategy, Profile};
use crate::core::{Edition, Feature, InternedString, PackageId, Target};
use crate::util::errors::{self, CargoResult, CargoResultExt, ProcessError, VerboseError};
use crate::util::machine_message::Message;
Expand Down Expand Up @@ -740,7 +741,6 @@ fn build_base_args(
let bcx = cx.bcx;
let Profile {
ref opt_level,
ref lto,
codegen_units,
debuginfo,
debug_assertions,
Expand Down Expand Up @@ -793,24 +793,31 @@ fn build_base_args(
cmd.arg("-C").arg(format!("panic={}", panic));
}

// Disable LTO for host builds as prefer_dynamic and it are mutually
// exclusive.
let lto_possible = unit.target.can_lto() && !unit.target.for_host();
match lto {
Lto::Bool(true) => {
if lto_possible {
cmd.args(&["-C", "lto"]);
}
match cx.lto[&unit] {
lto::Lto::Run(None) => {
cmd.arg("-C").arg("lto");
}
lto::Lto::Run(Some(s)) => {
cmd.arg("-C").arg(format!("lto={}", s));
}
Lto::Named(s) => {
if lto_possible {
cmd.arg("-C").arg(format!("lto={}", s));
lto::Lto::EmbedBitcode => {} // this is rustc's default
lto::Lto::OnlyBitcode => {
// Note that this compiler flag, like the one below, is just an
// optimization in terms of build time. If we don't pass it then
// both object code and bitcode will show up. This is lagely just
// compat until the feature lands on stable and we can remove the
// conditional branch.
if cx
.bcx
.target_data
.info(CompileKind::Host)
.supports_embed_bitcode
.unwrap()
{
cmd.arg("-Clinker-plugin-lto");
}
}
// If LTO isn't being enabled then there's no need for bitcode to be
// present in the intermediate artifacts, so shave off some build time
// by removing it.
Lto::Bool(false) => {
lto::Lto::None => {
if cx
.bcx
.target_data
Expand Down
Loading

0 comments on commit 91dc1e1

Please sign in to comment.