From 855f6d1483e023cea3b7988db294ed9767e15359 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Sat, 25 Nov 2017 11:13:58 -0800 Subject: [PATCH] rustc: Prepare to enable ThinLTO by default This commit prepares to enable ThinLTO and multiple codegen units in release mode by default. We've still got a debuginfo bug or two to sort out before actually turning it on by default. --- src/librustc/session/config.rs | 18 ++- src/librustc/session/mod.rs | 103 ++++++++++++++---- src/librustc_trans/back/write.rs | 5 +- src/librustc_trans/base.rs | 2 +- src/libstd/sys_common/backtrace.rs | 22 +++- .../run-fail/mir_trans_no_landing_pads.rs | 2 +- .../mir_trans_no_landing_pads_diverging.rs | 2 +- src/test/run-pass/no-landing-pads.rs | 2 +- 8 files changed, 124 insertions(+), 32 deletions(-) diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs index 630832372704a..0efd24ae132f0 100644 --- a/src/librustc/session/config.rs +++ b/src/librustc/session/config.rs @@ -383,8 +383,13 @@ top_level_options!( // try to not rely on this too much. actually_rustdoc: bool [TRACKED], - // Number of object files/codegen units to produce on the backend + // Specifications of codegen units / ThinLTO which are forced as a + // result of parsing command line options. These are not necessarily + // what rustc was invoked with, but massaged a bit to agree with + // commands like `--emit llvm-ir` which they're often incompatible with + // if we otherwise use the defaults of rustc. cli_forced_codegen_units: Option [UNTRACKED], + cli_forced_thinlto: Option [UNTRACKED], } ); @@ -566,6 +571,7 @@ pub fn basic_options() -> Options { debug_assertions: true, actually_rustdoc: false, cli_forced_codegen_units: None, + cli_forced_thinlto: None, } } @@ -1165,7 +1171,7 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options, "run the non-lexical lifetimes MIR pass"), trans_time_graph: bool = (false, parse_bool, [UNTRACKED], "generate a graphical HTML report of time spent in trans and LLVM"), - thinlto: bool = (false, parse_bool, [TRACKED], + thinlto: Option = (None, parse_opt_bool, [TRACKED], "enable ThinLTO when possible"), inline_in_all_cgus: Option = (None, parse_opt_bool, [TRACKED], "control whether #[inline] functions are in all cgus"), @@ -1601,6 +1607,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) let mut cg = build_codegen_options(matches, error_format); let mut codegen_units = cg.codegen_units; + let mut thinlto = None; // Issue #30063: if user requests llvm-related output to one // particular path, disable codegen-units. @@ -1622,9 +1629,13 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) } early_warn(error_format, "resetting to default -C codegen-units=1"); codegen_units = Some(1); + thinlto = Some(false); } } - _ => codegen_units = Some(1), + _ => { + codegen_units = Some(1); + thinlto = Some(false); + } } } @@ -1834,6 +1845,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches) debug_assertions, actually_rustdoc: false, cli_forced_codegen_units: codegen_units, + cli_forced_thinlto: thinlto, }, cfg) } diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs index 227efcf4d6e21..df5805bacd41a 100644 --- a/src/librustc/session/mod.rs +++ b/src/librustc/session/mod.rs @@ -656,30 +656,91 @@ impl Session { return n as usize } + // Why is 16 codegen units the default all the time? + // + // The main reason for enabling multiple codegen units by default is to + // leverage the ability for the trans backend to do translation and + // codegen in parallel. This allows us, especially for large crates, to + // make good use of all available resources on the machine once we've + // hit that stage of compilation. Large crates especially then often + // take a long time in trans/codegen and this helps us amortize that + // cost. + // + // Note that a high number here doesn't mean that we'll be spawning a + // large number of threads in parallel. The backend of rustc contains + // global rate limiting through the `jobserver` crate so we'll never + // overload the system with too much work, but rather we'll only be + // optimizing when we're otherwise cooperating with other instances of + // rustc. + // + // Rather a high number here means that we should be able to keep a lot + // of idle cpus busy. By ensuring that no codegen unit takes *too* long + // to build we'll be guaranteed that all cpus will finish pretty closely + // to one another and we should make relatively optimal use of system + // resources + // + // Note that the main cost of codegen units is that it prevents LLVM + // from inlining across codegen units. Users in general don't have a lot + // of control over how codegen units are split up so it's our job in the + // compiler to ensure that undue performance isn't lost when using + // codegen units (aka we can't require everyone to slap `#[inline]` on + // everything). + // + // If we're compiling at `-O0` then the number doesn't really matter too + // much because performance doesn't matter and inlining is ok to lose. + // In debug mode we just want to try to guarantee that no cpu is stuck + // doing work that could otherwise be farmed to others. + // + // In release mode, however (O1 and above) performance does indeed + // matter! To recover the loss in performance due to inlining we'll be + // enabling ThinLTO by default (the function for which is just below). + // This will ensure that we recover any inlining wins we otherwise lost + // through codegen unit partitioning. + // + // --- + // + // Ok that's a lot of words but the basic tl;dr; is that we want a high + // number here -- but not too high. Additionally we're "safe" to have it + // always at the same number at all optimization levels. + // + // As a result 16 was chosen here! Mostly because it was a power of 2 + // and most benchmarks agreed it was roughly a local optimum. Not very + // scientific. match self.opts.optimize { - // If we're compiling at `-O0` then default to 16 codegen units. - // The number here shouldn't matter too too much as debug mode - // builds don't rely on performance at all, meaning that lost - // opportunities for inlining through multiple codegen units is - // a non-issue. - // - // Note that the high number here doesn't mean that we'll be - // spawning a large number of threads in parallel. The backend - // of rustc contains global rate limiting through the - // `jobserver` crate so we'll never overload the system with too - // much work, but rather we'll only be optimizing when we're - // otherwise cooperating with other instances of rustc. - // - // Rather the high number here means that we should be able to - // keep a lot of idle cpus busy. By ensuring that no codegen - // unit takes *too* long to build we'll be guaranteed that all - // cpus will finish pretty closely to one another and we should - // make relatively optimal use of system resources config::OptLevel::No => 16, + _ => 1, // FIXME(#46346) this should be 16 + } + } - // All other optimization levels default use one codegen unit, - // the historical default in Rust for a Long Time. - _ => 1, + /// Returns whether ThinLTO is enabled for this compilation + pub fn thinlto(&self) -> bool { + // If processing command line options determined that we're incompatible + // with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option. + if let Some(enabled) = self.opts.cli_forced_thinlto { + return enabled + } + + // If explicitly specified, use that with the next highest priority + if let Some(enabled) = self.opts.debugging_opts.thinlto { + return enabled + } + + // If there's only one codegen unit and LTO isn't enabled then there's + // no need for ThinLTO so just return false. + if self.codegen_units() == 1 && !self.lto() { + return false + } + + // Right now ThinLTO isn't compatible with incremental compilation. + if self.opts.incremental.is_some() { + return false + } + + // Now we're in "defaults" territory. By default we enable ThinLTO for + // optimized compiles (anything greater than O0). + match self.opts.optimize { + config::OptLevel::No => false, + _ => true, } } } diff --git a/src/librustc_trans/back/write.rs b/src/librustc_trans/back/write.rs index da67940abcb77..cb883e0349f31 100644 --- a/src/librustc_trans/back/write.rs +++ b/src/librustc_trans/back/write.rs @@ -1402,8 +1402,9 @@ fn start_executing_work(tcx: TyCtxt, // for doesn't require full LTO. Some targets require one LLVM module // (they effectively don't have a linker) so it's up to us to use LTO to // link everything together. - thinlto: sess.opts.debugging_opts.thinlto && - !sess.target.target.options.requires_lto, + thinlto: sess.thinlto() && + !sess.target.target.options.requires_lto && + unsafe { llvm::LLVMRustThinLTOAvailable() }, no_landing_pads: sess.no_landing_pads(), save_temps: sess.opts.cg.save_temps, diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs index b7408681ed0c8..03c1e4368c9a3 100644 --- a/src/librustc_trans/base.rs +++ b/src/librustc_trans/base.rs @@ -706,7 +706,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, check_for_rustc_errors_attr(tcx); - if tcx.sess.opts.debugging_opts.thinlto { + if let Some(true) = tcx.sess.opts.debugging_opts.thinlto { if unsafe { !llvm::LLVMRustThinLTOAvailable() } { tcx.sess.fatal("this compiler's LLVM does not support ThinLTO"); } diff --git a/src/libstd/sys_common/backtrace.rs b/src/libstd/sys_common/backtrace.rs index 9f0214f5f0510..b5cf6d7d34fcc 100644 --- a/src/libstd/sys_common/backtrace.rs +++ b/src/libstd/sys_common/backtrace.rs @@ -252,8 +252,26 @@ fn output_fileline(w: &mut Write, // Note that this demangler isn't quite as fancy as it could be. We have lots // of other information in our symbols like hashes, version, type information, // etc. Additionally, this doesn't handle glue symbols at all. -pub fn demangle(writer: &mut Write, s: &str, format: PrintFormat) -> io::Result<()> { - // First validate the symbol. If it doesn't look like anything we're +pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> { + // During ThinLTO LLVM may import and rename internal symbols, so strip out + // those endings first as they're one of the last manglings applied to + // symbol names. + let llvm = ".llvm."; + if let Some(i) = s.find(llvm) { + let candidate = &s[i + llvm.len()..]; + let all_hex = candidate.chars().all(|c| { + match c { + 'A' ... 'F' | '0' ... '9' => true, + _ => false, + } + }); + + if all_hex { + s = &s[..i]; + } + } + + // Validate the symbol. If it doesn't look like anything we're // expecting, we just print it literally. Note that we must handle non-rust // symbols because we could have any function in the backtrace. let mut valid = true; diff --git a/src/test/run-fail/mir_trans_no_landing_pads.rs b/src/test/run-fail/mir_trans_no_landing_pads.rs index dacb039d89dc5..bafb78fc213e3 100644 --- a/src/test/run-fail/mir_trans_no_landing_pads.rs +++ b/src/test/run-fail/mir_trans_no_landing_pads.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// compile-flags: -Z no-landing-pads +// compile-flags: -Z no-landing-pads -C codegen-units=1 // error-pattern:converging_fn called use std::io::{self, Write}; diff --git a/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs b/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs index 87037c1efed9e..998ee7470bbe9 100644 --- a/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs +++ b/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// compile-flags: -Z no-landing-pads +// compile-flags: -Z no-landing-pads -C codegen-units=1 // error-pattern:diverging_fn called use std::io::{self, Write}; diff --git a/src/test/run-pass/no-landing-pads.rs b/src/test/run-pass/no-landing-pads.rs index e718046ebbcd7..73f123045d249 100644 --- a/src/test/run-pass/no-landing-pads.rs +++ b/src/test/run-pass/no-landing-pads.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// compile-flags: -Z no-landing-pads +// compile-flags: -Z no-landing-pads -C codegen-units=1 // ignore-emscripten no threads support use std::thread;