Skip to content

Commit

Permalink
rustc: Wait for all codegen threads to exit
Browse files Browse the repository at this point in the history
This commit updates rustc to wait for all codegen threads to exit before
allowing the main thread to exit. This is a stab in the dark to fix the
mysterious segfaults appearing on #55238, and hopefully we'll see
whether this actually fixes things in practice...
  • Loading branch information
alexcrichton committed Nov 2, 2018
1 parent 016eaf8 commit 14c6835
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 22 deletions.
12 changes: 0 additions & 12 deletions src/Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,6 @@ dependencies = [
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "alloc_jemalloc"
version = "0.0.0"
dependencies = [
"build_helper 0.1.0",
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
"compiler_builtins 0.0.0",
"core 0.0.0",
"libc 0.0.0",
]

[[package]]
name = "alloc_system"
version = "0.0.0"
Expand Down Expand Up @@ -2696,7 +2685,6 @@ name = "std"
version = "0.0.0"
dependencies = [
"alloc 0.0.0",
"alloc_jemalloc 0.0.0",
"alloc_system 0.0.0",
"build_helper 0.1.0",
"cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)",
Expand Down
57 changes: 51 additions & 6 deletions src/librustc_codegen_llvm/back/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1508,6 +1508,7 @@ enum Message {
},
CodegenComplete,
CodegenItem,
CodegenAborted,
}

struct Diagnostic {
Expand Down Expand Up @@ -1788,6 +1789,7 @@ fn start_executing_work(tcx: TyCtxt,
let mut needs_lto = Vec::new();
let mut lto_import_only_modules = Vec::new();
let mut started_lto = false;
let mut codegen_aborted = false;

// This flag tracks whether all items have gone through codegens
let mut codegen_done = false;
Expand All @@ -1805,13 +1807,19 @@ fn start_executing_work(tcx: TyCtxt,
let mut llvm_start_time = None;

// Run the message loop while there's still anything that needs message
// processing:
// processing. Note that as soon as codegen is aborted we simply want to
// wait for all existing work to finish, so many of the conditions here
// only apply if codegen hasn't been aborted as they represent pending
// work to be done.
while !codegen_done ||
work_items.len() > 0 ||
running > 0 ||
needs_lto.len() > 0 ||
lto_import_only_modules.len() > 0 ||
main_thread_worker_state != MainThreadWorkerState::Idle {
(!codegen_aborted && (
work_items.len() > 0 ||
needs_lto.len() > 0 ||
lto_import_only_modules.len() > 0 ||
main_thread_worker_state != MainThreadWorkerState::Idle
))
{

// While there are still CGUs to be codegened, the coordinator has
// to decide how to utilize the compiler processes implicit Token:
Expand Down Expand Up @@ -1840,6 +1848,9 @@ fn start_executing_work(tcx: TyCtxt,
spawn_work(cgcx, item);
}
}
} else if codegen_aborted {
// don't queue up any more work if codegen was aborted, we're
// just waiting for our existing children to finish
} else {
// If we've finished everything related to normal codegen
// then it must be the case that we've got some LTO work to do.
Expand Down Expand Up @@ -1904,7 +1915,7 @@ fn start_executing_work(tcx: TyCtxt,

// Spin up what work we can, only doing this while we've got available
// parallelism slots and work left to spawn.
while work_items.len() > 0 && running < tokens.len() {
while !codegen_aborted && work_items.len() > 0 && running < tokens.len() {
let (item, _) = work_items.pop().unwrap();

maybe_start_llvm_timer(cgcx.config(item.module_kind()),
Expand Down Expand Up @@ -1969,18 +1980,34 @@ fn start_executing_work(tcx: TyCtxt,
if !cgcx.opts.debugging_opts.no_parallel_llvm {
helper.request_token();
}
assert!(!codegen_aborted);
assert_eq!(main_thread_worker_state,
MainThreadWorkerState::Codegenning);
main_thread_worker_state = MainThreadWorkerState::Idle;
}

Message::CodegenComplete => {
codegen_done = true;
assert!(!codegen_aborted);
assert_eq!(main_thread_worker_state,
MainThreadWorkerState::Codegenning);
main_thread_worker_state = MainThreadWorkerState::Idle;
}

// If codegen is aborted that means translation was aborted due
// to some normal-ish compiler error. In this situation we want
// to exit as soon as possible, but we want to make sure all
// existing work has finished. Flag codegen as being done, and
// then conditions above will ensure no more work is spawned but
// we'll keep executing this loop until `running` hits 0.
Message::CodegenAborted => {
assert!(!codegen_aborted);
codegen_done = true;
codegen_aborted = true;
assert_eq!(main_thread_worker_state,
MainThreadWorkerState::Codegenning);
}

// If a thread exits successfully then we drop a token associated
// with that worker and update our `running` count. We may later
// re-acquire a token to continue running more work. We may also not
Expand Down Expand Up @@ -2446,6 +2473,19 @@ impl OngoingCodegen {
drop(self.coordinator_send.send(Box::new(Message::CodegenComplete)));
}

/// Consume this context indicating that codegen was entirely aborted, and
/// we need to exit as quickly as possible.
///
/// This method blocks the current thread until all worker threads have
/// finished, and all worker threads should have exited or be real close to
/// exiting at this point.
pub fn codegen_aborted(self) {
// Signal to the coordinator it should spawn no more work and start
// shutdown.
drop(self.coordinator_send.send(Box::new(Message::CodegenAborted)));
drop(self.future.join());
}

pub fn check_for_errors(&self, sess: &Session) {
self.shared_emitter_main.check(sess, false);
}
Expand All @@ -2464,6 +2504,11 @@ impl OngoingCodegen {
}
}

// impl Drop for OngoingCodegen {
// fn drop(&mut self) {
// }
// }

pub(crate) fn submit_codegened_module_to_llvm(tcx: TyCtxt,
module: ModuleCodegen,
cost: u64) {
Expand Down
57 changes: 53 additions & 4 deletions src/librustc_codegen_llvm/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,13 @@ use rustc_data_structures::small_c_str::SmallCStr;
use rustc_data_structures::sync::Lrc;

use std::any::Any;
use std::cmp;
use std::ffi::CString;
use std::sync::Arc;
use std::time::{Instant, Duration};
use std::i32;
use std::cmp;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use std::sync::mpsc;
use std::time::{Instant, Duration};
use syntax_pos::Span;
use syntax_pos::symbol::InternedString;
use syntax::attr;
Expand Down Expand Up @@ -820,6 +821,7 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
metadata,
rx,
codegen_units.len());
let ongoing_codegen = AbortCodegenOnDrop(Some(ongoing_codegen));

// Codegen an allocator shim, if necessary.
//
Expand Down Expand Up @@ -949,7 +951,54 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
ongoing_codegen.check_for_errors(tcx.sess);

assert_and_save_dep_graph(tcx);
ongoing_codegen
ongoing_codegen.into_inner()
}

/// A curious wrapper structure whose only purpose is to call `codegen_aborted`
/// when it's dropped abnormally.
///
/// In the process of working on rust-lang/rust#55238 a mysterious segfault was
/// stumbled upon. The segfault was never reproduced locally, but it was
/// suspected to be releated to the fact that codegen worker threads were
/// sticking around by the time the main thread was exiting, causing issues.
///
/// This structure is an attempt to fix that issue where the `codegen_aborted`
/// message will block until all workers have finished. This should ensure that
/// even if the main codegen thread panics we'll wait for pending work to
/// complete before returning from the main thread, hopefully avoiding
/// segfaults.
///
/// If you see this comment in the code, then it means that this workaround
/// worked! We may yet one day track down the mysterious cause of that
/// segfault...
struct AbortCodegenOnDrop(Option<OngoingCodegen>);

impl AbortCodegenOnDrop {
fn into_inner(mut self) -> OngoingCodegen {
self.0.take().unwrap()
}
}

impl Deref for AbortCodegenOnDrop {
type Target = OngoingCodegen;

fn deref(&self) -> &OngoingCodegen {
self.0.as_ref().unwrap()
}
}

impl DerefMut for AbortCodegenOnDrop {
fn deref_mut(&mut self) -> &mut OngoingCodegen {
self.0.as_mut().unwrap()
}
}

impl Drop for AbortCodegenOnDrop {
fn drop(&mut self) {
if let Some(codegen) = self.0.take() {
codegen.codegen_aborted();
}
}
}

fn assert_and_save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) {
Expand Down

0 comments on commit 14c6835

Please sign in to comment.