From ae0521dd4b3f5f6ed455ee2942a6b392dbb0bfdb Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Thu, 14 Dec 2023 17:52:59 -0500 Subject: [PATCH] (crudely) implement MIR-only rlibs --- .../src/back/symbol_export.rs | 31 +++++++- compiler/rustc_interface/src/tests.rs | 1 + .../src/rmeta/decoder/cstore_impl.rs | 8 ++ compiler/rustc_metadata/src/rmeta/encoder.rs | 10 ++- compiler/rustc_metadata/src/rmeta/mod.rs | 1 + compiler/rustc_middle/src/mir/mono.rs | 11 +++ compiler/rustc_middle/src/query/mod.rs | 9 +++ compiler/rustc_middle/src/ty/context.rs | 4 + compiler/rustc_monomorphize/src/collector.rs | 79 +++++++++++++++++-- .../rustc_monomorphize/src/partitioning.rs | 27 ++++++- compiler/rustc_session/src/options.rs | 2 + 11 files changed, 165 insertions(+), 18 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs index 850d36872dd57..dbd17920566d3 100644 --- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs +++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs @@ -209,8 +209,14 @@ fn exported_symbols_provider_local( if allocator_kind_for_codegen(tcx).is_some() { for symbol_name in ALLOCATOR_METHODS .iter() - .map(|method| format!("__rust_{}", method.name)) - .chain(["__rust_alloc_error_handler".to_string(), OomStrategy::SYMBOL.to_string()]) + .flat_map(|method| { + [format!("__rust_{}", method.name), format!("__rdl_{}", method.name)] + }) + .chain([ + "__rust_alloc_error_handler".to_string(), + OomStrategy::SYMBOL.to_string(), + "__rg_oom".to_string(), + ]) { let exported_symbol = ExportedSymbol::NoDefId(SymbolName::new(tcx, &symbol_name)); @@ -349,6 +355,27 @@ fn exported_symbols_provider_local( } } + if tcx.building_mir_only_rlib() { + for def_id in tcx.mir_keys(()) { + if !matches!(tcx.def_kind(def_id.to_def_id()), DefKind::Static { .. }) { + continue; + } + if tcx.is_reachable_non_generic(def_id.to_def_id()) { + continue; + } + let codegen_attrs = tcx.codegen_fn_attrs(def_id.to_def_id()); + symbols.push((ExportedSymbol::NonGeneric(def_id.to_def_id()), SymbolExportInfo { + level: symbol_export_level(tcx, def_id.to_def_id()), + kind: if codegen_attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) { + SymbolExportKind::Tls + } else { + SymbolExportKind::Data + }, + used: true, + })); + } + } + // Sort so we get a stable incr. comp. hash. symbols.sort_by_cached_key(|s| s.0.symbol_name_for_local_instance(tcx)); diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 2361231b3fba1..bece7ccfe6522 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -807,6 +807,7 @@ fn test_unstable_options_tracking_hash() { tracked!(mir_emit_retag, true); tracked!(mir_enable_passes, vec![("DestProp".to_string(), false)]); tracked!(mir_keep_place_mention, true); + tracked!(mir_only_rlibs, true); tracked!(mir_opt_level, Some(4)); tracked!(move_size_limit, Some(4096)); tracked!(mutable_noalias, false); diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs index 045fd0565ba0d..8647f3c6a7837 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -573,6 +573,14 @@ pub(in crate::rmeta) fn provide(providers: &mut Providers) { .filter_map(|(cnum, data)| data.used().then_some(cnum)), ) }, + mir_only_crates: |tcx, ()| { + tcx.untracked().cstore.freeze(); + let store = CStore::from_tcx(tcx); + let crates = store + .iter_crate_data() + .filter_map(|(cnum, data)| if data.root.is_mir_only { Some(cnum) } else { None }); + tcx.arena.alloc_from_iter(crates) + }, ..providers.queries }; provide_extern(&mut providers.extern_queries); diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index b5391247cea54..310b4049f8ffd 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -736,6 +736,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { impls, incoherent_impls, exported_symbols, + is_mir_only: tcx.building_mir_only_rlib(), interpret_alloc_index, tables, syntax_contexts, @@ -1058,12 +1059,13 @@ fn should_encode_mir( reachable_set: &LocalDefIdSet, def_id: LocalDefId, ) -> (bool, bool) { + let opts = &tcx.sess.opts; + let mir_required = opts.unstable_opts.always_encode_mir || tcx.building_mir_only_rlib(); match tcx.def_kind(def_id) { // Constructors DefKind::Ctor(_, _) => { - let mir_opt_base = tcx.sess.opts.output_types.should_codegen() - || tcx.sess.opts.unstable_opts.always_encode_mir; - (true, mir_opt_base) + let opt = mir_required || opts.output_types.should_codegen(); + (true, opt) } // Constants DefKind::AnonConst | DefKind::InlineConst | DefKind::AssocConst | DefKind::Const => { @@ -1075,7 +1077,7 @@ fn should_encode_mir( // Full-fledged functions + closures DefKind::AssocFn | DefKind::Fn | DefKind::Closure => { let generics = tcx.generics_of(def_id); - let opt = tcx.sess.opts.unstable_opts.always_encode_mir + let opt = mir_required || (tcx.sess.opts.output_types.should_codegen() && reachable_set.contains(&def_id) && (generics.requires_monomorphization(tcx) diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index 58f58efb116fe..a9d492f1a6e2b 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -275,6 +275,7 @@ pub(crate) struct CrateRoot { debugger_visualizers: LazyArray, exported_symbols: LazyArray<(ExportedSymbol<'static>, SymbolExportInfo)>, + is_mir_only: bool, syntax_contexts: SyntaxContextTable, expn_data: ExpnDataTable, diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs index d8d99deeb2c95..f11c54a13e092 100644 --- a/compiler/rustc_middle/src/mir/mono.rs +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -104,6 +104,17 @@ impl<'tcx> MonoItem<'tcx> { } pub fn instantiation_mode(&self, tcx: TyCtxt<'tcx>) -> InstantiationMode { + // Always do LocalCopy codegen when building a MIR-only rlib + if tcx.building_mir_only_rlib() { + return InstantiationMode::LocalCopy; + } + // If this is a monomorphization from a MIR-only rlib and we are building another lib, do + // local codegen. + if tcx.mir_only_crates(()).iter().any(|c| *c == self.def_id().krate) + && tcx.crate_types() == &[rustc_session::config::CrateType::Rlib] + { + return InstantiationMode::LocalCopy; + } let generate_cgu_internal_copies = tcx .sess .opts diff --git a/compiler/rustc_middle/src/query/mod.rs b/compiler/rustc_middle/src/query/mod.rs index 54ead9a7a7595..4a7958d6fe6d5 100644 --- a/compiler/rustc_middle/src/query/mod.rs +++ b/compiler/rustc_middle/src/query/mod.rs @@ -2333,6 +2333,15 @@ rustc_queries! { desc { "check for feature-dependent ABI" } cache_on_disk_if { true } } + + query find_field((def_id, ident): (DefId, rustc_span::symbol::Ident)) -> Option { + desc { |tcx| "find the index of maybe nested field `{ident}` in `{}`", tcx.def_path_str(def_id) } + } + + query mir_only_crates(_: ()) -> &'tcx [CrateNum] { + eval_always + desc { "fetching all foreign crates built in mir-only mode" } + } } rustc_query_append! { define_callbacks! } diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index c55733da7b3a1..0cdb57ece6f4f 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -1786,6 +1786,10 @@ impl<'tcx> TyCtxt<'tcx> { pub fn dcx(self) -> DiagCtxtHandle<'tcx> { self.sess.dcx() } + + pub fn building_mir_only_rlib(self) -> bool { + self.sess.opts.unstable_opts.mir_only_rlibs && self.crate_types() == &[CrateType::Rlib] + } } impl<'tcx> TyCtxtAt<'tcx> { diff --git a/compiler/rustc_monomorphize/src/collector.rs b/compiler/rustc_monomorphize/src/collector.rs index 85151e5f09300..c76ae4e271835 100644 --- a/compiler/rustc_monomorphize/src/collector.rs +++ b/compiler/rustc_monomorphize/src/collector.rs @@ -219,6 +219,7 @@ use rustc_hir::def::DefKind; use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId}; use rustc_hir::lang_items::LangItem; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; +use rustc_middle::middle::exported_symbols::ExportedSymbol; use rustc_middle::mir::interpret::{AllocId, ErrorHandled, GlobalAlloc, Scalar}; use rustc_middle::mir::mono::{InstantiationMode, MonoItem}; use rustc_middle::mir::visit::Visitor as MirVisitor; @@ -234,7 +235,7 @@ use rustc_middle::ty::{ use rustc_middle::util::Providers; use rustc_middle::{bug, span_bug}; use rustc_session::Limit; -use rustc_session::config::EntryFnType; +use rustc_session::config::{CrateType, EntryFnType}; use rustc_span::source_map::{Spanned, dummy_spanned, respan}; use rustc_span::symbol::{Ident, sym}; use rustc_span::{DUMMY_SP, Span}; @@ -941,9 +942,24 @@ fn should_codegen_locally<'tcx>(tcx: TyCtxtAt<'tcx>, instance: Instance<'tcx>) - return true; }; + let def_is_for_mir_only_rlib = if def_id.krate == rustc_hir::def_id::LOCAL_CRATE { + tcx.building_mir_only_rlib() + } else { + tcx.mir_only_crates(()).iter().any(|c| *c == def_id.krate) + }; + if tcx.is_foreign_item(def_id) { - // Foreign items are always linked against, there's no way of instantiating them. - return false; + if def_is_for_mir_only_rlib { + return tcx.is_mir_available(instance.def_id()); + } else { + // Foreign items are always linked against, there's no way of instantiating them. + return false; + } + } + + if def_is_for_mir_only_rlib { + let has_mir = tcx.is_mir_available(instance.def_id()); + return has_mir || matches!(tcx.def_kind(instance.def_id()), DefKind::Static { .. }); } if def_id.is_local() { @@ -951,6 +967,13 @@ fn should_codegen_locally<'tcx>(tcx: TyCtxtAt<'tcx>, instance: Instance<'tcx>) - return true; } + if !def_is_for_mir_only_rlib { + if let DefKind::Static { .. } = tcx.def_kind(def_id) { + // We cannot monomorphize statics from upstream crates. + return false; + } + } + if tcx.is_reachable_non_generic(def_id) || instance.polymorphize(*tcx).upstream_monomorphization(*tcx).is_some() { @@ -958,11 +981,6 @@ fn should_codegen_locally<'tcx>(tcx: TyCtxtAt<'tcx>, instance: Instance<'tcx>) - return false; } - if let DefKind::Static { .. } = tcx.def_kind(def_id) { - // We cannot monomorphize statics from upstream crates. - return false; - } - if !tcx.is_mir_available(def_id) { tcx.dcx().emit_fatal(NoOptimizedMir { span: tcx.def_span(def_id), @@ -1354,6 +1372,7 @@ fn collect_roots(tcx: TyCtxt<'_>, mode: MonoItemCollectionStrategy) -> Vec RootCollector<'_, 'v> { self.output.push(create_fn_mono_item(self.tcx, start_instance, DUMMY_SP)); } + + fn push_extra_roots_from_mir_only_rlibs(&mut self) { + // An upstream extern function may be used anywhere in the dependency tree, so we + // cannot do any reachability analysis on them. We blindly monomorphize every + // extern function declared anywhere in our dependency tree. We must give them + // GloballyShared codegen because we don't know if the only call to an upstream + // extern function is also upstream: We don't have reachability information. All we + // can do is codegen all extern functions and pray for the linker to delete the + // ones that are reachable. + if !self.tcx.crate_types().iter().any(|c| !matches!(c, CrateType::Rlib)) { + return; + } + + for (symbol, _info) in self + .tcx + .mir_only_crates(()) + .into_iter() + .flat_map(|krate| self.tcx.exported_symbols(*krate)) + { + let def_id = match symbol { + ExportedSymbol::NonGeneric(def_id) => def_id, + ExportedSymbol::ThreadLocalShim(def_id) => { + let item = MonoItem::Fn(Instance { + def: InstanceKind::ThreadLocalShim(*def_id), + args: GenericArgs::empty(), + }); + self.output.push(dummy_spanned(item)); + continue; + } + _ => continue, + }; + match self.tcx.def_kind(def_id) { + DefKind::Fn | DefKind::AssocFn => { + let instance = Instance::mono(self.tcx, *def_id); + let item = create_fn_mono_item(self.tcx, instance, DUMMY_SP); + self.output.push(item); + } + DefKind::Static { .. } => { + self.output.push(dummy_spanned(MonoItem::Static(*def_id))); + } + _ => {} + } + } + } } #[instrument(level = "debug", skip(tcx, output))] diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index e2a6d392ca093..11771563561ef 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -146,6 +146,12 @@ fn partition<'tcx, I>( where I: Iterator>, { + if tcx.building_mir_only_rlib() { + let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx); + let cgu_name = fallback_cgu_name(cgu_name_builder); + return vec![CodegenUnit::new(cgu_name)]; + } + let _prof_timer = tcx.prof.generic_activity("cgu_partitioning"); let cx = &PartitioningCx { tcx, usage_map }; @@ -170,6 +176,10 @@ where debug_dump(tcx, "MERGE", &codegen_units); } + if !codegen_units.is_sorted_by(|a, b| a.name().as_str() < b.name().as_str()) { + bug!("unsorted CGUs"); + } + // Make as many symbols "internal" as possible, so LLVM has more freedom to // optimize. if !tcx.sess.link_dead_code() { @@ -190,7 +200,12 @@ where for cgu in codegen_units.iter() { names += &format!("- {}\n", cgu.name()); } - bug!("unsorted CGUs:\n{names}"); + codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); + let mut sorted_names = String::new(); + for cgu in codegen_units.iter() { + sorted_names += &format!("- {}\n", cgu.name()); + } + bug!("unsorted CGUs:\n{names}\n{sorted_names}"); } codegen_units @@ -214,6 +229,9 @@ where let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx); let cgu_name_cache = &mut UnordMap::default(); + let start_fn = cx.tcx.lang_items().start_fn(); + let entry_fn = cx.tcx.entry_fn(()).map(|(id, _)| id); + for mono_item in mono_items { // Handle only root (GloballyShared) items directly here. Inlined (LocalCopy) items // are handled at the bottom of the loop based on reachability, with one exception. @@ -222,7 +240,8 @@ where match mono_item.instantiation_mode(cx.tcx) { InstantiationMode::GloballyShared { .. } => {} InstantiationMode::LocalCopy => { - if Some(mono_item.def_id()) != cx.tcx.lang_items().start_fn() { + let def_id = mono_item.def_id(); + if ![start_fn, entry_fn].contains(&Some(def_id)) { continue; } } @@ -244,7 +263,7 @@ where let cgu = codegen_units.entry(cgu_name).or_insert_with(|| CodegenUnit::new(cgu_name)); - let mut can_be_internalized = true; + let mut can_be_internalized = false; let (linkage, visibility) = mono_item_linkage_and_visibility( cx.tcx, &mono_item, @@ -486,7 +505,7 @@ fn merge_codegen_units<'tcx>( // If we didn't zero-pad the sorted-by-name order would be `XYZ-cgu.0`, // `XYZ-cgu.1`, `XYZ-cgu.10`, `XYZ-cgu.11`, ..., `XYZ-cgu.2`, etc. codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate())); - let num_digits = codegen_units.len().ilog10() as usize + 1; + let num_digits = std::hint::black_box(codegen_units.len().ilog10() as usize + 1); for (index, cgu) in codegen_units.iter_mut().enumerate() { // Note: `WorkItem::short_description` depends on this name ending // with `-cgu.` followed by a numeric suffix. Please keep it in diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 087ba0522ebe2..f061f5b0cd2b0 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1904,6 +1904,8 @@ options! { mir_keep_place_mention: bool = (false, parse_bool, [TRACKED], "keep place mention MIR statements, interpreted e.g., by miri; implies -Zmir-opt-level=0 \ (default: no)"), + mir_only_rlibs: bool = (false, parse_bool, [TRACKED], + "only generate MIR when building rlibs (default: no)"), #[rustc_lint_opt_deny_field_access("use `Session::mir_opt_level` instead of this field")] mir_opt_level: Option = (None, parse_opt_number, [TRACKED], "MIR optimization level (0-4; default: 1 in non optimized builds and 2 in optimized builds)"),