From 5702d83ba9c97e9d32412a779d2ef1fdf7e2bf8e Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sat, 30 Mar 2024 15:53:24 -0400 Subject: [PATCH] Enable -Zshare-generics for inline(never) functions This avoids inlining cross-crate generic items when possible that are already marked inline(never), implying that the author is not intending for the function to be inlined by callers. As such, having a local copy may make it easier for LLVM to optimize but mostly just adds to binary bloat and codegen time (in theory, TBD on in practice). It might also make sense it expand this with other heuristics (e.g., #[cold]). --- Cargo.lock | 1 + compiler/rustc_codegen_llvm/src/callee.rs | 4 ++- .../src/back/symbol_export.rs | 12 ++++++- compiler/rustc_middle/src/mir/mono.rs | 7 ++++ compiler/rustc_middle/src/ty/context.rs | 2 -- compiler/rustc_middle/src/ty/instance.rs | 18 ++++++---- compiler/rustc_monomorphize/Cargo.toml | 1 + .../rustc_monomorphize/src/partitioning.rs | 34 ++++++++++++++----- .../auxiliary/cgu_generic_function.rs | 11 ++++++ 9 files changed, 71 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c4501d6e574f0..5e5db2680fd23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4361,6 +4361,7 @@ dependencies = [ name = "rustc_monomorphize" version = "0.0.0" dependencies = [ + "rustc_attr", "rustc_data_structures", "rustc_errors", "rustc_fluent_macro", diff --git a/compiler/rustc_codegen_llvm/src/callee.rs b/compiler/rustc_codegen_llvm/src/callee.rs index e675362ac338c..c9a321530c9b3 100644 --- a/compiler/rustc_codegen_llvm/src/callee.rs +++ b/compiler/rustc_codegen_llvm/src/callee.rs @@ -112,7 +112,9 @@ pub fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'tcx>) -> // This is a monomorphization. Its expected visibility depends // on whether we are in share-generics mode. - if cx.tcx.sess.opts.share_generics() { + if cx.tcx.sess.opts.share_generics() + || tcx.codegen_fn_attrs(instance_def_id).inline == rustc_attr::InlineAttr::Never + { // We are in share_generics mode. if let Some(instance_def_id) = instance_def_id.as_local() { diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs index b19f52182b650..26658747070af 100644 --- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs +++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs @@ -306,7 +306,7 @@ fn exported_symbols_provider_local( )); } - if tcx.sess.opts.share_generics() && tcx.local_crate_exports_generics() { + if tcx.local_crate_exports_generics() { use rustc_middle::mir::mono::{Linkage, MonoItem, Visibility}; use rustc_middle::ty::InstanceDef; @@ -334,6 +334,16 @@ fn exported_symbols_provider_local( continue; } + if !tcx.sess.opts.share_generics() { + if tcx.codegen_fn_attrs(mono_item.def_id()).inline == rustc_attr::InlineAttr::Never + { + // this is OK, we explicitly allow sharing inline(never) across crates even + // without share-generics. + } else { + continue; + } + } + match *mono_item { MonoItem::Fn(Instance { def: InstanceDef::Item(def), args }) => { if args.non_erasable_generics(tcx, def).next().is_some() { diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs index 43e1318a75ab1..1010fa359498d 100644 --- a/compiler/rustc_middle/src/mir/mono.rs +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -119,6 +119,13 @@ impl<'tcx> MonoItem<'tcx> { return InstantiationMode::GloballyShared { may_conflict: false }; } + if let InlineAttr::Never = tcx.codegen_fn_attrs(instance.def_id()).inline + && self.is_generic_fn(tcx) + { + // Upgrade inline(never) to a globally shared instance. + return InstantiationMode::GloballyShared { may_conflict: true }; + } + // At this point we don't have explicit linkage and we're an // inlined function. If we're inlining into all CGUs then we'll // be creating a local copy per CGU. diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 188cb50849dc1..78f13d781bb72 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -1301,8 +1301,6 @@ impl<'tcx> TyCtxt<'tcx> { #[inline] pub fn local_crate_exports_generics(self) -> bool { - debug_assert!(self.sess.opts.share_generics()); - self.crate_types().iter().any(|crate_type| { match crate_type { CrateType::Executable diff --git a/compiler/rustc_middle/src/ty/instance.rs b/compiler/rustc_middle/src/ty/instance.rs index 4fec5653a798c..5f86bddec8e66 100644 --- a/compiler/rustc_middle/src/ty/instance.rs +++ b/compiler/rustc_middle/src/ty/instance.rs @@ -157,19 +157,23 @@ impl<'tcx> Instance<'tcx> { /// This method already takes into account the global `-Zshare-generics` /// setting, always returning `None` if `share-generics` is off. pub fn upstream_monomorphization(&self, tcx: TyCtxt<'tcx>) -> Option { - // If we are not in share generics mode, we don't link to upstream - // monomorphizations but always instantiate our own internal versions - // instead. - if !tcx.sess.opts.share_generics() { - return None; - } - // If this is an item that is defined in the local crate, no upstream // crate can know about it/provide a monomorphization. if self.def_id().is_local() { return None; } + // If we are not in share generics mode, we don't link to upstream + // monomorphizations but always instantiate our own internal versions + // instead. + if !tcx.sess.opts.share_generics() + // However, if the def_id is marked inline(never), then it's fine to just reuse the + // upstream monomorphization. + && tcx.codegen_fn_attrs(self.def_id()).inline != rustc_attr::InlineAttr::Never + { + return None; + } + // If this a non-generic instance, it cannot be a shared monomorphization. self.args.non_erasable_generics(tcx, self.def_id()).next()?; diff --git a/compiler/rustc_monomorphize/Cargo.toml b/compiler/rustc_monomorphize/Cargo.toml index c7f1b9fa78454..54306a68db2e7 100644 --- a/compiler/rustc_monomorphize/Cargo.toml +++ b/compiler/rustc_monomorphize/Cargo.toml @@ -9,6 +9,7 @@ rustc_data_structures = { path = "../rustc_data_structures" } rustc_errors = { path = "../rustc_errors" } rustc_fluent_macro = { path = "../rustc_fluent_macro" } rustc_hir = { path = "../rustc_hir" } +rustc_attr = { path = "../rustc_attr" } rustc_macros = { path = "../rustc_macros" } rustc_middle = { path = "../rustc_middle" } rustc_session = { path = "../rustc_session" } diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index 5a92657cb40d7..0bcfec14fbe09 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -203,8 +203,8 @@ where // available to downstream crates. This depends on whether we are in // share-generics mode and whether the current crate can even have // downstream crates. - let export_generics = - cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics(); + let can_export_generics = cx.tcx.local_crate_exports_generics(); + let always_export_generics = can_export_generics && cx.tcx.sess.opts.share_generics(); let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx); let cgu_name_cache = &mut FxHashMap::default(); @@ -244,7 +244,8 @@ where cx.tcx, &mono_item, &mut can_be_internalized, - export_generics, + can_export_generics, + always_export_generics, ); if visibility == Visibility::Hidden && can_be_internalized { internalization_candidates.insert(mono_item); @@ -727,12 +728,19 @@ fn mono_item_linkage_and_visibility<'tcx>( tcx: TyCtxt<'tcx>, mono_item: &MonoItem<'tcx>, can_be_internalized: &mut bool, - export_generics: bool, + can_export_generics: bool, + always_export_generics: bool, ) -> (Linkage, Visibility) { if let Some(explicit_linkage) = mono_item.explicit_linkage(tcx) { return (explicit_linkage, Visibility::Default); } - let vis = mono_item_visibility(tcx, mono_item, can_be_internalized, export_generics); + let vis = mono_item_visibility( + tcx, + mono_item, + can_be_internalized, + can_export_generics, + always_export_generics, + ); (Linkage::External, vis) } @@ -755,7 +763,8 @@ fn mono_item_visibility<'tcx>( tcx: TyCtxt<'tcx>, mono_item: &MonoItem<'tcx>, can_be_internalized: &mut bool, - export_generics: bool, + can_export_generics: bool, + always_export_generics: bool, ) -> Visibility { let instance = match mono_item { // This is pretty complicated; see below. @@ -812,7 +821,11 @@ fn mono_item_visibility<'tcx>( // Upstream `DefId` instances get different handling than local ones. let Some(def_id) = def_id.as_local() else { - return if export_generics && is_generic { + return if is_generic + && (always_export_generics + || (can_export_generics + && tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never)) + { // If it is an upstream monomorphization and we export generics, we must make // it available to downstream crates. *can_be_internalized = false; @@ -823,7 +836,12 @@ fn mono_item_visibility<'tcx>( }; if is_generic { - if export_generics { + // An inline(never) function won't get inlined in upstream crates anyway, so we might as + // well export it. + if always_export_generics + || (can_export_generics + && tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never) + { if tcx.is_unreachable_local_definition(def_id) { // This instance cannot be used from another crate. Visibility::Hidden diff --git a/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs b/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs index 3926f295742bc..8bb78eb788a22 100644 --- a/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs +++ b/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs @@ -11,10 +11,21 @@ pub fn foo(x: T) -> (T, u32, i8) { #[inline(never)] fn bar(x: T) -> (T, Struct) { let _ = not_exported_and_not_generic(0); + exported_and_generic::(0); (x, Struct(1)) } +pub static F: fn(u32) -> u32 = exported_and_generic::; + // These should not contribute to the codegen items of other crates. + +// This is generic, but it's only instantiated with a u32 argument and that instantiation is present +// in the local crate (see F above). +#[inline(never)] +pub fn exported_and_generic(x: T) -> T { + x +} + #[inline(never)] pub fn exported_but_not_generic(x: i32) -> i64 { x as i64