From 9488cacc52a851833be51c3f9d1555e9f9c9f586 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 2 Dec 2021 12:24:25 +0100 Subject: [PATCH] Use object crate for .rustc metadata generation We already use the object crate for generating uncompressed .rmeta metadata object files. This switches the generation of compressed .rustc object files to use the object crate as well. These have slightly different requirements in that .rmeta should be completely excluded from any final compilation artifacts, while .rustc should be part of shared objects, but not loaded into memory. The primary motivation for this change is #90326: In LLVM 14, the current way of setting section flags (and in particular, preventing the setting of SHF_ALLOC) will no longer work. There are other ways we could work around this, but switching to the object crate seems like the most elegant, as we already use it for .rmeta, and as it makes this independent of the codegen backend. In particular, we don't need separate handling in codegen_llvm and codegen_gcc. codegen_cranelift should be able to reuse the implementation as well, though I have omitted that here, as it is not based on codegen_ssa. This change mostly extracts the existing code for .rmeta handling to allow using it for .rustc as well, and adjust the codegen infrastructure to handle the metadata object file separately: We no longer create a backend-specific module for it, and directly produce the compiled module instead. This does not fix #90326 by itself yet, as .llvmbc will need to be handled separately. --- Cargo.lock | 2 +- compiler/rustc_codegen_gcc/src/base.rs | 39 ---- compiler/rustc_codegen_gcc/src/lib.rs | 5 - compiler/rustc_codegen_llvm/Cargo.toml | 1 - compiler/rustc_codegen_llvm/src/base.rs | 53 +---- compiler/rustc_codegen_llvm/src/lib.rs | 8 - compiler/rustc_codegen_ssa/Cargo.toml | 1 + compiler/rustc_codegen_ssa/src/back/link.rs | 137 +----------- .../rustc_codegen_ssa/src/back/metadata.rs | 197 +++++++++++++++++- compiler/rustc_codegen_ssa/src/back/write.rs | 19 +- compiler/rustc_codegen_ssa/src/base.rs | 66 +++--- .../rustc_codegen_ssa/src/traits/backend.rs | 6 - 12 files changed, 246 insertions(+), 288 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2233162be3b6b..29dcfe440af4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3728,7 +3728,6 @@ dependencies = [ "rustc_span", "rustc_target", "smallvec", - "snap", "tracing", ] @@ -3763,6 +3762,7 @@ dependencies = [ "rustc_symbol_mangling", "rustc_target", "smallvec", + "snap", "tempfile", "tracing", ] diff --git a/compiler/rustc_codegen_gcc/src/base.rs b/compiler/rustc_codegen_gcc/src/base.rs index a3b8d328388e0..dee70bf75369d 100644 --- a/compiler/rustc_codegen_gcc/src/base.rs +++ b/compiler/rustc_codegen_gcc/src/base.rs @@ -7,14 +7,12 @@ use gccjit::{ GlobalKind, }; use rustc_middle::dep_graph; -use rustc_middle::middle::exported_symbols; use rustc_middle::ty::TyCtxt; use rustc_middle::mir::mono::Linkage; use rustc_codegen_ssa::{ModuleCodegen, ModuleKind}; use rustc_codegen_ssa::base::maybe_create_entry_wrapper; use rustc_codegen_ssa::mono_item::MonoItemExt; use rustc_codegen_ssa::traits::DebugInfoMethods; -use rustc_metadata::EncodedMetadata; use rustc_session::config::DebugInfo; use rustc_span::Symbol; @@ -132,40 +130,3 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol) -> (Modul (module, cost) } - -pub fn write_compressed_metadata<'tcx>(tcx: TyCtxt<'tcx>, metadata: &EncodedMetadata, gcc_module: &mut GccContext) { - use snap::write::FrameEncoder; - use std::io::Write; - - // Historical note: - // - // When using link.exe it was seen that the section name `.note.rustc` - // was getting shortened to `.note.ru`, and according to the PE and COFF - // specification: - // - // > Executable images do not use a string table and do not support - // > section names longer than 8 characters - // - // https://docs.microsoft.com/en-us/windows/win32/debug/pe-format - // - // As a result, we choose a slightly shorter name! As to why - // `.note.rustc` works on MinGW, see - // https://github.com/llvm/llvm-project/blob/llvmorg-12.0.0/lld/COFF/Writer.cpp#L1190-L1197 - let section_name = if tcx.sess.target.is_like_osx { "__DATA,.rustc" } else { ".rustc" }; - - let context = &gcc_module.context; - let mut compressed = rustc_metadata::METADATA_HEADER.to_vec(); - FrameEncoder::new(&mut compressed).write_all(&metadata.raw_data()).unwrap(); - - let name = exported_symbols::metadata_symbol_name(tcx); - let typ = context.new_array_type(None, context.new_type::(), compressed.len() as i32); - let global = context.new_global(None, GlobalKind::Exported, typ, name); - global.global_set_initializer(&compressed); - global.set_link_section(section_name); - - // Also generate a .section directive to force no - // flags, at least for ELF outputs, so that the - // metadata doesn't get loaded into memory. - let directive = format!(".section {}", section_name); - context.add_top_level_asm(None, &directive); -} diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs index 629003d7982b9..a549bcbd93106 100644 --- a/compiler/rustc_codegen_gcc/src/lib.rs +++ b/compiler/rustc_codegen_gcc/src/lib.rs @@ -22,7 +22,6 @@ extern crate rustc_session; extern crate rustc_span; extern crate rustc_symbol_mangling; extern crate rustc_target; -extern crate snap; // This prevents duplicating functions and statics that are already part of the host rustc process. #[allow(unused_extern_crates)] @@ -128,10 +127,6 @@ impl ExtraBackendMethods for GccCodegenBackend { } } - fn write_compressed_metadata<'tcx>(&self, tcx: TyCtxt<'tcx>, metadata: &EncodedMetadata, gcc_module: &mut Self::Module) { - base::write_compressed_metadata(tcx, metadata, gcc_module) - } - fn codegen_allocator<'tcx>(&self, tcx: TyCtxt<'tcx>, mods: &mut Self::Module, module_name: &str, kind: AllocatorKind, has_alloc_error_handler: bool) { unsafe { allocator::codegen(tcx, mods, module_name, kind, has_alloc_error_handler) } } diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index 5f3f533447532..f77b0bc8cc9a2 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -12,7 +12,6 @@ bitflags = "1.0" cstr = "0.2" libc = "0.2" measureme = "10.0.0" -snap = "1" tracing = "0.1" rustc_middle = { path = "../rustc_middle" } rustc-demangle = "0.1.21" diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs index 8766caef6e379..7a3e11e32bc1a 100644 --- a/compiler/rustc_codegen_llvm/src/base.rs +++ b/compiler/rustc_codegen_llvm/src/base.rs @@ -9,13 +9,12 @@ //! int)` and `rec(x=int, y=int, z=int)` will have the same [`llvm::Type`]. //! //! [`Ty`]: rustc_middle::ty::Ty -//! [`val_ty`]: common::val_ty +//! [`val_ty`]: crate::common::val_ty use super::ModuleLlvm; use crate::attributes; use crate::builder::Builder; -use crate::common; use crate::context::CodegenCx; use crate::llvm; use crate::value::Value; @@ -25,66 +24,16 @@ use rustc_codegen_ssa::mono_item::MonoItemExt; use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::{ModuleCodegen, ModuleKind}; use rustc_data_structures::small_c_str::SmallCStr; -use rustc_metadata::EncodedMetadata; use rustc_middle::dep_graph; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; -use rustc_middle::middle::exported_symbols; use rustc_middle::mir::mono::{Linkage, Visibility}; use rustc_middle::ty::TyCtxt; use rustc_session::config::DebugInfo; use rustc_span::symbol::Symbol; use rustc_target::spec::SanitizerSet; -use std::ffi::CString; use std::time::Instant; -pub fn write_compressed_metadata<'tcx>( - tcx: TyCtxt<'tcx>, - metadata: &EncodedMetadata, - llvm_module: &mut ModuleLlvm, -) { - use snap::write::FrameEncoder; - use std::io::Write; - - // Historical note: - // - // When using link.exe it was seen that the section name `.note.rustc` - // was getting shortened to `.note.ru`, and according to the PE and COFF - // specification: - // - // > Executable images do not use a string table and do not support - // > section names longer than 8 characters - // - // https://docs.microsoft.com/en-us/windows/win32/debug/pe-format - // - // As a result, we choose a slightly shorter name! As to why - // `.note.rustc` works on MinGW, see - // https://github.com/llvm/llvm-project/blob/llvmorg-12.0.0/lld/COFF/Writer.cpp#L1190-L1197 - let section_name = if tcx.sess.target.is_like_osx { "__DATA,.rustc" } else { ".rustc" }; - - let (metadata_llcx, metadata_llmod) = (&*llvm_module.llcx, llvm_module.llmod()); - let mut compressed = rustc_metadata::METADATA_HEADER.to_vec(); - FrameEncoder::new(&mut compressed).write_all(metadata.raw_data()).unwrap(); - - let llmeta = common::bytes_in_context(metadata_llcx, &compressed); - let llconst = common::struct_in_context(metadata_llcx, &[llmeta], false); - let name = exported_symbols::metadata_symbol_name(tcx); - let buf = CString::new(name).unwrap(); - let llglobal = - unsafe { llvm::LLVMAddGlobal(metadata_llmod, common::val_ty(llconst), buf.as_ptr()) }; - unsafe { - llvm::LLVMSetInitializer(llglobal, llconst); - let name = SmallCStr::new(section_name); - llvm::LLVMSetSection(llglobal, name.as_ptr()); - - // Also generate a .section directive to force no - // flags, at least for ELF outputs, so that the - // metadata doesn't get loaded into memory. - let directive = format!(".section {}", section_name); - llvm::LLVMSetModuleInlineAsm2(metadata_llmod, directive.as_ptr().cast(), directive.len()) - } -} - pub struct ValueIter<'ll> { cur: Option<&'ll Value>, step: unsafe extern "C" fn(&'ll Value) -> Option<&'ll Value>, diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index c66d7d872c915..62c17e6a10f17 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -102,14 +102,6 @@ impl ExtraBackendMethods for LlvmCodegenBackend { ModuleLlvm::new_metadata(tcx, mod_name) } - fn write_compressed_metadata<'tcx>( - &self, - tcx: TyCtxt<'tcx>, - metadata: &EncodedMetadata, - llvm_module: &mut ModuleLlvm, - ) { - base::write_compressed_metadata(tcx, metadata, llvm_module) - } fn codegen_allocator<'tcx>( &self, tcx: TyCtxt<'tcx>, diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml index 83dd625611057..18dbcd8e52da8 100644 --- a/compiler/rustc_codegen_ssa/Cargo.toml +++ b/compiler/rustc_codegen_ssa/Cargo.toml @@ -15,6 +15,7 @@ libc = "0.2.50" jobserver = "0.1.22" tempfile = "3.2" pathdiff = "0.2.0" +snap = "1" smallvec = { version = "1.6.1", features = ["union", "may_dangle"] } regex = "1.4" diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index 638b2a7b5a9f2..bf45810de7708 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -14,7 +14,6 @@ use rustc_session::utils::NativeLibKind; /// need out of the shared crate context before we get rid of it. use rustc_session::{filesearch, Session}; use rustc_span::symbol::Symbol; -use rustc_target::abi::Endian; use rustc_target::spec::crt_objects::{CrtObjects, CrtObjectsFallback}; use rustc_target::spec::{LinkOutputKind, LinkerFlavor, LldFlavor, SplitDebuginfo}; use rustc_target::spec::{PanicStrategy, RelocModel, RelroLevel, SanitizerSet, Target}; @@ -22,6 +21,7 @@ use rustc_target::spec::{PanicStrategy, RelocModel, RelroLevel, SanitizerSet, Ta use super::archive::{find_library, ArchiveBuilder}; use super::command::Command; use super::linker::{self, Linker}; +use super::metadata::create_rmeta_file; use super::rpath::{self, RPathConfig}; use crate::{ looks_like_rust_object_file, CodegenResults, CompiledModule, CrateInfo, NativeLib, @@ -29,9 +29,6 @@ use crate::{ }; use cc::windows_registry; -use object::elf; -use object::write::Object; -use object::{Architecture, BinaryFormat, Endianness, FileFlags, SectionFlags, SectionKind}; use regex::Regex; use tempfile::Builder as TempFileBuilder; @@ -339,7 +336,7 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>( // metadata in rlib files is wrapped in a "dummy" object file for // the target platform so the rlib can be processed entirely by // normal linkers for the platform. - let metadata = create_metadata_file(sess, codegen_results.metadata.raw_data()); + let metadata = create_rmeta_file(sess, codegen_results.metadata.raw_data()); ab.add_file(&emit_metadata(sess, &metadata, tmpdir)); // After adding all files to the archive, we need to update the @@ -358,136 +355,6 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>( } } return Ok(ab); - - // For rlibs we "pack" rustc metadata into a dummy object file. When rustc - // creates a dylib crate type it will pass `--whole-archive` (or the - // platform equivalent) to include all object files from an rlib into the - // final dylib itself. This causes linkers to iterate and try to include all - // files located in an archive, so if metadata is stored in an archive then - // it needs to be of a form that the linker will be able to process. - // - // Note, though, that we don't actually want this metadata to show up in any - // final output of the compiler. Instead this is purely for rustc's own - // metadata tracking purposes. - // - // With the above in mind, each "flavor" of object format gets special - // handling here depending on the target: - // - // * MachO - macos-like targets will insert the metadata into a section that - // is sort of fake dwarf debug info. Inspecting the source of the macos - // linker this causes these sections to be skipped automatically because - // it's not in an allowlist of otherwise well known dwarf section names to - // go into the final artifact. - // - // * WebAssembly - we actually don't have any container format for this - // target. WebAssembly doesn't support the `dylib` crate type anyway so - // there's no need for us to support this at this time. Consequently the - // metadata bytes are simply stored as-is into an rlib. - // - // * COFF - Windows-like targets create an object with a section that has - // the `IMAGE_SCN_LNK_REMOVE` flag set which ensures that if the linker - // ever sees the section it doesn't process it and it's removed. - // - // * ELF - All other targets are similar to Windows in that there's a - // `SHF_EXCLUDE` flag we can set on sections in an object file to get - // automatically removed from the final output. - // - // Note that this metdata format is kept in sync with - // `rustc_codegen_ssa/src/back/metadata.rs`. - fn create_metadata_file(sess: &Session, metadata: &[u8]) -> Vec { - let endianness = match sess.target.options.endian { - Endian::Little => Endianness::Little, - Endian::Big => Endianness::Big, - }; - let architecture = match &sess.target.arch[..] { - "arm" => Architecture::Arm, - "aarch64" => Architecture::Aarch64, - "x86" => Architecture::I386, - "s390x" => Architecture::S390x, - "mips" => Architecture::Mips, - "mips64" => Architecture::Mips64, - "x86_64" => { - if sess.target.pointer_width == 32 { - Architecture::X86_64_X32 - } else { - Architecture::X86_64 - } - } - "powerpc" => Architecture::PowerPc, - "powerpc64" => Architecture::PowerPc64, - "riscv32" => Architecture::Riscv32, - "riscv64" => Architecture::Riscv64, - "sparc64" => Architecture::Sparc64, - - // This is used to handle all "other" targets. This includes targets - // in two categories: - // - // * Some targets don't have support in the `object` crate just yet - // to write an object file. These targets are likely to get filled - // out over time. - // - // * Targets like WebAssembly don't support dylibs, so the purpose - // of putting metadata in object files, to support linking rlibs - // into dylibs, is moot. - // - // In both of these cases it means that linking into dylibs will - // not be supported by rustc. This doesn't matter for targets like - // WebAssembly and for targets not supported by the `object` crate - // yet it means that work will need to be done in the `object` crate - // to add a case above. - _ => return metadata.to_vec(), - }; - - if sess.target.is_like_osx { - let mut file = Object::new(BinaryFormat::MachO, architecture, endianness); - - let section = - file.add_section(b"__DWARF".to_vec(), b".rmeta".to_vec(), SectionKind::Debug); - file.append_section_data(section, metadata, 1); - file.write().unwrap() - } else if sess.target.is_like_windows { - const IMAGE_SCN_LNK_REMOVE: u32 = 0; - let mut file = Object::new(BinaryFormat::Coff, architecture, endianness); - - let section = file.add_section(Vec::new(), b".rmeta".to_vec(), SectionKind::Debug); - file.section_mut(section).flags = - SectionFlags::Coff { characteristics: IMAGE_SCN_LNK_REMOVE }; - file.append_section_data(section, metadata, 1); - file.write().unwrap() - } else { - const SHF_EXCLUDE: u64 = 0x80000000; - let mut file = Object::new(BinaryFormat::Elf, architecture, endianness); - - match &sess.target.arch[..] { - // copied from `mipsel-linux-gnu-gcc foo.c -c` and - // inspecting the resulting `e_flags` field. - "mips" => { - let e_flags = elf::EF_MIPS_ARCH_32R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC; - file.flags = FileFlags::Elf { e_flags }; - } - // copied from `mips64el-linux-gnuabi64-gcc foo.c -c` - "mips64" => { - let e_flags = elf::EF_MIPS_ARCH_64R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC; - file.flags = FileFlags::Elf { e_flags }; - } - - // copied from `riscv64-linux-gnu-gcc foo.c -c`, note though - // that the `+d` target feature represents whether the double - // float abi is enabled. - "riscv64" if sess.target.options.features.contains("+d") => { - let e_flags = elf::EF_RISCV_RVC | elf::EF_RISCV_FLOAT_ABI_DOUBLE; - file.flags = FileFlags::Elf { e_flags }; - } - - _ => {} - } - - let section = file.add_section(Vec::new(), b".rmeta".to_vec(), SectionKind::Debug); - file.section_mut(section).flags = SectionFlags::Elf { sh_flags: SHF_EXCLUDE }; - file.append_section_data(section, metadata, 1); - file.write().unwrap() - } - } } /// Extract all symbols defined in raw-dylib libraries, collated by library name. diff --git a/compiler/rustc_codegen_ssa/src/back/metadata.rs b/compiler/rustc_codegen_ssa/src/back/metadata.rs index ffeb926e648e5..1df5540e3b840 100644 --- a/compiler/rustc_codegen_ssa/src/back/metadata.rs +++ b/compiler/rustc_codegen_ssa/src/back/metadata.rs @@ -1,14 +1,25 @@ //! Reading of the rustc metadata for rlibs and dylibs use std::fs::File; +use std::io::Write; use std::path::Path; -use object::{Object, ObjectSection}; +use object::write::{self, StandardSegment, Symbol, SymbolSection}; +use object::{ + elf, Architecture, BinaryFormat, Endianness, FileFlags, Object, ObjectSection, SectionFlags, + SectionKind, SymbolFlags, SymbolKind, SymbolScope, +}; + +use snap::write::FrameEncoder; + use rustc_data_structures::memmap::Mmap; use rustc_data_structures::owning_ref::OwningRef; use rustc_data_structures::rustc_erase_owner; use rustc_data_structures::sync::MetadataRef; +use rustc_metadata::EncodedMetadata; use rustc_session::cstore::MetadataLoader; +use rustc_session::Session; +use rustc_target::abi::Endian; use rustc_target::spec::Target; use crate::METADATA_FILENAME; @@ -83,3 +94,187 @@ fn search_for_metadata<'a>( .data() .map_err(|e| format!("failed to read {} section in '{}': {}", section, path.display(), e)) } + +fn create_object_file(sess: &Session) -> Option { + let endianness = match sess.target.options.endian { + Endian::Little => Endianness::Little, + Endian::Big => Endianness::Big, + }; + let architecture = match &sess.target.arch[..] { + "arm" => Architecture::Arm, + "aarch64" => Architecture::Aarch64, + "x86" => Architecture::I386, + "s390x" => Architecture::S390x, + "mips" => Architecture::Mips, + "mips64" => Architecture::Mips64, + "x86_64" => { + if sess.target.pointer_width == 32 { + Architecture::X86_64_X32 + } else { + Architecture::X86_64 + } + } + "powerpc" => Architecture::PowerPc, + "powerpc64" => Architecture::PowerPc64, + "riscv32" => Architecture::Riscv32, + "riscv64" => Architecture::Riscv64, + "sparc64" => Architecture::Sparc64, + // Unsupported architecture. + _ => return None, + }; + let binary_format = if sess.target.is_like_osx { + BinaryFormat::MachO + } else if sess.target.is_like_windows { + BinaryFormat::Coff + } else { + BinaryFormat::Elf + }; + + let mut file = write::Object::new(binary_format, architecture, endianness); + match architecture { + Architecture::Mips => { + // copied from `mipsel-linux-gnu-gcc foo.c -c` and + // inspecting the resulting `e_flags` field. + let e_flags = elf::EF_MIPS_ARCH_32R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC; + file.flags = FileFlags::Elf { e_flags }; + } + Architecture::Mips64 => { + // copied from `mips64el-linux-gnuabi64-gcc foo.c -c` + let e_flags = elf::EF_MIPS_ARCH_64R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC; + file.flags = FileFlags::Elf { e_flags }; + } + Architecture::Riscv64 if sess.target.options.features.contains("+d") => { + // copied from `riscv64-linux-gnu-gcc foo.c -c`, note though + // that the `+d` target feature represents whether the double + // float abi is enabled. + let e_flags = elf::EF_RISCV_RVC | elf::EF_RISCV_FLOAT_ABI_DOUBLE; + file.flags = FileFlags::Elf { e_flags }; + } + _ => {} + }; + Some(file) +} + +// For rlibs we "pack" rustc metadata into a dummy object file. When rustc +// creates a dylib crate type it will pass `--whole-archive` (or the +// platform equivalent) to include all object files from an rlib into the +// final dylib itself. This causes linkers to iterate and try to include all +// files located in an archive, so if metadata is stored in an archive then +// it needs to be of a form that the linker will be able to process. +// +// Note, though, that we don't actually want this metadata to show up in any +// final output of the compiler. Instead this is purely for rustc's own +// metadata tracking purposes. +// +// With the above in mind, each "flavor" of object format gets special +// handling here depending on the target: +// +// * MachO - macos-like targets will insert the metadata into a section that +// is sort of fake dwarf debug info. Inspecting the source of the macos +// linker this causes these sections to be skipped automatically because +// it's not in an allowlist of otherwise well known dwarf section names to +// go into the final artifact. +// +// * WebAssembly - we actually don't have any container format for this +// target. WebAssembly doesn't support the `dylib` crate type anyway so +// there's no need for us to support this at this time. Consequently the +// metadata bytes are simply stored as-is into an rlib. +// +// * COFF - Windows-like targets create an object with a section that has +// the `IMAGE_SCN_LNK_REMOVE` flag set which ensures that if the linker +// ever sees the section it doesn't process it and it's removed. +// +// * ELF - All other targets are similar to Windows in that there's a +// `SHF_EXCLUDE` flag we can set on sections in an object file to get +// automatically removed from the final output. +pub fn create_rmeta_file(sess: &Session, metadata: &[u8]) -> Vec { + let mut file = if let Some(file) = create_object_file(sess) { + file + } else { + // This is used to handle all "other" targets. This includes targets + // in two categories: + // + // * Some targets don't have support in the `object` crate just yet + // to write an object file. These targets are likely to get filled + // out over time. + // + // * Targets like WebAssembly don't support dylibs, so the purpose + // of putting metadata in object files, to support linking rlibs + // into dylibs, is moot. + // + // In both of these cases it means that linking into dylibs will + // not be supported by rustc. This doesn't matter for targets like + // WebAssembly and for targets not supported by the `object` crate + // yet it means that work will need to be done in the `object` crate + // to add a case above. + return metadata.to_vec(); + }; + let section = file.add_section( + file.segment_name(StandardSegment::Debug).to_vec(), + b".rmeta".to_vec(), + SectionKind::Debug, + ); + match file.format() { + BinaryFormat::Coff => { + const IMAGE_SCN_LNK_REMOVE: u32 = 0; + file.section_mut(section).flags = + SectionFlags::Coff { characteristics: IMAGE_SCN_LNK_REMOVE }; + } + BinaryFormat::Elf => { + const SHF_EXCLUDE: u64 = 0x80000000; + file.section_mut(section).flags = SectionFlags::Elf { sh_flags: SHF_EXCLUDE }; + } + _ => {} + }; + file.append_section_data(section, metadata, 1); + file.write().unwrap() +} + +// Historical note: +// +// When using link.exe it was seen that the section name `.note.rustc` +// was getting shortened to `.note.ru`, and according to the PE and COFF +// specification: +// +// > Executable images do not use a string table and do not support +// > section names longer than 8 characters +// +// https://docs.microsoft.com/en-us/windows/win32/debug/pe-format +// +// As a result, we choose a slightly shorter name! As to why +// `.note.rustc` works on MinGW, see +// https://github.com/llvm/llvm-project/blob/llvmorg-12.0.0/lld/COFF/Writer.cpp#L1190-L1197 +pub fn create_compressed_metadata_file( + sess: &Session, + metadata: &EncodedMetadata, + symbol_name: &str, +) -> Vec { + let mut compressed = rustc_metadata::METADATA_HEADER.to_vec(); + FrameEncoder::new(&mut compressed).write_all(metadata.raw_data()).unwrap(); + let mut file = if let Some(file) = create_object_file(sess) { + file + } else { + return compressed.to_vec(); + }; + let section = file.add_section( + file.segment_name(StandardSegment::Data).to_vec(), + b".rustc".to_vec(), + SectionKind::Data, + ); + let offset = file.append_section_data(section, &compressed, 1); + + // For MachO and probably PE this is necessary to prevent the linker from throwing away the + // .rustc section. For ELF this isn't necessary, but it also doesn't harm. + file.add_symbol(Symbol { + name: symbol_name.as_bytes().to_vec(), + value: offset, + size: compressed.len() as u64, + kind: SymbolKind::Data, + scope: SymbolScope::Dynamic, + weak: false, + section: SymbolSection::Section(section), + flags: SymbolFlags::None, + }); + + file.write().unwrap() +} diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index 85d51ea9a207b..2d6904af207d1 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -397,7 +397,6 @@ fn generate_lto_work( pub struct CompiledModules { pub modules: Vec, - pub metadata_module: Option, pub allocator_module: Option, } @@ -425,6 +424,7 @@ pub fn start_async_codegen( tcx: TyCtxt<'_>, target_cpu: String, metadata: EncodedMetadata, + metadata_module: Option, total_cgus: usize, ) -> OngoingCodegen { let (coordinator_send, coordinator_receive) = channel(); @@ -464,6 +464,7 @@ pub fn start_async_codegen( OngoingCodegen { backend, metadata, + metadata_module, crate_info, coordinator_send, @@ -640,12 +641,6 @@ fn produce_final_output_artifacts( } if !user_wants_bitcode { - if let Some(ref metadata_module) = compiled_modules.metadata_module { - if let Some(ref path) = metadata_module.bytecode { - ensure_removed(sess.diagnostic(), &path); - } - } - if let Some(ref allocator_module) = compiled_modules.allocator_module { if let Some(ref path) = allocator_module.bytecode { ensure_removed(sess.diagnostic(), path); @@ -1216,7 +1211,6 @@ fn start_executing_work( // This is where we collect codegen units that have gone all the way // through codegen and LLVM. let mut compiled_modules = vec![]; - let mut compiled_metadata_module = None; let mut compiled_allocator_module = None; let mut needs_link = Vec::new(); let mut needs_fat_lto = Vec::new(); @@ -1475,14 +1469,11 @@ fn start_executing_work( ModuleKind::Regular => { compiled_modules.push(compiled_module); } - ModuleKind::Metadata => { - assert!(compiled_metadata_module.is_none()); - compiled_metadata_module = Some(compiled_module); - } ModuleKind::Allocator => { assert!(compiled_allocator_module.is_none()); compiled_allocator_module = Some(compiled_module); } + ModuleKind::Metadata => bug!("Should be handled separately"), } } Message::NeedsLink { module, worker_id } => { @@ -1539,7 +1530,6 @@ fn start_executing_work( Ok(CompiledModules { modules: compiled_modules, - metadata_module: compiled_metadata_module, allocator_module: compiled_allocator_module, }) }); @@ -1800,6 +1790,7 @@ impl SharedEmitterMain { pub struct OngoingCodegen { pub backend: B, pub metadata: EncodedMetadata, + pub metadata_module: Option, pub crate_info: CrateInfo, pub coordinator_send: Sender>, pub codegen_worker_receive: Receiver>, @@ -1846,7 +1837,7 @@ impl OngoingCodegen { modules: compiled_modules.modules, allocator_module: compiled_modules.allocator_module, - metadata_module: compiled_modules.metadata_module, + metadata_module: self.metadata_module, }, work_products, ) diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index 9bb4982754c20..d82aa6915452b 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -1,3 +1,4 @@ +use crate::back::metadata::create_compressed_metadata_file; use crate::back::write::{ compute_per_cgu_lto_type, start_async_codegen, submit_codegened_module_to_llvm, submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm, ComputedLtoType, OngoingCodegen, @@ -8,7 +9,7 @@ use crate::mir; use crate::mir::operand::OperandValue; use crate::mir::place::PlaceRef; use crate::traits::*; -use crate::{CachedModuleCodegen, CrateInfo, MemFlags, ModuleCodegen, ModuleKind}; +use crate::{CachedModuleCodegen, CompiledModule, CrateInfo, MemFlags, ModuleCodegen, ModuleKind}; use rustc_attr as attr; use rustc_data_structures::fx::FxHashMap; @@ -20,13 +21,14 @@ use rustc_hir::lang_items::LangItem; use rustc_index::vec::Idx; use rustc_metadata::EncodedMetadata; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; +use rustc_middle::middle::exported_symbols; use rustc_middle::middle::lang_items; use rustc_middle::mir::mono::{CodegenUnit, CodegenUnitNameBuilder, MonoItem}; use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf, TyAndLayout}; use rustc_middle::ty::query::Providers; use rustc_middle::ty::{self, Instance, Ty, TyCtxt}; use rustc_session::cgu_reuse_tracker::CguReuse; -use rustc_session::config::{self, EntryFnType}; +use rustc_session::config::{self, EntryFnType, OutputType}; use rustc_session::Session; use rustc_span::symbol::sym; use rustc_target::abi::{Align, VariantIdx}; @@ -491,7 +493,7 @@ pub fn codegen_crate( ) -> OngoingCodegen { // Skip crate items and just output metadata in -Z no-codegen mode. if tcx.sess.opts.debugging_opts.no_codegen || !tcx.sess.opts.output_types.should_codegen() { - let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu, metadata, 1); + let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu, metadata, None, 1); ongoing_codegen.codegen_finished(tcx); @@ -517,8 +519,41 @@ pub fn codegen_crate( } } - let ongoing_codegen = - start_async_codegen(backend.clone(), tcx, target_cpu, metadata, codegen_units.len()); + let metadata_module = if need_metadata_module { + // Emit compressed metadata object. + let metadata_cgu_name = + cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("metadata")).to_string(); + tcx.sess.time("write_compressed_metadata", || { + let file_name = + tcx.output_filenames(()).temp_path(OutputType::Metadata, Some(&metadata_cgu_name)); + let data = create_compressed_metadata_file( + tcx.sess, + &metadata, + &exported_symbols::metadata_symbol_name(tcx), + ); + if let Err(err) = std::fs::write(&file_name, data) { + tcx.sess.fatal(&format!("error writing metadata object file: {}", err)); + } + Some(CompiledModule { + name: metadata_cgu_name, + kind: ModuleKind::Metadata, + object: Some(file_name), + dwarf_object: None, + bytecode: None, + }) + }) + } else { + None + }; + + let ongoing_codegen = start_async_codegen( + backend.clone(), + tcx, + target_cpu, + metadata, + metadata_module, + codegen_units.len(), + ); let ongoing_codegen = AbortCodegenOnDrop::(Some(ongoing_codegen)); // Codegen an allocator shim, if necessary. @@ -558,27 +593,6 @@ pub fn codegen_crate( ongoing_codegen.submit_pre_codegened_module_to_llvm(tcx, allocator_module); } - if need_metadata_module { - // Codegen the encoded metadata. - let metadata_cgu_name = - cgu_name_builder.build_cgu_name(LOCAL_CRATE, &["crate"], Some("metadata")).to_string(); - let mut metadata_llvm_module = backend.new_metadata(tcx, &metadata_cgu_name); - tcx.sess.time("write_compressed_metadata", || { - backend.write_compressed_metadata( - tcx, - &ongoing_codegen.metadata, - &mut metadata_llvm_module, - ); - }); - - let metadata_module = ModuleCodegen { - name: metadata_cgu_name, - module_llvm: metadata_llvm_module, - kind: ModuleKind::Metadata, - }; - ongoing_codegen.submit_pre_codegened_module_to_llvm(tcx, metadata_module); - } - // For better throughput during parallel processing by LLVM, we used to sort // CGUs largest to smallest. This would lead to better thread utilization // by, for example, preventing a large CGU from being processed last and diff --git a/compiler/rustc_codegen_ssa/src/traits/backend.rs b/compiler/rustc_codegen_ssa/src/traits/backend.rs index 9c8bc3b210988..707561f5ebd0f 100644 --- a/compiler/rustc_codegen_ssa/src/traits/backend.rs +++ b/compiler/rustc_codegen_ssa/src/traits/backend.rs @@ -114,12 +114,6 @@ pub trait CodegenBackend { pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Send + Sync { fn new_metadata(&self, sess: TyCtxt<'_>, mod_name: &str) -> Self::Module; - fn write_compressed_metadata<'tcx>( - &self, - tcx: TyCtxt<'tcx>, - metadata: &EncodedMetadata, - llvm_module: &mut Self::Module, - ); fn codegen_allocator<'tcx>( &self, tcx: TyCtxt<'tcx>,