Skip to content

Commit

Permalink
Use object crate for .rustc metadata generation
Browse files Browse the repository at this point in the history
We already use the object crate for generating uncompressed .rmeta
metadata object files. This switches the generation of compressed
.rustc object files to use the object crate as well. These have
slightly different requirements in that .rmeta should be completely
excluded from any final compilation artifacts, while .rustc should
be part of shared objects, but not loaded into memory.

The primary motivation for this change is rust-lang#90326: In LLVM 14, the
current way of setting section flags (and in particular, preventing
the setting of SHF_ALLOC) will no longer work. There are other ways
we could work around this, but switching to the object crate seems
like the most elegant, as we already use it for .rmeta, and as it
makes this independent of the codegen backend. In particular, we
don't need separate handling in codegen_llvm and codegen_gcc.
codegen_cranelift should be able to reuse the implementation as
well, though I have omitted that here, as it is not based on
codegen_ssa.

This change mostly extracts the existing code for .rmeta handling
to allow using it for .rustc as well, and adjust the codegen
infrastructure to handle the metadata object file separately: We
no longer create a backend-specific module for it, and directly
produce the compiled module instead.

This does not fix rust-lang#90326 by itself yet, as .llvmbc will need to be
handled separately.
  • Loading branch information
nikic committed Dec 7, 2021
1 parent d9baa36 commit 9488cac
Show file tree
Hide file tree
Showing 12 changed files with 246 additions and 288 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3728,7 +3728,6 @@ dependencies = [
"rustc_span",
"rustc_target",
"smallvec",
"snap",
"tracing",
]

Expand Down Expand Up @@ -3763,6 +3762,7 @@ dependencies = [
"rustc_symbol_mangling",
"rustc_target",
"smallvec",
"snap",
"tempfile",
"tracing",
]
Expand Down
39 changes: 0 additions & 39 deletions compiler/rustc_codegen_gcc/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@ use gccjit::{
GlobalKind,
};
use rustc_middle::dep_graph;
use rustc_middle::middle::exported_symbols;
use rustc_middle::ty::TyCtxt;
use rustc_middle::mir::mono::Linkage;
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::DebugInfoMethods;
use rustc_metadata::EncodedMetadata;
use rustc_session::config::DebugInfo;
use rustc_span::Symbol;

Expand Down Expand Up @@ -132,40 +130,3 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol) -> (Modul

(module, cost)
}

pub fn write_compressed_metadata<'tcx>(tcx: TyCtxt<'tcx>, metadata: &EncodedMetadata, gcc_module: &mut GccContext) {
use snap::write::FrameEncoder;
use std::io::Write;

// Historical note:
//
// When using link.exe it was seen that the section name `.note.rustc`
// was getting shortened to `.note.ru`, and according to the PE and COFF
// specification:
//
// > Executable images do not use a string table and do not support
// > section names longer than 8 characters
//
// https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
//
// As a result, we choose a slightly shorter name! As to why
// `.note.rustc` works on MinGW, see
// https://github.com/llvm/llvm-project/blob/llvmorg-12.0.0/lld/COFF/Writer.cpp#L1190-L1197
let section_name = if tcx.sess.target.is_like_osx { "__DATA,.rustc" } else { ".rustc" };

let context = &gcc_module.context;
let mut compressed = rustc_metadata::METADATA_HEADER.to_vec();
FrameEncoder::new(&mut compressed).write_all(&metadata.raw_data()).unwrap();

let name = exported_symbols::metadata_symbol_name(tcx);
let typ = context.new_array_type(None, context.new_type::<u8>(), compressed.len() as i32);
let global = context.new_global(None, GlobalKind::Exported, typ, name);
global.global_set_initializer(&compressed);
global.set_link_section(section_name);

// Also generate a .section directive to force no
// flags, at least for ELF outputs, so that the
// metadata doesn't get loaded into memory.
let directive = format!(".section {}", section_name);
context.add_top_level_asm(None, &directive);
}
5 changes: 0 additions & 5 deletions compiler/rustc_codegen_gcc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ extern crate rustc_session;
extern crate rustc_span;
extern crate rustc_symbol_mangling;
extern crate rustc_target;
extern crate snap;

// This prevents duplicating functions and statics that are already part of the host rustc process.
#[allow(unused_extern_crates)]
Expand Down Expand Up @@ -128,10 +127,6 @@ impl ExtraBackendMethods for GccCodegenBackend {
}
}

fn write_compressed_metadata<'tcx>(&self, tcx: TyCtxt<'tcx>, metadata: &EncodedMetadata, gcc_module: &mut Self::Module) {
base::write_compressed_metadata(tcx, metadata, gcc_module)
}

fn codegen_allocator<'tcx>(&self, tcx: TyCtxt<'tcx>, mods: &mut Self::Module, module_name: &str, kind: AllocatorKind, has_alloc_error_handler: bool) {
unsafe { allocator::codegen(tcx, mods, module_name, kind, has_alloc_error_handler) }
}
Expand Down
1 change: 0 additions & 1 deletion compiler/rustc_codegen_llvm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ bitflags = "1.0"
cstr = "0.2"
libc = "0.2"
measureme = "10.0.0"
snap = "1"
tracing = "0.1"
rustc_middle = { path = "../rustc_middle" }
rustc-demangle = "0.1.21"
Expand Down
53 changes: 1 addition & 52 deletions compiler/rustc_codegen_llvm/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
//! int)` and `rec(x=int, y=int, z=int)` will have the same [`llvm::Type`].
//!
//! [`Ty`]: rustc_middle::ty::Ty
//! [`val_ty`]: common::val_ty
//! [`val_ty`]: crate::common::val_ty

use super::ModuleLlvm;

use crate::attributes;
use crate::builder::Builder;
use crate::common;
use crate::context::CodegenCx;
use crate::llvm;
use crate::value::Value;
Expand All @@ -25,66 +24,16 @@ use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_metadata::EncodedMetadata;
use rustc_middle::dep_graph;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
use rustc_middle::middle::exported_symbols;
use rustc_middle::mir::mono::{Linkage, Visibility};
use rustc_middle::ty::TyCtxt;
use rustc_session::config::DebugInfo;
use rustc_span::symbol::Symbol;
use rustc_target::spec::SanitizerSet;

use std::ffi::CString;
use std::time::Instant;

pub fn write_compressed_metadata<'tcx>(
tcx: TyCtxt<'tcx>,
metadata: &EncodedMetadata,
llvm_module: &mut ModuleLlvm,
) {
use snap::write::FrameEncoder;
use std::io::Write;

// Historical note:
//
// When using link.exe it was seen that the section name `.note.rustc`
// was getting shortened to `.note.ru`, and according to the PE and COFF
// specification:
//
// > Executable images do not use a string table and do not support
// > section names longer than 8 characters
//
// https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
//
// As a result, we choose a slightly shorter name! As to why
// `.note.rustc` works on MinGW, see
// https://github.com/llvm/llvm-project/blob/llvmorg-12.0.0/lld/COFF/Writer.cpp#L1190-L1197
let section_name = if tcx.sess.target.is_like_osx { "__DATA,.rustc" } else { ".rustc" };

let (metadata_llcx, metadata_llmod) = (&*llvm_module.llcx, llvm_module.llmod());
let mut compressed = rustc_metadata::METADATA_HEADER.to_vec();
FrameEncoder::new(&mut compressed).write_all(metadata.raw_data()).unwrap();

let llmeta = common::bytes_in_context(metadata_llcx, &compressed);
let llconst = common::struct_in_context(metadata_llcx, &[llmeta], false);
let name = exported_symbols::metadata_symbol_name(tcx);
let buf = CString::new(name).unwrap();
let llglobal =
unsafe { llvm::LLVMAddGlobal(metadata_llmod, common::val_ty(llconst), buf.as_ptr()) };
unsafe {
llvm::LLVMSetInitializer(llglobal, llconst);
let name = SmallCStr::new(section_name);
llvm::LLVMSetSection(llglobal, name.as_ptr());

// Also generate a .section directive to force no
// flags, at least for ELF outputs, so that the
// metadata doesn't get loaded into memory.
let directive = format!(".section {}", section_name);
llvm::LLVMSetModuleInlineAsm2(metadata_llmod, directive.as_ptr().cast(), directive.len())
}
}

pub struct ValueIter<'ll> {
cur: Option<&'ll Value>,
step: unsafe extern "C" fn(&'ll Value) -> Option<&'ll Value>,
Expand Down
8 changes: 0 additions & 8 deletions compiler/rustc_codegen_llvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,6 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
ModuleLlvm::new_metadata(tcx, mod_name)
}

fn write_compressed_metadata<'tcx>(
&self,
tcx: TyCtxt<'tcx>,
metadata: &EncodedMetadata,
llvm_module: &mut ModuleLlvm,
) {
base::write_compressed_metadata(tcx, metadata, llvm_module)
}
fn codegen_allocator<'tcx>(
&self,
tcx: TyCtxt<'tcx>,
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_ssa/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ libc = "0.2.50"
jobserver = "0.1.22"
tempfile = "3.2"
pathdiff = "0.2.0"
snap = "1"
smallvec = { version = "1.6.1", features = ["union", "may_dangle"] }
regex = "1.4"

Expand Down
137 changes: 2 additions & 135 deletions compiler/rustc_codegen_ssa/src/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,21 @@ use rustc_session::utils::NativeLibKind;
/// need out of the shared crate context before we get rid of it.
use rustc_session::{filesearch, Session};
use rustc_span::symbol::Symbol;
use rustc_target::abi::Endian;
use rustc_target::spec::crt_objects::{CrtObjects, CrtObjectsFallback};
use rustc_target::spec::{LinkOutputKind, LinkerFlavor, LldFlavor, SplitDebuginfo};
use rustc_target::spec::{PanicStrategy, RelocModel, RelroLevel, SanitizerSet, Target};

use super::archive::{find_library, ArchiveBuilder};
use super::command::Command;
use super::linker::{self, Linker};
use super::metadata::create_rmeta_file;
use super::rpath::{self, RPathConfig};
use crate::{
looks_like_rust_object_file, CodegenResults, CompiledModule, CrateInfo, NativeLib,
METADATA_FILENAME,
};

use cc::windows_registry;
use object::elf;
use object::write::Object;
use object::{Architecture, BinaryFormat, Endianness, FileFlags, SectionFlags, SectionKind};
use regex::Regex;
use tempfile::Builder as TempFileBuilder;

Expand Down Expand Up @@ -339,7 +336,7 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>(
// metadata in rlib files is wrapped in a "dummy" object file for
// the target platform so the rlib can be processed entirely by
// normal linkers for the platform.
let metadata = create_metadata_file(sess, codegen_results.metadata.raw_data());
let metadata = create_rmeta_file(sess, codegen_results.metadata.raw_data());
ab.add_file(&emit_metadata(sess, &metadata, tmpdir));

// After adding all files to the archive, we need to update the
Expand All @@ -358,136 +355,6 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>(
}
}
return Ok(ab);

// For rlibs we "pack" rustc metadata into a dummy object file. When rustc
// creates a dylib crate type it will pass `--whole-archive` (or the
// platform equivalent) to include all object files from an rlib into the
// final dylib itself. This causes linkers to iterate and try to include all
// files located in an archive, so if metadata is stored in an archive then
// it needs to be of a form that the linker will be able to process.
//
// Note, though, that we don't actually want this metadata to show up in any
// final output of the compiler. Instead this is purely for rustc's own
// metadata tracking purposes.
//
// With the above in mind, each "flavor" of object format gets special
// handling here depending on the target:
//
// * MachO - macos-like targets will insert the metadata into a section that
// is sort of fake dwarf debug info. Inspecting the source of the macos
// linker this causes these sections to be skipped automatically because
// it's not in an allowlist of otherwise well known dwarf section names to
// go into the final artifact.
//
// * WebAssembly - we actually don't have any container format for this
// target. WebAssembly doesn't support the `dylib` crate type anyway so
// there's no need for us to support this at this time. Consequently the
// metadata bytes are simply stored as-is into an rlib.
//
// * COFF - Windows-like targets create an object with a section that has
// the `IMAGE_SCN_LNK_REMOVE` flag set which ensures that if the linker
// ever sees the section it doesn't process it and it's removed.
//
// * ELF - All other targets are similar to Windows in that there's a
// `SHF_EXCLUDE` flag we can set on sections in an object file to get
// automatically removed from the final output.
//
// Note that this metdata format is kept in sync with
// `rustc_codegen_ssa/src/back/metadata.rs`.
fn create_metadata_file(sess: &Session, metadata: &[u8]) -> Vec<u8> {
let endianness = match sess.target.options.endian {
Endian::Little => Endianness::Little,
Endian::Big => Endianness::Big,
};
let architecture = match &sess.target.arch[..] {
"arm" => Architecture::Arm,
"aarch64" => Architecture::Aarch64,
"x86" => Architecture::I386,
"s390x" => Architecture::S390x,
"mips" => Architecture::Mips,
"mips64" => Architecture::Mips64,
"x86_64" => {
if sess.target.pointer_width == 32 {
Architecture::X86_64_X32
} else {
Architecture::X86_64
}
}
"powerpc" => Architecture::PowerPc,
"powerpc64" => Architecture::PowerPc64,
"riscv32" => Architecture::Riscv32,
"riscv64" => Architecture::Riscv64,
"sparc64" => Architecture::Sparc64,

// This is used to handle all "other" targets. This includes targets
// in two categories:
//
// * Some targets don't have support in the `object` crate just yet
// to write an object file. These targets are likely to get filled
// out over time.
//
// * Targets like WebAssembly don't support dylibs, so the purpose
// of putting metadata in object files, to support linking rlibs
// into dylibs, is moot.
//
// In both of these cases it means that linking into dylibs will
// not be supported by rustc. This doesn't matter for targets like
// WebAssembly and for targets not supported by the `object` crate
// yet it means that work will need to be done in the `object` crate
// to add a case above.
_ => return metadata.to_vec(),
};

if sess.target.is_like_osx {
let mut file = Object::new(BinaryFormat::MachO, architecture, endianness);

let section =
file.add_section(b"__DWARF".to_vec(), b".rmeta".to_vec(), SectionKind::Debug);
file.append_section_data(section, metadata, 1);
file.write().unwrap()
} else if sess.target.is_like_windows {
const IMAGE_SCN_LNK_REMOVE: u32 = 0;
let mut file = Object::new(BinaryFormat::Coff, architecture, endianness);

let section = file.add_section(Vec::new(), b".rmeta".to_vec(), SectionKind::Debug);
file.section_mut(section).flags =
SectionFlags::Coff { characteristics: IMAGE_SCN_LNK_REMOVE };
file.append_section_data(section, metadata, 1);
file.write().unwrap()
} else {
const SHF_EXCLUDE: u64 = 0x80000000;
let mut file = Object::new(BinaryFormat::Elf, architecture, endianness);

match &sess.target.arch[..] {
// copied from `mipsel-linux-gnu-gcc foo.c -c` and
// inspecting the resulting `e_flags` field.
"mips" => {
let e_flags = elf::EF_MIPS_ARCH_32R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC;
file.flags = FileFlags::Elf { e_flags };
}
// copied from `mips64el-linux-gnuabi64-gcc foo.c -c`
"mips64" => {
let e_flags = elf::EF_MIPS_ARCH_64R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC;
file.flags = FileFlags::Elf { e_flags };
}

// copied from `riscv64-linux-gnu-gcc foo.c -c`, note though
// that the `+d` target feature represents whether the double
// float abi is enabled.
"riscv64" if sess.target.options.features.contains("+d") => {
let e_flags = elf::EF_RISCV_RVC | elf::EF_RISCV_FLOAT_ABI_DOUBLE;
file.flags = FileFlags::Elf { e_flags };
}

_ => {}
}

let section = file.add_section(Vec::new(), b".rmeta".to_vec(), SectionKind::Debug);
file.section_mut(section).flags = SectionFlags::Elf { sh_flags: SHF_EXCLUDE };
file.append_section_data(section, metadata, 1);
file.write().unwrap()
}
}
}

/// Extract all symbols defined in raw-dylib libraries, collated by library name.
Expand Down
Loading

0 comments on commit 9488cac

Please sign in to comment.