From 596ffa959acde21435b9626432a5f2ae151e49df Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Fri, 28 Jun 2024 19:16:19 +0900 Subject: [PATCH] Add --separate-debug-info This option is to separate debug info to a different file. The debug info file's filename is stored to the main output file's .gnu_debuglink section. gdb can read the section contents and followg the link to find debug info in another file. Fixes https://github.com/rui314/mold/issues/1294 --- CMakeLists.txt | 1 + common/common.h | 7 ++ common/crc32.cc | 197 ++++++++++++++++++++++++++++++++ elf/cmdline.cc | 16 +++ elf/main.cc | 23 ++-- elf/mold.h | 24 ++++ elf/output-chunks.cc | 15 +++ elf/passes.cc | 123 ++++++++++++++++++++ test/elf/separate-debug-file.sh | 26 +++++ test/elf/x86_64_note.sh | 2 +- test/elf/x86_64_note2.sh | 2 +- 11 files changed, 426 insertions(+), 10 deletions(-) create mode 100644 common/crc32.cc create mode 100755 test/elf/separate-debug-file.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index d2a7c53e38..9b3899c556 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -364,6 +364,7 @@ endforeach() # Add other non-template source files. target_sources(mold PRIVATE common/compress.cc + common/crc32.cc common/demangle.cc common/filepath.cc common/glob.cc diff --git a/common/common.h b/common/common.h index 986448e134..c5ddab0a50 100644 --- a/common/common.h +++ b/common/common.h @@ -899,6 +899,13 @@ std::optional demangle_rust(std::string_view name); void acquire_global_lock(); void release_global_lock(); +// +// crc32.cc +// + +u32 compute_crc32(u32 crc, u8 *buf, i64 len); +std::vector crc32_solve(i64 datalen, u32 current, u32 want); + // // compress.cc // diff --git a/common/crc32.cc b/common/crc32.cc new file mode 100644 index 0000000000..fe37196c2e --- /dev/null +++ b/common/crc32.cc @@ -0,0 +1,197 @@ +// This file contains a function to "forge" a CRC. That is, given a piece +// of data and a desired CRC32 value, crc32_solve() returns a binary blob +// to add to the end of the original data to yield the desired CRC32 +// value. A trailing garbage is ignored for many bianry file formats, so +// you can create a file with a desired CRC using crc32_solve(). We need +// it for --separate-debug-info. +// +// The code in this file is based on Mark Adler's "spoof" program. You can +// obtain the original copy of it at the following URL: +// +// https://github.com/madler/spoof/blob/master/spoof.c +// +// Below is the original license: + +/* spoof.c -- modify a message to have a desired CRC + + Copyright (C) 2012, 2014, 2016, 2018, 2021 Mark Adler + + This software is provided 'as-is', without any express or implied warranty. + In no event will the authors be held liable for any damages arising from the + use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim + that you wrote the original software. If you use this software in a + product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler + madler@alumni.caltech.edu + + */ + +#include "common.h" + +#include +#include + +namespace mold { + +static constexpr i64 deg = 32; +static constexpr u32 poly = 0xedb88320; + +using Mat = std::array; + +static constexpr u32 gf2_matrix_times(const Mat &mat, u32 vec) { + u32 n = 0; + for (i64 i = 0; vec; vec >>= 1, i++) + if (vec & 1) + n ^= mat[i]; + return n; +} + +static constexpr Mat gf2_matrix_square(const Mat &mat) { + Mat sq; + for (i64 i = 0; i < deg; i++) + sq[i] = gf2_matrix_times(mat, mat[i]); + return sq; +} + +static consteval std::array get_crc_zero_powers() { + std::array p; + + p[1][0] = poly; + for (i64 n = 1; n < deg; n++) + p[1][n] = 1 << (n - 1); + + p[0] = gf2_matrix_square(p[1]); + p[1] = gf2_matrix_square(p[0]); + p[0] = gf2_matrix_square(p[1]); + p[1] = gf2_matrix_square(p[0]); + + for (i64 i = 2; i < 64; i++) + p[i] = gf2_matrix_square(p[i - 1]); + return p; +} + +// Efficiently apply len zero bytes to crc, returning the resulting crc. +static u32 crc_zeros(u32 crc, i64 len) { + static constexpr std::array power = get_crc_zero_powers(); + + // apply len zeros to crc + if (crc) + for (i64 n = 0; len; len >>= 1, n++) + if (len & 1) + crc = gf2_matrix_times(power[n], crc); + return crc; +} + +// Solve M x = c for x +static std::vector gf2_matrix_solve(std::vector M, u32 c) { + i64 cols = M.size(); + i64 rows = deg; + + // create adjoining identity matrix + std::vector> inv(cols); + for (i64 i = 0; i < cols; i++) { + inv[i].resize(cols); + inv[i][i] = 1; + } + + for (i64 j = 0; j < rows; j++) { + u32 pos = 1 << j; + + if ((M[j] & pos) == 0) { + i64 k; + for (k = j + 1; k < cols; k++) + if (M[k] & pos) + break; + + if (k == cols) { + std::cerr << "mold: internal error: crc32_solve: no solution\n"; + exit(1); + } + + std::swap(M[j], M[k]); + std::swap(inv[j], inv[k]); + } + + for (i64 k = 0; k < cols; k++) { + if (k != j && (M[k] & pos)) { + M[k] ^= M[j]; + for (i64 i = 0; i < cols; i++) + inv[k][i] = inv[k][i] ^ inv[j][i]; + } + } + } + + // multiply inverse by c to get result x + std::vector x(cols); + for (i64 j = 0; c; c >>= 1, j++) + if (c & 1) + for (i64 i = 0; i < cols; i++) + x[i] = x[i] ^ inv[j][i]; + return x; +} + +// Compute a CRC for given data in parallel +u32 compute_crc32(u32 crc, u8 *buf, i64 len) { + struct Shard { + u8 *buf; + i64 len; + u32 crc; + }; + + constexpr i64 shard_size = 1024 * 1024; // 1 MiB + std::vector shards; + + while (len > 0) { + i64 sz = std::min(len, shard_size); + shards.push_back({buf, sz, 0}); + buf += sz; + len -= sz; + } + + tbb::parallel_for_each(shards.begin(), shards.end(), [](Shard &shard) { + shard.crc = crc32_z(0, shard.buf, shard.len); + }); + + for (Shard &shard : shards) + crc = crc32_combine(crc, shard.crc, shard.len); + return crc; +} + +// Given input data and a desired CRC value, this function returns +// a binary blob such that if the blob is appended to the end of the +// input data, the entire data's CRC value becomes the desired CRC. +std::vector crc32_solve(i64 datalen, u32 current, u32 desired) { + // Compute the CRC for the given data and the all-zero trailer + constexpr i64 trailer_len = 16; + current = ~crc_zeros(~current, trailer_len); + + // Compute CRCs for all bits in the trailer + std::vector mat; + for (i64 i = 0; i < trailer_len * 8; i++) { + u8 buf[trailer_len] = {}; + buf[i / 8] = 1 << (i % 8); + mat.push_back(~crc32_z(~crc_zeros(0, datalen), buf, sizeof(buf))); + } + + // Find desired trailer data + std::vector sol = gf2_matrix_solve(mat, desired ^ current); + + std::vector out(trailer_len); + for (i64 i = 0; i < trailer_len * 8; i++) + if (sol[i]) + out[i / 8] |= 1 << (i % 8); + return out; +} + +} // namespace mold diff --git a/elf/cmdline.cc b/elf/cmdline.cc index fa63185023..5f2b666ad5 100644 --- a/elf/cmdline.cc +++ b/elf/cmdline.cc @@ -143,6 +143,8 @@ inline const char helpmsg[] = R"( --rpath-link DIR Ignored --run COMMAND ARG... Run COMMAND with mold as /usr/bin/ld --section-start=SECTION=ADDR Set address for section + --separate-debug-file[=FILE] Separate debug info to the specified file + --no-separate-debug-file --shared, --Bshareable Create a shared library --shuffle-sections[=SEED] Randomize the output by shuffling input sections --sort-common Ignored @@ -526,6 +528,7 @@ std::vector parse_nonpositional_args(Context &ctx) { std::optional z_separate_code; std::optional report_undefined; std::optional z_relro; + std::optional separate_debug_file; std::optional shuffle_sections_seed; std::unordered_set rpaths; @@ -1003,6 +1006,12 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.arg.z_origin = true; } else if (read_z_flag("nodefaultlib")) { ctx.arg.z_nodefaultlib = true; + } else if (read_eq("separate-debug-file")) { + separate_debug_file = arg; + } else if (read_flag("separate-debug-file")) { + separate_debug_file = ""; + } else if (read_flag("no-separate-debug-file")) { + separate_debug_file.reset(); } else if (read_z_flag("separate-loadable-segments")) { z_separate_code = SEPARATE_LOADABLE_SEGMENTS; } else if (read_z_flag("separate-code")) { @@ -1394,6 +1403,13 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.default_version = VER_NDX_LAST_RESERVED + 1; } + if (separate_debug_file) { + if (separate_debug_file->empty()) + ctx.arg.separate_debug_file = ctx.arg.output + ".dbg"; + else + ctx.arg.separate_debug_file = *separate_debug_file; + } + if (ctx.arg.shared && warn_shared_textrel) ctx.arg.warn_textrel = true; diff --git a/elf/main.cc b/elf/main.cc index 1dd4db0127..c9e3fb2ea3 100644 --- a/elf/main.cc +++ b/elf/main.cc @@ -559,14 +559,17 @@ int elf_main(int argc, char **argv) { // Compute the is_weak bit for each imported symbol. compute_imported_symbol_weakness(ctx); - // Compute sizes of output sections while assigning offsets - // within an output section to input sections. - compute_section_sizes(ctx); - // Sort sections by section attributes so that we'll have to // create as few segments as possible. sort_output_sections(ctx); + if (!ctx.arg.separate_debug_file.empty()) + separate_debug_sections(ctx); + + // Compute sizes of output sections while assigning offsets + // within an output section to input sections. + compute_section_sizes(ctx); + // If --packed_dyn_relocs=relr was given, base relocations are stored // to a .relr.dyn section in a compressed form. Construct a compressed // relocations now so that we can fix section sizes and file layout. @@ -659,9 +662,12 @@ int elf_main(int argc, char **argv) { // .gdb_index's contents cannot be constructed before applying // relocations to other debug sections. We have relocated debug // sections now, so write the .gdb_index section. - if (ctx.gdb_index) + if (ctx.gdb_index && ctx.arg.separate_debug_file.empty()) write_gdb_index(ctx); + if (!ctx.arg.separate_debug_file.empty()) + write_gnu_debuglink(ctx); + t_copy.stop(); ctx.checkpoint(); @@ -680,6 +686,9 @@ int elf_main(int argc, char **argv) { if (ctx.arg.print_map) print_map(ctx); + if (!ctx.arg.separate_debug_file.empty()) + write_separate_debug_file(ctx); + // Show stats numbers if (ctx.arg.stats) show_stats(ctx); @@ -690,9 +699,7 @@ int elf_main(int argc, char **argv) { std::cout << std::flush; std::cerr << std::flush; - if (ctx.arg.fork) - notify_parent(); - + notify_parent(); release_global_lock(); if (ctx.arg.quick_exit) diff --git a/elf/mold.h b/elf/mold.h index 4b610a495e..caa4eb9f2a 100644 --- a/elf/mold.h +++ b/elf/mold.h @@ -993,6 +993,22 @@ class NotePropertySection : public Chunk { std::map properties; }; +template +class GnuDebuglinkSection : public Chunk { +public: + GnuDebuglinkSection() { + this->name = ".gnu_debuglink"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_addralign = 4; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + std::string filename; + u32 crc32 = 0; +}; + template class GdbIndexSection : public Chunk { public: @@ -1439,11 +1455,14 @@ template void apply_version_script(Context &); template void parse_symbol_version(Context &); template void compute_import_export(Context &); template void compute_address_significance(Context &); +template void separate_debug_sections(Context &); template void compute_section_headers(Context &); template i64 set_osec_offsets(Context &); template void fix_synthetic_symbols(Context &); template i64 compress_debug_sections(Context &); template void write_build_id(Context &); +template void write_gnu_debuglink(Context &); +template void write_separate_debug_file(Context &ctx); template void write_dependency_file(Context &); template void show_stats(Context &); @@ -1807,6 +1826,7 @@ struct Context { std::string package_metadata; std::string plugin; std::string rpaths; + std::string separate_debug_file; std::string soname; std::string sysroot; std::unique_ptr> retain_symbols_file; @@ -1885,6 +1905,9 @@ struct Context { tbb::concurrent_hash_map *, std::vector> undef_errors; + // For --separate-debug-file + std::vector *> debug_chunks; + // Output chunks OutputEhdr *ehdr = nullptr; OutputShdr *shdr = nullptr; @@ -1900,6 +1923,7 @@ struct Context { DynstrSection *dynstr = nullptr; HashSection *hash = nullptr; GnuHashSection *gnu_hash = nullptr; + GnuDebuglinkSection *gnu_debuglink = nullptr; ShstrtabSection *shstrtab = nullptr; PltSection *plt = nullptr; PltGotSection *pltgot = nullptr; diff --git a/elf/output-chunks.cc b/elf/output-chunks.cc index 9e87ea08aa..70f1a4f795 100644 --- a/elf/output-chunks.cc +++ b/elf/output-chunks.cc @@ -2948,6 +2948,20 @@ void ComdatGroupSection::copy_buf(Context &ctx) { *buf++ = chunk->shndx; } +template +void GnuDebuglinkSection::update_shdr(Context &ctx) { + filename = std::filesystem::path(ctx.arg.separate_debug_file).filename(); + this->shdr.sh_size = align_to(filename.size() + 1, 4) + 4; +} + +template +void GnuDebuglinkSection::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + this->shdr.sh_offset; + memset(buf, 0, this->shdr.sh_size); + write_string(buf, filename); + *(U32 *)(buf + this->shdr.sh_size - 4) = crc32; +} + using E = MOLD_TARGET; template class Chunk; @@ -2986,6 +3000,7 @@ template class GdbIndexSection; template class CompressedSection; template class RelocSection; template class ComdatGroupSection; +template class GnuDebuglinkSection; template OutputSection *find_section(Context &, u32); template OutputSection *find_section(Context &, std::string_view); diff --git a/elf/passes.cc b/elf/passes.cc index 95707288e4..b4ac71fca8 100644 --- a/elf/passes.cc +++ b/elf/passes.cc @@ -156,6 +156,8 @@ void create_synthetic_sections(Context &ctx) { ctx.verdef = push(new VerdefSection); if (ctx.arg.emit_relocs) ctx.eh_frame_reloc = push(new EhFrameRelocSection); + if (!ctx.arg.separate_debug_file.empty()) + ctx.gnu_debuglink = push(new GnuDebuglinkSection); if (ctx.arg.shared || !ctx.dsos.empty() || ctx.arg.pie) { ctx.dynamic = push(new DynamicSection(ctx)); @@ -2602,6 +2604,24 @@ static i64 set_file_offsets(Context &ctx) { return fileoff; } +// Remove debug sections from ctx.chunks and save them to ctx.debug_chunks. +// This is for --separate-debug-file. +template +void separate_debug_sections(Context &ctx) { + auto is_debug_section = [&](Chunk *chunk) { + if (chunk->shdr.sh_flags & SHF_ALLOC) + return false; + return chunk == ctx.gdb_index || chunk == ctx.symtab || chunk == ctx.strtab || + chunk->name.starts_with(".debug_"); + }; + + auto mid = std::stable_partition(ctx.chunks.begin(), ctx.chunks.end(), + is_debug_section); + + ctx.debug_chunks = {ctx.chunks.begin(), mid}; + ctx.chunks.erase(ctx.chunks.begin(), mid); +} + template void compute_section_headers(Context &ctx) { // Update sh_size for each chunk. @@ -2993,6 +3013,106 @@ void write_build_id(Context &ctx) { ctx.buildid->copy_buf(ctx); } +// A .gnu_debuglink section contains a filename and a CRC32 checksum of a +// debug info file. When we are writing a .gnu_debuglink, we don't know +// its CRC32 checksum because we haven't created a debug info file. So we +// write a dummy value instead. +// +// We can't choose a random value as a dummy value for build +// reproducibility. We also don't want to write a fixed value for all +// files because the CRC checksum is in this section to prevent using +// wrong file on debugging. gdb rejects a debug info file if its CRC +// doesn't match with the one in .gdb_debuglink. +// +// Therefore, we'll try to make our CRC checksum as unique as possible. +// We'll remember that checksum, and after creating a debug info file, add +// a few bytes of garbage at the end of it so that the debug info file's +// CRC checksum becomes the one that we have precomputed. +template +void write_gnu_debuglink(Context &ctx) { + Timer t(ctx, "write_gnu_debuglink"); + u32 crc32; + + if (ctx.buildid) { + crc32 = compute_crc32(0, ctx.buildid->contents.data(), + ctx.buildid->contents.size()); + } else { + std::vector> shards = get_shards(ctx); + std::vector> hashes(shards.size()); + + tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) { + hashes[i] = hash_string({(char *)shards[i].data(), shards[i].size()}); + }); + crc32 = compute_crc32(0, (u8 *)hashes.data(), hashes.size() * 8); + } + + ctx.gnu_debuglink->crc32 = crc32; + ctx.gnu_debuglink->copy_buf(ctx); +} + +// Write a separate debug file. This function is called after we finish +// writing to the usual output file. +template +void write_separate_debug_file(Context &ctx) { + Timer t(ctx, "write_separate_debug_file"); + + // We want to write to the debug info file in background so that the + // user doesn't have to wait for it to complete. + if (!ctx.arg.stats && !ctx.arg.perf) + notify_parent(); + + // A debug info file contains all sections as the original file, though + // most of them can be empty as if they were bss sections. We convert + // real sections into dummy sections here. + for (i64 i = 0; i < ctx.chunks.size(); i++) { + Chunk *chunk = ctx.chunks[i]; + if (chunk != ctx.ehdr && chunk != ctx.shdr && chunk != ctx.shstrtab && + chunk->shdr.sh_type != SHT_NOTE) { + Chunk *sec = new OutputSection(chunk->name, SHT_NULL); + sec->shdr = chunk->shdr; + sec->shdr.sh_type = SHT_NOBITS; + + ctx.chunks[i] = sec; + ctx.chunk_pool.emplace_back(sec); + } + } + + // Restore debug info sections that had been set aside while we were + // creating the main file. + tbb::parallel_for_each(ctx.debug_chunks, [&](Chunk *chunk) { + chunk->compute_section_size(ctx); + }); + + append(ctx.chunks, ctx.debug_chunks); + + // Write to the debug info file as if it were a regular output file. + compute_section_headers(ctx); + i64 filesize = set_osec_offsets(ctx); + + ctx.output_file = + OutputFile>::open(ctx, ctx.arg.separate_debug_file, + filesize, 0666); + ctx.buf = ctx.output_file->buf; + + copy_chunks(ctx); + + if (ctx.gdb_index) + write_gdb_index(ctx); + + // Reverse-compute a CRC32 value so that the CRC32 checksum embedded to + // the .gnu_debuglink section in the main executable matches with the + // debug info file's CRC32 checksum. + std::vector &buf2 = ctx.output_file->buf2; + i64 datalen = filesize + buf2.size(); + + u32 crc = compute_crc32(0, ctx.buf, filesize); + crc = compute_crc32(crc, buf2.data(), buf2.size()); + + std::vector trailer = crc32_solve(datalen, crc, ctx.gnu_debuglink->crc32); + append(ctx.output_file->buf2, trailer); + ctx.output_file->close(ctx); +} + // Write Makefile-style dependency rules to a file specified by // --dependency-file. This is analogous to the compiler's -M flag. template @@ -3126,11 +3246,14 @@ template void apply_version_script(Context &); template void parse_symbol_version(Context &); template void compute_import_export(Context &); template void compute_address_significance(Context &); +template void separate_debug_sections(Context &); template void compute_section_headers(Context &); template i64 set_osec_offsets(Context &); template void fix_synthetic_symbols(Context &); template i64 compress_debug_sections(Context &); template void write_build_id(Context &); +template void write_gnu_debuglink(Context &); +template void write_separate_debug_file(Context &); template void write_dependency_file(Context &); template void show_stats(Context &); diff --git a/test/elf/separate-debug-file.sh b/test/elf/separate-debug-file.sh new file mode 100755 index 0000000000..33bf6c755e --- /dev/null +++ b/test/elf/separate-debug-file.sh @@ -0,0 +1,26 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +on_qemu && skip +command -v gdb >& /dev/null || skip +command -v flock >& /dev/null || skip + +cat < $t/a.c +#include +int main() { + printf("Hello world\n"); +} +EOF + +$CC -c -o $t/a.o $t/a.c -g +$CC -B. -o $t/exe1 $t/a.o -Wl,--separate-debug-file +readelf -SW $t/exe1 | grep -Fq .gnu_debuglink + +$CC -c -o $t/a.o $t/a.c -g +$CC -B. -o $t/exe2 $t/a.o -Wl,--separate-debug-file -Wl,--no-build-id +readelf -SW $t/exe2 | grep -Fq .gnu_debuglink + +sleep 1 + +gdb $t/exe1 -ex 'list main' -ex 'quit' | grep -Fq printf +gdb $t/exe2 -ex 'list main' -ex 'quit' | grep -Fq printf diff --git a/test/elf/x86_64_note.sh b/test/elf/x86_64_note.sh index cff814b48b..51aa68d4a1 100755 --- a/test/elf/x86_64_note.sh +++ b/test/elf/x86_64_note.sh @@ -37,5 +37,5 @@ grep -Eq '.note.baz\s+NOTE.+000008 00 A 0 0 8' $t/log grep -Eq '.note.nonalloc\s+NOTE.+000008 00 0 0 1' $t/log readelf --segments $t/exe > $t/log -grep -Fq '01 .note.baz .note.foo .note.bar' $t/log +grep -Fq '01 .note.bar .note.baz .note.foo' $t/log ! grep -q 'NOTE.*0x0000000000000000 0x0000000000000000' $t/log || false diff --git a/test/elf/x86_64_note2.sh b/test/elf/x86_64_note2.sh index 24ebef583a..e2bb303673 100755 --- a/test/elf/x86_64_note2.sh +++ b/test/elf/x86_64_note2.sh @@ -29,4 +29,4 @@ EOF ./mold -o $t/exe $t/a.o $t/b.o $t/c.o $t/d.o readelf --segments $t/exe > $t/log -grep -Fq '01 .note.a .note.c .note.b' $t/log +grep -Fq '01 .note.a .note.b .note.c' $t/log