Skip to content

Commit

Permalink
Add --separate-debug-info
Browse files Browse the repository at this point in the history
This option is to separate debug info to a different file. The debug
info file's filename is stored to the main output file's .gnu_debuglink
section. gdb can read the section contents and followg the link to
find debug info in another file.

Fixes #1294
  • Loading branch information
rui314 committed Jul 8, 2024
1 parent cd3b817 commit 596ffa9
Show file tree
Hide file tree
Showing 11 changed files with 426 additions and 10 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ endforeach()
# Add other non-template source files.
target_sources(mold PRIVATE
common/compress.cc
common/crc32.cc
common/demangle.cc
common/filepath.cc
common/glob.cc
Expand Down
7 changes: 7 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,13 @@ std::optional<std::string_view> demangle_rust(std::string_view name);
void acquire_global_lock();
void release_global_lock();

//
// crc32.cc
//

u32 compute_crc32(u32 crc, u8 *buf, i64 len);
std::vector<u8> crc32_solve(i64 datalen, u32 current, u32 want);

//
// compress.cc
//
Expand Down
197 changes: 197 additions & 0 deletions common/crc32.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
// This file contains a function to "forge" a CRC. That is, given a piece
// of data and a desired CRC32 value, crc32_solve() returns a binary blob
// to add to the end of the original data to yield the desired CRC32
// value. A trailing garbage is ignored for many bianry file formats, so
// you can create a file with a desired CRC using crc32_solve(). We need
// it for --separate-debug-info.
//
// The code in this file is based on Mark Adler's "spoof" program. You can
// obtain the original copy of it at the following URL:
//
// https://github.com/madler/spoof/blob/master/spoof.c
//
// Below is the original license:

/* spoof.c -- modify a message to have a desired CRC

Copyright (C) 2012, 2014, 2016, 2018, 2021 Mark Adler

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the
use of this software.

Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim
that you wrote the original software. If you use this software in a
product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.

Mark Adler
madler@alumni.caltech.edu

*/

#include "common.h"

#include <tbb/parallel_for_each.h>
#include <zlib.h>

namespace mold {

static constexpr i64 deg = 32;
static constexpr u32 poly = 0xedb88320;

using Mat = std::array<u32, deg>;

static constexpr u32 gf2_matrix_times(const Mat &mat, u32 vec) {
u32 n = 0;
for (i64 i = 0; vec; vec >>= 1, i++)
if (vec & 1)
n ^= mat[i];
return n;
}

static constexpr Mat gf2_matrix_square(const Mat &mat) {
Mat sq;
for (i64 i = 0; i < deg; i++)
sq[i] = gf2_matrix_times(mat, mat[i]);
return sq;
}

static consteval std::array<Mat, 64> get_crc_zero_powers() {
std::array<Mat, 64> p;

p[1][0] = poly;
for (i64 n = 1; n < deg; n++)
p[1][n] = 1 << (n - 1);

p[0] = gf2_matrix_square(p[1]);
p[1] = gf2_matrix_square(p[0]);
p[0] = gf2_matrix_square(p[1]);
p[1] = gf2_matrix_square(p[0]);

for (i64 i = 2; i < 64; i++)
p[i] = gf2_matrix_square(p[i - 1]);
return p;
}

// Efficiently apply len zero bytes to crc, returning the resulting crc.
static u32 crc_zeros(u32 crc, i64 len) {
static constexpr std::array<Mat, 64> power = get_crc_zero_powers();

// apply len zeros to crc
if (crc)
for (i64 n = 0; len; len >>= 1, n++)
if (len & 1)
crc = gf2_matrix_times(power[n], crc);
return crc;
}

// Solve M x = c for x
static std::vector<bool> gf2_matrix_solve(std::vector<u32> M, u32 c) {
i64 cols = M.size();
i64 rows = deg;

// create adjoining identity matrix
std::vector<std::vector<bool>> inv(cols);
for (i64 i = 0; i < cols; i++) {
inv[i].resize(cols);
inv[i][i] = 1;
}

for (i64 j = 0; j < rows; j++) {
u32 pos = 1 << j;

if ((M[j] & pos) == 0) {
i64 k;
for (k = j + 1; k < cols; k++)
if (M[k] & pos)
break;

if (k == cols) {
std::cerr << "mold: internal error: crc32_solve: no solution\n";
exit(1);
}

std::swap(M[j], M[k]);
std::swap(inv[j], inv[k]);
}

for (i64 k = 0; k < cols; k++) {
if (k != j && (M[k] & pos)) {
M[k] ^= M[j];
for (i64 i = 0; i < cols; i++)
inv[k][i] = inv[k][i] ^ inv[j][i];
}
}
}

// multiply inverse by c to get result x
std::vector<bool> x(cols);
for (i64 j = 0; c; c >>= 1, j++)
if (c & 1)
for (i64 i = 0; i < cols; i++)
x[i] = x[i] ^ inv[j][i];
return x;
}

// Compute a CRC for given data in parallel
u32 compute_crc32(u32 crc, u8 *buf, i64 len) {
struct Shard {
u8 *buf;
i64 len;
u32 crc;
};

constexpr i64 shard_size = 1024 * 1024; // 1 MiB
std::vector<Shard> shards;

while (len > 0) {
i64 sz = std::min(len, shard_size);
shards.push_back({buf, sz, 0});
buf += sz;
len -= sz;
}

tbb::parallel_for_each(shards.begin(), shards.end(), [](Shard &shard) {
shard.crc = crc32_z(0, shard.buf, shard.len);
});

for (Shard &shard : shards)
crc = crc32_combine(crc, shard.crc, shard.len);
return crc;
}

// Given input data and a desired CRC value, this function returns
// a binary blob such that if the blob is appended to the end of the
// input data, the entire data's CRC value becomes the desired CRC.
std::vector<u8> crc32_solve(i64 datalen, u32 current, u32 desired) {
// Compute the CRC for the given data and the all-zero trailer
constexpr i64 trailer_len = 16;
current = ~crc_zeros(~current, trailer_len);

// Compute CRCs for all bits in the trailer
std::vector<u32> mat;
for (i64 i = 0; i < trailer_len * 8; i++) {
u8 buf[trailer_len] = {};
buf[i / 8] = 1 << (i % 8);
mat.push_back(~crc32_z(~crc_zeros(0, datalen), buf, sizeof(buf)));
}

// Find desired trailer data
std::vector<bool> sol = gf2_matrix_solve(mat, desired ^ current);

std::vector<u8> out(trailer_len);
for (i64 i = 0; i < trailer_len * 8; i++)
if (sol[i])
out[i / 8] |= 1 << (i % 8);
return out;
}

} // namespace mold
16 changes: 16 additions & 0 deletions elf/cmdline.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ inline const char helpmsg[] = R"(
--rpath-link DIR Ignored
--run COMMAND ARG... Run COMMAND with mold as /usr/bin/ld
--section-start=SECTION=ADDR Set address for section
--separate-debug-file[=FILE] Separate debug info to the specified file
--no-separate-debug-file
--shared, --Bshareable Create a shared library
--shuffle-sections[=SEED] Randomize the output by shuffling input sections
--sort-common Ignored
Expand Down Expand Up @@ -526,6 +528,7 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
std::optional<SeparateCodeKind> z_separate_code;
std::optional<bool> report_undefined;
std::optional<bool> z_relro;
std::optional<std::string> separate_debug_file;
std::optional<u64> shuffle_sections_seed;
std::unordered_set<std::string_view> rpaths;

Expand Down Expand Up @@ -1003,6 +1006,12 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
ctx.arg.z_origin = true;
} else if (read_z_flag("nodefaultlib")) {
ctx.arg.z_nodefaultlib = true;
} else if (read_eq("separate-debug-file")) {
separate_debug_file = arg;
} else if (read_flag("separate-debug-file")) {
separate_debug_file = "";
} else if (read_flag("no-separate-debug-file")) {
separate_debug_file.reset();
} else if (read_z_flag("separate-loadable-segments")) {
z_separate_code = SEPARATE_LOADABLE_SEGMENTS;
} else if (read_z_flag("separate-code")) {
Expand Down Expand Up @@ -1394,6 +1403,13 @@ std::vector<std::string> parse_nonpositional_args(Context<E> &ctx) {
ctx.default_version = VER_NDX_LAST_RESERVED + 1;
}

if (separate_debug_file) {
if (separate_debug_file->empty())
ctx.arg.separate_debug_file = ctx.arg.output + ".dbg";
else
ctx.arg.separate_debug_file = *separate_debug_file;
}

if (ctx.arg.shared && warn_shared_textrel)
ctx.arg.warn_textrel = true;

Expand Down
23 changes: 15 additions & 8 deletions elf/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -559,14 +559,17 @@ int elf_main(int argc, char **argv) {
// Compute the is_weak bit for each imported symbol.
compute_imported_symbol_weakness(ctx);

// Compute sizes of output sections while assigning offsets
// within an output section to input sections.
compute_section_sizes(ctx);

// Sort sections by section attributes so that we'll have to
// create as few segments as possible.
sort_output_sections(ctx);

if (!ctx.arg.separate_debug_file.empty())
separate_debug_sections(ctx);

// Compute sizes of output sections while assigning offsets
// within an output section to input sections.
compute_section_sizes(ctx);

// If --packed_dyn_relocs=relr was given, base relocations are stored
// to a .relr.dyn section in a compressed form. Construct a compressed
// relocations now so that we can fix section sizes and file layout.
Expand Down Expand Up @@ -659,9 +662,12 @@ int elf_main(int argc, char **argv) {
// .gdb_index's contents cannot be constructed before applying
// relocations to other debug sections. We have relocated debug
// sections now, so write the .gdb_index section.
if (ctx.gdb_index)
if (ctx.gdb_index && ctx.arg.separate_debug_file.empty())
write_gdb_index(ctx);

if (!ctx.arg.separate_debug_file.empty())
write_gnu_debuglink(ctx);

t_copy.stop();
ctx.checkpoint();

Expand All @@ -680,6 +686,9 @@ int elf_main(int argc, char **argv) {
if (ctx.arg.print_map)
print_map(ctx);

if (!ctx.arg.separate_debug_file.empty())
write_separate_debug_file(ctx);

// Show stats numbers
if (ctx.arg.stats)
show_stats(ctx);
Expand All @@ -690,9 +699,7 @@ int elf_main(int argc, char **argv) {
std::cout << std::flush;
std::cerr << std::flush;

if (ctx.arg.fork)
notify_parent();

notify_parent();
release_global_lock();

if (ctx.arg.quick_exit)
Expand Down
24 changes: 24 additions & 0 deletions elf/mold.h
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,22 @@ class NotePropertySection : public Chunk<E> {
std::map<u32, u32> properties;
};

template <typename E>
class GnuDebuglinkSection : public Chunk<E> {
public:
GnuDebuglinkSection() {
this->name = ".gnu_debuglink";
this->shdr.sh_type = SHT_PROGBITS;
this->shdr.sh_addralign = 4;
}

void update_shdr(Context<E> &ctx) override;
void copy_buf(Context<E> &ctx) override;

std::string filename;
u32 crc32 = 0;
};

template <typename E>
class GdbIndexSection : public Chunk<E> {
public:
Expand Down Expand Up @@ -1439,11 +1455,14 @@ template <typename E> void apply_version_script(Context<E> &);
template <typename E> void parse_symbol_version(Context<E> &);
template <typename E> void compute_import_export(Context<E> &);
template <typename E> void compute_address_significance(Context<E> &);
template <typename E> void separate_debug_sections(Context<E> &);
template <typename E> void compute_section_headers(Context<E> &);
template <typename E> i64 set_osec_offsets(Context<E> &);
template <typename E> void fix_synthetic_symbols(Context<E> &);
template <typename E> i64 compress_debug_sections(Context<E> &);
template <typename E> void write_build_id(Context<E> &);
template <typename E> void write_gnu_debuglink(Context<E> &);
template <typename E> void write_separate_debug_file(Context<E> &ctx);
template <typename E> void write_dependency_file(Context<E> &);
template <typename E> void show_stats(Context<E> &);

Expand Down Expand Up @@ -1807,6 +1826,7 @@ struct Context {
std::string package_metadata;
std::string plugin;
std::string rpaths;
std::string separate_debug_file;
std::string soname;
std::string sysroot;
std::unique_ptr<std::unordered_set<std::string_view>> retain_symbols_file;
Expand Down Expand Up @@ -1885,6 +1905,9 @@ struct Context {

tbb::concurrent_hash_map<Symbol<E> *, std::vector<std::string>> undef_errors;

// For --separate-debug-file
std::vector<Chunk<E> *> debug_chunks;

// Output chunks
OutputEhdr<E> *ehdr = nullptr;
OutputShdr<E> *shdr = nullptr;
Expand All @@ -1900,6 +1923,7 @@ struct Context {
DynstrSection<E> *dynstr = nullptr;
HashSection<E> *hash = nullptr;
GnuHashSection<E> *gnu_hash = nullptr;
GnuDebuglinkSection<E> *gnu_debuglink = nullptr;
ShstrtabSection<E> *shstrtab = nullptr;
PltSection<E> *plt = nullptr;
PltGotSection<E> *pltgot = nullptr;
Expand Down
Loading

0 comments on commit 596ffa9

Please sign in to comment.