Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[lld][WebAssembly] Match the ELF linker in transitioning away from archive indexes. #78658

Merged
merged 1 commit into from
Jan 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lld/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,9 @@ MachO Improvements
WebAssembly Improvements
------------------------

* Indexes are no longer required on archive files. Instead symbol information
is read from object files within the archive. This matches the behaviour of
the ELF linker.

Fixes
#####
14 changes: 0 additions & 14 deletions lld/test/wasm/archive-no-index.s

This file was deleted.

2 changes: 1 addition & 1 deletion lld/test/wasm/bad-archive-member.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux -o %t.dir/elf.o %s
# RUN: llvm-ar rcs %t.dir/libfoo.a %t.dir/elf.o
# RUN: not wasm-ld %t.dir/libfoo.a -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: unknown file type: {{.*}}libfoo.a(elf.o)
# CHECK: warning: {{.*}}libfoo.a: archive member 'elf.o' is neither Wasm object file nor LLVM bitcode

.globl _start
_start:
29 changes: 14 additions & 15 deletions lld/wasm/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,11 @@ void LinkerDriver::addFile(StringRef path) {
if (fs::exists(importFile))
readImportFile(importFile.str());

auto members = getArchiveMembers(mbref);

// Handle -whole-archive.
if (inWholeArchive) {
for (const auto &[m, offset] : getArchiveMembers(mbref)) {
for (const auto &[m, offset] : members) {
auto *object = createObjectFile(m, path, offset);
// Mark object as live; object members are normally not
// live by default but -whole-archive is designed to treat
Expand All @@ -289,12 +291,15 @@ void LinkerDriver::addFile(StringRef path) {
std::unique_ptr<Archive> file =
CHECK(Archive::create(mbref), path + ": failed to parse archive");

if (!file->isEmpty() && !file->hasSymbolTable()) {
error(mbref.getBufferIdentifier() +
": archive has no index; run ranlib to add one");
for (const auto &[m, offset] : members) {
auto magic = identify_magic(m.getBuffer());
if (magic == file_magic::wasm_object || magic == file_magic::bitcode)
files.push_back(createObjectFile(m, path, offset, true));
else
warn(path + ": archive member '" + m.getBufferIdentifier() +
"' is neither Wasm object file nor LLVM bitcode");
}

files.push_back(make<ArchiveFile>(mbref));
return;
}
case file_magic::bitcode:
Expand Down Expand Up @@ -732,16 +737,10 @@ static Symbol *handleUndefined(StringRef name, const char *option) {

static void handleLibcall(StringRef name) {
Symbol *sym = symtab->find(name);
if (!sym)
return;

if (auto *lazySym = dyn_cast<LazySymbol>(sym)) {
MemoryBufferRef mb = lazySym->getMemberBuffer();
if (isBitcode(mb)) {
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
lazySym->extract();
}
if (sym && sym->isLazy() && isa<BitcodeFile>(sym->getFile())) {
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back("<libcall>", sym->getFile(), *sym);
cast<LazySymbol>(sym)->extract();
}
}

Expand Down
101 changes: 49 additions & 52 deletions lld/wasm/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,26 +75,19 @@ std::optional<MemoryBufferRef> readFile(StringRef path) {
}

InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive) {
uint64_t offsetInArchive, bool lazy) {
file_magic magic = identify_magic(mb.getBuffer());
if (magic == file_magic::wasm_object) {
std::unique_ptr<Binary> bin =
CHECK(createBinary(mb), mb.getBufferIdentifier());
auto *obj = cast<WasmObjectFile>(bin.get());
if (obj->isSharedObject())
return make<SharedFile>(mb);
return make<ObjFile>(mb, archiveName);
return make<ObjFile>(mb, archiveName, lazy);
}

if (magic == file_magic::bitcode)
return make<BitcodeFile>(mb, archiveName, offsetInArchive);

std::string name = mb.getBufferIdentifier().str();
if (!archiveName.empty()) {
name = archiveName.str() + "(" + name + ")";
}

fatal("unknown file type: " + name);
assert(magic == file_magic::bitcode);
return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy);
}

// Relocations contain either symbol or type indices. This function takes a
Expand Down Expand Up @@ -391,9 +384,30 @@ static bool shouldMerge(const WasmSegment &seg) {
return true;
}

void ObjFile::parse(bool ignoreComdats) {
// Parse a memory buffer as a wasm file.
LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
void ObjFile::parseLazy() {
LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << "\n");
for (const SymbolRef &sym : wasmObj->symbols()) {
const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
if (!wasmSym.isDefined())
continue;
symtab->addLazy(wasmSym.Info.Name, this);
// addLazy() may trigger this->extract() if an existing symbol is an
// undefined symbol. If that happens, this function has served its purpose,
// and we can exit from the loop early.
if (!lazy)
break;
}
}

ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy)
: InputFile(ObjectKind, m) {
this->lazy = lazy;
this->archiveName = std::string(archiveName);

// If this isn't part of an archive, it's eagerly linked, so mark it live.
if (archiveName.empty())
markLive();

std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this));

auto *obj = dyn_cast<WasmObjectFile>(bin.get());
Expand All @@ -406,6 +420,11 @@ void ObjFile::parse(bool ignoreComdats) {
wasmObj.reset(obj);

checkArch(obj->getArch());
}

void ObjFile::parse(bool ignoreComdats) {
// Parse a memory buffer as a wasm file.
LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n");

// Build up a map of function indices to table indices for use when
// verifying the existing table index relocations
Expand Down Expand Up @@ -717,43 +736,6 @@ void StubFile::parse() {
}
}

void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
file = CHECK(Archive::create(mb), toString(this));

// Read the symbol table to construct Lazy symbols.
int count = 0;
for (const Archive::Symbol &sym : file->symbols()) {
symtab->addLazy(this, &sym);
++count;
}
LLVM_DEBUG(dbgs() << "Read " << count << " symbols\n");
(void) count;
}

void ArchiveFile::addMember(const Archive::Symbol *sym) {
const Archive::Child &c =
CHECK(sym->getMember(),
"could not get the member for symbol " + sym->getName());

// Don't try to load the same member twice (this can happen when members
// mutually reference each other).
if (!seen.insert(c.getChildOffset()).second)
return;

LLVM_DEBUG(dbgs() << "loading lazy: " << sym->getName() << "\n");
LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");

MemoryBufferRef mb =
CHECK(c.getMemoryBufferRef(),
"could not get the buffer for the member defining symbol " +
sym->getName());

InputFile *obj = createObjectFile(mb, getName(), c.getChildOffset());
symtab->addFile(obj, sym->getName());
}

static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
switch (gvVisibility) {
case GlobalValue::DefaultVisibility:
Expand Down Expand Up @@ -790,8 +772,9 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
}

BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
uint64_t offsetInArchive)
uint64_t offsetInArchive, bool lazy)
: InputFile(BitcodeKind, m) {
this->lazy = lazy;
this->archiveName = std::string(archiveName);

std::string path = mb.getBufferIdentifier().str();
Expand All @@ -817,6 +800,20 @@ BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,

bool BitcodeFile::doneLTO = false;

void BitcodeFile::parseLazy() {
for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
if (irSym.isUndefined())
continue;
StringRef name = saver().save(irSym.getName());
symtab->addLazy(name, this);
// addLazy() may trigger this->extract() if an existing symbol is an
// undefined symbol. If that happens, this function has served its purpose,
// and we can exit from the loop early.
if (!lazy)
break;
}
}

void BitcodeFile::parse(StringRef symName) {
if (doneLTO) {
error(toString(this) + ": attempt to add bitcode file after LTO (" + symName + ")");
Expand Down
41 changes: 10 additions & 31 deletions lld/wasm/InputFiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/TargetParser/Triple.h"
Expand Down Expand Up @@ -45,7 +44,6 @@ class InputFile {
enum Kind {
ObjectKind,
SharedKind,
ArchiveKind,
BitcodeKind,
StubKind,
};
Expand All @@ -69,6 +67,11 @@ class InputFile {
void markLive() { live = true; }
bool isLive() const { return live; }

// True if this file is exists as in an archive file and has not yet been
// extracted.
// TODO(sbc): Use this to implement --start-lib/--end-lib.
bool lazy = false;

protected:
InputFile(Kind k, MemoryBufferRef m)
: mb(m), fileKind(k), live(!config->gcSections) {}
Expand All @@ -85,35 +88,14 @@ class InputFile {
bool live;
};

// .a file (ar archive)
class ArchiveFile : public InputFile {
public:
explicit ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {}
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }

void addMember(const llvm::object::Archive::Symbol *sym);

void parse();

private:
std::unique_ptr<llvm::object::Archive> file;
llvm::DenseSet<uint64_t> seen;
};

// .o file (wasm object file)
class ObjFile : public InputFile {
public:
explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
: InputFile(ObjectKind, m) {
this->archiveName = std::string(archiveName);

// If this isn't part of an archive, it's eagerly linked, so mark it live.
if (archiveName.empty())
markLive();
}
ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy = false);
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }

void parse(bool ignoreComdats = false);
void parseLazy();

// Returns the underlying wasm file.
const WasmObjectFile *getWasmObj() const { return wasmObj.get(); }
Expand Down Expand Up @@ -173,10 +155,11 @@ class SharedFile : public InputFile {
class BitcodeFile : public InputFile {
public:
BitcodeFile(MemoryBufferRef m, StringRef archiveName,
uint64_t offsetInArchive);
uint64_t offsetInArchive, bool lazy);
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }

void parse(StringRef symName);
void parseLazy();
std::unique_ptr<llvm::lto::InputFile> obj;

// Set to true once LTO is complete in order prevent further bitcode objects
Expand All @@ -196,14 +179,10 @@ class StubFile : public InputFile {
llvm::DenseMap<StringRef, std::vector<StringRef>> symbolDependencies;
};

inline bool isBitcode(MemoryBufferRef mb) {
return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
}

// Will report a fatal() error if the input buffer is not a valid bitcode
// or wasm object file.
InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "",
uint64_t offsetInArchive = 0);
uint64_t offsetInArchive = 0, bool lazy = false);

// Opens a given file.
std::optional<MemoryBufferRef> readFile(StringRef path);
Expand Down
23 changes: 13 additions & 10 deletions lld/wasm/SymbolTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,13 @@ SymbolTable *symtab;
void SymbolTable::addFile(InputFile *file, StringRef symName) {
log("Processing: " + toString(file));

// .a file
if (auto *f = dyn_cast<ArchiveFile>(file)) {
f->parse();
// Lazy object file
if (file->lazy) {
if (auto *f = dyn_cast<BitcodeFile>(file)) {
f->parseLazy();
} else {
cast<ObjFile>(file)->parseLazy();
}
return;
}

Expand Down Expand Up @@ -737,16 +741,15 @@ TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) {
return nullptr;
}

void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
LLVM_DEBUG(dbgs() << "addLazy: " << sym->getName() << "\n");
StringRef name = sym->getName();
void SymbolTable::addLazy(StringRef name, InputFile *file) {
LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n");

Symbol *s;
bool wasInserted;
std::tie(s, wasInserted) = insertName(name);

if (wasInserted) {
replaceSymbol<LazySymbol>(s, name, 0, file, *sym);
replaceSymbol<LazySymbol>(s, name, 0, file);
return;
}

Expand All @@ -763,15 +766,15 @@ void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) {
if (auto *f = dyn_cast<UndefinedFunction>(s))
oldSig = f->signature;
LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n");
auto newSym = replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK,
file, *sym);
auto newSym =
replaceSymbol<LazySymbol>(s, name, WASM_SYMBOL_BINDING_WEAK, file);
newSym->signature = oldSig;
return;
}

LLVM_DEBUG(dbgs() << "replacing existing undefined\n");
const InputFile *oldFile = s->getFile();
file->addMember(sym);
replaceSymbol<LazySymbol>(s, name, 0, file)->extract();
if (!config->whyExtract.empty())
ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s);
}
Expand Down
Loading
Loading