diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index c322b776ff58f6..01669543cd50ca 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -50,5 +50,9 @@ MachO Improvements WebAssembly Improvements ------------------------ +* Indexes are no longer required on archive files. Instead symbol information + is read from object files within the archive. This matches the behaviour of + the ELF linker. + Fixes ##### diff --git a/lld/test/wasm/archive-no-index.s b/lld/test/wasm/archive-no-index.s deleted file mode 100644 index 99ca5a367d3c6d..00000000000000 --- a/lld/test/wasm/archive-no-index.s +++ /dev/null @@ -1,14 +0,0 @@ -# Tests error on archive file without a symbol table -# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: llvm-as -o %t.archive.o %S/Inputs/archive1.ll -# RUN: rm -f %t.a -# RUN: llvm-ar crS %t.a %t.archive.o - -# RUN: not wasm-ld -o out.wasm %t.o %t.a 2>&1 | FileCheck %s - - .globl _start -_start: - .functype _start () -> () - end_function - -# CHECK: archive has no index; run ranlib to add one diff --git a/lld/test/wasm/bad-archive-member.s b/lld/test/wasm/bad-archive-member.s index 029027a8517a36..77bf16871ca5b5 100644 --- a/lld/test/wasm/bad-archive-member.s +++ b/lld/test/wasm/bad-archive-member.s @@ -5,7 +5,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux -o %t.dir/elf.o %s # RUN: llvm-ar rcs %t.dir/libfoo.a %t.dir/elf.o # RUN: not wasm-ld %t.dir/libfoo.a -o /dev/null 2>&1 | FileCheck %s -# CHECK: error: unknown file type: {{.*}}libfoo.a(elf.o) +# CHECK: warning: {{.*}}libfoo.a: archive member 'elf.o' is neither Wasm object file nor LLVM bitcode .globl _start _start: diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 32c042b5695a4b..edf1979c1d3025 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -272,9 +272,11 @@ void LinkerDriver::addFile(StringRef path) { if (fs::exists(importFile)) readImportFile(importFile.str()); + auto members = getArchiveMembers(mbref); + // Handle -whole-archive. if (inWholeArchive) { - for (const auto &[m, offset] : getArchiveMembers(mbref)) { + for (const auto &[m, offset] : members) { auto *object = createObjectFile(m, path, offset); // Mark object as live; object members are normally not // live by default but -whole-archive is designed to treat @@ -289,12 +291,15 @@ void LinkerDriver::addFile(StringRef path) { std::unique_ptr file = CHECK(Archive::create(mbref), path + ": failed to parse archive"); - if (!file->isEmpty() && !file->hasSymbolTable()) { - error(mbref.getBufferIdentifier() + - ": archive has no index; run ranlib to add one"); + for (const auto &[m, offset] : members) { + auto magic = identify_magic(m.getBuffer()); + if (magic == file_magic::wasm_object || magic == file_magic::bitcode) + files.push_back(createObjectFile(m, path, offset, true)); + else + warn(path + ": archive member '" + m.getBufferIdentifier() + + "' is neither Wasm object file nor LLVM bitcode"); } - files.push_back(make(mbref)); return; } case file_magic::bitcode: @@ -732,16 +737,10 @@ static Symbol *handleUndefined(StringRef name, const char *option) { static void handleLibcall(StringRef name) { Symbol *sym = symtab->find(name); - if (!sym) - return; - - if (auto *lazySym = dyn_cast(sym)) { - MemoryBufferRef mb = lazySym->getMemberBuffer(); - if (isBitcode(mb)) { - if (!config->whyExtract.empty()) - ctx.whyExtractRecords.emplace_back("", sym->getFile(), *sym); - lazySym->extract(); - } + if (sym && sym->isLazy() && isa(sym->getFile())) { + if (!config->whyExtract.empty()) + ctx.whyExtractRecords.emplace_back("", sym->getFile(), *sym); + cast(sym)->extract(); } } diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index 19c76e49027896..f5e946aca8b2a8 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -75,7 +75,7 @@ std::optional readFile(StringRef path) { } InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) { + uint64_t offsetInArchive, bool lazy) { file_magic magic = identify_magic(mb.getBuffer()); if (magic == file_magic::wasm_object) { std::unique_ptr bin = @@ -83,18 +83,11 @@ InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName, auto *obj = cast(bin.get()); if (obj->isSharedObject()) return make(mb); - return make(mb, archiveName); + return make(mb, archiveName, lazy); } - if (magic == file_magic::bitcode) - return make(mb, archiveName, offsetInArchive); - - std::string name = mb.getBufferIdentifier().str(); - if (!archiveName.empty()) { - name = archiveName.str() + "(" + name + ")"; - } - - fatal("unknown file type: " + name); + assert(magic == file_magic::bitcode); + return make(mb, archiveName, offsetInArchive, lazy); } // Relocations contain either symbol or type indices. This function takes a @@ -391,9 +384,30 @@ static bool shouldMerge(const WasmSegment &seg) { return true; } -void ObjFile::parse(bool ignoreComdats) { - // Parse a memory buffer as a wasm file. - LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); +void ObjFile::parseLazy() { + LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << "\n"); + for (const SymbolRef &sym : wasmObj->symbols()) { + const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl()); + if (!wasmSym.isDefined()) + continue; + symtab->addLazy(wasmSym.Info.Name, this); + // addLazy() may trigger this->extract() if an existing symbol is an + // undefined symbol. If that happens, this function has served its purpose, + // and we can exit from the loop early. + if (!lazy) + break; + } +} + +ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy) + : InputFile(ObjectKind, m) { + this->lazy = lazy; + this->archiveName = std::string(archiveName); + + // If this isn't part of an archive, it's eagerly linked, so mark it live. + if (archiveName.empty()) + markLive(); + std::unique_ptr bin = CHECK(createBinary(mb), toString(this)); auto *obj = dyn_cast(bin.get()); @@ -406,6 +420,11 @@ void ObjFile::parse(bool ignoreComdats) { wasmObj.reset(obj); checkArch(obj->getArch()); +} + +void ObjFile::parse(bool ignoreComdats) { + // Parse a memory buffer as a wasm file. + LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n"); // Build up a map of function indices to table indices for use when // verifying the existing table index relocations @@ -717,43 +736,6 @@ void StubFile::parse() { } } -void ArchiveFile::parse() { - // Parse a MemoryBufferRef as an archive file. - LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); - file = CHECK(Archive::create(mb), toString(this)); - - // Read the symbol table to construct Lazy symbols. - int count = 0; - for (const Archive::Symbol &sym : file->symbols()) { - symtab->addLazy(this, &sym); - ++count; - } - LLVM_DEBUG(dbgs() << "Read " << count << " symbols\n"); - (void) count; -} - -void ArchiveFile::addMember(const Archive::Symbol *sym) { - const Archive::Child &c = - CHECK(sym->getMember(), - "could not get the member for symbol " + sym->getName()); - - // Don't try to load the same member twice (this can happen when members - // mutually reference each other). - if (!seen.insert(c.getChildOffset()).second) - return; - - LLVM_DEBUG(dbgs() << "loading lazy: " << sym->getName() << "\n"); - LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n"); - - MemoryBufferRef mb = - CHECK(c.getMemoryBufferRef(), - "could not get the buffer for the member defining symbol " + - sym->getName()); - - InputFile *obj = createObjectFile(mb, getName(), c.getChildOffset()); - symtab->addFile(obj, sym->getName()); -} - static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { switch (gvVisibility) { case GlobalValue::DefaultVisibility: @@ -790,8 +772,9 @@ static Symbol *createBitcodeSymbol(const std::vector &keptComdats, } BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName, - uint64_t offsetInArchive) + uint64_t offsetInArchive, bool lazy) : InputFile(BitcodeKind, m) { + this->lazy = lazy; this->archiveName = std::string(archiveName); std::string path = mb.getBufferIdentifier().str(); @@ -817,6 +800,20 @@ BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName, bool BitcodeFile::doneLTO = false; +void BitcodeFile::parseLazy() { + for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { + if (irSym.isUndefined()) + continue; + StringRef name = saver().save(irSym.getName()); + symtab->addLazy(name, this); + // addLazy() may trigger this->extract() if an existing symbol is an + // undefined symbol. If that happens, this function has served its purpose, + // and we can exit from the loop early. + if (!lazy) + break; + } +} + void BitcodeFile::parse(StringRef symName) { if (doneLTO) { error(toString(this) + ": attempt to add bitcode file after LTO (" + symName + ")"); diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h index d9a8b530660324..fd3d5e5ef47967 100644 --- a/lld/wasm/InputFiles.h +++ b/lld/wasm/InputFiles.h @@ -14,7 +14,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/LTO/LTO.h" -#include "llvm/Object/Archive.h" #include "llvm/Object/Wasm.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/TargetParser/Triple.h" @@ -45,7 +44,6 @@ class InputFile { enum Kind { ObjectKind, SharedKind, - ArchiveKind, BitcodeKind, StubKind, }; @@ -69,6 +67,11 @@ class InputFile { void markLive() { live = true; } bool isLive() const { return live; } + // True if this file is exists as in an archive file and has not yet been + // extracted. + // TODO(sbc): Use this to implement --start-lib/--end-lib. + bool lazy = false; + protected: InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k), live(!config->gcSections) {} @@ -85,35 +88,14 @@ class InputFile { bool live; }; -// .a file (ar archive) -class ArchiveFile : public InputFile { -public: - explicit ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {} - static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; } - - void addMember(const llvm::object::Archive::Symbol *sym); - - void parse(); - -private: - std::unique_ptr file; - llvm::DenseSet seen; -}; - // .o file (wasm object file) class ObjFile : public InputFile { public: - explicit ObjFile(MemoryBufferRef m, StringRef archiveName) - : InputFile(ObjectKind, m) { - this->archiveName = std::string(archiveName); - - // If this isn't part of an archive, it's eagerly linked, so mark it live. - if (archiveName.empty()) - markLive(); - } + ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy = false); static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } void parse(bool ignoreComdats = false); + void parseLazy(); // Returns the underlying wasm file. const WasmObjectFile *getWasmObj() const { return wasmObj.get(); } @@ -173,10 +155,11 @@ class SharedFile : public InputFile { class BitcodeFile : public InputFile { public: BitcodeFile(MemoryBufferRef m, StringRef archiveName, - uint64_t offsetInArchive); + uint64_t offsetInArchive, bool lazy); static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } void parse(StringRef symName); + void parseLazy(); std::unique_ptr obj; // Set to true once LTO is complete in order prevent further bitcode objects @@ -196,14 +179,10 @@ class StubFile : public InputFile { llvm::DenseMap> symbolDependencies; }; -inline bool isBitcode(MemoryBufferRef mb) { - return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; -} - // Will report a fatal() error if the input buffer is not a valid bitcode // or wasm object file. InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", - uint64_t offsetInArchive = 0); + uint64_t offsetInArchive = 0, bool lazy = false); // Opens a given file. std::optional readFile(StringRef path); diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp index 9988490e14b0bc..c98aa3ee3a7a32 100644 --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -26,9 +26,13 @@ SymbolTable *symtab; void SymbolTable::addFile(InputFile *file, StringRef symName) { log("Processing: " + toString(file)); - // .a file - if (auto *f = dyn_cast(file)) { - f->parse(); + // Lazy object file + if (file->lazy) { + if (auto *f = dyn_cast(file)) { + f->parseLazy(); + } else { + cast(file)->parseLazy(); + } return; } @@ -737,16 +741,15 @@ TableSymbol *SymbolTable::resolveIndirectFunctionTable(bool required) { return nullptr; } -void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) { - LLVM_DEBUG(dbgs() << "addLazy: " << sym->getName() << "\n"); - StringRef name = sym->getName(); +void SymbolTable::addLazy(StringRef name, InputFile *file) { + LLVM_DEBUG(dbgs() << "addLazy: " << name << "\n"); Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insertName(name); if (wasInserted) { - replaceSymbol(s, name, 0, file, *sym); + replaceSymbol(s, name, 0, file); return; } @@ -763,15 +766,15 @@ void SymbolTable::addLazy(ArchiveFile *file, const Archive::Symbol *sym) { if (auto *f = dyn_cast(s)) oldSig = f->signature; LLVM_DEBUG(dbgs() << "replacing existing weak undefined symbol\n"); - auto newSym = replaceSymbol(s, name, WASM_SYMBOL_BINDING_WEAK, - file, *sym); + auto newSym = + replaceSymbol(s, name, WASM_SYMBOL_BINDING_WEAK, file); newSym->signature = oldSig; return; } LLVM_DEBUG(dbgs() << "replacing existing undefined\n"); const InputFile *oldFile = s->getFile(); - file->addMember(sym); + replaceSymbol(s, name, 0, file)->extract(); if (!config->whyExtract.empty()) ctx.whyExtractRecords.emplace_back(toString(oldFile), s->getFile(), *s); } diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h index c5518ee23da26d..42ebb8be8eb3f8 100644 --- a/lld/wasm/SymbolTable.h +++ b/lld/wasm/SymbolTable.h @@ -83,7 +83,7 @@ class SymbolTable { TableSymbol *resolveIndirectFunctionTable(bool required); - void addLazy(ArchiveFile *f, const llvm::object::Archive::Symbol *sym); + void addLazy(StringRef name, InputFile *f); bool addComdat(StringRef name); diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 47d8d09ab1bd42..ace6bade02d436 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -13,6 +13,7 @@ #include "InputFiles.h" #include "OutputSections.h" #include "OutputSegment.h" +#include "SymbolTable.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/Demangle/Demangle.h" @@ -426,23 +427,16 @@ const OutputSectionSymbol *SectionSymbol::getOutputSectionSymbol() const { } void LazySymbol::extract() { - cast(file)->addMember(&archiveSymbol); + if (file->lazy) { + file->lazy = false; + symtab->addFile(file, name); + } } void LazySymbol::setWeak() { flags |= (flags & ~WASM_SYMBOL_BINDING_MASK) | WASM_SYMBOL_BINDING_WEAK; } -MemoryBufferRef LazySymbol::getMemberBuffer() { - Archive::Child c = - CHECK(archiveSymbol.getMember(), - "could not get the member for symbol " + toString(*this)); - - return CHECK(c.getMemoryBufferRef(), - "could not get the buffer for the member defining symbol " + - toString(*this)); -} - void printTraceSymbolUndefined(StringRef name, const InputFile* file) { message(toString(file) + ": reference to " + name); } diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index 69ebfdb5bb356e..de52c92d34e78b 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -497,14 +497,12 @@ class UndefinedTag : public TagSymbol { // symbols into consideration. class LazySymbol : public Symbol { public: - LazySymbol(StringRef name, uint32_t flags, InputFile *file, - const llvm::object::Archive::Symbol &sym) - : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {} + LazySymbol(StringRef name, uint32_t flags, InputFile *file) + : Symbol(name, LazyKind, flags, file) {} static bool classof(const Symbol *s) { return s->kind() == LazyKind; } void extract(); void setWeak(); - MemoryBufferRef getMemberBuffer(); // Lazy symbols can have a signature because they can replace an // UndefinedFunction in which case we need to be able to preserve the @@ -512,9 +510,6 @@ class LazySymbol : public Symbol { // TODO(sbc): This repetition of the signature field is inelegant. Revisit // the use of class hierarchy to represent symbol taxonomy. const WasmSignature *signature = nullptr; - -private: - llvm::object::Archive::Symbol archiveSymbol; }; // linker-generated symbols