-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[lld][ELF] Add --debug-names to create merged .debug_names. #86508
Conversation
@llvm/pr-subscribers-lld Author: None (cmtice) ChangesUpdate LLD to be able to generate a single merged .debug_names section (rather than a set of appended sections). Option is controlled by a new flag, --debug-names. Does not handle type units yet (coming soon). Patch is 147.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86508.diff 19 Files Affected:
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index bb3608da80b21f..1a7c6dafe17e18 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -228,6 +228,7 @@ struct Config {
bool cref;
llvm::SmallVector<std::pair<llvm::GlobPattern, uint64_t>, 0>
deadRelocInNonAlloc;
+ bool debugNames;
bool demangle = true;
bool dependentLibraries;
bool disableVerify;
diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp
index ac28aa8c7ca6e0..03fac65754f020 100644
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@@ -40,6 +40,7 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
.Case(".debug_gnu_pubtypes", &gnuPubtypesSection)
.Case(".debug_line", &lineSection)
.Case(".debug_loclists", &loclistsSection)
+ .Case(".debug_names", &debugNamesSection)
.Case(".debug_ranges", &rangesSection)
.Case(".debug_rnglists", &rnglistsSection)
.Case(".debug_str_offsets", &strOffsetsSection)
diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h
index 1b9a3e3f77943b..e1a830279b54d0 100644
--- a/lld/ELF/DWARF.h
+++ b/lld/ELF/DWARF.h
@@ -68,6 +68,10 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
return gnuPubtypesSection;
}
+ const LLDDWARFSection &getNamesSection() const override {
+ return debugNamesSection;
+ }
+
StringRef getFileName() const override { return ""; }
StringRef getAbbrevSection() const override { return abbrevSection; }
StringRef getStrSection() const override { return strSection; }
@@ -95,6 +99,7 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
LLDDWARFSection strOffsetsSection;
LLDDWARFSection lineSection;
LLDDWARFSection addrSection;
+ LLDDWARFSection debugNamesSection;
StringRef abbrevSection;
StringRef strSection;
StringRef lineStrSection;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 7257ebd0fac994..9a300a54e122bd 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -441,6 +441,8 @@ static void checkOptions() {
error("-r and -pie may not be used together");
if (config->exportDynamic)
error("-r and --export-dynamic may not be used together");
+ if (config->debugNames)
+ error("-r and --debug-names may not be used together");
}
if (config->executeOnly) {
@@ -1231,6 +1233,7 @@ static void readConfigs(opt::InputArgList &args) {
config->cref = args.hasArg(OPT_cref);
config->optimizeBBJumps =
args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
+ config->debugNames = args.hasFlag(OPT_debug_names, OPT_no_debug_names, false);
config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
config->dependencyFile = args.getLastArgValue(OPT_dependency_file);
config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c5e95d0d25c1ae..d470646ed0eeef 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -153,6 +153,10 @@ def : Flag<["--"], "no-color-diagnostics">, Alias<color_diagnostics>, AliasArgs<
def cref: FF<"cref">,
HelpText<"Output cross reference table. If -Map is specified, print to the map file">;
+defm debug_names: BB<"debug-names",
+ "Generate a merged .debug_names section",
+ "Do not generate a merged .debug_names section (default)">;
+
defm demangle: B<"demangle",
"Demangle symbol names (default)",
"Do not demangle symbol names">;
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index f924756ddddfcd..6b84f4d806f356 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "SyntheticSections.h"
+
#include "Config.h"
#include "DWARF.h"
#include "EhFrame.h"
@@ -34,7 +35,9 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/Support/DJB.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Parallel.h"
@@ -2690,6 +2693,759 @@ static uint32_t computeGdbHash(StringRef s) {
return h;
}
+DebugNamesSection::DebugNamesSection()
+ : SyntheticSection(0, SHT_PROGBITS, 1, ".debug_names") {}
+
+template <class ELFT, class RelTy>
+void DebugNamesSection::getNameRelocsImpl(
+ InputSection *sec, ArrayRef<RelTy> rels,
+ llvm::DenseMap<uint32_t, uint32_t> &relocs) {
+ for (auto &rel : rels) {
+ Symbol &sym = sec->getFile<ELFT>()->getRelocTargetSym(rel);
+ relocs[rel.r_offset] = sym.getVA(getAddend<ELFT>(rel));
+ }
+}
+
+template <class ELFT>
+void DebugNamesSection::getNameRelocs(
+ InputSectionBase *base, llvm::DenseMap<uint32_t, uint32_t> &relocs) {
+ auto *sec = cast<InputSection>(base);
+ const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ getNameRelocsImpl<ELFT>(sec, rels.rels, relocs);
+ else
+ getNameRelocsImpl<ELFT>(sec, rels.relas, relocs);
+}
+
+void DebugNamesSection::writeTo(uint8_t *buf) { invokeELFT(writeToImpl, buf); }
+
+template <class ELFT> void DebugNamesSection::writeToImpl(uint8_t *buf) {
+ SmallVector<uint32_t, 0> mergedCuOffsets;
+ SmallVector<uint32_t, 0> mergedTuOffsets;
+ llvm::DenseMap<uint32_t, uint32_t> strOffsets;
+ SmallVector<llvm::DenseMap<uint32_t, uint32_t>, 0> chunksRelocs;
+ chunksRelocs.reserve(numChunks);
+
+ for (size_t i = 0; i < numChunks; ++i) {
+ DebugNamesOutputChunk &chunk = outputChunks[i];
+ InputSectionBase *base = inputDebugNamesSections[i];
+ llvm::DenseMap<uint32_t, uint32_t> relocs;
+ getNameRelocs<ELFT>(base, relocs);
+ chunksRelocs.push_back(std::move(relocs));
+
+ // Update CuOffsets list with new data
+ for (uint32_t cuOffset : chunk.compilationUnits)
+ mergedCuOffsets.push_back(chunk.sec->outSecOff + cuOffset);
+
+ // TODO: Update TuOffsets list with new data
+ }
+
+ // Update the entries with the relocated string offsets.
+ for (auto &stringEntry : mergedEntries) {
+ uint32_t oldOffset = stringEntry.stringOffsetOffset;
+ uint32_t idx = stringEntry.chunkIdx;
+ stringEntry.relocatedEntryOffset = chunksRelocs[idx][oldOffset];
+ }
+
+ // Write out bytes for merged section.
+
+ // Write the header.
+ endian::write32<ELFT::TargetEndianness>(buf + 0, mergedHdr.UnitLength);
+ endian::write16<ELFT::TargetEndianness>(buf + 4, mergedHdr.Version);
+ endian::write32<ELFT::TargetEndianness>(buf + 8, mergedHdr.CompUnitCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 12,
+ mergedHdr.LocalTypeUnitCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 16,
+ mergedHdr.ForeignTypeUnitCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 20, mergedHdr.BucketCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 24, mergedHdr.NameCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 28, mergedHdr.AbbrevTableSize);
+ endian::write32<ELFT::TargetEndianness>(buf + 32,
+ mergedHdr.AugmentationStringSize);
+ buf += 36;
+ memcpy(buf, mergedHdr.AugmentationString.c_str(), 8);
+ buf += 8;
+
+ // Write the CU list.
+ for (uint32_t offset : mergedCuOffsets) {
+ endian::write32<ELFT::TargetEndianness>(buf + 0, offset);
+ buf += 4;
+ }
+
+ // Write the local TU list.
+ // TODO: Fix this, once we get everything working without TUs.
+ if (mergedHdr.LocalTypeUnitCount != 0)
+ warn(".debug_names: type units are not handled in merged index");
+
+ // Write the foreign TU list.
+ // Currently LLVM does not emit foreign type units, so there should
+ // be nothing here to emit.
+ // TODO: Fix this, once we get everything working wtihout TUs.
+ if (mergedHdr.ForeignTypeUnitCount != 0)
+ warn(".debug_names: type units are not handled in merged index");
+
+ // Write the hash table.
+ // ... Write the buckets
+ uint32_t idx = 1;
+ for (const auto &bucket : bucketList) {
+ if (!bucket.empty())
+ endian::write32<ELFT::TargetEndianness>(buf + 0, idx);
+ idx += bucket.size();
+ buf += 4;
+ }
+
+ // ...Write the hashes
+ for (const auto &bucket : bucketList) {
+ for (const auto &entry : bucket) {
+ uint32_t hashValue = entry->hashValue;
+ endian::write32<ELFT::TargetEndianness>(buf + 0, hashValue);
+ buf += 4;
+ }
+ }
+
+ // Write the string offsets.
+ for (const auto &entry : mergedEntries) {
+ endian::write32<ELFT::TargetEndianness>(buf + 0,
+ entry.relocatedEntryOffset);
+ buf += 4;
+ }
+
+ // Write the entry offsets.
+ for (const auto &entry : mergedEntries) {
+ endian::write32<ELFT::TargetEndianness>(buf + 0, entry.entryOffset);
+ buf += 4;
+ }
+
+ // Write the abbrev table.
+ for (const auto *abbrev : mergedAbbrevTable) {
+ size_t uleb_size = encodeULEB128(abbrev->code, buf);
+ buf += uleb_size;
+ uleb_size = encodeULEB128(abbrev->tag, buf);
+ buf += uleb_size;
+ for (auto attr : abbrev->attributes) {
+ uleb_size = encodeULEB128(attr.Index, buf);
+ buf += uleb_size;
+ uleb_size = encodeULEB128(attr.Form, buf);
+ buf += uleb_size;
+ }
+ endian::write16<ELFT::TargetEndianness>(buf + 0, 0); // attribute sentinels.
+ buf += 2;
+ }
+ *buf++ = 0; // abbrev table sentinel
+
+ // Write the entry pool.
+ for (const auto &stringEntry : mergedEntries) {
+ // Write all the entries for the string.
+ size_t uleb_size;
+ for (const auto &entry : stringEntry.indexEntries) {
+ uleb_size = encodeULEB128(entry->abbrevCode, buf);
+ buf += uleb_size;
+ for (const auto &value : entry->attrValues) {
+ if (value.attrSize > 0) {
+ endian::write32<ELFT::TargetEndianness>(buf + 0, value.attrValue);
+ buf += value.attrSize;
+ }
+ }
+ }
+ *buf++ = 0; // Entry sentinel
+ }
+}
+
+bool DebugNamesSection::isNeeded() const { return numChunks > 0; }
+
+template <class ELFT>
+static void
+readCompileUnitOffsets(struct DebugNamesSection::DebugNamesSectionData &secData,
+ DebugNamesSection::DebugNamesInputChunk &inputChunk,
+ DebugNamesSection::DebugNamesOutputChunk &outputChunk,
+ llvm::DWARFDataExtractor &namesExtractor) {
+ uint64_t offset = secData.locs.CUsBase;
+ uint64_t *offsetPtr = &offset;
+ for (size_t i = 0; i < secData.hdr.CompUnitCount; ++i) {
+ llvm::Error err = llvm::Error::success();
+ uint32_t value = namesExtractor.getU32(offsetPtr, &err);
+ if (err)
+ errorOrWarn(toString(inputChunk.namesSection->sec) +
+ ": error reading CU offsets: " + toString(std::move(err)));
+ else
+ outputChunk.compilationUnits.push_back(value);
+ }
+}
+
+template <class ELFT>
+static void
+readEntryOffsets(struct DebugNamesSection::DebugNamesSectionData &secData,
+ DebugNamesSection::DebugNamesInputChunk &chunk,
+ llvm::DWARFDataExtractor &namesExtractor) {
+ secData.entryOffsets = std::make_unique<uint32_t[]>(secData.hdr.NameCount);
+ if (secData.locs.EntryOffsetsBase >= namesExtractor.getData().size())
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ ": invalid entry offsets input");
+
+ uint64_t offset = secData.locs.EntryOffsetsBase;
+ uint64_t *offsetPtr = &offset;
+ for (size_t i = 0; i < secData.hdr.NameCount; ++i) {
+ llvm::Error err = llvm::Error::success();
+ uint32_t value = namesExtractor.getU32(offsetPtr, &err);
+ if (err)
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ ": error reading entry offsets: " + toString(std::move(err)));
+ else
+ secData.entryOffsets.get()[i] = value;
+ }
+}
+
+template <class ELFT>
+static void readAttributeValues(
+ SmallVector<DebugNamesSection::AttrValueData, 3> &values,
+ DebugNamesSection::DebugNamesInputChunk &chunk, uint64_t &offset,
+ struct DebugNamesSection::DebugNamesSectionData &secData,
+ int32_t &parentOffset, llvm::DWARFDataExtractor &namesExtractor,
+ const llvm::DWARFDebugNames::Abbrev &abbrev) {
+ const LLDDWARFSection &namesSection = *chunk.namesSection;
+ uint64_t *offsetPtr = &offset;
+ DebugNamesSection::AttrValueData cuOrTuAttr = {0, 0};
+ for (auto attr : abbrev.Attributes) {
+ llvm::Error err = llvm::Error::success();
+ DebugNamesSection::AttrValueData newAttr;
+ uint32_t value;
+ switch (attr.Form) {
+ case DW_FORM_flag_present: {
+ // Currently only DW_IDX_parent attributes (in .debug_names) can
+ // have this form. This form does not have a real value (nothing is
+ // emitted for it).
+ break;
+ }
+ case DW_FORM_data1:
+ case DW_FORM_ref1: {
+ newAttr.attrValue = namesExtractor.getU8(offsetPtr, &err);
+ newAttr.attrSize = 1;
+ break;
+ }
+ case DW_FORM_data2:
+ case DW_FORM_ref2: {
+ value = namesExtractor.getU16(offsetPtr, &err);
+ newAttr.attrValue = value;
+ newAttr.attrSize = 2;
+ break;
+ }
+ case DW_FORM_data4:
+ case DW_FORM_ref4: {
+ value = namesExtractor.getU32(offsetPtr, &err);
+ newAttr.attrValue = value;
+ newAttr.attrSize = 4;
+ if (attr.Index == dwarf::DW_IDX_parent)
+ parentOffset = value + secData.locs.EntriesBase;
+ break;
+ }
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_sig8: {
+ value = namesExtractor.getU64(offsetPtr, &err);
+ newAttr.attrValue = value;
+ newAttr.attrSize = 8;
+ break;
+ }
+ default: {
+ errorOrWarn(toString(namesSection.sec) +
+ Twine(": unrecognized form encoding ") + Twine(attr.Form) +
+ " in .debug_names abbrev table");
+ break;
+ }
+ }
+ if (err)
+ errorOrWarn(
+ toString(namesSection.sec) +
+ ": error while reading attributes: " + toString(std::move(err)));
+ if (attr.Index == DW_IDX_compile_unit || attr.Index == DW_IDX_type_unit)
+ // Save it to put it at the end of the attributes list.
+ cuOrTuAttr = newAttr;
+ else if (attr.Form != DW_FORM_flag_present)
+ values.push_back(newAttr);
+ }
+
+ // Make sure the CU or TU index attribute is the last one in the list.
+ values.push_back(cuOrTuAttr);
+}
+
+template <class ELFT>
+static void readEntry(DebugNamesSection::NamedEntry &stringEntry,
+ DebugNamesSection::DebugNamesInputChunk &chunk,
+ DebugNamesSection::DebugNamesSectionData &secData,
+ llvm::DWARFDataExtractor &namesExtractor,
+ const llvm::DWARFDebugNames::NameIndex &ni) {
+ std::unique_ptr<LLDDWARFSection> &namesSection = chunk.namesSection;
+ uint64_t offset = stringEntry.entryOffset;
+ // Each entry ends with a zero 'sentinel' value
+ while (offset < namesSection->Data.size() &&
+ namesSection->Data[offset] != 0) {
+ // Read & store all entries (for the same string)
+ auto entry = std::make_unique<DebugNamesSection::IndexEntry>();
+ entry->poolOffset = offset;
+ llvm::Error err = llvm::Error::success();
+ uint64_t ulebValue = namesExtractor.getULEB128(&offset, &err);
+ if (err)
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ ": error reading entry: " + toString(std::move(err)));
+ entry->abbrevCode = static_cast<uint32_t>(ulebValue);
+ const auto &abbrevs = ni.getAbbrevs();
+ auto abbrevIt = abbrevs.find_as(entry->abbrevCode);
+ if (abbrevIt != abbrevs.end()) {
+ const llvm::DWARFDebugNames::Abbrev &abbrev = *abbrevIt;
+ readAttributeValues<ELFT>(entry->attrValues, chunk, offset, secData,
+ entry->parentOffset, namesExtractor, abbrev);
+ stringEntry.indexEntries.push_back(std::move(entry));
+ }
+ }
+ if (offset >= namesSection->Data.size())
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ ": encountered unexpected end of section while reading entry");
+}
+
+template <class ELFT>
+static void
+readEntries(struct DebugNamesSection::DebugNamesSectionData &secData,
+ DebugNamesSection::DebugNamesInputChunk &chunk,
+ llvm::DWARFDataExtractor &namesExtractor,
+ llvm::DataExtractor &strExtractor,
+ const llvm::DWARFDebugNames::NameIndex &ni) {
+ // Temporary map from entry offsets to address (pointer) of entry with that
+ // offset; used to find parent entries quickly.
+ DenseMap<uint32_t, DebugNamesSection::IndexEntry *> offsetMap;
+ // Reserve sizes for this chunk's hashes & namedEntries.
+ chunk.hashValues.reserve(secData.hdr.NameCount);
+ secData.namedEntries.reserve(secData.hdr.NameCount);
+ // Calculate the Entry Offsets, create initial records.
+ for (uint32_t i = 0; i < secData.hdr.NameCount; ++i) {
+ // Get string value
+ DebugNamesSection::NamedEntry stringEntry;
+ stringEntry.entryOffset =
+ secData.locs.EntriesBase + secData.entryOffsets[i];
+ uint64_t strOffsetOffset =
+ secData.locs.StringOffsetsBase + i * secData.dwarfSize;
+ stringEntry.stringOffsetOffset = strOffsetOffset;
+ uint64_t strOffset =
+ namesExtractor.getRelocatedValue(secData.dwarfSize, &strOffsetOffset);
+ StringRef name = strExtractor.getCStrRef(&strOffset);
+ stringEntry.name = name.data();
+ // Calculate hash
+ stringEntry.hashValue = caseFoldingDjbHash(name);
+ chunk.hashValues.push_back(stringEntry.hashValue);
+ // Read String Entry
+ readEntry<ELFT>(stringEntry, chunk, secData, namesExtractor, ni);
+ // Add index entries & offsets to our temporary map
+ for (const auto &entry : stringEntry.indexEntries)
+ offsetMap[entry->poolOffset] = entry.get();
+ secData.namedEntries.push_back(std::move(stringEntry));
+ }
+ // Find/assign pointers to any 'real' parent entries (needed to find correct
+ // parent entry offsets in merged data).
+ for (auto &stringEntry : secData.namedEntries)
+ for (auto &entry : stringEntry.indexEntries)
+ if (entry->parentOffset != -1)
+ entry->parentEntry = offsetMap[entry->parentOffset];
+}
+
+static uint16_t computeDebugNamesHeaderSize() {
+ // Size of the .debug_names section header, in bytes, for DWARF32:
+ uint16_t size = /* unit length */ 4 +
+ /* version */ 2 +
+ /* padding */ 2 +
+ /* CU count */ 4 +
+ /* TU count */ 4 +
+ /* Foreign TU count */ 4 +
+ /* Bucket Count */ 4 +
+ /* Name Count */ 4 +
+ /* Abbrev table size */ 4 +
+ /* Augmentation string size */ 4 +
+ /* augmentation string */ 8;
+ return size;
+}
+
+template <class ELFT>
+static void collectDebugNamesSectionData(
+ DebugNamesSection::DebugNamesInputChunk &chunk,
+ DebugNamesSection::DebugNamesOutputChunk &outputChunk,
+ llvm::DWARFDataExtractor &namesExtractor,
+ llvm::DataExtractor &strExtractor) {
+ for (const auto &ni : *chunk.debugNamesData) {
+ DebugNamesSection::DebugNamesSectionData secData;
+ secData.hdr = ni.getHeader();
+ if (secData.hdr.Format != DwarfFormat::DWARF32)
+ errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported DWARF64");
+ secData.dwarfSize = dwarf::getDwarfOffsetByteSize(DwarfFormat::DWARF32);
+ secData.hdrSize = computeDebugNamesHeaderSize();
+ if (secData.hdr.Version != 5)
+ errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported version");
+ findDebugNamesOffsets(secData.locs, secData.hdrSize, secData.hdr.Format,
+ secData.hdr);
+ readCompileUnitOffsets<ELFT>(secData, chunk, outputChunk, namesExtractor);
+ readEntryOffsets<ELFT>(secData, chunk, namesExtractor);
+ readEntries<ELFT>(secData, chunk, namesExtractor, strExtractor, ni);
+ chunk.sectionsData.push_back(std::move(secData));
+ }
+}
+
+void DebugNamesSection::collectMergedCounts(
+ MutableArrayRef<DebugNamesInputChunk> &inputChunks) {
+ SmallVector<uint32_t, 0> tmpMergedCuOffsets;
+
+ mergedHdr.CompUnitCount = 0;
+ mergedHdr.LocalTypeUnitCount = 0;
+ mergedHdr.For...
[truncated]
|
@llvm/pr-subscribers-lld-elf Author: None (cmtice) ChangesUpdate LLD to be able to generate a single merged .debug_names section (rather than a set of appended sections). Option is controlled by a new flag, --debug-names. Does not handle type units yet (coming soon). Patch is 147.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86508.diff 19 Files Affected:
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index bb3608da80b21f..1a7c6dafe17e18 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -228,6 +228,7 @@ struct Config {
bool cref;
llvm::SmallVector<std::pair<llvm::GlobPattern, uint64_t>, 0>
deadRelocInNonAlloc;
+ bool debugNames;
bool demangle = true;
bool dependentLibraries;
bool disableVerify;
diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp
index ac28aa8c7ca6e0..03fac65754f020 100644
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@@ -40,6 +40,7 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
.Case(".debug_gnu_pubtypes", &gnuPubtypesSection)
.Case(".debug_line", &lineSection)
.Case(".debug_loclists", &loclistsSection)
+ .Case(".debug_names", &debugNamesSection)
.Case(".debug_ranges", &rangesSection)
.Case(".debug_rnglists", &rnglistsSection)
.Case(".debug_str_offsets", &strOffsetsSection)
diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h
index 1b9a3e3f77943b..e1a830279b54d0 100644
--- a/lld/ELF/DWARF.h
+++ b/lld/ELF/DWARF.h
@@ -68,6 +68,10 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
return gnuPubtypesSection;
}
+ const LLDDWARFSection &getNamesSection() const override {
+ return debugNamesSection;
+ }
+
StringRef getFileName() const override { return ""; }
StringRef getAbbrevSection() const override { return abbrevSection; }
StringRef getStrSection() const override { return strSection; }
@@ -95,6 +99,7 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
LLDDWARFSection strOffsetsSection;
LLDDWARFSection lineSection;
LLDDWARFSection addrSection;
+ LLDDWARFSection debugNamesSection;
StringRef abbrevSection;
StringRef strSection;
StringRef lineStrSection;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 7257ebd0fac994..9a300a54e122bd 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -441,6 +441,8 @@ static void checkOptions() {
error("-r and -pie may not be used together");
if (config->exportDynamic)
error("-r and --export-dynamic may not be used together");
+ if (config->debugNames)
+ error("-r and --debug-names may not be used together");
}
if (config->executeOnly) {
@@ -1231,6 +1233,7 @@ static void readConfigs(opt::InputArgList &args) {
config->cref = args.hasArg(OPT_cref);
config->optimizeBBJumps =
args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
+ config->debugNames = args.hasFlag(OPT_debug_names, OPT_no_debug_names, false);
config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
config->dependencyFile = args.getLastArgValue(OPT_dependency_file);
config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c5e95d0d25c1ae..d470646ed0eeef 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -153,6 +153,10 @@ def : Flag<["--"], "no-color-diagnostics">, Alias<color_diagnostics>, AliasArgs<
def cref: FF<"cref">,
HelpText<"Output cross reference table. If -Map is specified, print to the map file">;
+defm debug_names: BB<"debug-names",
+ "Generate a merged .debug_names section",
+ "Do not generate a merged .debug_names section (default)">;
+
defm demangle: B<"demangle",
"Demangle symbol names (default)",
"Do not demangle symbol names">;
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index f924756ddddfcd..6b84f4d806f356 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "SyntheticSections.h"
+
#include "Config.h"
#include "DWARF.h"
#include "EhFrame.h"
@@ -34,7 +35,9 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/Support/DJB.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Parallel.h"
@@ -2690,6 +2693,759 @@ static uint32_t computeGdbHash(StringRef s) {
return h;
}
+DebugNamesSection::DebugNamesSection()
+ : SyntheticSection(0, SHT_PROGBITS, 1, ".debug_names") {}
+
+template <class ELFT, class RelTy>
+void DebugNamesSection::getNameRelocsImpl(
+ InputSection *sec, ArrayRef<RelTy> rels,
+ llvm::DenseMap<uint32_t, uint32_t> &relocs) {
+ for (auto &rel : rels) {
+ Symbol &sym = sec->getFile<ELFT>()->getRelocTargetSym(rel);
+ relocs[rel.r_offset] = sym.getVA(getAddend<ELFT>(rel));
+ }
+}
+
+template <class ELFT>
+void DebugNamesSection::getNameRelocs(
+ InputSectionBase *base, llvm::DenseMap<uint32_t, uint32_t> &relocs) {
+ auto *sec = cast<InputSection>(base);
+ const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ getNameRelocsImpl<ELFT>(sec, rels.rels, relocs);
+ else
+ getNameRelocsImpl<ELFT>(sec, rels.relas, relocs);
+}
+
+void DebugNamesSection::writeTo(uint8_t *buf) { invokeELFT(writeToImpl, buf); }
+
+template <class ELFT> void DebugNamesSection::writeToImpl(uint8_t *buf) {
+ SmallVector<uint32_t, 0> mergedCuOffsets;
+ SmallVector<uint32_t, 0> mergedTuOffsets;
+ llvm::DenseMap<uint32_t, uint32_t> strOffsets;
+ SmallVector<llvm::DenseMap<uint32_t, uint32_t>, 0> chunksRelocs;
+ chunksRelocs.reserve(numChunks);
+
+ for (size_t i = 0; i < numChunks; ++i) {
+ DebugNamesOutputChunk &chunk = outputChunks[i];
+ InputSectionBase *base = inputDebugNamesSections[i];
+ llvm::DenseMap<uint32_t, uint32_t> relocs;
+ getNameRelocs<ELFT>(base, relocs);
+ chunksRelocs.push_back(std::move(relocs));
+
+ // Update CuOffsets list with new data
+ for (uint32_t cuOffset : chunk.compilationUnits)
+ mergedCuOffsets.push_back(chunk.sec->outSecOff + cuOffset);
+
+ // TODO: Update TuOffsets list with new data
+ }
+
+ // Update the entries with the relocated string offsets.
+ for (auto &stringEntry : mergedEntries) {
+ uint32_t oldOffset = stringEntry.stringOffsetOffset;
+ uint32_t idx = stringEntry.chunkIdx;
+ stringEntry.relocatedEntryOffset = chunksRelocs[idx][oldOffset];
+ }
+
+ // Write out bytes for merged section.
+
+ // Write the header.
+ endian::write32<ELFT::TargetEndianness>(buf + 0, mergedHdr.UnitLength);
+ endian::write16<ELFT::TargetEndianness>(buf + 4, mergedHdr.Version);
+ endian::write32<ELFT::TargetEndianness>(buf + 8, mergedHdr.CompUnitCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 12,
+ mergedHdr.LocalTypeUnitCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 16,
+ mergedHdr.ForeignTypeUnitCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 20, mergedHdr.BucketCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 24, mergedHdr.NameCount);
+ endian::write32<ELFT::TargetEndianness>(buf + 28, mergedHdr.AbbrevTableSize);
+ endian::write32<ELFT::TargetEndianness>(buf + 32,
+ mergedHdr.AugmentationStringSize);
+ buf += 36;
+ memcpy(buf, mergedHdr.AugmentationString.c_str(), 8);
+ buf += 8;
+
+ // Write the CU list.
+ for (uint32_t offset : mergedCuOffsets) {
+ endian::write32<ELFT::TargetEndianness>(buf + 0, offset);
+ buf += 4;
+ }
+
+ // Write the local TU list.
+ // TODO: Fix this, once we get everything working without TUs.
+ if (mergedHdr.LocalTypeUnitCount != 0)
+ warn(".debug_names: type units are not handled in merged index");
+
+ // Write the foreign TU list.
+ // Currently LLVM does not emit foreign type units, so there should
+ // be nothing here to emit.
+ // TODO: Fix this, once we get everything working wtihout TUs.
+ if (mergedHdr.ForeignTypeUnitCount != 0)
+ warn(".debug_names: type units are not handled in merged index");
+
+ // Write the hash table.
+ // ... Write the buckets
+ uint32_t idx = 1;
+ for (const auto &bucket : bucketList) {
+ if (!bucket.empty())
+ endian::write32<ELFT::TargetEndianness>(buf + 0, idx);
+ idx += bucket.size();
+ buf += 4;
+ }
+
+ // ...Write the hashes
+ for (const auto &bucket : bucketList) {
+ for (const auto &entry : bucket) {
+ uint32_t hashValue = entry->hashValue;
+ endian::write32<ELFT::TargetEndianness>(buf + 0, hashValue);
+ buf += 4;
+ }
+ }
+
+ // Write the string offsets.
+ for (const auto &entry : mergedEntries) {
+ endian::write32<ELFT::TargetEndianness>(buf + 0,
+ entry.relocatedEntryOffset);
+ buf += 4;
+ }
+
+ // Write the entry offsets.
+ for (const auto &entry : mergedEntries) {
+ endian::write32<ELFT::TargetEndianness>(buf + 0, entry.entryOffset);
+ buf += 4;
+ }
+
+ // Write the abbrev table.
+ for (const auto *abbrev : mergedAbbrevTable) {
+ size_t uleb_size = encodeULEB128(abbrev->code, buf);
+ buf += uleb_size;
+ uleb_size = encodeULEB128(abbrev->tag, buf);
+ buf += uleb_size;
+ for (auto attr : abbrev->attributes) {
+ uleb_size = encodeULEB128(attr.Index, buf);
+ buf += uleb_size;
+ uleb_size = encodeULEB128(attr.Form, buf);
+ buf += uleb_size;
+ }
+ endian::write16<ELFT::TargetEndianness>(buf + 0, 0); // attribute sentinels.
+ buf += 2;
+ }
+ *buf++ = 0; // abbrev table sentinel
+
+ // Write the entry pool.
+ for (const auto &stringEntry : mergedEntries) {
+ // Write all the entries for the string.
+ size_t uleb_size;
+ for (const auto &entry : stringEntry.indexEntries) {
+ uleb_size = encodeULEB128(entry->abbrevCode, buf);
+ buf += uleb_size;
+ for (const auto &value : entry->attrValues) {
+ if (value.attrSize > 0) {
+ endian::write32<ELFT::TargetEndianness>(buf + 0, value.attrValue);
+ buf += value.attrSize;
+ }
+ }
+ }
+ *buf++ = 0; // Entry sentinel
+ }
+}
+
+bool DebugNamesSection::isNeeded() const { return numChunks > 0; }
+
+template <class ELFT>
+static void
+readCompileUnitOffsets(struct DebugNamesSection::DebugNamesSectionData &secData,
+ DebugNamesSection::DebugNamesInputChunk &inputChunk,
+ DebugNamesSection::DebugNamesOutputChunk &outputChunk,
+ llvm::DWARFDataExtractor &namesExtractor) {
+ uint64_t offset = secData.locs.CUsBase;
+ uint64_t *offsetPtr = &offset;
+ for (size_t i = 0; i < secData.hdr.CompUnitCount; ++i) {
+ llvm::Error err = llvm::Error::success();
+ uint32_t value = namesExtractor.getU32(offsetPtr, &err);
+ if (err)
+ errorOrWarn(toString(inputChunk.namesSection->sec) +
+ ": error reading CU offsets: " + toString(std::move(err)));
+ else
+ outputChunk.compilationUnits.push_back(value);
+ }
+}
+
+template <class ELFT>
+static void
+readEntryOffsets(struct DebugNamesSection::DebugNamesSectionData &secData,
+ DebugNamesSection::DebugNamesInputChunk &chunk,
+ llvm::DWARFDataExtractor &namesExtractor) {
+ secData.entryOffsets = std::make_unique<uint32_t[]>(secData.hdr.NameCount);
+ if (secData.locs.EntryOffsetsBase >= namesExtractor.getData().size())
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ ": invalid entry offsets input");
+
+ uint64_t offset = secData.locs.EntryOffsetsBase;
+ uint64_t *offsetPtr = &offset;
+ for (size_t i = 0; i < secData.hdr.NameCount; ++i) {
+ llvm::Error err = llvm::Error::success();
+ uint32_t value = namesExtractor.getU32(offsetPtr, &err);
+ if (err)
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ ": error reading entry offsets: " + toString(std::move(err)));
+ else
+ secData.entryOffsets.get()[i] = value;
+ }
+}
+
+template <class ELFT>
+static void readAttributeValues(
+ SmallVector<DebugNamesSection::AttrValueData, 3> &values,
+ DebugNamesSection::DebugNamesInputChunk &chunk, uint64_t &offset,
+ struct DebugNamesSection::DebugNamesSectionData &secData,
+ int32_t &parentOffset, llvm::DWARFDataExtractor &namesExtractor,
+ const llvm::DWARFDebugNames::Abbrev &abbrev) {
+ const LLDDWARFSection &namesSection = *chunk.namesSection;
+ uint64_t *offsetPtr = &offset;
+ DebugNamesSection::AttrValueData cuOrTuAttr = {0, 0};
+ for (auto attr : abbrev.Attributes) {
+ llvm::Error err = llvm::Error::success();
+ DebugNamesSection::AttrValueData newAttr;
+ uint32_t value;
+ switch (attr.Form) {
+ case DW_FORM_flag_present: {
+ // Currently only DW_IDX_parent attributes (in .debug_names) can
+ // have this form. This form does not have a real value (nothing is
+ // emitted for it).
+ break;
+ }
+ case DW_FORM_data1:
+ case DW_FORM_ref1: {
+ newAttr.attrValue = namesExtractor.getU8(offsetPtr, &err);
+ newAttr.attrSize = 1;
+ break;
+ }
+ case DW_FORM_data2:
+ case DW_FORM_ref2: {
+ value = namesExtractor.getU16(offsetPtr, &err);
+ newAttr.attrValue = value;
+ newAttr.attrSize = 2;
+ break;
+ }
+ case DW_FORM_data4:
+ case DW_FORM_ref4: {
+ value = namesExtractor.getU32(offsetPtr, &err);
+ newAttr.attrValue = value;
+ newAttr.attrSize = 4;
+ if (attr.Index == dwarf::DW_IDX_parent)
+ parentOffset = value + secData.locs.EntriesBase;
+ break;
+ }
+ case DW_FORM_data8:
+ case DW_FORM_ref8:
+ case DW_FORM_ref_sig8: {
+ value = namesExtractor.getU64(offsetPtr, &err);
+ newAttr.attrValue = value;
+ newAttr.attrSize = 8;
+ break;
+ }
+ default: {
+ errorOrWarn(toString(namesSection.sec) +
+ Twine(": unrecognized form encoding ") + Twine(attr.Form) +
+ " in .debug_names abbrev table");
+ break;
+ }
+ }
+ if (err)
+ errorOrWarn(
+ toString(namesSection.sec) +
+ ": error while reading attributes: " + toString(std::move(err)));
+ if (attr.Index == DW_IDX_compile_unit || attr.Index == DW_IDX_type_unit)
+ // Save it to put it at the end of the attributes list.
+ cuOrTuAttr = newAttr;
+ else if (attr.Form != DW_FORM_flag_present)
+ values.push_back(newAttr);
+ }
+
+ // Make sure the CU or TU index attribute is the last one in the list.
+ values.push_back(cuOrTuAttr);
+}
+
+template <class ELFT>
+static void readEntry(DebugNamesSection::NamedEntry &stringEntry,
+ DebugNamesSection::DebugNamesInputChunk &chunk,
+ DebugNamesSection::DebugNamesSectionData &secData,
+ llvm::DWARFDataExtractor &namesExtractor,
+ const llvm::DWARFDebugNames::NameIndex &ni) {
+ std::unique_ptr<LLDDWARFSection> &namesSection = chunk.namesSection;
+ uint64_t offset = stringEntry.entryOffset;
+ // Each entry ends with a zero 'sentinel' value
+ while (offset < namesSection->Data.size() &&
+ namesSection->Data[offset] != 0) {
+ // Read & store all entries (for the same string)
+ auto entry = std::make_unique<DebugNamesSection::IndexEntry>();
+ entry->poolOffset = offset;
+ llvm::Error err = llvm::Error::success();
+ uint64_t ulebValue = namesExtractor.getULEB128(&offset, &err);
+ if (err)
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ ": error reading entry: " + toString(std::move(err)));
+ entry->abbrevCode = static_cast<uint32_t>(ulebValue);
+ const auto &abbrevs = ni.getAbbrevs();
+ auto abbrevIt = abbrevs.find_as(entry->abbrevCode);
+ if (abbrevIt != abbrevs.end()) {
+ const llvm::DWARFDebugNames::Abbrev &abbrev = *abbrevIt;
+ readAttributeValues<ELFT>(entry->attrValues, chunk, offset, secData,
+ entry->parentOffset, namesExtractor, abbrev);
+ stringEntry.indexEntries.push_back(std::move(entry));
+ }
+ }
+ if (offset >= namesSection->Data.size())
+ errorOrWarn(toString(chunk.namesSection->sec) +
+ ": encountered unexpected end of section while reading entry");
+}
+
+template <class ELFT>
+static void
+readEntries(struct DebugNamesSection::DebugNamesSectionData &secData,
+ DebugNamesSection::DebugNamesInputChunk &chunk,
+ llvm::DWARFDataExtractor &namesExtractor,
+ llvm::DataExtractor &strExtractor,
+ const llvm::DWARFDebugNames::NameIndex &ni) {
+ // Temporary map from entry offsets to address (pointer) of entry with that
+ // offset; used to find parent entries quickly.
+ DenseMap<uint32_t, DebugNamesSection::IndexEntry *> offsetMap;
+ // Reserve sizes for this chunk's hashes & namedEntries.
+ chunk.hashValues.reserve(secData.hdr.NameCount);
+ secData.namedEntries.reserve(secData.hdr.NameCount);
+ // Calculate the Entry Offsets, create initial records.
+ for (uint32_t i = 0; i < secData.hdr.NameCount; ++i) {
+ // Get string value
+ DebugNamesSection::NamedEntry stringEntry;
+ stringEntry.entryOffset =
+ secData.locs.EntriesBase + secData.entryOffsets[i];
+ uint64_t strOffsetOffset =
+ secData.locs.StringOffsetsBase + i * secData.dwarfSize;
+ stringEntry.stringOffsetOffset = strOffsetOffset;
+ uint64_t strOffset =
+ namesExtractor.getRelocatedValue(secData.dwarfSize, &strOffsetOffset);
+ StringRef name = strExtractor.getCStrRef(&strOffset);
+ stringEntry.name = name.data();
+ // Calculate hash
+ stringEntry.hashValue = caseFoldingDjbHash(name);
+ chunk.hashValues.push_back(stringEntry.hashValue);
+ // Read String Entry
+ readEntry<ELFT>(stringEntry, chunk, secData, namesExtractor, ni);
+ // Add index entries & offsets to our temporary map
+ for (const auto &entry : stringEntry.indexEntries)
+ offsetMap[entry->poolOffset] = entry.get();
+ secData.namedEntries.push_back(std::move(stringEntry));
+ }
+ // Find/assign pointers to any 'real' parent entries (needed to find correct
+ // parent entry offsets in merged data).
+ for (auto &stringEntry : secData.namedEntries)
+ for (auto &entry : stringEntry.indexEntries)
+ if (entry->parentOffset != -1)
+ entry->parentEntry = offsetMap[entry->parentOffset];
+}
+
+static uint16_t computeDebugNamesHeaderSize() {
+ // Size of the .debug_names section header, in bytes, for DWARF32:
+ uint16_t size = /* unit length */ 4 +
+ /* version */ 2 +
+ /* padding */ 2 +
+ /* CU count */ 4 +
+ /* TU count */ 4 +
+ /* Foreign TU count */ 4 +
+ /* Bucket Count */ 4 +
+ /* Name Count */ 4 +
+ /* Abbrev table size */ 4 +
+ /* Augmentation string size */ 4 +
+ /* augmentation string */ 8;
+ return size;
+}
+
+template <class ELFT>
+static void collectDebugNamesSectionData(
+ DebugNamesSection::DebugNamesInputChunk &chunk,
+ DebugNamesSection::DebugNamesOutputChunk &outputChunk,
+ llvm::DWARFDataExtractor &namesExtractor,
+ llvm::DataExtractor &strExtractor) {
+ for (const auto &ni : *chunk.debugNamesData) {
+ DebugNamesSection::DebugNamesSectionData secData;
+ secData.hdr = ni.getHeader();
+ if (secData.hdr.Format != DwarfFormat::DWARF32)
+ errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported DWARF64");
+ secData.dwarfSize = dwarf::getDwarfOffsetByteSize(DwarfFormat::DWARF32);
+ secData.hdrSize = computeDebugNamesHeaderSize();
+ if (secData.hdr.Version != 5)
+ errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported version");
+ findDebugNamesOffsets(secData.locs, secData.hdrSize, secData.hdr.Format,
+ secData.hdr);
+ readCompileUnitOffsets<ELFT>(secData, chunk, outputChunk, namesExtractor);
+ readEntryOffsets<ELFT>(secData, chunk, namesExtractor);
+ readEntries<ELFT>(secData, chunk, namesExtractor, strExtractor, ni);
+ chunk.sectionsData.push_back(std::move(secData));
+ }
+}
+
+void DebugNamesSection::collectMergedCounts(
+ MutableArrayRef<DebugNamesInputChunk> &inputChunks) {
+ SmallVector<uint32_t, 0> tmpMergedCuOffsets;
+
+ mergedHdr.CompUnitCount = 0;
+ mergedHdr.LocalTypeUnitCount = 0;
+ mergedHdr.For...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
✅ With the latest revision this PR passed the Python code formatter. |
How much does it add to linking time? |
I did some rough timing measurements on my workstation (no special quiescent environment), linking Clang. In both cases Clang was built with -gpubnames. In one case I passed --debug_names to the LLD, so it built the merged section; in the other case I didn't pass any special flags to LLD, so it appended the .debug_names sections together (current normal behavior). I linked 5 times in each case and took the average. Linking Clang without --debug-names flag took, on average, 8.8 seconds. Linking Clang with --debug-names took, on average, 13.5 seconds. The .debug_names section without merging is 0x99f8280 (161448576) bytes. So creating a merged section cost us ~ 5 seconds and saved us ~41.6 MB. It also saves us ~ 6 seconds in LLDB startup times on Clang. |
(Reading. I find |
|
lld/test/ELF/debug-names.s
Outdated
|
||
# DISASM: Disassembly of section .text: | ||
# DISASM-EMPTY: | ||
# DISASM: <_Z2f12t1>: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The disassembly is unrelated to the functionality to be tested
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. (removed the disassembly).
lld/test/ELF/debug-names.s
Outdated
# DWARF-NEXT: DW_IDX_parent: <parent not indexed> | ||
# DWARF-NEXT: DW_IDX_compile_unit: 0x01 | ||
|
||
# READELF: .debug_names PROGBITS 0000000000000000 0003eb 0000d0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
s/0003eb/[[#%x,]]/
so that the test is less sensitive to section offset changes.
The size, while sensitive, is useful as a change detector.
I's just one line and displays the section size. It's better to move it before # DWARF
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
# REQUIRES: x86 | ||
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t1.o | ||
# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/debug-names-2.s -o %t2.o | ||
# |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't add ^#$
or ^##
lines. Please remove here and elsewhere.
You can check the convention of newer tests added to this directory git whatchanged -p --diff-filter=A --after=2024-01-01 lld/test/ELF
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.h
Outdated
size_t getSize() const override { return sectionSize; } | ||
bool isNeeded() const override; | ||
|
||
void addSections(SmallVector<InputSectionBase *, 0> sec_list) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
expensive copy of SmallVector
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How would you recommend that I deal with this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just inline the simple function to the caller
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, done.
lld/ELF/SyntheticSections.h
Outdated
@@ -788,6 +792,136 @@ class RelroPaddingSection final : public SyntheticSection { | |||
void writeTo(uint8_t *buf) override {} | |||
}; | |||
|
|||
class DebugNamesSection final : public SyntheticSection { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are a lot of ELFT
in member functions. Seems better to make the class ELFT
, or follow RelocationBaseSection
: non-template base class with ELFT derived class.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Made the entire class ELFT.
lld/ELF/SyntheticSections.cpp
Outdated
getNameRelocsImpl<ELFT>(sec, rels.relas, relocs); | ||
} | ||
|
||
void DebugNamesSection::writeTo(uint8_t *buf) { invokeELFT(writeToImpl, buf); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RelocationSection::writeTo avoids invokeELFT
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done (with making entire class ELFT).
@@ -0,0 +1,153 @@ | |||
# REQUIRES: x86 | |||
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t1.o | |||
# |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Delete ^#$
lines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
# | ||
# RUN: not ld.lld --debug-names %t1.o -o /dev/null 2>&1 | FileCheck %s | ||
# | ||
# CHECK: error: {{.*}}:(.debug_names): encountered unexpected end of section while reading entry |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should test the filename to ensure it is correct.
You can use FileCheck -DFILE=%t1.o
to get the filename with [[FILE]]
. Recently, more tests have rm -rf %t && split-file %s %t && cd %t
so that we don't need to figure out slashes/backslashes for non-Windows/Windows.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can successfully add "FileCheck -DFILE=%t1.o" or "FileCheck -DFILE=%t2.o to my tests.
I don't understand what you want me to do with [[FILE]]; could you be more explicit please?
I tried adding "rm -rf %t && split-file %s %t && cd %t" to some of my tests, but then the tests fail with errors like: "split-file: error: /usr/local/google3/cmtice/LLD.llvm-project/lld/test/ELF/debug-names-parent-idx.s: no part separator was found". I'm not sure what to do here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can probably spend some time on existing tests. https://llvm.org/docs/TestingGuide.html#extra-files #---
serves as a separtor.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just verifying that I understand what you want me to do: For the three debug-names-*.s tests that have two input files, you want me to put both input files into the same .s file and use the #--- separator syntax (along with the split-file stuff). Is that correct?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've done this now.
# DWARF-NEXT: CU[0]: 0x00000000 | ||
# DWARF-NEXT: CU[1]: 0x00000041 | ||
|
||
## |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We typically place such file-level comments at the top. Don't add ^##$
lines.
Follow the convention of recent test files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.cpp
Outdated
for (size_t i = 0; i < numChunks; ++i) { | ||
DebugNamesOutputChunk &chunk = outputChunks[i]; | ||
InputSectionBase *base = inputDebugNamesSections[i]; | ||
llvm::DenseMap<uint32_t, uint32_t> relocs; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DenseMap<uint32_t, uint32_t> &relocs = chunksRelocs.emplace_back();
to save a move.
Please follow the convention: we normally don't use llvm::
in .cpp
files. llvm::DenseMap
is for headers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Both are done.
lld/ELF/SyntheticSections.cpp
Outdated
files.size()); | ||
auto outputChunks = std::make_unique<DebugNamesOutputChunk[]>(files.size()); | ||
parallelFor(0, files.size(), [&](size_t i) { | ||
ObjFile<ELFT> *file = cast<ObjFile<ELFT>>(files[i]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto *file
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.cpp
Outdated
// Extract llvm::DWARFDebugNames data from the .debug_names section. The | ||
// .debug_names section needs the .debug_str section, to get the actual | ||
// symbol names. | ||
const StringRef &strSection = dobj.getStrSection(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
avoid reference for small types like StringRef
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.cpp
Outdated
std::make_unique<LLDDWARFSection>(namesSection); | ||
if (llvm::Error E = inputChunks[i].debugNamesData->extract()) { | ||
// Report an error here. We were unable to extract the data. | ||
errorOrWarn(toString(dobj.getNamesSection().sec) + ": " + |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Twine(": ")
to avoid a temporary std::string
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done (in all the errorOrWarn calls).
lld/ELF/SyntheticSections.cpp
Outdated
inputChunks[i].namesSection = | ||
std::make_unique<LLDDWARFSection>(namesSection); | ||
if (llvm::Error E = inputChunks[i].debugNamesData->extract()) { | ||
// Report an error here. We were unable to extract the data. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code self explains. Remove the comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.cpp
Outdated
ret->getMergedAbbrevTable(inputChunks); | ||
ret->getMergedSymbols(inputChunks); | ||
ret->computeUniqueHashes(inputChunks); | ||
inputChunksPtr.reset(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If early reset
is to save memory. Add a comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.cpp
Outdated
|
||
template <class ELFT> | ||
static void | ||
readEntries(struct DebugNamesSection::DebugNamesSectionData &secData, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove struct
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.cpp
Outdated
chunk.hashValues.reserve(secData.hdr.NameCount); | ||
secData.namedEntries.reserve(secData.hdr.NameCount); | ||
// Calculate the Entry Offsets, create initial records. | ||
for (uint32_t i = 0; i < secData.hdr.NameCount; ++i) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://llvm.org/docs/CodingStandards.html#don-t-evaluate-end-every-time-through-a-loop
Save uint32_t i =0, e = secData.hdr.NameCount; i != e; ++i
) (!=
is more common)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can do this if you like, but secData.hdr.NameCount is an integer, not a function call...so it seems like I'd just be substituting one variable for another? Do you really want me to do that here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The compiler may not know secData.hdr.NameCount
is immutable. It may generate redundant instruction in each iteration to recompute the NameCount, more expensive than loading NameCount
once into a register.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation. Done.
lld/ELF/SyntheticSections.cpp
Outdated
errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported version"); | ||
findDebugNamesOffsets(secData.locs, secData.hdrSize, secData.hdr.Format, | ||
secData.hdr); | ||
readCompileUnitOffsets<ELFT>(secData, chunk, outputChunk, namesExtractor); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The three functions do bound checking in a very inefficient way.
EntriesBase
used by readEntries has the largest data structure offset. If it doesn't overflow, other data structures don't overflow. However, readCompileUnitOffsets/readEntryOffsets waste time in the loop body to do unnecessary bound checking.
Remove the needless use of DataExtractor.
More inefficiency is going on. entryOffsets
is read in readEntryOffsets
just to be used once and abandoned in readEntries
. It's much better to read the offset in readEntries
instead
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Working on this one...(not done yet).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see what you're talking about here -- I see no bounds-checking happening in readCompileUnitOffsets nor in readEntryOffsets. Nor do I see any needless uses of data extractors. What are you considering to be bounds-checking code, and where to you see a needless use of a data extractor?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Every DataExtractor call does bounds checking (that's the error handling it does/can do - if you ask it to read 1 byte, all byte values are valid, so long as there's a byte there - so the only reason it could fail is if there wasn't a byte to read - if the underlying buffer was too short).
I think Ray's suggesting doing some up-front bounds checking of the total bounds (at least within a list - for instance the suggestion to use the getU32
that takes an output buffer and a count - reading in NU32 in one go, and so only bounds checking once for the whole NU32s) rather than having bounds checking on every separate U32 (or whatever) read.
The index entries proper are probably trickier - entries aren't necessarily fixed size, and the header doesn't specify how many entries there are - so you just have to keep reading them one by one I think...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I've done was maskray was asking for. Please let me know if I misunderstood you.
lld/ELF/SyntheticSections.cpp
Outdated
void DebugNamesSection::collectMergedCounts( | ||
MutableArrayRef<DebugNamesInputChunk> &inputChunks) { | ||
SmallVector<uint32_t, 0> tmpMergedCuOffsets; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
delete blank line
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.cpp
Outdated
data.hdr.AugmentationString)) { | ||
// There are conflicting augmentation strings, so it's best for the | ||
// merged index to not use an augmentation string. | ||
StringRef emptyString = " "; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
inline the used-once constant variable
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be more suitable to keep the variable and use it twice - once to initialize the AugmentationStringSize (= emptyString.size()
) and then to initialized AugmentationString.
(or skip the separate named variable, but still initialize the StringSize from the String (after initializing AugmentationString
, mergedHdr.AugmentationStringSize = mergedHdr.AugmentationString.size()
), rather than hardcoding it to 8 - which risks the size diverging from the string and something else breaking later on)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've updated it to get the size directly from the string now.
lld/ELF/SyntheticSections.cpp
Outdated
|
||
// Write the abbrev table. | ||
for (const auto *abbrev : mergedAbbrevTable) { | ||
size_t uleb_size = encodeULEB128(abbrev->code, buf); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use encodeULEB128AndInc and remove uleb_size
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unable to use 'encodeULEB128AndInc' because the function doesn't exist (only decodeULEB128AndInc exists). But Was able to remove uleb_size variable anyway.
lld/ELF/SyntheticSections.cpp
Outdated
errorOrWarn( | ||
toString(namesSection.sec) + | ||
": error while reading attributes: " + toString(std::move(err))); | ||
if (attr.Index == DW_IDX_compile_unit || attr.Index == DW_IDX_type_unit) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this patch does not support DW_IDX_type_unit
, best to leave it for the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I disagree. There are several places where I've put in comments about type units. Also I plan on adding the type_units work as soon as this CL goes in. I would strongly prefer to keep this code as is; it's not hurting anything and will make future work much easier.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One reason I'm often a bit apprehensive about including half-implemented code, is it's harder to know whether code is tested/correct, when it's only partially implemented and unreachable or not coherent in the commit it's landed in.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I will remove all mentions of DW_IDX_type_unit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.h
Outdated
uint32_t abbrevCode; | ||
uint32_t poolOffset; | ||
union { | ||
int32_t parentOffset = -1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use uint32_t parentOffset = 0
since the minimum valid parentOffset
value is at least EntriesOffset
, which is much larger than 0?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, the offsets are measured from the start of the entries section, so zero is in fact a valid offset. I need to initialize this to an invalid offset value.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The header has a larger size, so offset 0 cannot really a valid entry offset.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You would be correct, if the offsets were counting from the start of the .debug_names section, but they are not. The offsets are measuring from the start of the Entry Pool (the bit in .debug_names section that comes after the abbrev table). So zero is a valid entry offset.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep entry offsets are relative, with first one being zero for the purposes of parent. Discovered that when implementing support in BOLT.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We compute the parent offset as ie->parentOffset = locs.EntriesBase + attr.attrValue
. EntriesBase
is larger than the header size. Here we can use 0 as a sentinel number.
lld/ELF/SyntheticSections.cpp
Outdated
for (auto &entry : stringEntry.indexEntries) { | ||
uint32_t entrySize = 0; | ||
entry->poolOffset = offset; | ||
uint32_t ulebSize = getULEB128Size(entry->abbrevCode); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
entrySize += getULEB128Size(entry->abbrevCode)
Please check whether ulebSize
/uleb_size
elsewhere can be removed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have removed all ulebSize and uleb_size variables.
lld/ELF/SyntheticSections.cpp
Outdated
|
||
template <class ELFT> | ||
static void | ||
readCompileUnitOffsets(struct DebugNamesSection::DebugNamesSectionData &secData, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove struct
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Done (see 'debug-names-invalid-flags.s). |
When I suggested add the test there, I am very clear that we should reuse an existing test, instead of disobeying the convention and adding a new file. There are values in reusing a small set of files instead of spreading tests everywhere. I can take a stab at providing an update on top of this branch so that we don't need to spend too much time debating detailed changes, and you can then consider cherry picking my commits. I believe my providing commits directly might expedite this patch. |
I apologize! I mis-read your original request. I have fixed it now (added the test to driver.test and removed debug-names-invalid-flags.s). |
lld/ELF/SyntheticSections.cpp
Outdated
warn(".debug_names: type units are not handled in merged index"); | ||
|
||
// Write the foreign TU list. | ||
// Currently LLVM does not emit foreign type units, so there should |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure what this means. llvm does emit TUs for split dwarf.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point; that comment was correct when written, but it now obsolete (sorry about that!). I will remove it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.cpp
Outdated
for (const auto &entry : stringEntry.indexEntries) { | ||
buf += encodeULEB128(entry->abbrevCode, buf); | ||
for (const auto &value : entry->attrValues) { | ||
if (value.attrSize > 0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When will this be zero? Does this imply that Entry only has parent attribute of flag?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another piece of obsolete code (nice catch!). Leftover from creating attribute for DW_IDX_parent with form DW_FORM_flag_present. I will clean this up as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
lld/ELF/SyntheticSections.cpp
Outdated
for (auto &stringEntry : secData.namedEntries) | ||
for (auto &entry : stringEntry.indexEntries) | ||
if (entry->parentOffset != -1) | ||
entry->parentEntry = offsetMap[entry->parentOffset]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How will this handle when
module 0
Entry0 0xOffset1 {...}
....
EntryX {
...
ParrentOffst 0xOffset1
}
module 1
Entry0 0xOffset1 {...}
....
EntryX {
...
ParrentOffst 0xOffset1
}
So parent offsets are the same in module 0 and 1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is being assigned to entry->parentEntry is not the offset, but the pointer to the actual IndexEntry of the parent.
Does that answer your question? If not, I can go into more detail...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I missed the part where offsetMap gets re-created for each module. Never mind.
We ended up with |
What does everyone thinks about refactoring the debug names code out of SythenticSections.cpp to DebugNamesSyntheticSection.cpp or something like that? (followup commit is fine) |
Done: I've switched to using ELFT::Endianness. |
Except for maskray's request re using 'rm -rf %t && split-file %s %t && cd %t' in tests (still looking into that), I think I have addressed all the review request/comments. So it's ready for any additional reviewing. If I've overlooked something please let me know. Re @ayermolo 's suggestion to pull the DebugNames work out of SyntheticSections.cpp into another file: That's a decision I would want to leave to @MaskRay . I don't know how much doing so would really help, however. The DebugNamesSection work in SyntheticSections.cpp is only ~750 lines, and it's all contiguous... |
Adding a new file might require to make some internal functions ( |
lGTM. |
- Update comment about CompUnits in OutputChunk. - Replace two uses of 'seq' in for-loops. - Replace use of memcpy with llvm::copy.
Fix typo, add 'const', replace one use of 'seq'/
- Keep type unit counts in output header at zero, regardless of input, to avoid crashing LLD, since we're not actually handling type units yet. - Move the type units warning to the point where we can check the input counts. - Add a type units test (that LLD doesn't crash and outputs the warning).
Remove tools/generate-content.py. We will use a generic tool intended for llvm/utils: https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408
Remove tools/generate-content.py. We will use a generic tool intended for llvm/utils: https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408
lld/ELF/SyntheticSections.cpp
Outdated
numCu += chunks[i].compUnits.size(); | ||
for (const NameData &nd : inputChunk.nameData) { | ||
hdr.CompUnitCount += nd.hdr.CompUnitCount; | ||
// We are not actually handling or emitting type units yet, so |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
typo: so so
I'd remove
//hdr.LocalTypeUnitCount += nd.hdr.LocalTypeUnitCount;
//hdr.ForeignTypeUnitCount += nd.hdr.ForeignTypeUnitCount;
and non-zero type unit counts will crash LLD.
(it's an assertion failure which can be avoided if we write code slightly differently.
Just
// TODO: We don't handle type units yet and LocalTypeUnitCount/ForeignTypeUnitCount are left as 0.
is sufficient.
Thanks for chiming in. Type units are not handled yet in this patch. This patch focuses on compile units with an opt-in option It seems that the remaining issues are minor style issues. LGTM to make it clear I am happy once these issues are resolved or possibly defer some later if @dwblaikie is fine with that. |
- Pulled body of loop that reads/parses IndexEntries into a separate function (readEntry) - Removed unnecessary 'const' qualifiers on local vars - Updated comment about handling type units - Created getChunks accessor function; used it to eliminate use of 'seq' in for-loop. - Enhanced some error messages - Update debug-names-bad.s test to handle udpated error messages
I believe I have addressed all requests and this is ready for approval. :-) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, after a few minor cleanups after the readEntry
refactoring
lld/ELF/SyntheticSections.cpp
Outdated
attr.attrSize = 4; | ||
ie->parentOffset = entriesBase + attr.attrValue; | ||
} else if (a.Form != DW_FORM_flag_present) { | ||
errMsg = ": invalid form for DW_IDX_parent"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Presumably should just return immediately here, with the string error?
And then there's no reason for errMsg
to have such a wide scope (or possibly to exist at all)
lld/ELF/SyntheticSections.cpp
Outdated
errMsg = ": entry abbrev code not found in abbrev table: "; | ||
errMsg.append(std::to_string(ie->abbrevCode)); | ||
return createStringError(inconvertibleErrorCode(), errMsg.c_str()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use the format version of createStringError
:
return createStringError(inconvertibleErrorCode(), ": entry abbrev code too large for DWARF32: %d", ie->abbrevCode);
here and elsewhere - then errMsg
can be removed, most likely.
lld/ELF/SyntheticSections.cpp
Outdated
if (offset >= namesSec.Data.size()) | ||
errMsg = ": index entry is out of bounds"; | ||
if (errMsg) | ||
errorOrWarn(toString(namesSec.sec) + Twine(errMsg)); | ||
if (!errMsg.empty()) | ||
errorOrWarn(toString(namesSec.sec) + Twine(errMsg.c_str())); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
drop errMsg
here (& remove its declaration above) - since it's only used in this immediate scope, simplify this code to:
if (offset >= namesSec.Data.size())
errorOrWarn(toString(namesSec.sec) + ": index entry is out of bounds");
lld/ELF/SyntheticSections.cpp
Outdated
errorOrWarn(toString(namesSec.sec) + | ||
Twine(toString(ieOrErr.takeError()))); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider removing the ": " from the error messages inside readEntry
(they don't need to know they're being concatenated with something else later) and adding it here instead:
errorOrWarn(toString(namesSec.sec) + ": " + toString(ieOrErr.takeError()));
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using Expected<DebugNamesBaseSection::IndexEntry *>
and inconvertibleErrorCode()
look unnecessary to me.
We have other places that we return different types of errors and we simply return a std::string.
lld/ELF/SyntheticSections.cpp
Outdated
errMsg.append(toString(std::move(err))); | ||
return createStringError(inconvertibleErrorCode(), errMsg.c_str()); | ||
} | ||
if (ulebVal < UINT32_MAX) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be <=
Or use isUInt<32>(ulebVal)
lld/ELF/SyntheticSections.h
Outdated
SmallVector<Abbrev *, 0> abbrevTable; | ||
SmallVector<char, 0> abbrevTableBuf; | ||
|
||
ArrayRef<OutputChunk> getChunks() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getChunks() const
lld/ELF/SyntheticSections.cpp
Outdated
// Collect and de-duplicate all the names (preserving all the entries). | ||
// Speed it up using multithreading, as the number of symbols can be in the | ||
// order of millions. | ||
size_t concurrency = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
254ceed removed const
here and in other places.
I think we should keep const size_t concurrency
. Using const for integer type variables is conventional and avoiding const
is not. @dwblaikie
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair enough gfor now.
Though I find that to be a confusing convention & makes the code harder to read for me, because that's not a logical distinction I tend to think of when reading code - so then it's confusing seeing one local const and one local non-const-but-not-modified & not understanding why one would be const and another wouldn't be...
- Clean up the error messages in readEntry function - Restore 'const' qualifier to size_t vars in computeEntryPool - Add 'const' qualifier to getChunks function.
lld/ELF/SyntheticSections.cpp
Outdated
|
||
// Canonicalize abbrev by placing the CU/TU index at the end. | ||
ie->attrValues.push_back(cuAttr); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Our convention does not add a blank line here
lld/ELF/SyntheticSections.cpp
Outdated
auto it = ni.getAbbrevs().find_as(ie->abbrevCode); | ||
if (it == ni.getAbbrevs().end()) | ||
return createStringError(inconvertibleErrorCode(), | ||
"entry abbrev code not found in abbrev table: %d", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%d => %u since this is an unsigned int.
lld/ELF/SyntheticSections.cpp
Outdated
ie->abbrevCode = static_cast<uint32_t>(ulebVal); | ||
else | ||
return createStringError(inconvertibleErrorCode(), | ||
"abbrev code in entry too large for DWARF32: %d", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be "%" PRIu64
lld/ELF/SyntheticSections.cpp
Outdated
const size_t concurrency = | ||
bit_floor(std::min<size_t>(config->threadCount, numShards)); | ||
const size_t shift = 32 - countr_zero(numShards); | ||
uint8_t cuAttrSize = getMergedCuCountForm(hdr.CompUnitCount).first; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
const uint8_t cuAttrSize
Remove one blank line & add one 'const' qualifier.
Test that ld.lld --debug-names (llvm#86508) built per-module index can be consumed by lldb. This has uncovered a bug during the development of the lld feature.
Test that ld.lld --debug-names (#86508) built per-module index can be consumed by lldb. This has uncovered a bug during the development of the lld feature.
Generally, IR and assembly test files benefit from being cleaned to remove unnecessary details. However, for tests requiring elaborate IR or assembly files where cleanup is less practical (e.g., large amount of debug information output from Clang), the current practice is to include the C/C++ source file and the generation instructions as comments. This is inconvenient when regeneration is needed. This patch adds `llvm/utils/update_test_body.py` to allow easier regeneration. `ld.lld --debug-names` tests (#86508) utilize this script for Clang-generated assembly tests. Note: `-o pipefail` is standard (since https://www.austingroupbugs.net/view.php?id=789) but not supported by dash. Link: https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408
clang -g -gpubnames
(with optional -gsplit-dwarf) creates the.debug_names
section ("per-CU" index). By default lld concatenates input.debug_names
sections into an output.debug_names
section. LLDB can consume the concatenated section but the lookup performance is not good.This patch adds --debug-names to create a per-module index by combining the per-CU indexes into a single index that covers the entire load module. The produced
.debug_names
is a replacement for.gdb_index
.Type units (-fdebug-types-section) are not handled yet.
Co-authored-by: Fangrui Song i@maskray.me