[lld][ELF] Add --debug-names to create merged .debug_names. #86508

cmtice · 2024-03-25T14:36:24Z

clang -g -gpubnames (with optional -gsplit-dwarf) creates the .debug_names section ("per-CU" index). By default lld concatenates input .debug_names sections into an output .debug_names section. LLDB can consume the concatenated section but the lookup performance is not good.

This patch adds --debug-names to create a per-module index by combining the per-CU indexes into a single index that covers the entire load module. The produced .debug_names is a replacement for .gdb_index.

Type units (-fdebug-types-section) are not handled yet.

Co-authored-by: Fangrui Song i@maskray.me

llvmbot · 2024-03-25T14:36:57Z

@llvm/pr-subscribers-lld

Author: None (cmtice)

Changes

Update LLD to be able to generate a single merged .debug_names section (rather than a set of appended sections). Option is controlled by a new flag, --debug-names. Does not handle type units yet (coming soon).

Patch is 147.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86508.diff

19 Files Affected:

(modified) lld/ELF/Config.h (+1)
(modified) lld/ELF/DWARF.cpp (+1)
(modified) lld/ELF/DWARF.h (+5)
(modified) lld/ELF/Driver.cpp (+3)
(modified) lld/ELF/Options.td (+4)
(modified) lld/ELF/SyntheticSections.cpp (+761)
(modified) lld/ELF/SyntheticSections.h (+134)
(modified) lld/ELF/Writer.cpp (+3)
(modified) lld/docs/ld.lld.1 (+4)
(added) lld/test/ELF/Inputs/debug-names-2.s (+191)
(added) lld/test/ELF/Inputs/debug-names-parent-idx-2.s (+347)
(added) lld/test/ELF/debug-names-bad-aug-string.s (+189)
(added) lld/test/ELF/debug-names-bad-die-idx-sizes.s (+151)
(added) lld/test/ELF/debug-names-bad-name-count.s (+154)
(added) lld/test/ELF/debug-names-bad-offsets-sizes.s (+153)
(added) lld/test/ELF/debug-names-bad-version.s (+173)
(added) lld/test/ELF/debug-names-invalid-attribute.s (+179)
(added) lld/test/ELF/debug-names-parent-idx.s (+549)
(added) lld/test/ELF/debug-names.s (+294)

diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index bb3608da80b21f..1a7c6dafe17e18 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -228,6 +228,7 @@ struct Config {
   bool cref;
   llvm::SmallVector<std::pair<llvm::GlobPattern, uint64_t>, 0>
       deadRelocInNonAlloc;
+  bool debugNames;
   bool demangle = true;
   bool dependentLibraries;
   bool disableVerify;
diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp
index ac28aa8c7ca6e0..03fac65754f020 100644
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@@ -40,6 +40,7 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
                 .Case(".debug_gnu_pubtypes", &gnuPubtypesSection)
                 .Case(".debug_line", &lineSection)
                 .Case(".debug_loclists", &loclistsSection)
+                .Case(".debug_names", &debugNamesSection)
                 .Case(".debug_ranges", &rangesSection)
                 .Case(".debug_rnglists", &rnglistsSection)
                 .Case(".debug_str_offsets", &strOffsetsSection)
diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h
index 1b9a3e3f77943b..e1a830279b54d0 100644
--- a/lld/ELF/DWARF.h
+++ b/lld/ELF/DWARF.h
@@ -68,6 +68,10 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
     return gnuPubtypesSection;
   }
 
+  const LLDDWARFSection &getNamesSection() const override {
+    return debugNamesSection;
+  }
+
   StringRef getFileName() const override { return ""; }
   StringRef getAbbrevSection() const override { return abbrevSection; }
   StringRef getStrSection() const override { return strSection; }
@@ -95,6 +99,7 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
   LLDDWARFSection strOffsetsSection;
   LLDDWARFSection lineSection;
   LLDDWARFSection addrSection;
+  LLDDWARFSection debugNamesSection;
   StringRef abbrevSection;
   StringRef strSection;
   StringRef lineStrSection;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 7257ebd0fac994..9a300a54e122bd 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -441,6 +441,8 @@ static void checkOptions() {
       error("-r and -pie may not be used together");
     if (config->exportDynamic)
       error("-r and --export-dynamic may not be used together");
+    if (config->debugNames)
+      error("-r and --debug-names may not be used together");
   }
 
   if (config->executeOnly) {
@@ -1231,6 +1233,7 @@ static void readConfigs(opt::InputArgList &args) {
   config->cref = args.hasArg(OPT_cref);
   config->optimizeBBJumps =
       args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
+  config->debugNames = args.hasFlag(OPT_debug_names, OPT_no_debug_names, false);
   config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
   config->dependencyFile = args.getLastArgValue(OPT_dependency_file);
   config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c5e95d0d25c1ae..d470646ed0eeef 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -153,6 +153,10 @@ def : Flag<["--"], "no-color-diagnostics">, Alias<color_diagnostics>, AliasArgs<
 def cref: FF<"cref">,
   HelpText<"Output cross reference table. If -Map is specified, print to the map file">;
 
+defm debug_names: BB<"debug-names",
+    "Generate a merged .debug_names section",
+    "Do not generate a merged .debug_names section (default)">;
+
 defm demangle: B<"demangle",
     "Demangle symbol names (default)",
     "Do not demangle symbol names">;
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index f924756ddddfcd..6b84f4d806f356 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SyntheticSections.h"
+
 #include "Config.h"
 #include "DWARF.h"
 #include "EhFrame.h"
@@ -34,7 +35,9 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/Parallel.h"
@@ -2690,6 +2693,759 @@ static uint32_t computeGdbHash(StringRef s) {
   return h;
 }
 
+DebugNamesSection::DebugNamesSection()
+    : SyntheticSection(0, SHT_PROGBITS, 1, ".debug_names") {}
+
+template <class ELFT, class RelTy>
+void DebugNamesSection::getNameRelocsImpl(
+    InputSection *sec, ArrayRef<RelTy> rels,
+    llvm::DenseMap<uint32_t, uint32_t> &relocs) {
+  for (auto &rel : rels) {
+    Symbol &sym = sec->getFile<ELFT>()->getRelocTargetSym(rel);
+    relocs[rel.r_offset] = sym.getVA(getAddend<ELFT>(rel));
+  }
+}
+
+template <class ELFT>
+void DebugNamesSection::getNameRelocs(
+    InputSectionBase *base, llvm::DenseMap<uint32_t, uint32_t> &relocs) {
+  auto *sec = cast<InputSection>(base);
+  const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+  if (rels.areRelocsRel())
+    getNameRelocsImpl<ELFT>(sec, rels.rels, relocs);
+  else
+    getNameRelocsImpl<ELFT>(sec, rels.relas, relocs);
+}
+
+void DebugNamesSection::writeTo(uint8_t *buf) { invokeELFT(writeToImpl, buf); }
+
+template <class ELFT> void DebugNamesSection::writeToImpl(uint8_t *buf) {
+  SmallVector<uint32_t, 0> mergedCuOffsets;
+  SmallVector<uint32_t, 0> mergedTuOffsets;
+  llvm::DenseMap<uint32_t, uint32_t> strOffsets;
+  SmallVector<llvm::DenseMap<uint32_t, uint32_t>, 0> chunksRelocs;
+  chunksRelocs.reserve(numChunks);
+
+  for (size_t i = 0; i < numChunks; ++i) {
+    DebugNamesOutputChunk &chunk = outputChunks[i];
+    InputSectionBase *base = inputDebugNamesSections[i];
+    llvm::DenseMap<uint32_t, uint32_t> relocs;
+    getNameRelocs<ELFT>(base, relocs);
+    chunksRelocs.push_back(std::move(relocs));
+
+    // Update CuOffsets list with new data
+    for (uint32_t cuOffset : chunk.compilationUnits)
+      mergedCuOffsets.push_back(chunk.sec->outSecOff + cuOffset);
+
+    // TODO: Update TuOffsets list with new data
+  }
+
+  // Update the entries with the relocated string offsets.
+  for (auto &stringEntry : mergedEntries) {
+    uint32_t oldOffset = stringEntry.stringOffsetOffset;
+    uint32_t idx = stringEntry.chunkIdx;
+    stringEntry.relocatedEntryOffset = chunksRelocs[idx][oldOffset];
+  }
+
+  // Write out bytes for merged section.
+
+  // Write the header.
+  endian::write32<ELFT::TargetEndianness>(buf + 0, mergedHdr.UnitLength);
+  endian::write16<ELFT::TargetEndianness>(buf + 4, mergedHdr.Version);
+  endian::write32<ELFT::TargetEndianness>(buf + 8, mergedHdr.CompUnitCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 12,
+                                          mergedHdr.LocalTypeUnitCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 16,
+                                          mergedHdr.ForeignTypeUnitCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 20, mergedHdr.BucketCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 24, mergedHdr.NameCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 28, mergedHdr.AbbrevTableSize);
+  endian::write32<ELFT::TargetEndianness>(buf + 32,
+                                          mergedHdr.AugmentationStringSize);
+  buf += 36;
+  memcpy(buf, mergedHdr.AugmentationString.c_str(), 8);
+  buf += 8;
+
+  // Write the CU list.
+  for (uint32_t offset : mergedCuOffsets) {
+    endian::write32<ELFT::TargetEndianness>(buf + 0, offset);
+    buf += 4;
+  }
+
+  // Write the local TU list.
+  // TODO: Fix this, once we get everything working without TUs.
+  if (mergedHdr.LocalTypeUnitCount != 0)
+    warn(".debug_names: type units are not handled in merged index");
+
+  // Write the foreign TU list.
+  // Currently LLVM does not emit foreign type units, so there should
+  // be nothing here to emit.
+  // TODO: Fix this, once we get everything working wtihout TUs.
+  if (mergedHdr.ForeignTypeUnitCount != 0)
+    warn(".debug_names: type units are not handled in merged index");
+
+  // Write the hash table.
+  // ... Write the buckets
+  uint32_t idx = 1;
+  for (const auto &bucket : bucketList) {
+    if (!bucket.empty())
+      endian::write32<ELFT::TargetEndianness>(buf + 0, idx);
+    idx += bucket.size();
+    buf += 4;
+  }
+
+  // ...Write the hashes
+  for (const auto &bucket : bucketList) {
+    for (const auto &entry : bucket) {
+      uint32_t hashValue = entry->hashValue;
+      endian::write32<ELFT::TargetEndianness>(buf + 0, hashValue);
+      buf += 4;
+    }
+  }
+
+  // Write the string offsets.
+  for (const auto &entry : mergedEntries) {
+    endian::write32<ELFT::TargetEndianness>(buf + 0,
+                                            entry.relocatedEntryOffset);
+    buf += 4;
+  }
+
+  // Write the entry offsets.
+  for (const auto &entry : mergedEntries) {
+    endian::write32<ELFT::TargetEndianness>(buf + 0, entry.entryOffset);
+    buf += 4;
+  }
+
+  // Write the abbrev table.
+  for (const auto *abbrev : mergedAbbrevTable) {
+    size_t uleb_size = encodeULEB128(abbrev->code, buf);
+    buf += uleb_size;
+    uleb_size = encodeULEB128(abbrev->tag, buf);
+    buf += uleb_size;
+    for (auto attr : abbrev->attributes) {
+      uleb_size = encodeULEB128(attr.Index, buf);
+      buf += uleb_size;
+      uleb_size = encodeULEB128(attr.Form, buf);
+      buf += uleb_size;
+    }
+    endian::write16<ELFT::TargetEndianness>(buf + 0, 0); // attribute sentinels.
+    buf += 2;
+  }
+  *buf++ = 0; // abbrev table sentinel
+
+  // Write the entry pool.
+  for (const auto &stringEntry : mergedEntries) {
+    // Write all the entries for the string.
+    size_t uleb_size;
+    for (const auto &entry : stringEntry.indexEntries) {
+      uleb_size = encodeULEB128(entry->abbrevCode, buf);
+      buf += uleb_size;
+      for (const auto &value : entry->attrValues) {
+        if (value.attrSize > 0) {
+          endian::write32<ELFT::TargetEndianness>(buf + 0, value.attrValue);
+          buf += value.attrSize;
+        }
+      }
+    }
+    *buf++ = 0; // Entry sentinel
+  }
+}
+
+bool DebugNamesSection::isNeeded() const { return numChunks > 0; }
+
+template <class ELFT>
+static void
+readCompileUnitOffsets(struct DebugNamesSection::DebugNamesSectionData &secData,
+                       DebugNamesSection::DebugNamesInputChunk &inputChunk,
+                       DebugNamesSection::DebugNamesOutputChunk &outputChunk,
+                       llvm::DWARFDataExtractor &namesExtractor) {
+  uint64_t offset = secData.locs.CUsBase;
+  uint64_t *offsetPtr = &offset;
+  for (size_t i = 0; i < secData.hdr.CompUnitCount; ++i) {
+    llvm::Error err = llvm::Error::success();
+    uint32_t value = namesExtractor.getU32(offsetPtr, &err);
+    if (err)
+      errorOrWarn(toString(inputChunk.namesSection->sec) +
+                  ": error reading CU offsets: " + toString(std::move(err)));
+    else
+      outputChunk.compilationUnits.push_back(value);
+  }
+}
+
+template <class ELFT>
+static void
+readEntryOffsets(struct DebugNamesSection::DebugNamesSectionData &secData,
+                 DebugNamesSection::DebugNamesInputChunk &chunk,
+                 llvm::DWARFDataExtractor &namesExtractor) {
+  secData.entryOffsets = std::make_unique<uint32_t[]>(secData.hdr.NameCount);
+  if (secData.locs.EntryOffsetsBase >= namesExtractor.getData().size())
+    errorOrWarn(toString(chunk.namesSection->sec) +
+                ": invalid entry offsets input");
+
+  uint64_t offset = secData.locs.EntryOffsetsBase;
+  uint64_t *offsetPtr = &offset;
+  for (size_t i = 0; i < secData.hdr.NameCount; ++i) {
+    llvm::Error err = llvm::Error::success();
+    uint32_t value = namesExtractor.getU32(offsetPtr, &err);
+    if (err)
+      errorOrWarn(toString(chunk.namesSection->sec) +
+                  ": error reading entry offsets: " + toString(std::move(err)));
+    else
+      secData.entryOffsets.get()[i] = value;
+  }
+}
+
+template <class ELFT>
+static void readAttributeValues(
+    SmallVector<DebugNamesSection::AttrValueData, 3> &values,
+    DebugNamesSection::DebugNamesInputChunk &chunk, uint64_t &offset,
+    struct DebugNamesSection::DebugNamesSectionData &secData,
+    int32_t &parentOffset, llvm::DWARFDataExtractor &namesExtractor,
+    const llvm::DWARFDebugNames::Abbrev &abbrev) {
+  const LLDDWARFSection &namesSection = *chunk.namesSection;
+  uint64_t *offsetPtr = &offset;
+  DebugNamesSection::AttrValueData cuOrTuAttr = {0, 0};
+  for (auto attr : abbrev.Attributes) {
+    llvm::Error err = llvm::Error::success();
+    DebugNamesSection::AttrValueData newAttr;
+    uint32_t value;
+    switch (attr.Form) {
+    case DW_FORM_flag_present: {
+      // Currently only DW_IDX_parent attributes (in .debug_names) can
+      // have this form. This form does not have a real value (nothing is
+      // emitted for it).
+      break;
+    }
+    case DW_FORM_data1:
+    case DW_FORM_ref1: {
+      newAttr.attrValue = namesExtractor.getU8(offsetPtr, &err);
+      newAttr.attrSize = 1;
+      break;
+    }
+    case DW_FORM_data2:
+    case DW_FORM_ref2: {
+      value = namesExtractor.getU16(offsetPtr, &err);
+      newAttr.attrValue = value;
+      newAttr.attrSize = 2;
+      break;
+    }
+    case DW_FORM_data4:
+    case DW_FORM_ref4: {
+      value = namesExtractor.getU32(offsetPtr, &err);
+      newAttr.attrValue = value;
+      newAttr.attrSize = 4;
+      if (attr.Index == dwarf::DW_IDX_parent)
+        parentOffset = value + secData.locs.EntriesBase;
+      break;
+    }
+    case DW_FORM_data8:
+    case DW_FORM_ref8:
+    case DW_FORM_ref_sig8: {
+      value = namesExtractor.getU64(offsetPtr, &err);
+      newAttr.attrValue = value;
+      newAttr.attrSize = 8;
+      break;
+    }
+    default: {
+      errorOrWarn(toString(namesSection.sec) +
+                  Twine(": unrecognized form encoding ") + Twine(attr.Form) +
+                  " in .debug_names abbrev table");
+      break;
+    }
+    }
+    if (err)
+      errorOrWarn(
+          toString(namesSection.sec) +
+          ": error while reading attributes: " + toString(std::move(err)));
+    if (attr.Index == DW_IDX_compile_unit || attr.Index == DW_IDX_type_unit)
+      // Save it to put it at the end of the attributes list.
+      cuOrTuAttr = newAttr;
+    else if (attr.Form != DW_FORM_flag_present)
+      values.push_back(newAttr);
+  }
+
+  // Make sure the CU or TU index attribute is the last one in the list.
+  values.push_back(cuOrTuAttr);
+}
+
+template <class ELFT>
+static void readEntry(DebugNamesSection::NamedEntry &stringEntry,
+                      DebugNamesSection::DebugNamesInputChunk &chunk,
+                      DebugNamesSection::DebugNamesSectionData &secData,
+                      llvm::DWARFDataExtractor &namesExtractor,
+                      const llvm::DWARFDebugNames::NameIndex &ni) {
+  std::unique_ptr<LLDDWARFSection> &namesSection = chunk.namesSection;
+  uint64_t offset = stringEntry.entryOffset;
+  // Each entry ends with a zero 'sentinel' value
+  while (offset < namesSection->Data.size() &&
+         namesSection->Data[offset] != 0) {
+    // Read & store all entries (for the same string)
+    auto entry = std::make_unique<DebugNamesSection::IndexEntry>();
+    entry->poolOffset = offset;
+    llvm::Error err = llvm::Error::success();
+    uint64_t ulebValue = namesExtractor.getULEB128(&offset, &err);
+    if (err)
+      errorOrWarn(toString(chunk.namesSection->sec) +
+                  ": error reading entry: " + toString(std::move(err)));
+    entry->abbrevCode = static_cast<uint32_t>(ulebValue);
+    const auto &abbrevs = ni.getAbbrevs();
+    auto abbrevIt = abbrevs.find_as(entry->abbrevCode);
+    if (abbrevIt != abbrevs.end()) {
+      const llvm::DWARFDebugNames::Abbrev &abbrev = *abbrevIt;
+      readAttributeValues<ELFT>(entry->attrValues, chunk, offset, secData,
+                                entry->parentOffset, namesExtractor, abbrev);
+      stringEntry.indexEntries.push_back(std::move(entry));
+    }
+  }
+  if (offset >= namesSection->Data.size())
+    errorOrWarn(toString(chunk.namesSection->sec) +
+                ": encountered unexpected end of section while reading entry");
+}
+
+template <class ELFT>
+static void
+readEntries(struct DebugNamesSection::DebugNamesSectionData &secData,
+            DebugNamesSection::DebugNamesInputChunk &chunk,
+            llvm::DWARFDataExtractor &namesExtractor,
+            llvm::DataExtractor &strExtractor,
+            const llvm::DWARFDebugNames::NameIndex &ni) {
+  // Temporary map from entry offsets to address (pointer) of entry with that
+  // offset; used to find parent entries quickly.
+  DenseMap<uint32_t, DebugNamesSection::IndexEntry *> offsetMap;
+  // Reserve sizes for this chunk's hashes & namedEntries.
+  chunk.hashValues.reserve(secData.hdr.NameCount);
+  secData.namedEntries.reserve(secData.hdr.NameCount);
+  // Calculate the Entry Offsets, create initial records.
+  for (uint32_t i = 0; i < secData.hdr.NameCount; ++i) {
+    // Get string value
+    DebugNamesSection::NamedEntry stringEntry;
+    stringEntry.entryOffset =
+        secData.locs.EntriesBase + secData.entryOffsets[i];
+    uint64_t strOffsetOffset =
+        secData.locs.StringOffsetsBase + i * secData.dwarfSize;
+    stringEntry.stringOffsetOffset = strOffsetOffset;
+    uint64_t strOffset =
+        namesExtractor.getRelocatedValue(secData.dwarfSize, &strOffsetOffset);
+    StringRef name = strExtractor.getCStrRef(&strOffset);
+    stringEntry.name = name.data();
+    // Calculate hash
+    stringEntry.hashValue = caseFoldingDjbHash(name);
+    chunk.hashValues.push_back(stringEntry.hashValue);
+    // Read String Entry
+    readEntry<ELFT>(stringEntry, chunk, secData, namesExtractor, ni);
+    // Add index entries & offsets to our temporary map
+    for (const auto &entry : stringEntry.indexEntries)
+      offsetMap[entry->poolOffset] = entry.get();
+    secData.namedEntries.push_back(std::move(stringEntry));
+  }
+  // Find/assign pointers to any 'real' parent entries (needed to find correct
+  // parent entry offsets in merged data).
+  for (auto &stringEntry : secData.namedEntries)
+    for (auto &entry : stringEntry.indexEntries)
+      if (entry->parentOffset != -1)
+        entry->parentEntry = offsetMap[entry->parentOffset];
+}
+
+static uint16_t computeDebugNamesHeaderSize() {
+  // Size of the .debug_names section header, in bytes, for DWARF32:
+  uint16_t size = /* unit length */ 4 +
+                  /* version */ 2 +
+                  /* padding */ 2 +
+                  /* CU count */ 4 +
+                  /* TU count */ 4 +
+                  /* Foreign TU count */ 4 +
+                  /* Bucket Count */ 4 +
+                  /* Name Count */ 4 +
+                  /* Abbrev table size */ 4 +
+                  /* Augmentation string size */ 4 +
+                  /* augmentation string */ 8;
+  return size;
+}
+
+template <class ELFT>
+static void collectDebugNamesSectionData(
+    DebugNamesSection::DebugNamesInputChunk &chunk,
+    DebugNamesSection::DebugNamesOutputChunk &outputChunk,
+    llvm::DWARFDataExtractor &namesExtractor,
+    llvm::DataExtractor &strExtractor) {
+  for (const auto &ni : *chunk.debugNamesData) {
+    DebugNamesSection::DebugNamesSectionData secData;
+    secData.hdr = ni.getHeader();
+    if (secData.hdr.Format != DwarfFormat::DWARF32)
+      errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported DWARF64");
+    secData.dwarfSize = dwarf::getDwarfOffsetByteSize(DwarfFormat::DWARF32);
+    secData.hdrSize = computeDebugNamesHeaderSize();
+    if (secData.hdr.Version != 5)
+      errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported version");
+    findDebugNamesOffsets(secData.locs, secData.hdrSize, secData.hdr.Format,
+                          secData.hdr);
+    readCompileUnitOffsets<ELFT>(secData, chunk, outputChunk, namesExtractor);
+    readEntryOffsets<ELFT>(secData, chunk, namesExtractor);
+    readEntries<ELFT>(secData, chunk, namesExtractor, strExtractor, ni);
+    chunk.sectionsData.push_back(std::move(secData));
+  }
+}
+
+void DebugNamesSection::collectMergedCounts(
+    MutableArrayRef<DebugNamesInputChunk> &inputChunks) {
+  SmallVector<uint32_t, 0> tmpMergedCuOffsets;
+
+  mergedHdr.CompUnitCount = 0;
+  mergedHdr.LocalTypeUnitCount = 0;
+  mergedHdr.For...
[truncated]

llvmbot · 2024-03-25T14:36:57Z

@llvm/pr-subscribers-lld-elf

Author: None (cmtice)

Changes

Update LLD to be able to generate a single merged .debug_names section (rather than a set of appended sections). Option is controlled by a new flag, --debug-names. Does not handle type units yet (coming soon).

Patch is 147.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86508.diff

19 Files Affected:

(modified) lld/ELF/Config.h (+1)
(modified) lld/ELF/DWARF.cpp (+1)
(modified) lld/ELF/DWARF.h (+5)
(modified) lld/ELF/Driver.cpp (+3)
(modified) lld/ELF/Options.td (+4)
(modified) lld/ELF/SyntheticSections.cpp (+761)
(modified) lld/ELF/SyntheticSections.h (+134)
(modified) lld/ELF/Writer.cpp (+3)
(modified) lld/docs/ld.lld.1 (+4)
(added) lld/test/ELF/Inputs/debug-names-2.s (+191)
(added) lld/test/ELF/Inputs/debug-names-parent-idx-2.s (+347)
(added) lld/test/ELF/debug-names-bad-aug-string.s (+189)
(added) lld/test/ELF/debug-names-bad-die-idx-sizes.s (+151)
(added) lld/test/ELF/debug-names-bad-name-count.s (+154)
(added) lld/test/ELF/debug-names-bad-offsets-sizes.s (+153)
(added) lld/test/ELF/debug-names-bad-version.s (+173)
(added) lld/test/ELF/debug-names-invalid-attribute.s (+179)
(added) lld/test/ELF/debug-names-parent-idx.s (+549)
(added) lld/test/ELF/debug-names.s (+294)

diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index bb3608da80b21f..1a7c6dafe17e18 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -228,6 +228,7 @@ struct Config {
   bool cref;
   llvm::SmallVector<std::pair<llvm::GlobPattern, uint64_t>, 0>
       deadRelocInNonAlloc;
+  bool debugNames;
   bool demangle = true;
   bool dependentLibraries;
   bool disableVerify;
diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp
index ac28aa8c7ca6e0..03fac65754f020 100644
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@@ -40,6 +40,7 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
                 .Case(".debug_gnu_pubtypes", &gnuPubtypesSection)
                 .Case(".debug_line", &lineSection)
                 .Case(".debug_loclists", &loclistsSection)
+                .Case(".debug_names", &debugNamesSection)
                 .Case(".debug_ranges", &rangesSection)
                 .Case(".debug_rnglists", &rnglistsSection)
                 .Case(".debug_str_offsets", &strOffsetsSection)
diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h
index 1b9a3e3f77943b..e1a830279b54d0 100644
--- a/lld/ELF/DWARF.h
+++ b/lld/ELF/DWARF.h
@@ -68,6 +68,10 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
     return gnuPubtypesSection;
   }
 
+  const LLDDWARFSection &getNamesSection() const override {
+    return debugNamesSection;
+  }
+
   StringRef getFileName() const override { return ""; }
   StringRef getAbbrevSection() const override { return abbrevSection; }
   StringRef getStrSection() const override { return strSection; }
@@ -95,6 +99,7 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
   LLDDWARFSection strOffsetsSection;
   LLDDWARFSection lineSection;
   LLDDWARFSection addrSection;
+  LLDDWARFSection debugNamesSection;
   StringRef abbrevSection;
   StringRef strSection;
   StringRef lineStrSection;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 7257ebd0fac994..9a300a54e122bd 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -441,6 +441,8 @@ static void checkOptions() {
       error("-r and -pie may not be used together");
     if (config->exportDynamic)
       error("-r and --export-dynamic may not be used together");
+    if (config->debugNames)
+      error("-r and --debug-names may not be used together");
   }
 
   if (config->executeOnly) {
@@ -1231,6 +1233,7 @@ static void readConfigs(opt::InputArgList &args) {
   config->cref = args.hasArg(OPT_cref);
   config->optimizeBBJumps =
       args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
+  config->debugNames = args.hasFlag(OPT_debug_names, OPT_no_debug_names, false);
   config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
   config->dependencyFile = args.getLastArgValue(OPT_dependency_file);
   config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c5e95d0d25c1ae..d470646ed0eeef 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -153,6 +153,10 @@ def : Flag<["--"], "no-color-diagnostics">, Alias<color_diagnostics>, AliasArgs<
 def cref: FF<"cref">,
   HelpText<"Output cross reference table. If -Map is specified, print to the map file">;
 
+defm debug_names: BB<"debug-names",
+    "Generate a merged .debug_names section",
+    "Do not generate a merged .debug_names section (default)">;
+
 defm demangle: B<"demangle",
     "Demangle symbol names (default)",
     "Do not demangle symbol names">;
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index f924756ddddfcd..6b84f4d806f356 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SyntheticSections.h"
+
 #include "Config.h"
 #include "DWARF.h"
 #include "EhFrame.h"
@@ -34,7 +35,9 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/Support/DJB.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/Parallel.h"
@@ -2690,6 +2693,759 @@ static uint32_t computeGdbHash(StringRef s) {
   return h;
 }
 
+DebugNamesSection::DebugNamesSection()
+    : SyntheticSection(0, SHT_PROGBITS, 1, ".debug_names") {}
+
+template <class ELFT, class RelTy>
+void DebugNamesSection::getNameRelocsImpl(
+    InputSection *sec, ArrayRef<RelTy> rels,
+    llvm::DenseMap<uint32_t, uint32_t> &relocs) {
+  for (auto &rel : rels) {
+    Symbol &sym = sec->getFile<ELFT>()->getRelocTargetSym(rel);
+    relocs[rel.r_offset] = sym.getVA(getAddend<ELFT>(rel));
+  }
+}
+
+template <class ELFT>
+void DebugNamesSection::getNameRelocs(
+    InputSectionBase *base, llvm::DenseMap<uint32_t, uint32_t> &relocs) {
+  auto *sec = cast<InputSection>(base);
+  const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+  if (rels.areRelocsRel())
+    getNameRelocsImpl<ELFT>(sec, rels.rels, relocs);
+  else
+    getNameRelocsImpl<ELFT>(sec, rels.relas, relocs);
+}
+
+void DebugNamesSection::writeTo(uint8_t *buf) { invokeELFT(writeToImpl, buf); }
+
+template <class ELFT> void DebugNamesSection::writeToImpl(uint8_t *buf) {
+  SmallVector<uint32_t, 0> mergedCuOffsets;
+  SmallVector<uint32_t, 0> mergedTuOffsets;
+  llvm::DenseMap<uint32_t, uint32_t> strOffsets;
+  SmallVector<llvm::DenseMap<uint32_t, uint32_t>, 0> chunksRelocs;
+  chunksRelocs.reserve(numChunks);
+
+  for (size_t i = 0; i < numChunks; ++i) {
+    DebugNamesOutputChunk &chunk = outputChunks[i];
+    InputSectionBase *base = inputDebugNamesSections[i];
+    llvm::DenseMap<uint32_t, uint32_t> relocs;
+    getNameRelocs<ELFT>(base, relocs);
+    chunksRelocs.push_back(std::move(relocs));
+
+    // Update CuOffsets list with new data
+    for (uint32_t cuOffset : chunk.compilationUnits)
+      mergedCuOffsets.push_back(chunk.sec->outSecOff + cuOffset);
+
+    // TODO: Update TuOffsets list with new data
+  }
+
+  // Update the entries with the relocated string offsets.
+  for (auto &stringEntry : mergedEntries) {
+    uint32_t oldOffset = stringEntry.stringOffsetOffset;
+    uint32_t idx = stringEntry.chunkIdx;
+    stringEntry.relocatedEntryOffset = chunksRelocs[idx][oldOffset];
+  }
+
+  // Write out bytes for merged section.
+
+  // Write the header.
+  endian::write32<ELFT::TargetEndianness>(buf + 0, mergedHdr.UnitLength);
+  endian::write16<ELFT::TargetEndianness>(buf + 4, mergedHdr.Version);
+  endian::write32<ELFT::TargetEndianness>(buf + 8, mergedHdr.CompUnitCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 12,
+                                          mergedHdr.LocalTypeUnitCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 16,
+                                          mergedHdr.ForeignTypeUnitCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 20, mergedHdr.BucketCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 24, mergedHdr.NameCount);
+  endian::write32<ELFT::TargetEndianness>(buf + 28, mergedHdr.AbbrevTableSize);
+  endian::write32<ELFT::TargetEndianness>(buf + 32,
+                                          mergedHdr.AugmentationStringSize);
+  buf += 36;
+  memcpy(buf, mergedHdr.AugmentationString.c_str(), 8);
+  buf += 8;
+
+  // Write the CU list.
+  for (uint32_t offset : mergedCuOffsets) {
+    endian::write32<ELFT::TargetEndianness>(buf + 0, offset);
+    buf += 4;
+  }
+
+  // Write the local TU list.
+  // TODO: Fix this, once we get everything working without TUs.
+  if (mergedHdr.LocalTypeUnitCount != 0)
+    warn(".debug_names: type units are not handled in merged index");
+
+  // Write the foreign TU list.
+  // Currently LLVM does not emit foreign type units, so there should
+  // be nothing here to emit.
+  // TODO: Fix this, once we get everything working wtihout TUs.
+  if (mergedHdr.ForeignTypeUnitCount != 0)
+    warn(".debug_names: type units are not handled in merged index");
+
+  // Write the hash table.
+  // ... Write the buckets
+  uint32_t idx = 1;
+  for (const auto &bucket : bucketList) {
+    if (!bucket.empty())
+      endian::write32<ELFT::TargetEndianness>(buf + 0, idx);
+    idx += bucket.size();
+    buf += 4;
+  }
+
+  // ...Write the hashes
+  for (const auto &bucket : bucketList) {
+    for (const auto &entry : bucket) {
+      uint32_t hashValue = entry->hashValue;
+      endian::write32<ELFT::TargetEndianness>(buf + 0, hashValue);
+      buf += 4;
+    }
+  }
+
+  // Write the string offsets.
+  for (const auto &entry : mergedEntries) {
+    endian::write32<ELFT::TargetEndianness>(buf + 0,
+                                            entry.relocatedEntryOffset);
+    buf += 4;
+  }
+
+  // Write the entry offsets.
+  for (const auto &entry : mergedEntries) {
+    endian::write32<ELFT::TargetEndianness>(buf + 0, entry.entryOffset);
+    buf += 4;
+  }
+
+  // Write the abbrev table.
+  for (const auto *abbrev : mergedAbbrevTable) {
+    size_t uleb_size = encodeULEB128(abbrev->code, buf);
+    buf += uleb_size;
+    uleb_size = encodeULEB128(abbrev->tag, buf);
+    buf += uleb_size;
+    for (auto attr : abbrev->attributes) {
+      uleb_size = encodeULEB128(attr.Index, buf);
+      buf += uleb_size;
+      uleb_size = encodeULEB128(attr.Form, buf);
+      buf += uleb_size;
+    }
+    endian::write16<ELFT::TargetEndianness>(buf + 0, 0); // attribute sentinels.
+    buf += 2;
+  }
+  *buf++ = 0; // abbrev table sentinel
+
+  // Write the entry pool.
+  for (const auto &stringEntry : mergedEntries) {
+    // Write all the entries for the string.
+    size_t uleb_size;
+    for (const auto &entry : stringEntry.indexEntries) {
+      uleb_size = encodeULEB128(entry->abbrevCode, buf);
+      buf += uleb_size;
+      for (const auto &value : entry->attrValues) {
+        if (value.attrSize > 0) {
+          endian::write32<ELFT::TargetEndianness>(buf + 0, value.attrValue);
+          buf += value.attrSize;
+        }
+      }
+    }
+    *buf++ = 0; // Entry sentinel
+  }
+}
+
+bool DebugNamesSection::isNeeded() const { return numChunks > 0; }
+
+template <class ELFT>
+static void
+readCompileUnitOffsets(struct DebugNamesSection::DebugNamesSectionData &secData,
+                       DebugNamesSection::DebugNamesInputChunk &inputChunk,
+                       DebugNamesSection::DebugNamesOutputChunk &outputChunk,
+                       llvm::DWARFDataExtractor &namesExtractor) {
+  uint64_t offset = secData.locs.CUsBase;
+  uint64_t *offsetPtr = &offset;
+  for (size_t i = 0; i < secData.hdr.CompUnitCount; ++i) {
+    llvm::Error err = llvm::Error::success();
+    uint32_t value = namesExtractor.getU32(offsetPtr, &err);
+    if (err)
+      errorOrWarn(toString(inputChunk.namesSection->sec) +
+                  ": error reading CU offsets: " + toString(std::move(err)));
+    else
+      outputChunk.compilationUnits.push_back(value);
+  }
+}
+
+template <class ELFT>
+static void
+readEntryOffsets(struct DebugNamesSection::DebugNamesSectionData &secData,
+                 DebugNamesSection::DebugNamesInputChunk &chunk,
+                 llvm::DWARFDataExtractor &namesExtractor) {
+  secData.entryOffsets = std::make_unique<uint32_t[]>(secData.hdr.NameCount);
+  if (secData.locs.EntryOffsetsBase >= namesExtractor.getData().size())
+    errorOrWarn(toString(chunk.namesSection->sec) +
+                ": invalid entry offsets input");
+
+  uint64_t offset = secData.locs.EntryOffsetsBase;
+  uint64_t *offsetPtr = &offset;
+  for (size_t i = 0; i < secData.hdr.NameCount; ++i) {
+    llvm::Error err = llvm::Error::success();
+    uint32_t value = namesExtractor.getU32(offsetPtr, &err);
+    if (err)
+      errorOrWarn(toString(chunk.namesSection->sec) +
+                  ": error reading entry offsets: " + toString(std::move(err)));
+    else
+      secData.entryOffsets.get()[i] = value;
+  }
+}
+
+template <class ELFT>
+static void readAttributeValues(
+    SmallVector<DebugNamesSection::AttrValueData, 3> &values,
+    DebugNamesSection::DebugNamesInputChunk &chunk, uint64_t &offset,
+    struct DebugNamesSection::DebugNamesSectionData &secData,
+    int32_t &parentOffset, llvm::DWARFDataExtractor &namesExtractor,
+    const llvm::DWARFDebugNames::Abbrev &abbrev) {
+  const LLDDWARFSection &namesSection = *chunk.namesSection;
+  uint64_t *offsetPtr = &offset;
+  DebugNamesSection::AttrValueData cuOrTuAttr = {0, 0};
+  for (auto attr : abbrev.Attributes) {
+    llvm::Error err = llvm::Error::success();
+    DebugNamesSection::AttrValueData newAttr;
+    uint32_t value;
+    switch (attr.Form) {
+    case DW_FORM_flag_present: {
+      // Currently only DW_IDX_parent attributes (in .debug_names) can
+      // have this form. This form does not have a real value (nothing is
+      // emitted for it).
+      break;
+    }
+    case DW_FORM_data1:
+    case DW_FORM_ref1: {
+      newAttr.attrValue = namesExtractor.getU8(offsetPtr, &err);
+      newAttr.attrSize = 1;
+      break;
+    }
+    case DW_FORM_data2:
+    case DW_FORM_ref2: {
+      value = namesExtractor.getU16(offsetPtr, &err);
+      newAttr.attrValue = value;
+      newAttr.attrSize = 2;
+      break;
+    }
+    case DW_FORM_data4:
+    case DW_FORM_ref4: {
+      value = namesExtractor.getU32(offsetPtr, &err);
+      newAttr.attrValue = value;
+      newAttr.attrSize = 4;
+      if (attr.Index == dwarf::DW_IDX_parent)
+        parentOffset = value + secData.locs.EntriesBase;
+      break;
+    }
+    case DW_FORM_data8:
+    case DW_FORM_ref8:
+    case DW_FORM_ref_sig8: {
+      value = namesExtractor.getU64(offsetPtr, &err);
+      newAttr.attrValue = value;
+      newAttr.attrSize = 8;
+      break;
+    }
+    default: {
+      errorOrWarn(toString(namesSection.sec) +
+                  Twine(": unrecognized form encoding ") + Twine(attr.Form) +
+                  " in .debug_names abbrev table");
+      break;
+    }
+    }
+    if (err)
+      errorOrWarn(
+          toString(namesSection.sec) +
+          ": error while reading attributes: " + toString(std::move(err)));
+    if (attr.Index == DW_IDX_compile_unit || attr.Index == DW_IDX_type_unit)
+      // Save it to put it at the end of the attributes list.
+      cuOrTuAttr = newAttr;
+    else if (attr.Form != DW_FORM_flag_present)
+      values.push_back(newAttr);
+  }
+
+  // Make sure the CU or TU index attribute is the last one in the list.
+  values.push_back(cuOrTuAttr);
+}
+
+template <class ELFT>
+static void readEntry(DebugNamesSection::NamedEntry &stringEntry,
+                      DebugNamesSection::DebugNamesInputChunk &chunk,
+                      DebugNamesSection::DebugNamesSectionData &secData,
+                      llvm::DWARFDataExtractor &namesExtractor,
+                      const llvm::DWARFDebugNames::NameIndex &ni) {
+  std::unique_ptr<LLDDWARFSection> &namesSection = chunk.namesSection;
+  uint64_t offset = stringEntry.entryOffset;
+  // Each entry ends with a zero 'sentinel' value
+  while (offset < namesSection->Data.size() &&
+         namesSection->Data[offset] != 0) {
+    // Read & store all entries (for the same string)
+    auto entry = std::make_unique<DebugNamesSection::IndexEntry>();
+    entry->poolOffset = offset;
+    llvm::Error err = llvm::Error::success();
+    uint64_t ulebValue = namesExtractor.getULEB128(&offset, &err);
+    if (err)
+      errorOrWarn(toString(chunk.namesSection->sec) +
+                  ": error reading entry: " + toString(std::move(err)));
+    entry->abbrevCode = static_cast<uint32_t>(ulebValue);
+    const auto &abbrevs = ni.getAbbrevs();
+    auto abbrevIt = abbrevs.find_as(entry->abbrevCode);
+    if (abbrevIt != abbrevs.end()) {
+      const llvm::DWARFDebugNames::Abbrev &abbrev = *abbrevIt;
+      readAttributeValues<ELFT>(entry->attrValues, chunk, offset, secData,
+                                entry->parentOffset, namesExtractor, abbrev);
+      stringEntry.indexEntries.push_back(std::move(entry));
+    }
+  }
+  if (offset >= namesSection->Data.size())
+    errorOrWarn(toString(chunk.namesSection->sec) +
+                ": encountered unexpected end of section while reading entry");
+}
+
+template <class ELFT>
+static void
+readEntries(struct DebugNamesSection::DebugNamesSectionData &secData,
+            DebugNamesSection::DebugNamesInputChunk &chunk,
+            llvm::DWARFDataExtractor &namesExtractor,
+            llvm::DataExtractor &strExtractor,
+            const llvm::DWARFDebugNames::NameIndex &ni) {
+  // Temporary map from entry offsets to address (pointer) of entry with that
+  // offset; used to find parent entries quickly.
+  DenseMap<uint32_t, DebugNamesSection::IndexEntry *> offsetMap;
+  // Reserve sizes for this chunk's hashes & namedEntries.
+  chunk.hashValues.reserve(secData.hdr.NameCount);
+  secData.namedEntries.reserve(secData.hdr.NameCount);
+  // Calculate the Entry Offsets, create initial records.
+  for (uint32_t i = 0; i < secData.hdr.NameCount; ++i) {
+    // Get string value
+    DebugNamesSection::NamedEntry stringEntry;
+    stringEntry.entryOffset =
+        secData.locs.EntriesBase + secData.entryOffsets[i];
+    uint64_t strOffsetOffset =
+        secData.locs.StringOffsetsBase + i * secData.dwarfSize;
+    stringEntry.stringOffsetOffset = strOffsetOffset;
+    uint64_t strOffset =
+        namesExtractor.getRelocatedValue(secData.dwarfSize, &strOffsetOffset);
+    StringRef name = strExtractor.getCStrRef(&strOffset);
+    stringEntry.name = name.data();
+    // Calculate hash
+    stringEntry.hashValue = caseFoldingDjbHash(name);
+    chunk.hashValues.push_back(stringEntry.hashValue);
+    // Read String Entry
+    readEntry<ELFT>(stringEntry, chunk, secData, namesExtractor, ni);
+    // Add index entries & offsets to our temporary map
+    for (const auto &entry : stringEntry.indexEntries)
+      offsetMap[entry->poolOffset] = entry.get();
+    secData.namedEntries.push_back(std::move(stringEntry));
+  }
+  // Find/assign pointers to any 'real' parent entries (needed to find correct
+  // parent entry offsets in merged data).
+  for (auto &stringEntry : secData.namedEntries)
+    for (auto &entry : stringEntry.indexEntries)
+      if (entry->parentOffset != -1)
+        entry->parentEntry = offsetMap[entry->parentOffset];
+}
+
+static uint16_t computeDebugNamesHeaderSize() {
+  // Size of the .debug_names section header, in bytes, for DWARF32:
+  uint16_t size = /* unit length */ 4 +
+                  /* version */ 2 +
+                  /* padding */ 2 +
+                  /* CU count */ 4 +
+                  /* TU count */ 4 +
+                  /* Foreign TU count */ 4 +
+                  /* Bucket Count */ 4 +
+                  /* Name Count */ 4 +
+                  /* Abbrev table size */ 4 +
+                  /* Augmentation string size */ 4 +
+                  /* augmentation string */ 8;
+  return size;
+}
+
+template <class ELFT>
+static void collectDebugNamesSectionData(
+    DebugNamesSection::DebugNamesInputChunk &chunk,
+    DebugNamesSection::DebugNamesOutputChunk &outputChunk,
+    llvm::DWARFDataExtractor &namesExtractor,
+    llvm::DataExtractor &strExtractor) {
+  for (const auto &ni : *chunk.debugNamesData) {
+    DebugNamesSection::DebugNamesSectionData secData;
+    secData.hdr = ni.getHeader();
+    if (secData.hdr.Format != DwarfFormat::DWARF32)
+      errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported DWARF64");
+    secData.dwarfSize = dwarf::getDwarfOffsetByteSize(DwarfFormat::DWARF32);
+    secData.hdrSize = computeDebugNamesHeaderSize();
+    if (secData.hdr.Version != 5)
+      errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported version");
+    findDebugNamesOffsets(secData.locs, secData.hdrSize, secData.hdr.Format,
+                          secData.hdr);
+    readCompileUnitOffsets<ELFT>(secData, chunk, outputChunk, namesExtractor);
+    readEntryOffsets<ELFT>(secData, chunk, namesExtractor);
+    readEntries<ELFT>(secData, chunk, namesExtractor, strExtractor, ni);
+    chunk.sectionsData.push_back(std::move(secData));
+  }
+}
+
+void DebugNamesSection::collectMergedCounts(
+    MutableArrayRef<DebugNamesInputChunk> &inputChunks) {
+  SmallVector<uint32_t, 0> tmpMergedCuOffsets;
+
+  mergedHdr.CompUnitCount = 0;
+  mergedHdr.LocalTypeUnitCount = 0;
+  mergedHdr.For...
[truncated]

github-actions · 2024-03-25T14:39:10Z

✅ With the latest revision this PR passed the C/C++ code formatter.

github-actions · 2024-03-25T14:39:10Z

✅ With the latest revision this PR passed the Python code formatter.

ayermolo · 2024-03-25T14:57:09Z

How much does it add to linking time?
Also did you measure reduction in section size by any chance?

cmtice · 2024-03-25T15:49:34Z

How much does it add to linking time? Also did you measure reduction in section size by any chance?

I did some rough timing measurements on my workstation (no special quiescent environment), linking Clang. In both cases Clang was built with -gpubnames. In one case I passed --debug_names to the LLD, so it built the merged section; in the other case I didn't pass any special flags to LLD, so it appended the .debug_names sections together (current normal behavior). I linked 5 times in each case and took the average.

Linking Clang without --debug-names flag took, on average, 8.8 seconds. Linking Clang with --debug-names took, on average, 13.5 seconds.

The .debug_names section without merging is 0x99f8280 (161448576) bytes.
The .debug_names section WITH merging is 0x72483e0 (119833568) bytes.

So creating a merged section cost us ~ 5 seconds and saved us ~41.6 MB. It also saves us ~ 6 seconds in LLDB startup times on Clang.

MaskRay · 2024-03-26T00:08:38Z

(Reading. I find ELFT::TargetEndianness too long. Proposed #86604 to allow ELFT::Endian. No action needed yet)

MaskRay · 2024-03-26T04:47:27Z

driver.test has error: -r and --gdb-index may not be used together. Add --debug-names test there.

MaskRay · 2024-03-26T04:51:31Z

lld/test/ELF/debug-names.s

+
+# DISASM:       Disassembly of section .text:
+# DISASM-EMPTY:
+# DISASM:       <_Z2f12t1>:


The disassembly is unrelated to the functionality to be tested

Done. (removed the disassembly).

MaskRay · 2024-03-26T04:52:34Z

lld/test/ELF/debug-names.s

+# DWARF-NEXT:         DW_IDX_parent: <parent not indexed>
+# DWARF-NEXT:         DW_IDX_compile_unit: 0x01
+
+# READELF: .debug_names PROGBITS 0000000000000000 0003eb 0000d0


s/0003eb/[[#%x,]]/ so that the test is less sensitive to section offset changes.
The size, while sensitive, is useful as a change detector.

I's just one line and displays the section size. It's better to move it before # DWARF

MaskRay · 2024-03-26T04:55:13Z

lld/test/ELF/debug-names-bad-version.s

+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/debug-names-2.s -o %t2.o
+#


We don't add ^#$ or ^## lines. Please remove here and elsewhere.

You can check the convention of newer tests added to this directory git whatchanged -p --diff-filter=A --after=2024-01-01 lld/test/ELF

MaskRay · 2024-03-26T04:57:17Z

lld/ELF/SyntheticSections.h

+  size_t getSize() const override { return sectionSize; }
+  bool isNeeded() const override;
+
+  void addSections(SmallVector<InputSectionBase *, 0> sec_list) {


expensive copy of SmallVector

How would you recommend that I deal with this?

Just inline the simple function to the caller

MaskRay · 2024-03-26T04:58:56Z

lld/ELF/SyntheticSections.h

@@ -788,6 +792,136 @@ class RelroPaddingSection final : public SyntheticSection {
  void writeTo(uint8_t *buf) override {}
 };

+class DebugNamesSection final : public SyntheticSection {


There are a lot of ELFT in member functions. Seems better to make the class ELFT, or follow RelocationBaseSection: non-template base class with ELFT derived class.

Done. Made the entire class ELFT.

MaskRay · 2024-03-26T05:00:04Z

lld/ELF/SyntheticSections.cpp

+    getNameRelocsImpl<ELFT>(sec, rels.relas, relocs);
+}
+
+void DebugNamesSection::writeTo(uint8_t *buf) { invokeELFT(writeToImpl, buf); }


RelocationSection::writeTo avoids invokeELFT

Done (with making entire class ELFT).

MaskRay · 2024-03-26T05:04:04Z

lld/test/ELF/debug-names-bad-offsets-sizes.s

@@ -0,0 +1,153 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t1.o
+#


Delete ^#$ lines

MaskRay · 2024-03-26T05:05:18Z

lld/test/ELF/debug-names-bad-offsets-sizes.s

+#
+# RUN: not ld.lld --debug-names %t1.o -o /dev/null 2>&1 | FileCheck %s
+#
+# CHECK: error: {{.*}}:(.debug_names): encountered unexpected end of section while reading entry


We should test the filename to ensure it is correct.

You can use FileCheck -DFILE=%t1.o to get the filename with [[FILE]]. Recently, more tests have rm -rf %t && split-file %s %t && cd %t so that we don't need to figure out slashes/backslashes for non-Windows/Windows.

I can successfully add "FileCheck -DFILE=%t1.o" or "FileCheck -DFILE=%t2.o to my tests.

I don't understand what you want me to do with [[FILE]]; could you be more explicit please?

I tried adding "rm -rf %t && split-file %s %t && cd %t" to some of my tests, but then the tests fail with errors like: "split-file: error: /usr/local/google3/cmtice/LLD.llvm-project/lld/test/ELF/debug-names-parent-idx.s: no part separator was found". I'm not sure what to do here?

You can probably spend some time on existing tests. https://llvm.org/docs/TestingGuide.html#extra-files #--- serves as a separtor.

Just verifying that I understand what you want me to do: For the three debug-names-*.s tests that have two input files, you want me to put both input files into the same .s file and use the #--- separator syntax (along with the split-file stuff). Is that correct?

I've done this now.

MaskRay · 2024-03-26T05:07:14Z

lld/test/ELF/debug-names-bad-aug-string.s

+# DWARF-NEXT:     CU[0]: 0x00000000
+# DWARF-NEXT:     CU[1]: 0x00000041
+
+##


We typically place such file-level comments at the top. Don't add ^##$ lines.
Follow the convention of recent test files.

MaskRay · 2024-03-26T05:08:59Z

lld/ELF/SyntheticSections.cpp

+  for (size_t i = 0; i < numChunks; ++i) {
+    DebugNamesOutputChunk &chunk = outputChunks[i];
+    InputSectionBase *base = inputDebugNamesSections[i];
+    llvm::DenseMap<uint32_t, uint32_t> relocs;


DenseMap<uint32_t, uint32_t> &relocs = chunksRelocs.emplace_back(); to save a move.

Please follow the convention: we normally don't use llvm:: in .cpp files. llvm::DenseMap is for headers.

Both are done.

MaskRay · 2024-03-26T05:19:33Z

lld/ELF/SyntheticSections.cpp

+                                                    files.size());
+  auto outputChunks = std::make_unique<DebugNamesOutputChunk[]>(files.size());
+  parallelFor(0, files.size(), [&](size_t i) {
+    ObjFile<ELFT> *file = cast<ObjFile<ELFT>>(files[i]);


auto *file

MaskRay · 2024-03-26T05:20:09Z

lld/ELF/SyntheticSections.cpp

+    // Extract llvm::DWARFDebugNames data from the .debug_names section. The
+    // .debug_names section needs the .debug_str section, to get the actual
+    // symbol names.
+    const StringRef &strSection = dobj.getStrSection();


avoid reference for small types like StringRef

MaskRay · 2024-03-26T05:22:11Z

lld/ELF/SyntheticSections.cpp

+        std::make_unique<LLDDWARFSection>(namesSection);
+    if (llvm::Error E = inputChunks[i].debugNamesData->extract()) {
+      // Report an error here. We were unable to extract the data.
+      errorOrWarn(toString(dobj.getNamesSection().sec) + ": " +


Twine(": ") to avoid a temporary std::string

Done (in all the errorOrWarn calls).

MaskRay · 2024-03-26T05:22:47Z

lld/ELF/SyntheticSections.cpp

+    inputChunks[i].namesSection =
+        std::make_unique<LLDDWARFSection>(namesSection);
+    if (llvm::Error E = inputChunks[i].debugNamesData->extract()) {
+      // Report an error here. We were unable to extract the data.


The code self explains. Remove the comment.

MaskRay · 2024-03-26T05:23:39Z

lld/ELF/SyntheticSections.cpp

+  ret->getMergedAbbrevTable(inputChunks);
+  ret->getMergedSymbols(inputChunks);
+  ret->computeUniqueHashes(inputChunks);
+  inputChunksPtr.reset();


If early reset is to save memory. Add a comment.

MaskRay · 2024-03-26T05:34:41Z

lld/ELF/SyntheticSections.cpp

+
+template <class ELFT>
+static void
+readEntries(struct DebugNamesSection::DebugNamesSectionData &secData,


remove struct

MaskRay · 2024-03-26T05:35:51Z

lld/ELF/SyntheticSections.cpp

+  chunk.hashValues.reserve(secData.hdr.NameCount);
+  secData.namedEntries.reserve(secData.hdr.NameCount);
+  // Calculate the Entry Offsets, create initial records.
+  for (uint32_t i = 0; i < secData.hdr.NameCount; ++i) {


https://llvm.org/docs/CodingStandards.html#don-t-evaluate-end-every-time-through-a-loop

Save uint32_t i =0, e = secData.hdr.NameCount; i != e; ++i) (!= is more common)

I can do this if you like, but secData.hdr.NameCount is an integer, not a function call...so it seems like I'd just be substituting one variable for another? Do you really want me to do that here?

The compiler may not know secData.hdr.NameCount is immutable. It may generate redundant instruction in each iteration to recompute the NameCount, more expensive than loading NameCount once into a register.

Thanks for the explanation. Done.

MaskRay · 2024-03-26T05:55:40Z

lld/ELF/SyntheticSections.cpp

+      errorOrWarn(toString(chunk.namesSection->sec) + ": unsupported version");
+    findDebugNamesOffsets(secData.locs, secData.hdrSize, secData.hdr.Format,
+                          secData.hdr);
+    readCompileUnitOffsets<ELFT>(secData, chunk, outputChunk, namesExtractor);


The three functions do bound checking in a very inefficient way.

EntriesBase used by readEntries has the largest data structure offset. If it doesn't overflow, other data structures don't overflow. However, readCompileUnitOffsets/readEntryOffsets waste time in the loop body to do unnecessary bound checking.

Remove the needless use of DataExtractor.

More inefficiency is going on. entryOffsets is read in readEntryOffsets just to be used once and abandoned in readEntries. It's much better to read the offset in readEntries instead

Working on this one...(not done yet).

I don't see what you're talking about here -- I see no bounds-checking happening in readCompileUnitOffsets nor in readEntryOffsets. Nor do I see any needless uses of data extractors. What are you considering to be bounds-checking code, and where to you see a needless use of a data extractor?

Every DataExtractor call does bounds checking (that's the error handling it does/can do - if you ask it to read 1 byte, all byte values are valid, so long as there's a byte there - so the only reason it could fail is if there wasn't a byte to read - if the underlying buffer was too short).

I think Ray's suggesting doing some up-front bounds checking of the total bounds (at least within a list - for instance the suggestion to use the getU32 that takes an output buffer and a count - reading in NU32 in one go, and so only bounds checking once for the whole NU32s) rather than having bounds checking on every separate U32 (or whatever) read.

The index entries proper are probably trickier - entries aren't necessarily fixed size, and the header doesn't specify how many entries there are - so you just have to keep reading them one by one I think...

I think I've done was maskray was asking for. Please let me know if I misunderstood you.

MaskRay · 2024-03-26T05:55:52Z

lld/ELF/SyntheticSections.cpp

+void DebugNamesSection::collectMergedCounts(
+    MutableArrayRef<DebugNamesInputChunk> &inputChunks) {
+  SmallVector<uint32_t, 0> tmpMergedCuOffsets;
+


delete blank line

MaskRay · 2024-03-26T05:56:16Z

lld/ELF/SyntheticSections.cpp

+                  data.hdr.AugmentationString)) {
+        // There are conflicting augmentation strings, so it's best for the
+        // merged index to not use an augmentation string.
+        StringRef emptyString = "        ";


inline the used-once constant variable

Might be more suitable to keep the variable and use it twice - once to initialize the AugmentationStringSize (= emptyString.size()) and then to initialized AugmentationString.

(or skip the separate named variable, but still initialize the StringSize from the String (after initializing AugmentationString, mergedHdr.AugmentationStringSize = mergedHdr.AugmentationString.size()), rather than hardcoding it to 8 - which risks the size diverging from the string and something else breaking later on)

I've updated it to get the size directly from the string now.

MaskRay · 2024-03-26T06:05:30Z

lld/ELF/SyntheticSections.cpp

+
+  // Write the abbrev table.
+  for (const auto *abbrev : mergedAbbrevTable) {
+    size_t uleb_size = encodeULEB128(abbrev->code, buf);


Use encodeULEB128AndInc and remove uleb_size

Unable to use 'encodeULEB128AndInc' because the function doesn't exist (only decodeULEB128AndInc exists). But Was able to remove uleb_size variable anyway.

MaskRay · 2024-03-26T06:16:24Z

lld/ELF/SyntheticSections.cpp

+      errorOrWarn(
+          toString(namesSection.sec) +
+          ": error while reading attributes: " + toString(std::move(err)));
+    if (attr.Index == DW_IDX_compile_unit || attr.Index == DW_IDX_type_unit)


If this patch does not support DW_IDX_type_unit, best to leave it for the future.

I disagree. There are several places where I've put in comments about type units. Also I plan on adding the type_units work as soon as this CL goes in. I would strongly prefer to keep this code as is; it's not hurting anything and will make future work much easier.

One reason I'm often a bit apprehensive about including half-implemented code, is it's harder to know whether code is tested/correct, when it's only partially implemented and unreachable or not coherent in the commit it's landed in.

Ok, I will remove all mentions of DW_IDX_type_unit.

MaskRay · 2024-03-26T06:26:17Z

lld/ELF/SyntheticSections.h

+    uint32_t abbrevCode;
+    uint32_t poolOffset;
+    union {
+      int32_t parentOffset = -1;


Can we use uint32_t parentOffset = 0 since the minimum valid parentOffset value is at least EntriesOffset, which is much larger than 0?

No, the offsets are measured from the start of the entries section, so zero is in fact a valid offset. I need to initialize this to an invalid offset value.

The header has a larger size, so offset 0 cannot really a valid entry offset.

You would be correct, if the offsets were counting from the start of the .debug_names section, but they are not. The offsets are measuring from the start of the Entry Pool (the bit in .debug_names section that comes after the abbrev table). So zero is a valid entry offset.

Yep entry offsets are relative, with first one being zero for the purposes of parent. Discovered that when implementing support in BOLT.

We compute the parent offset as ie->parentOffset = locs.EntriesBase + attr.attrValue. EntriesBase is larger than the header size. Here we can use 0 as a sentinel number.

MaskRay · 2024-03-26T06:37:19Z

lld/ELF/SyntheticSections.cpp

+    for (auto &entry : stringEntry.indexEntries) {
+      uint32_t entrySize = 0;
+      entry->poolOffset = offset;
+      uint32_t ulebSize = getULEB128Size(entry->abbrevCode);


entrySize += getULEB128Size(entry->abbrevCode)

Please check whether ulebSize/uleb_size elsewhere can be removed.

I have removed all ulebSize and uleb_size variables.

MaskRay · 2024-03-27T05:18:17Z

lld/ELF/SyntheticSections.cpp

+
+template <class ELFT>
+static void
+readCompileUnitOffsets(struct DebugNamesSection::DebugNamesSectionData &secData,


remove struct

cmtice · 2024-03-27T05:52:13Z

driver.test has error: -r and --gdb-index may not be used together. Add --debug-names test there.

Done (see 'debug-names-invalid-flags.s).

MaskRay · 2024-03-27T08:21:09Z

driver.test has error: -r and --gdb-index may not be used together. Add --debug-names test there.

Done (see 'debug-names-invalid-flags.s).

When I suggested add the test there, I am very clear that we should reuse an existing test, instead of disobeying the convention and adding a new file. There are values in reusing a small set of files instead of spreading tests everywhere.

I can take a stab at providing an update on top of this branch so that we don't need to spend too much time debating detailed changes, and you can then consider cherry picking my commits. I believe my providing commits directly might expedite this patch.

cmtice · 2024-03-27T14:55:29Z

driver.test has error: -r and --gdb-index may not be used together. Add --debug-names test there.

Done (see 'debug-names-invalid-flags.s).

When I suggested add the test there, I am very clear that we should reuse an existing test, instead of disobeying the convention and adding a new file. There are values in reusing a small set of files instead of spreading tests everywhere.

I can take a stab at providing an update on top of this branch so that we don't need to spend too much time debating detailed changes, and you can then consider cherry picking my commits. I believe my providing commits directly might expedite this patch.

I apologize! I mis-read your original request. I have fixed it now (added the test to driver.test and removed debug-names-invalid-flags.s).

lld/ELF/Driver.cpp

ayermolo · 2024-03-28T17:36:48Z

lld/ELF/SyntheticSections.cpp

+    warn(".debug_names: type units are not handled in merged index");
+
+  // Write the foreign TU list.
+  // Currently LLVM does not emit foreign type units, so there should


Not sure what this means. llvm does emit TUs for split dwarf.

Good point; that comment was correct when written, but it now obsolete (sorry about that!). I will remove it

ayermolo · 2024-03-28T17:51:52Z

lld/ELF/SyntheticSections.cpp

+    for (const auto &entry : stringEntry.indexEntries) {
+      buf += encodeULEB128(entry->abbrevCode, buf);
+      for (const auto &value : entry->attrValues) {
+        if (value.attrSize > 0) {


When will this be zero? Does this imply that Entry only has parent attribute of flag?

Another piece of obsolete code (nice catch!). Leftover from creating attribute for DW_IDX_parent with form DW_FORM_flag_present. I will clean this up as well.

lld/ELF/SyntheticSections.cpp

ayermolo · 2024-03-28T20:35:25Z

lld/ELF/SyntheticSections.cpp

+  for (auto &stringEntry : secData.namedEntries)
+    for (auto &entry : stringEntry.indexEntries)
+      if (entry->parentOffset != -1)
+        entry->parentEntry = offsetMap[entry->parentOffset];


How will this handle when
module 0
Entry0 0xOffset1 {...}
....
EntryX {
...
ParrentOffst 0xOffset1
}
module 1
Entry0 0xOffset1 {...}
....
EntryX {
...
ParrentOffst 0xOffset1
}
So parent offsets are the same in module 0 and 1

What is being assigned to entry->parentEntry is not the offset, but the pointer to the actual IndexEntry of the parent.

Does that answer your question? If not, I can go into more detail...

Ah I missed the part where offsetMap gets re-created for each module. Never mind.

MaskRay · 2024-03-28T20:46:15Z

(Reading. I find ELFT::TargetEndianness too long. Proposed #86604 to allow ELFT::Endian. No action needed yet)

We ended up with ELFT::Endianness. The next update can switch to ELFT::Endianness

ayermolo · 2024-03-29T18:47:10Z

What does everyone thinks about refactoring the debug names code out of SythenticSections.cpp to DebugNamesSyntheticSection.cpp or something like that? (followup commit is fine)
That file is almost 5k lines of code and is getting unwieldy.

cmtice · 2024-03-29T21:51:44Z

(Reading. I find ELFT::TargetEndianness too long. Proposed #86604 to allow ELFT::Endian. No action needed yet)

We ended up with ELFT::Endianness. The next update can switch to ELFT::Endianness

Done: I've switched to using ELFT::Endianness.

cmtice · 2024-03-29T22:41:02Z

Except for maskray's request re using 'rm -rf %t && split-file %s %t && cd %t' in tests (still looking into that), I think I have addressed all the review request/comments. So it's ready for any additional reviewing.

If I've overlooked something please let me know.

Re @ayermolo 's suggestion to pull the DebugNames work out of SyntheticSections.cpp into another file: That's a decision I would want to leave to @MaskRay . I don't know how much doing so would really help, however. The DebugNamesSection work in SyntheticSections.cpp is only ~750 lines, and it's all contiguous...

MaskRay · 2024-03-30T06:48:32Z

Except for maskray's request re using 'rm -rf %t && split-file %s %t && cd %t' in tests (still looking into that), I think I have addressed all the review request/comments. So it's ready for any additional reviewing.

If I've overlooked something please let me know.

Re @ayermolo 's suggestion to pull the DebugNames work out of SyntheticSections.cpp into another file: That's a decision I would want to leave to @MaskRay . I don't know how much doing so would really help, however. The DebugNamesSection work in SyntheticSections.cpp is only ~750 lines, and it's all contiguous...

Adding a new file might require to make some internal functions (static) external. There can be more complexity if the new file needs to call a template. I think we should avoid a new file for now.
That said, ~750 lines are a lot of complexity. I'll check if I can simplify the code.

lld/ELF/SyntheticSections.cpp

ayermolo · 2024-04-05T19:26:06Z

lGTM.
Personally I think it would be worth to see if the debug code can be in it's own file, but I can't really make a strong argument for it beyond putting everything into one file just seems messy. There is precedence of GDBIndex is part of SyntheticSections.cpp. I guess one argument would be that since LLD supports more and more DWARF related stuff it might be good to isolate DWARF related code in it's own directory, instead of scattered through various files.

- Update comment about CompUnits in OutputChunk. - Replace two uses of 'seq' in for-loops. - Replace use of memcpy with llvm::copy.

Fix typo, add 'const', replace one use of 'seq'/

- Keep type unit counts in output header at zero, regardless of input, to avoid crashing LLD, since we're not actually handling type units yet. - Move the type units warning to the point where we can check the input counts. - Add a type units test (that LLD doesn't crash and outputs the warning).

Remove tools/generate-content.py. We will use a generic tool intended for llvm/utils: https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408

MaskRay · 2024-04-18T02:33:19Z

lld/ELF/SyntheticSections.cpp

+    numCu += chunks[i].compUnits.size();
+    for (const NameData &nd : inputChunk.nameData) {
+      hdr.CompUnitCount += nd.hdr.CompUnitCount;
+      // We are not actually handling or emitting type units yet, so


typo: so so

I'd remove

//hdr.LocalTypeUnitCount += nd.hdr.LocalTypeUnitCount; //hdr.ForeignTypeUnitCount += nd.hdr.ForeignTypeUnitCount;

and non-zero type unit counts will crash LLD. (it's an assertion failure which can be avoided if we write code slightly differently.

Just

// TODO: We don't handle type units yet and LocalTypeUnitCount/ForeignTypeUnitCount are left as 0.

is sufficient.

MaskRay · 2024-04-18T02:38:09Z

ant to get in the way of that. My PR for addng support for foreign type units hasn't gone in yet either, so I am happy to change course here. We just need a way to be able ensure that .debug_names entries for foreign type units somehow know if they are valid or not. Our previous attempt at this was to add a DW_AT_dwo_name attribute to the DW_TAG_type_unit and then verify that this matched the one any only CU in the CU list of the .debug_names table, but happy to pivot if we can think of a

Thanks for chiming in. Type units are not handled yet in this patch. This patch focuses on compile units with an opt-in option --debug-names. I'll try study your lldb type units implementation:)

It seems that the remaining issues are minor style issues. LGTM to make it clear I am happy once these issues are resolved or possibly defer some later if @dwblaikie is fine with that.

- Pulled body of loop that reads/parses IndexEntries into a separate function (readEntry) - Removed unnecessary 'const' qualifiers on local vars - Updated comment about handling type units - Created getChunks accessor function; used it to eliminate use of 'seq' in for-loop. - Enhanced some error messages - Update debug-names-bad.s test to handle udpated error messages

cmtice · 2024-04-18T07:48:55Z

I believe I have addressed all requests and this is ready for approval. :-)

dwblaikie

Looks good, after a few minor cleanups after the readEntry refactoring

dwblaikie · 2024-04-18T15:47:19Z

lld/ELF/SyntheticSections.cpp

+        attr.attrSize = 4;
+        ie->parentOffset = entriesBase + attr.attrValue;
+      } else if (a.Form != DW_FORM_flag_present) {
+        errMsg = ": invalid form for DW_IDX_parent";


Presumably should just return immediately here, with the string error?

And then there's no reason for errMsg to have such a wide scope (or possibly to exist at all)

dwblaikie · 2024-04-18T15:49:22Z

lld/ELF/SyntheticSections.cpp

+    errMsg = ": entry abbrev code not found in abbrev table: ";
+    errMsg.append(std::to_string(ie->abbrevCode));
+    return createStringError(inconvertibleErrorCode(), errMsg.c_str());


Use the format version of createStringError:

return createStringError(inconvertibleErrorCode(), ": entry abbrev code too large for DWARF32: %d", ie->abbrevCode);

here and elsewhere - then errMsg can be removed, most likely.

dwblaikie · 2024-04-18T15:51:11Z

lld/ELF/SyntheticSections.cpp

      if (offset >= namesSec.Data.size())
        errMsg = ": index entry is out of bounds";
-      if (errMsg)
-        errorOrWarn(toString(namesSec.sec) + Twine(errMsg));
+      if (!errMsg.empty())
+        errorOrWarn(toString(namesSec.sec) + Twine(errMsg.c_str()));


drop errMsg here (& remove its declaration above) - since it's only used in this immediate scope, simplify this code to:

if (offset >= namesSec.Data.size()) errorOrWarn(toString(namesSec.sec) + ": index entry is out of bounds");

dwblaikie · 2024-04-18T15:52:51Z

lld/ELF/SyntheticSections.cpp

+          errorOrWarn(toString(namesSec.sec) +
+                      Twine(toString(ieOrErr.takeError())));


Consider removing the ": " from the error messages inside readEntry (they don't need to know they're being concatenated with something else later) and adding it here instead:

errorOrWarn(toString(namesSec.sec) + ": " + toString(ieOrErr.takeError()));

Using Expected<DebugNamesBaseSection::IndexEntry *> and inconvertibleErrorCode() look unnecessary to me.
We have other places that we return different types of errors and we simply return a std::string.

MaskRay · 2024-04-18T18:15:25Z

lld/ELF/SyntheticSections.cpp

+    errMsg.append(toString(std::move(err)));
+    return createStringError(inconvertibleErrorCode(), errMsg.c_str());
+  }
+  if (ulebVal < UINT32_MAX)


This should be <=

Or use isUInt<32>(ulebVal)

MaskRay · 2024-04-18T18:18:41Z

lld/ELF/SyntheticSections.h

+  SmallVector<Abbrev *, 0> abbrevTable;
+  SmallVector<char, 0> abbrevTableBuf;
+
+  ArrayRef<OutputChunk> getChunks() {


getChunks() const

MaskRay · 2024-04-18T18:20:10Z

lld/ELF/SyntheticSections.cpp

+  // Collect and de-duplicate all the names (preserving all the entries).
+  // Speed it up using multithreading, as the number of symbols can be in the
+  // order of millions.
+  size_t concurrency =


254ceed removed const here and in other places.

I think we should keep const size_t concurrency. Using const for integer type variables is conventional and avoiding const is not. @dwblaikie

Fair enough gfor now.

Though I find that to be a confusing convention & makes the code harder to read for me, because that's not a logical distinction I tend to think of when reading code - so then it's confusing seeing one local const and one local non-const-but-not-modified & not understanding why one would be const and another wouldn't be...

- Clean up the error messages in readEntry function - Restore 'const' qualifier to size_t vars in computeEntryPool - Add 'const' qualifier to getChunks function.

MaskRay · 2024-04-18T19:39:57Z

lld/ELF/SyntheticSections.cpp

+
+  // Canonicalize abbrev by placing the CU/TU index at the end.
+  ie->attrValues.push_back(cuAttr);
+


Our convention does not add a blank line here

MaskRay · 2024-04-18T19:40:48Z

lld/ELF/SyntheticSections.cpp

+  auto it = ni.getAbbrevs().find_as(ie->abbrevCode);
+  if (it == ni.getAbbrevs().end())
+    return createStringError(inconvertibleErrorCode(),
+                             "entry abbrev code not found in abbrev table: %d",


%d => %u since this is an unsigned int.

MaskRay · 2024-04-18T19:41:59Z

lld/ELF/SyntheticSections.cpp

+    ie->abbrevCode = static_cast<uint32_t>(ulebVal);
+  else
+    return createStringError(inconvertibleErrorCode(),
+                             "abbrev code in entry too large for DWARF32: %d",


This should be "%" PRIu64

MaskRay · 2024-04-18T19:43:41Z

lld/ELF/SyntheticSections.cpp

+  const size_t concurrency =
+      bit_floor(std::min<size_t>(config->threadCount, numShards));
+  const size_t shift = 32 - countr_zero(numShards);
+  uint8_t cuAttrSize = getMergedCuCountForm(hdr.CompUnitCount).first;


const uint8_t cuAttrSize

Remove one blank line & add one 'const' qualifier.

Test that ld.lld --debug-names (llvm#86508) built per-module index can be consumed by lldb. This has uncovered a bug during the development of the lld feature.

Test that ld.lld --debug-names (#86508) built per-module index can be consumed by lldb. This has uncovered a bug during the development of the lld feature.

Generally, IR and assembly test files benefit from being cleaned to remove unnecessary details. However, for tests requiring elaborate IR or assembly files where cleanup is less practical (e.g., large amount of debug information output from Clang), the current practice is to include the C/C++ source file and the generation instructions as comments. This is inconvenient when regeneration is needed. This patch adds `llvm/utils/update_test_body.py` to allow easier regeneration. `ld.lld --debug-names` tests (#86508) utilize this script for Clang-generated assembly tests. Note: `-o pipefail` is standard (since https://www.austingroupbugs.net/view.php?id=789) but not supported by dash. Link: https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408

llvmbot added lld lld:ELF labels Mar 25, 2024

cmtice requested a review from MaskRay March 25, 2024 14:36

cmtice requested a review from dwblaikie March 25, 2024 14:37

MaskRay reviewed Mar 26, 2024

View reviewed changes

MaskRay requested changes Mar 26, 2024

View reviewed changes

MaskRay reviewed Mar 26, 2024

View reviewed changes

MaskRay reviewed Mar 27, 2024

View reviewed changes

ayermolo reviewed Mar 28, 2024

View reviewed changes

ayermolo reviewed Apr 1, 2024

View reviewed changes

cmtice and others added 6 commits April 16, 2024 23:04

[lld][ELF] Minor fixes addressing review comments.

fe196b7

- Update comment about CompUnits in OutputChunk. - Replace two uses of 'seq' in for-loops. - Replace use of memcpy with llvm::copy.

[lld][ELF] Fixes to previous fixes.

cee8c29

Fix typo, add 'const', replace one use of 'seq'/

Clean up more tests

ac233d4

Remove tools/generate-content.py. We will use a generic tool intended for llvm/utils: https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408

Clean up more tests

232c729

Remove tools/generate-content.py. We will use a generic tool intended for llvm/utils: https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408

Merge remote-tracking branch 'maskray/lld-debug-names' into new-lld-work

41580dd

MaskRay reviewed Apr 18, 2024

View reviewed changes

MaskRay approved these changes Apr 18, 2024

View reviewed changes

dwblaikie approved these changes Apr 18, 2024

View reviewed changes

MaskRay reviewed Apr 18, 2024

View reviewed changes

[lld][ELF] Clean up error messages.

22f1ef4

- Clean up the error messages in readEntry function - Restore 'const' qualifier to size_t vars in computeEntryPool - Add 'const' qualifier to getChunks function.

MaskRay approved these changes Apr 18, 2024

View reviewed changes

MaskRay reviewed Apr 18, 2024

View reviewed changes

MaskRay and others added 2 commits April 18, 2024 12:57

Update some diagnostics in readEntry and clang-format

69870a5

[lld][ELF] Tiny cleanups

b061f66

Remove one blank line & add one 'const' qualifier.

cmtice merged commit 16711b4 into llvm:main Apr 18, 2024
4 of 5 checks passed

MaskRay mentioned this pull request Apr 19, 2024

[lldb/test] Add basic ld.lld --debug-names tests #88335

Merged

MaskRay added a commit that referenced this pull request Apr 23, 2024

[lldb/test] Add basic ld.lld --debug-names tests (#88335)

a7e2726

Test that ld.lld --debug-names (#86508) built per-module index can be consumed by lldb. This has uncovered a bug during the development of the lld feature.

MaskRay mentioned this pull request May 8, 2024

[utils] Add script to generate elaborated IR and assembly tests #89026

Merged

cmtice deleted the new-lld-work branch December 11, 2024 21:48

		errorOrWarn(toString(namesSec.sec) +
		Twine(toString(ieOrErr.takeError())));


		// Canonicalize abbrev by placing the CU/TU index at the end.
		ie->attrValues.push_back(cuAttr);

[lld][ELF] Add --debug-names to create merged .debug_names. #86508

[lld][ELF] Add --debug-names to create merged .debug_names. #86508

Conversation

cmtice commented Mar 25, 2024 • edited Loading

llvmbot commented Mar 25, 2024

llvmbot commented Mar 25, 2024

github-actions bot commented Mar 25, 2024 • edited Loading

github-actions bot commented Mar 25, 2024

ayermolo commented Mar 25, 2024 • edited Loading

cmtice commented Mar 25, 2024

MaskRay commented Mar 26, 2024

MaskRay commented Mar 26, 2024

Choose a reason for hiding this comment

cmtice Mar 27, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

cmtice Apr 2, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

MaskRay Mar 26, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

cmtice commented Mar 25, 2024 •

edited

Loading

github-actions bot commented Mar 25, 2024 •

edited

Loading

ayermolo commented Mar 25, 2024 •

edited

Loading

cmtice Mar 27, 2024 •

edited

Loading

cmtice Apr 2, 2024 •

edited

Loading

MaskRay Mar 26, 2024 •

edited

Loading