From 1c0d928ff753b5a3119eec78268305ab5ae454fe Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 2 Oct 2023 17:20:31 +0200 Subject: [PATCH] ELF/LibObjectFile experiment: Add support for building optimized string table --- .../Elf/Sections/ElfStringTable.cs | 168 +++++++++++++++--- .../Elf/Sections/ElfSymbolTable.cs | 2 +- 2 files changed, 146 insertions(+), 24 deletions(-) diff --git a/src/coreclr/tools/aot/external/LibObjectFile/src/LibObjectFile/Elf/Sections/ElfStringTable.cs b/src/coreclr/tools/aot/external/LibObjectFile/src/LibObjectFile/Elf/Sections/ElfStringTable.cs index 447fd9db0820a..b7de66f4237b1 100644 --- a/src/coreclr/tools/aot/external/LibObjectFile/src/LibObjectFile/Elf/Sections/ElfStringTable.cs +++ b/src/coreclr/tools/aot/external/LibObjectFile/src/LibObjectFile/Elf/Sections/ElfStringTable.cs @@ -17,6 +17,7 @@ namespace LibObjectFile.Elf public class ElfStringTable : ElfSection { private readonly MemoryStream _table; + private readonly List _reservedStrings; private readonly Dictionary _mapStringToIndex; private readonly Dictionary _mapIndexToString; @@ -35,8 +36,9 @@ public ElfStringTable(int capacityInBytes) : base(ElfSectionType.StringTable) _table = new MemoryStream(capacityInBytes); _mapStringToIndex = new Dictionary(); _mapIndexToString = new Dictionary(); + _reservedStrings = new List(); // Always create an empty string - GetOrCreateIndex(string.Empty); + CreateIndex(string.Empty); } public override ElfSectionType Type @@ -55,6 +57,7 @@ public override ElfSectionType Type public override void UpdateLayout(DiagnosticBag diagnostics) { if (diagnostics == null) throw new ArgumentNullException(nameof(diagnostics)); + if (_reservedStrings.Count > 0) FlushReservedStrings(); Size = (ulong)_table.Length; } @@ -73,21 +76,136 @@ protected override void Write(ElfWriter writer) writer.Stream.Write(_table.GetBuffer(), 0, (int)_table.Length); } - public uint GetOrCreateIndex(string text) + internal void ReserveString(string text) { - // Same as empty string - if (text == null) return 0; + if (text is object && !_mapStringToIndex.ContainsKey(text)) + { + _reservedStrings.Add(text); + } + } - if (_mapStringToIndex.TryGetValue(text, out uint index)) + internal void FlushReservedStrings() + { + // TODO: Use CollectionsMarshal.AsSpan + string[] reservedStrings = _reservedStrings.ToArray(); + + // Pre-sort the string based on their matching suffix + MultiKeySort(reservedStrings, 0); + + // Add the strings to string table + string lastText = null; + for (int i = 0; i < reservedStrings.Length; i++) { - return index; + var text = reservedStrings[i]; + uint index; + if (lastText != null && lastText.EndsWith(text, StringComparison.Ordinal)) + { + // Suffix matches the last symbol + index = (uint)(_table.Length - Encoding.UTF8.GetByteCount(text) - 1); + _mapIndexToString.Add(index, text); + _mapStringToIndex.Add(text, index); + } + else + { + lastText = text; + CreateIndex(text); + } + } + + _reservedStrings.Clear(); + + static char TailCharacter(string str, int pos) + { + int index = str.Length - pos - 1; + if ((uint)index < str.Length) + return str[index]; + return '\0'; + } + + static void MultiKeySort(Span input, int pos) + { + if (!MultiKeySortSmallInput(input, pos)) + { + MultiKeySortLargeInput(input, pos); + } + } + + static void MultiKeySortLargeInput(Span input, int pos) + { + tailcall: + char pivot = TailCharacter(input[0], pos); + int l = 0, h = input.Length; + for (int i = 1; i < h;) + { + char c = TailCharacter(input[i], pos); + if (c > pivot) + { + (input[l], input[i]) = (input[i], input[l]); + l++; i++; + } + else if (c < pivot) + { + h--; + (input[h], input[i]) = (input[i], input[h]); + } + else + { + i++; + } + } + + MultiKeySort(input.Slice(0, l), pos); + MultiKeySort(input.Slice(h), pos); + if (pivot != '\0') + { + // Use a loop as a poor man's tailcall + // MultiKeySort(input.Slice(l, h - l), pos + 1); + pos++; + input = input.Slice(l, h - l); + if (!MultiKeySortSmallInput(input, pos)) + { + goto tailcall; + } + } + } + + static bool MultiKeySortSmallInput(Span input, int pos) + { + if (input.Length <= 1) + return true; + + // Optimize comparing two strings + if (input.Length == 2) + { + while (true) + { + char c0 = TailCharacter(input[0], pos); + char c1 = TailCharacter(input[1], pos); + if (c0 < c1) + { + (input[0], input[1]) = (input[1], input[0]); + break; + } + else if (c0 > c1 || c0 == (char)0) + { + break; + } + pos++; + } + return true; + } + + return false; } + } - index = (uint) _table.Length; + private uint CreateIndex(string text) + { + uint index = (uint) _table.Length; _mapIndexToString.Add(index, text); _mapStringToIndex.Add(text, index); - if (text.Length == 0) + if (index == 0) { Debug.Assert(index == 0); _table.WriteByte(0); @@ -105,25 +223,26 @@ public uint GetOrCreateIndex(string text) } _table.Write(span); ArrayPool.Shared.Return(buffer); - - // Register all subsequent strings - while (text.Length > 0) - { - text = text.Substring(1); - if (_mapStringToIndex.ContainsKey(text)) - { - break; - } - var offset = reservedBytes - Encoding.UTF8.GetByteCount(text) - 1; - var subIndex = index + (uint) offset; - _mapStringToIndex.Add(text, subIndex); - _mapIndexToString.Add(subIndex, text); - } } return index; } + public uint GetOrCreateIndex(string text) + { + // Same as empty string + if (text == null) return 0; + + if (_reservedStrings.Count > 0) FlushReservedStrings(); + + if (_mapStringToIndex.TryGetValue(text, out uint index)) + { + return index; + } + + return CreateIndex(text); + } + public bool TryResolve(ElfString inStr, out ElfString outStr) { outStr = inStr; @@ -153,6 +272,8 @@ public bool TryFind(uint index, out string text) return true; } + if (_reservedStrings.Count > 0) FlushReservedStrings(); + if (_mapIndexToString.TryGetValue(index, out text)) { return true; @@ -191,9 +312,10 @@ public void Reset() _table.SetLength(0); _mapStringToIndex.Clear(); _mapIndexToString.Clear(); + _reservedStrings.Clear(); // Always create an empty string - GetOrCreateIndex(string.Empty); + CreateIndex(string.Empty); } } } \ No newline at end of file diff --git a/src/coreclr/tools/aot/external/LibObjectFile/src/LibObjectFile/Elf/Sections/ElfSymbolTable.cs b/src/coreclr/tools/aot/external/LibObjectFile/src/LibObjectFile/Elf/Sections/ElfSymbolTable.cs index a23f823931ecb..4e7ab842d4eb7 100644 --- a/src/coreclr/tools/aot/external/LibObjectFile/src/LibObjectFile/Elf/Sections/ElfSymbolTable.cs +++ b/src/coreclr/tools/aot/external/LibObjectFile/src/LibObjectFile/Elf/Sections/ElfSymbolTable.cs @@ -227,7 +227,7 @@ public override void Verify(DiagnosticBag diagnostics) diagnostics.Error(DiagnosticId.ELF_ERR_InvalidSymbolEntrySectionParent, $"Invalid section for the symbol entry #{i} in the {nameof(ElfSymbolTable)} section [{Index}]. The section of the entry `{entry}` must the same than this symbol table section"); } - stringTable.GetOrCreateIndex(entry.Name); + stringTable.ReserveString(entry.Name); // Update the last local index if (entry.Bind == ElfSymbolBind.Local)