From 08c95d2d8c21f3cceaea69e56cef2dc7474e8af7 Mon Sep 17 00:00:00 2001 From: ghizard <50744617+ghizard@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:15:22 -0400 Subject: [PATCH] GP-4717 - Add DemangledNamespaceNode and refine MDMangGhidra namespace processing, including setting anonymous namespace names to their underlying name --- .../demangler/DemangledNamespaceNode.java | 120 +++++++++++ .../app/util/demangler/DemangledType.java | 4 +- .../main/java/mdemangler/MDMangGhidra.java | 70 ++++-- .../src/main/java/mdemangler/MDMangUtils.java | 199 +++++++++++++++++- .../modifier/MDQuestionModifierType.java | 3 + .../naming/MDNumberedNamespace.java | 15 +- .../java/mdemangler/naming/MDQualifier.java | 28 +++ .../test/java/mdemangler/MDMangUtilsTest.java | 40 +++- 8 files changed, 455 insertions(+), 24 deletions(-) create mode 100644 Ghidra/Features/Base/src/main/java/ghidra/app/util/demangler/DemangledNamespaceNode.java diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/demangler/DemangledNamespaceNode.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/demangler/DemangledNamespaceNode.java new file mode 100644 index 00000000000..31128b29b53 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/demangler/DemangledNamespaceNode.java @@ -0,0 +1,120 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.demangler; + +import org.apache.commons.lang3.StringUtils; + +import ghidra.program.model.symbol.Namespace; + +/** + * Represents a plain namespace node that is not a type or method + */ +public class DemangledNamespaceNode implements Demangled { + + // The intention is for this to be as refined a part of a larger mangled string as possible, + // but it is up to the user to know if they can pass that more refined string or if they + // just have to pass a bigger piece. + protected String mangled; + private String originalDemangled; + private String demangledName; + private String name; // 'safe' name + + protected Demangled namespace; + + /** + * Constructor + * @param mangled as a refined a piece of the (larger) original mangled stream as the user + * can provide, though many times the larger piece is all that the user can provide + * @param originalDemangled the original demangled string to match mangled string with the + * same caveats + * @param name the name of the namespace node + */ + public DemangledNamespaceNode(String mangled, String originalDemangled, String name) { + this.mangled = mangled; + this.originalDemangled = originalDemangled; + setName(name); + } + + @Override + public void setName(String name) { + if (StringUtils.isBlank(name)) { + throw new IllegalArgumentException("Name cannot be blank"); + } + demangledName = name; + this.name = DemanglerUtil.stripSuperfluousSignatureSpaces(name).replace(' ', '_'); + } + + @Override + public String getName() { + return name; + } + + @Override + public String getMangledString() { + return mangled; + } + + @Override + public String getOriginalDemangled() { + return originalDemangled; + } + + @Override + public String getDemangledName() { + return demangledName; + } + + @Override + public void setNamespace(Demangled ns) { + namespace = ns; + } + + @Override + public Demangled getNamespace() { + return namespace; + } + + @Override + public String getNamespaceString() { + return getName(true); + } + + @Override + public String getNamespaceName() { + return name; + } + + @Override + public String getSignature() { + return getNamespaceName(); + } + + private String getName(boolean includeNamespace) { + StringBuilder builder = new StringBuilder(); + if (includeNamespace && namespace != null) { + builder.append(namespace.getNamespaceString()); + builder.append(Namespace.DELIMITER); + } + builder.append(demangledName); + return builder.toString(); + } + + @Override + public String toString() { + return getNamespaceString(); + } + +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/demangler/DemangledType.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/demangler/DemangledType.java index 85f0845343c..d4828d557f2 100644 --- a/Ghidra/Features/Base/src/main/java/ghidra/app/util/demangler/DemangledType.java +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/demangler/DemangledType.java @@ -37,9 +37,9 @@ public class DemangledType implements Demangled { private boolean isConst; private boolean isVolatile; - public DemangledType(String mangled, String originaDemangled, String name) { + public DemangledType(String mangled, String originalDemangled, String name) { this.mangled = mangled; - this.originalDemangled = originaDemangled; + this.originalDemangled = originalDemangled; setName(name); } diff --git a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangGhidra.java b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangGhidra.java index 58f728644a5..cfcd4a4b0a6 100644 --- a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangGhidra.java +++ b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangGhidra.java @@ -94,33 +94,79 @@ public MDDataType demangleType(String mangledArg, boolean errorOnRemainingChars) return returnedType; } - public DemangledType processNamespace(MDQualifiedName qualifiedName) { + public Demangled processNamespace(MDQualifiedName qualifiedName) { return processNamespace(qualifiedName.getQualification()); } - private DemangledType processNamespace(MDQualification qualification) { + private Demangled processNamespace(MDQualification qualification) { Iterator it = qualification.iterator(); if (!it.hasNext()) { return null; } MDQualifier qual = it.next(); - DemangledType type = new DemangledType(mangledSource, demangledSource, qual.toString()); - DemangledType parentType = type; + Demangled type = getDemangled(qual); + Demangled current = type; + // Note that qualifiers come in reverse order, from most refined to root being the last while (it.hasNext()) { qual = it.next(); - DemangledType newType; - if (qual.isNested()) { - String subMangled = qual.getNested().getMangled(); - newType = new DemangledType(subMangled, demangledSource, qual.toString()); + Demangled parent = getDemangled(qual); + current.setNamespace(parent); + current = parent; + } + return type; + } + + private Demangled getDemangled(MDQualifier qual) { + Demangled demangled; + if (qual.isNested()) { + String subMangled = qual.getNested().getMangled(); + MDObjectCPP obj = qual.getNested().getNestedObject(); + MDTypeInfo typeInfo = obj.getTypeInfo(); + MDType type = typeInfo.getMDType(); + if (type instanceof MDDataType dt) { + demangled = new DemangledType(subMangled, qual.toString(), qual.toString()); + } + else if (type instanceof MDFunctionType ft) { + // We currently cannot handle functions as part of a namespace, so we will just + // treat the demangled function namespace string as a plain namespace. + //demangled = new DemangledFunction(subMangled, qual.toString(), qual.toString()); + demangled = + new DemangledNamespaceNode(subMangled, qual.toString(), qual.toString()); } else { - newType = new DemangledType(mangledSource, demangledSource, qual.toString()); + demangled = + new DemangledNamespaceNode(subMangled, qual.toString(), qual.toString()); } - parentType.setNamespace(newType); - parentType = newType; } - return type; + else if (qual.isAnon()) { + // Instead of using the standard qual.toString() method, which returns + // "`anonymous namespace'" for anonymous qualifiers, we use qual.getAnonymousName() + // which will have the underlying anonymous name of the form "A0xfedcba98" to create + // a standardized anonymous name that is distinguishable from other anonymous names. + // The standardized name comes from createStandardAnonymousNamespaceNode(). This + // is especially important when there are sibling anonymous names. + String anon = MDMangUtils.createStandardAnonymousNamespaceNode(qual.getAnonymousName()); + demangled = new DemangledNamespaceNode(mangledSource, qual.toString(), anon); + } + else if (qual.isInterface()) { + // TODO: need to do better; setting namespace for now + demangled = new DemangledNamespaceNode(mangledSource, qual.toString(), qual.toString()); + } + else if (qual.isNameQ()) { + // TODO: need to do better; setting namespace for now, as it looks like interface + demangled = new DemangledNamespaceNode(mangledSource, qual.toString(), qual.toString()); + } + else if (qual.isNameC()) { + // TODO: need to do better; setting type for now, but not processed yet and not sure + // what it is + demangled = new DemangledType(mangledSource, qual.toString(), qual.toString()); + } + else { + // This takes care of plain and local namespaces + demangled = new DemangledNamespaceNode(mangledSource, qual.toString(), qual.toString()); + } + return demangled; } private DemangledObject processItem() { diff --git a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangUtils.java b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangUtils.java index c79368bf31f..4badb070a02 100644 --- a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangUtils.java +++ b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/MDMangUtils.java @@ -17,8 +17,13 @@ import java.util.ArrayList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.lang3.StringUtils; import ghidra.app.util.SymbolPath; +import ghidra.app.util.SymbolPathParser; import mdemangler.datatype.complex.MDComplexType; import mdemangler.datatype.modifier.MDModifierType; import mdemangler.naming.*; @@ -97,13 +102,20 @@ else if (item instanceof MDObjectCPP objCpp) { myParts.addAll(0, nestedParts); } else if (qual.isAnon()) { - myParts.add(0, qual.getAnonymousName()); + // Instead of using the standard qual.toString() method, which returns + // "`anonymous namespace'" for anonymous qualifiers, we use qual.getAnonymousName() + // which will have the underlying anonymous name of the form "A0xfedcba98" to create + // a standardized anonymous name that is distinguishable from other anonymous names. + // The standardized name comes from createStandardAnonymousNamespaceNode(). This + // is especially important when there are sibling anonymous names. + String anon = createStandardAnonymousNamespaceNode(qual.getAnonymousName()); + myParts.add(0, anon); } else { - myParts.add(0, qual.toString()); + myParts.add(0, stripTags(qual.toString())); } } - myParts.add(name); + myParts.add(stripTags(name)); parts.addAll(myParts); } @@ -115,4 +127,185 @@ private static MDParsableItem getReferencedType(MDParsableItem item) { return item; } + /** + * Checks that the given String begins with standard "A0x" (under-the-hood MDMang name) + * pattern or with the "`" pattern that is found with MDQuestionModifier type + * @param anon the input string or the original string if is not standard + * @return the standardized anonymous namespace component + */ + public static String createStandardAnonymousNamespaceNode(String anon) { + /* + * Note that we are converting to upper case and doing zero padding to 8 hex digits. + * Rationale: In analyzing mangled symbols with anonymous namespaces, we found an LLVM + * PDB that had a mix of anonymous namespaces: + * that used only lower case a-f hex digits + * that used only upper case A-F hex digits + * that had zero-padding, leading zeros to 8 hex digits + * that did not have zero-padding, leading zeros to 8 hex digits + * There were matching namespaces between upper-case-only hex and lower-case-only that were + * found often enough to be beyond coincidence. + * There was only one anon NS node that had the zero-padding and this is one that also + * had the 8-hex-digit suffix that we (have initially) parsed in the MDQuestionmodifier + * type. Knowing that this matches has, for all practical purposes, confirmed that the + * suffix is to represent an anonymous namespace. + * Since there was only one anon NS with zero padding, we could not convince ourselves + * completely that a "short" namespace and one with leading zeros that shared the + * meaningful numeric part were essentially the same, but it would make sense to assume + * this is true, especially in the context of the 8-hex digit suffix case (the need for + * this suffix is probably what causes the namespace to be created, and it uses formatting + * that provides the zero-padding; later, when the A0x namespace is needed, it uses the + * name already given) + * TODO: probably want to wind this into special MDMang processing as some sort of option + * and possibly an optional user-specified format. + */ + String str; + if (anon.startsWith("A0x")) { + str = anon.substring(3); + } + else if (anon.startsWith("`")) { + str = anon.substring(1); + } + else { + return anon; + } + Integer num = Integer.valueOf(str, 16); + return String.format("anon_%08X", num); + } + + // @formatter:off + private static String[] searchList = { + " demangledParts = new ArrayList<>(); + // When simple is true, we need to recurse the nested hierarchy to pull the names + // up to the main namespace level, so we set recurse = true + recurseNamespace(demangledParts, parsableItem, simple); + List regularParts = SymbolPathParser.parse(regularPathName); + + int m = Integer.min(demangledParts.size(), regularParts.size()); + + List parts = new ArrayList<>(); + for (int i = 1; i <= m; i++) { + int ni = demangledParts.size() - i; + String n = demangledParts.get(ni); + // Prefer the mangled part, but could get more sophisticated and decide to use + // regular parts too + parts.add(0, n); + } + for (int i = m + 1; i <= regularParts.size(); i++) { + int ri = regularParts.size() - i; + String r = regularParts.get(ri); + if (r.equals("`anonymous-namespace'")) { + parts.add(0, "`anonymous namespace'"); + } + else { + parts.add(0, r); + } + } + for (int i = m + 1; i <= demangledParts.size(); i++) { + int ni = demangledParts.size() - i; + String n = demangledParts.get(ni); + parts.add(0, n); + } + + SymbolPath sp = null; + for (String part : parts) { + sp = new SymbolPath(sp, part); + } + return sp; + } + + private static final Pattern LOCAL_NS_PATTERN = Pattern.compile("^__l([0-9]+)$"); + private static final Pattern EMBEDDED_LOCAL_NS_PATTERN = Pattern.compile("::__l([0-9]+)::"); + private static final Pattern DEMANGLED_LOCAL_NS_PATTERN = Pattern.compile("^`([0-9]+)'$"); + private static final Pattern DEMANGLED_EMBEDDED_LOCAL_NS_PATTERN = + Pattern.compile("::`([0-9]+)'::"); + + /** + * Standardize a SymbolPath. For now replacing local namespace {@code __l#} pattern with + * {@code `#'} pattern. + *

Ultimately, this method should be moved to a different utility class, but putting it + * here for now (probably with the template work) + * @param symbolPath the symbol path to standardize + * @return the standardized symbol path + */ + public static SymbolPath standarizeSymbolPath(SymbolPath symbolPath) { + List parts = symbolPath.asList(); + for (int i = 0; i < parts.size(); i++) { + String part = parts.get(i); + // These anonymous namespaces are those that come in the clear (non-mangled) + StringUtils.replace(part, "`anonymous-namespace'", "`anonymous namespace'"); + StringBuilder sb = new StringBuilder(); + Matcher m = LOCAL_NS_PATTERN.matcher(part); + if (m.find()) { + m.appendReplacement(sb, "`" + m.group(1) + "'"); + } + else { + m = EMBEDDED_LOCAL_NS_PATTERN.matcher(part); + while (m.find()) { + m.appendReplacement(sb, "::`" + m.group(1) + "'::"); + } + m.appendTail(sb); + } + if (!sb.isEmpty()) { + parts.set(i, sb.toString()); + } + } + return new SymbolPath(parts); + } + + /** + * Standardize a SymbolPath. Alternative: replacing local namespace {@code `#'} pattern with + * {@code __l#} pattern. + *

Ultimately, this method should be moved to a different utility class, but putting it + * here for now (probably with the template work) + * @param symbolPath the symbol path to standardize + * @return the standardized symbol path + */ + public static SymbolPath standarizeSymbolPathAlt(SymbolPath symbolPath) { + List parts = symbolPath.asList(); + for (int i = 0; i < parts.size(); i++) { + String part = parts.get(i); + // These anonymous namespaces are those that come in the clear (non-mangled) + StringUtils.replace(part, "`anonymous-namespace'", "`anonymous namespace'"); + StringBuilder sb = new StringBuilder(); + Matcher m = DEMANGLED_LOCAL_NS_PATTERN.matcher(part); + if (m.find()) { + m.appendReplacement(sb, "__l" + m.group(1)); + } + else { + m = DEMANGLED_EMBEDDED_LOCAL_NS_PATTERN.matcher(part); + while (m.find()) { + m.appendReplacement(sb, "::__l" + m.group(1) + "::"); + } + m.appendTail(sb); + } + if (!sb.isEmpty()) { + parts.set(i, sb.toString()); + } + } + return new SymbolPath(parts); + } + } diff --git a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/datatype/modifier/MDQuestionModifierType.java b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/datatype/modifier/MDQuestionModifierType.java index 346a4055094..2f3aaf08ae7 100644 --- a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/datatype/modifier/MDQuestionModifierType.java +++ b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/datatype/modifier/MDQuestionModifierType.java @@ -26,6 +26,9 @@ */ public class MDQuestionModifierType extends MDModifierType { + // TODO: Decide on whether parsing this suffix belongs here... from PDB namespace investigation, + // it is reasoned that this suffix identifies an anonymous namespace. See comments in + // MDMangUtils.createStandardAnonymousNamespaceNode(String anon) method. private String suffix; public MDQuestionModifierType(MDMang dmang) { diff --git a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/naming/MDNumberedNamespace.java b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/naming/MDNumberedNamespace.java index d40aea7b79d..acb1624e29f 100644 --- a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/naming/MDNumberedNamespace.java +++ b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/naming/MDNumberedNamespace.java @@ -22,22 +22,29 @@ * Microsoft mangled symbol. */ public class MDNumberedNamespace extends MDParsableItem { - private String name; + private MDEncodedNumber num; public MDNumberedNamespace(MDMang dmang) { super(dmang); } + public MDEncodedNumber getNumber() { + return num; + } + + public String getName() { + return "`" + num + "'"; + } + @Override protected void parseInternal() throws MDException { - MDEncodedNumber num = new MDEncodedNumber(dmang); + num = new MDEncodedNumber(dmang); num.parse(); - name = "`" + num + "'"; } @Override public void insert(StringBuilder builder) { - dmang.insertString(builder, name); + dmang.insertString(builder, getName()); } } diff --git a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/naming/MDQualifier.java b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/naming/MDQualifier.java index 84c7db70753..bbd69dd1da3 100644 --- a/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/naming/MDQualifier.java +++ b/Ghidra/Features/MicrosoftDmang/src/main/java/mdemangler/naming/MDQualifier.java @@ -48,6 +48,18 @@ public boolean isAnon() { return (nameAnonymous != null); } + public boolean isLocalNamespace() { + return (nameNumbered != null); + } + + public boolean isNameC() { + return (nameC != null); + } + + public boolean isNameQ() { + return (nameQ != null); + } + public MDNestedName getNested() { return nameNested; } @@ -56,6 +68,22 @@ public String getAnonymousName() { return nameAnonymous.getName(); } + public String getLocalNamespace() { + return nameNumbered.getName(); + } + + public String getLocalNamespaceNumber() { + return nameNumbered.getNumber().toString(); + } + + public String getNameC() { + return nameC; + } + + public String getNameQ() { + return nameQ; + } + @Override public void insert(StringBuilder builder) { // Only one of these will hit. diff --git a/Ghidra/Features/MicrosoftDmang/src/test/java/mdemangler/MDMangUtilsTest.java b/Ghidra/Features/MicrosoftDmang/src/test/java/mdemangler/MDMangUtilsTest.java index 2a11f9017fb..962003867de 100644 --- a/Ghidra/Features/MicrosoftDmang/src/test/java/mdemangler/MDMangUtilsTest.java +++ b/Ghidra/Features/MicrosoftDmang/src/test/java/mdemangler/MDMangUtilsTest.java @@ -17,6 +17,8 @@ import static org.junit.Assert.*; +import java.util.Arrays; + import org.junit.Test; import generic.test.AbstractGenericTest; @@ -35,7 +37,7 @@ public void testWithLambdaAndSimpleConversionApplies() throws Exception { // with nested types that were causing problems for PDB String mangled = ".?AV@?0??name0@name1@@YA?AUname2@2@Uname3@2@Uname4@2@@Z@"; String expected = - "`struct name1::name2 __cdecl name1::name0(struct name1::name3,struct name1::name4)'::`1'::"; + "`name1::name2 __cdecl name1::name0(name1::name3,name1::name4)'::`1'::"; String simpleExpected = "name1::name0::`1'::"; String expectedDemangled = "class `struct name1::name2 __cdecl name1::name0(struct name1::name3,struct name1::name4)'::`1'::"; @@ -59,7 +61,7 @@ public void testTypeNamespaceSimpleConversionDoesNotApply1() throws Exception { String mangled = ".?AU?$name0@$$QEAV@?0??name1@name2@?Aname3@name4@@UEAAXVname5@4@HAEBVname6@4@@Z@@name7@name8@@"; String expected = - "name8::name7::name0 && __ptr64>"; + "name8::name7::name0<`public: virtual void __cdecl name4::`anonymous namespace'::name2::name1(Aname3::name5,int,Aname3::name6 const &)'::`1':: &&>"; // See MDMangUtils.getSimpleSymbolPath(item) javadoc to understand why expected and // simpleExpected are the same String simpleExpected = expected; @@ -84,7 +86,7 @@ public void testTypeNamespaceSimpleConversionDoesNotApply1() throws Exception { public void testTypeNamespaceSimpleConversionDoesNotApply2() throws Exception { String mangled = ".?AU?$name0@$$QEAV@?0???1Aname1@name2@@UEAA@XZ@@name3@name4@@"; String expected = - "name4::name3::name0 && __ptr64>"; + "name4::name3::name0<`public: virtual __cdecl name2::Aname1::~Aname1(void)'::`1':: &&>"; // See MDMangUtils.getSimpleSymbolPath(item) javadoc to understand why expected and // simpleExpected are the same String simpleExpected = expected; @@ -105,4 +107,36 @@ public void testTypeNamespaceSimpleConversionDoesNotApply2() throws Exception { assertEquals(expectedDemangled, demangled); } + @Test + public void testStandarizeSymbolPath() throws Exception { + SymbolPath sp = new SymbolPath(Arrays.asList("name0", "__l1", "name2")); + SymbolPath result = MDMangUtils.standarizeSymbolPath(sp); + String expected = "name0::`1'::name2"; + assertEquals(expected, result.toString()); + } + + @Test + public void testStandarizeSymbolPathWithEmbedded() throws Exception { + SymbolPath sp = new SymbolPath(Arrays.asList("name0", "__l1", "name2(name3::__l4::name5)")); + SymbolPath result = MDMangUtils.standarizeSymbolPath(sp); + String expected = "name0::`1'::name2(name3::`4'::name5)"; + assertEquals(expected, result.toString()); + } + + @Test + public void testStandarizeSymbolPathAlt() throws Exception { + SymbolPath sp = new SymbolPath(Arrays.asList("name0", "`1'", "name2")); + SymbolPath result = MDMangUtils.standarizeSymbolPathAlt(sp); + String expected = "name0::__l1::name2"; + assertEquals(expected, result.toString()); + } + + @Test + public void testStandarizeSymbolPathWithEmbeddedAlt() throws Exception { + SymbolPath sp = new SymbolPath(Arrays.asList("name0", "`1'", "name2(name3::`4'::name5)")); + SymbolPath result = MDMangUtils.standarizeSymbolPathAlt(sp); + String expected = "name0::__l1::name2(name3::__l4::name5)"; + assertEquals(expected, result.toString()); + } + }