diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bde7ca37..a9cd15050 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - binja: add support for forwarded exports #1646 @xusheng6 - binja: add support for symtab names #1504 @xusheng6 - add com class/interface features #322 @Aayush-goel-04 +- dotnet: emit enclosing class information for nested classes #1780 #1913 @bkojusner @mike-hunhoff ### Breaking Changes @@ -1628,4 +1629,4 @@ Download a standalone binary below and checkout the readme [here on GitHub](http ### Raw diffs - [capa v1.0.0...v1.1.0](https://github.com/mandiant/capa/compare/v1.0.0...v1.1.0) - - [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0) \ No newline at end of file + - [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index def6cd04a..e4bdfa011 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -131,10 +131,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] + typerefnamespace, typerefname = resolve_nested_typeref_name( + member_ref.Class.row_index, member_ref.Class.row, pe + ) + yield DnType( token, - member_ref.Class.row.TypeName, - namespace=member_ref.Class.row.TypeNamespace, + typerefname, + namespace=typerefnamespace, member=member_ref_name, access=access, ) @@ -188,6 +192,8 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type) """ + nested_class_table = get_dotnet_nested_class_table_index(pe) + accessor_map: Dict[int, str] = {} for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): accessor_map[methoddef] = methoddef_access @@ -211,7 +217,9 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ method_name = method_name[4:] - yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access) + typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) + + yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access) def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: @@ -225,6 +233,8 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type) """ + nested_class_table = get_dotnet_nested_class_table_index(pe) + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -235,8 +245,11 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: if field.row is None: logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx) continue + + typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) + token: int = calculate_dotnet_token_value(field.table.number, field.row_index) - yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name) + yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name) def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: @@ -300,19 +313,119 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod] yield DnUnmanagedMethod(token, module, method) +def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> Optional[dnfile.base.MDTableRow]: + assert pe.net is not None + assert pe.net.mdtables is not None + + if row_index - 1 <= 0: + return None + + try: + table = pe.net.mdtables.tables.get(table_index, []) + return table[row_index - 1] + except IndexError: + return None + + +def resolve_nested_typedef_name( + nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE +) -> Tuple[str, Tuple[str, ...]]: + """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" + + if index in nested_class_table: + typedef_name = [] + name = typedef.TypeName + + # Append the current typedef name + typedef_name.append(name) + + while nested_class_table[index] in nested_class_table: + # Iterate through the typedef table to resolve the nested name + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]) + if table_row is None: + return typedef.TypeNamespace, tuple(typedef_name[::-1]) + + name = table_row.TypeName + typedef_name.append(name) + index = nested_class_table[index] + + # Document the root enclosing details + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]) + if table_row is None: + return typedef.TypeNamespace, tuple(typedef_name[::-1]) + + enclosing_name = table_row.TypeName + typedef_name.append(enclosing_name) + + return table_row.TypeNamespace, tuple(typedef_name[::-1]) + + else: + return typedef.TypeNamespace, (typedef.TypeName,) + + +def resolve_nested_typeref_name( + index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE +) -> Tuple[str, Tuple[str, ...]]: + """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" + # If the ResolutionScope decodes to a typeRef type then it is nested + if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): + typeref_name = [] + name = typeref.TypeName + # Not appending the current typeref name to avoid potential duplicate + + # Validate index + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index) + if table_row is None: + return typeref.TypeNamespace, (typeref.TypeName,) + + while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef): + # Iterate through the typeref table to resolve the nested name + typeref_name.append(name) + name = table_row.TypeName + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index) + if table_row is None: + return typeref.TypeNamespace, tuple(typeref_name[::-1]) + + # Document the root enclosing details + typeref_name.append(table_row.TypeName) + + return table_row.TypeNamespace, tuple(typeref_name[::-1]) + + else: + return typeref.TypeNamespace, (typeref.TypeName,) + + +def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]: + """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" + nested_class_table = {} + + # Used to find nested classes in typedef + for _, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): + assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) + nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index + + return nested_class_table + + def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" + nested_class_table = get_dotnet_nested_class_table_index(pe) + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) + typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) - yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) + yield DnType(typedef_token, typedefname, namespace=typedefnamespace) for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) + typerefnamespace, typerefname = resolve_nested_typeref_name(typeref.ResolutionScope.row_index, typeref, pe) + typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) - yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) + yield DnType(typeref_token, typerefname, namespace=typerefnamespace) def calculate_dotnet_token_value(table: int, rid: int) -> int: diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 6c6d59927..4afcc81e1 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -6,15 +6,17 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Optional +from typing import Tuple, Optional class DnType: - def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None): + def __init__( + self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None + ): self.token: int = token self.access: Optional[str] = access self.namespace: str = namespace - self.class_: str = class_ + self.class_: Tuple[str, ...] = class_ if member == ".ctor": member = "ctor" @@ -42,9 +44,13 @@ def __repr__(self): return str(self) @staticmethod - def format_name(class_: str, namespace: str = "", member: str = ""): + def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""): + if len(class_) > 1: + class_str = "/".join(class_) # Concat items in tuple, separated by a "/" + else: + class_str = "".join(class_) # Convert tuple to str # like File::OpenRead - name: str = f"{class_}::{member}" if member else class_ + name: str = f"{class_str}::{member}" if member else class_str if namespace: # like System.IO.File::OpenRead name = f"{namespace}.{name}" diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index a9d36d299..4c9b41507 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -38,8 +38,11 @@ is_dotnet_mixed_mode, get_dotnet_managed_imports, get_dotnet_managed_methods, + resolve_nested_typedef_name, + resolve_nested_typeref_name, calculate_dotnet_token_value, get_dotnet_unmanaged_imports, + get_dotnet_nested_class_table_index, ) logger = logging.getLogger(__name__) @@ -92,19 +95,25 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" + nested_class_table = get_dotnet_nested_class_table_index(pe) + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): # emit internal .NET classes assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) + token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) - yield Class(DnType.format_name(typedef.TypeName, namespace=typedef.TypeNamespace)), DNTokenAddress(token) + yield Class(DnType.format_name(typedefname, namespace=typedefnamespace)), DNTokenAddress(token) for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): # emit external .NET classes assert isinstance(typeref, dnfile.mdtable.TypeRefRow) + typerefnamespace, typerefname = resolve_nested_typeref_name(typeref.ResolutionScope.row_index, typeref, pe) + token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) - yield Class(DnType.format_name(typeref.TypeName, namespace=typeref.TypeNamespace)), DNTokenAddress(token) + yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token) def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]: diff --git a/tests/fixtures.py b/tests/fixtures.py index 950c439a8..a06308a1c 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -393,6 +393,10 @@ def get_data_path_by_name(name) -> Path: return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_" elif name.startswith("1038a2"): return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_" + elif name.startswith("nested_typedef"): + return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_" + elif name.startswith("nested_typeref"): + return CD / "data" / "dotnet" / "2c7d60f77812607dec5085973ff76cea.dll_" else: raise ValueError(f"unexpected sample fixture: {name}") @@ -1274,6 +1278,114 @@ def parametrize(params, values, **kwargs): ), # MemberRef method False, ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer0"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer1"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer0/myclass_inner0_0"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer0/myclass_inner0_1"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer1/myclass_inner1_0"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer1/myclass_inner1_1"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer1/myclass_inner1_0/myclass_inner_inner"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner_inner"), + False, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner1_0"), + False, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner1_1"), + False, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner0_0"), + False, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner0_1"), + False, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Android.OS.Build/VERSION::SdkInt"), + True, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Android.Media.Image/Plane::Buffer"), + True, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Android.Provider.Telephony/Sent/Sent::ContentUri"), + True, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Android.OS.Build::SdkInt"), + False, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Plane::Buffer"), + False, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Sent::ContentUri"), + False, + ), ], # order tests by (file, item) # so that our LRU cache is most effective.