From ed7fcda792804d2834465fa505a00d4f9b686357 Mon Sep 17 00:00:00 2001 From: bswck Date: Wed, 22 Nov 2023 20:36:17 +0100 Subject: [PATCH 01/24] Use only new-style class declarations --- malduck/compression/components/aplib.py | 2 +- malduck/disasm.py | 2 +- malduck/ints.py | 2 +- malduck/pe.py | 2 +- malduck/procmem/procmem.pyi | 2 +- malduck/structure.py | 2 +- malduck/verify.py | 2 +- tests/test_disasm.py | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/malduck/compression/components/aplib.py b/malduck/compression/components/aplib.py index 67bff08..a5b2cda 100644 --- a/malduck/compression/components/aplib.py +++ b/malduck/compression/components/aplib.py @@ -12,7 +12,7 @@ __author__ = "Sandor Nemes" -class APLib(object): +class APLib: __slots__ = "source", "destination", "tag", "bitcount", "strict" diff --git a/malduck/disasm.py b/malduck/disasm.py index 2ca1191..fd0a3dc 100644 --- a/malduck/disasm.py +++ b/malduck/disasm.py @@ -141,7 +141,7 @@ def __str__(self) -> str: raise Exception("Invalid Operand type") -class Instruction(object): +class Instruction: """ Represents single instruction in :class:`Disassemble` diff --git a/malduck/ints.py b/malduck/ints.py index d49642c..d31e024 100644 --- a/malduck/ints.py +++ b/malduck/ints.py @@ -23,7 +23,7 @@ T = TypeVar("T", bound="IntType") -class IntTypeBase(object): +class IntTypeBase: """ Base class representing all IntType instances """ diff --git a/malduck/pe.py b/malduck/pe.py index a7a85a0..8e89da8 100644 --- a/malduck/pe.py +++ b/malduck/pe.py @@ -59,7 +59,7 @@ def find(self, str: bytes, beg: int = 0, end: Optional[int] = None) -> int: return -1 -class PE(object): +class PE: """ Wrapper around :class:`pefile.PE`, accepts either bytes (raw file contents) or :class:`ProcessMemory` instance. diff --git a/malduck/procmem/procmem.pyi b/malduck/procmem/procmem.pyi index 793232e..7346397 100644 --- a/malduck/procmem/procmem.pyi +++ b/malduck/procmem/procmem.pyi @@ -21,7 +21,7 @@ from ..ints import IntType from ..yara import Yara, YaraRulesetMatch, YaraRulesetOffsets from .region import Region -class MemoryBuffer(object): +class MemoryBuffer: def __setitem__(self, item: Union[int, slice], value: Union[int, slice]): ... def __getitem__(self, item: Union[int, slice]): ... def __len__(self) -> int: ... diff --git a/malduck/structure.py b/malduck/structure.py index 71f2235..4956d94 100644 --- a/malduck/structure.py +++ b/malduck/structure.py @@ -32,7 +32,7 @@ } -class Structure(object): +class Structure: _pack_ = 0 _fields_: List[Tuple[str, Type]] = [] diff --git a/malduck/verify.py b/malduck/verify.py index 7f7f2c8..b220576 100644 --- a/malduck/verify.py +++ b/malduck/verify.py @@ -30,7 +30,7 @@ ) -class Verify(object): +class Verify: @staticmethod def ascii(s: bytes) -> bool: return bool(re.match(b"^[\\x20-\\x7f]*$", s, re.DOTALL)) diff --git a/tests/test_disasm.py b/tests/test_disasm.py index cf06b2f..7fb6082 100644 --- a/tests/test_disasm.py +++ b/tests/test_disasm.py @@ -5,7 +5,7 @@ from malduck import disasm -class TestDisasm(object): +class TestDisasm: streams = b"".join(( # mov esi, [edi+4] b"\x8b\x77\x04", @@ -66,7 +66,7 @@ def test_equal(self): assert list(disasm(b"hAAAA", 0)) == list(disasm(b"hAAAA", 0)) -class TestDisasm64bit(object): +class TestDisasm64bit: streams = b"".join(( # inc rax b"\x48\xff\xc0", From 197dca8e093d23d34e7aa585708e17ad033d2cc3 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 07:58:08 +0100 Subject: [PATCH 02/24] Modernize type annotations Changes overview: - Use types from the built-in scope, not the deprecated ones like `typing.List`, `typing.Dict` etc. See [PEP 585](https://peps.python.org/pep-0585/) and [typing docs on built-in types aliases](https://docs.python.org/3/library/typing.html#aliases-to-built-in-types). - Postpone type annotations evaluation in runtime. See [PEP 563](https://peps.python.org/pep-0563/). - Use [PEP 604](https://peps.python.org/pep-0604/) `X | Y` union type syntax. - Use [PEP 613](https://peps.python.org/pep-0613/) explicit type aliases. - Use `typing.TYPE_CHECKING` to separate typing-related imports from the actual runtime requirements. --- malduck/bits.py | 1 + malduck/compression/aplib.py | 5 +- malduck/compression/components/aplib.py | 4 +- malduck/compression/components/lznt1.py | 2 + malduck/compression/gzip.py | 6 +- malduck/compression/lznt1.py | 2 + malduck/crypto/aes.py | 8 +- malduck/crypto/camellia.py | 2 + malduck/crypto/chacha20.py | 6 +- malduck/crypto/components/pyserpent.py | 11 +- malduck/crypto/des3.py | 2 + malduck/crypto/rabbit.py | 4 +- malduck/crypto/rc.py | 1 + malduck/crypto/rsa.py | 31 ++-- malduck/crypto/salsa20.py | 6 +- malduck/crypto/serpent.py | 6 +- malduck/crypto/winhdr.py | 9 +- malduck/crypto/xor.py | 5 +- malduck/disasm.py | 44 +++-- malduck/dnpe.py | 27 +-- malduck/extractor/config_utils.py | 16 +- malduck/extractor/extract_manager.py | 22 ++- malduck/extractor/extractor.py | 6 +- malduck/extractor/extractor.pyi | 72 +++----- malduck/extractor/modules.py | 22 ++- malduck/hash/crc.py | 1 + malduck/hash/sha.py | 1 + malduck/ints.py | 54 +++--- malduck/pe.py | 20 +- malduck/procmem/binmem.py | 17 +- malduck/procmem/cuckoomem.py | 7 +- malduck/procmem/idamem.py | 2 + malduck/procmem/procmem.py | 21 ++- malduck/procmem/procmem.pyi | 233 +++++++++++------------- malduck/procmem/procmemdnpe.py | 7 +- malduck/procmem/procmemelf.py | 5 +- malduck/procmem/procmempe.py | 6 +- malduck/procmem/region.py | 6 +- malduck/string/bin.py | 11 +- malduck/string/inet.py | 4 +- malduck/string/ops.py | 21 ++- malduck/structure.py | 5 +- malduck/yara.py | 17 +- malduck/yara.pyi | 96 ++++------ 44 files changed, 448 insertions(+), 406 deletions(-) diff --git a/malduck/bits.py b/malduck/bits.py index 16d6742..75c3f00 100644 --- a/malduck/bits.py +++ b/malduck/bits.py @@ -1,6 +1,7 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations __all__ = ["rol", "ror", "align", "align_down"] diff --git a/malduck/compression/aplib.py b/malduck/compression/aplib.py index 271ae85..957cb04 100644 --- a/malduck/compression/aplib.py +++ b/malduck/compression/aplib.py @@ -1,7 +1,8 @@ +from __future__ import annotations + import logging import struct from binascii import crc32 -from typing import Optional from .components.aplib import APLib @@ -30,7 +31,7 @@ class aPLib: :rtype: bytes """ - def decompress(self, buf: bytes, headerless: bool = True) -> Optional[bytes]: + def decompress(self, buf: bytes, headerless: bool = True) -> bytes | None: packed_size = None packed_crc = None orig_size = None diff --git a/malduck/compression/components/aplib.py b/malduck/compression/components/aplib.py index a5b2cda..06187fd 100644 --- a/malduck/compression/components/aplib.py +++ b/malduck/compression/components/aplib.py @@ -5,6 +5,8 @@ Approximately 20 times faster than other Python implementations. Compatible with both Python 2 and 3. """ +from __future__ import annotations + from io import BytesIO __all__ = ["APLib"] @@ -13,7 +15,6 @@ class APLib: - __slots__ = "source", "destination", "tag", "bitcount", "strict" def __init__(self, source: bytes, strict: bool = True) -> None: @@ -54,7 +55,6 @@ def depack(self) -> bytes: done = False try: - # first byte verbatim self.destination += self.source.read(1) diff --git a/malduck/compression/components/lznt1.py b/malduck/compression/components/lznt1.py index 86084d3..ac8850f 100644 --- a/malduck/compression/components/lznt1.py +++ b/malduck/compression/components/lznt1.py @@ -27,6 +27,8 @@ https://github.com/libyal/reviveit/ https://github.com/sleuthkit/sleuthkit/blob/develop/tsk/fs/ntfs.c """ +from __future__ import annotations + import array import struct from io import BytesIO diff --git a/malduck/compression/gzip.py b/malduck/compression/gzip.py index 658dfc7..5e9a9f2 100644 --- a/malduck/compression/gzip.py +++ b/malduck/compression/gzip.py @@ -1,14 +1,14 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. -from __future__ import absolute_import - -__all__ = ["gzip", "Gzip"] +from __future__ import annotations import io import zlib from gzip import GzipFile +__all__ = ["gzip", "Gzip"] + class Gzip: r""" diff --git a/malduck/compression/lznt1.py b/malduck/compression/lznt1.py index 7355d70..89f38a8 100644 --- a/malduck/compression/lznt1.py +++ b/malduck/compression/lznt1.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from .components.lznt1 import decompress_data __all__ = ["Lznt1", "lznt1"] diff --git a/malduck/crypto/aes.py b/malduck/crypto/aes.py index ebd82b2..f2d04c9 100644 --- a/malduck/crypto/aes.py +++ b/malduck/crypto/aes.py @@ -1,9 +1,9 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations import io -from typing import Optional, Tuple from Cryptodome.Cipher import AES as AESCipher @@ -27,7 +27,7 @@ class PlaintextKeyBlob(BaseBlob): def __init__(self) -> None: BaseBlob.__init__(self) - self.key: Optional[bytes] = None + self.key: bytes | None = None def parse(self, buf: io.BytesIO) -> None: """ @@ -42,7 +42,7 @@ def parse(self, buf: io.BytesIO) -> None: return self.key = value - def export_key(self) -> Optional[Tuple[str, bytes]]: + def export_key(self) -> tuple[str, bytes] | None: """ Exports key from structure or returns None if no key was imported @@ -164,7 +164,7 @@ class Aes: ctr = AesCtr() @staticmethod - def import_key(data: bytes) -> Optional[Tuple[str, bytes]]: + def import_key(data: bytes) -> tuple[str, bytes] | None: """ Extracts key from buffer containing :class:`PlaintextKeyBlob` data diff --git a/malduck/crypto/camellia.py b/malduck/crypto/camellia.py index 7c763b7..39ad35f 100644 --- a/malduck/crypto/camellia.py +++ b/malduck/crypto/camellia.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes __all__ = ["camellia"] diff --git a/malduck/crypto/chacha20.py b/malduck/crypto/chacha20.py index f63e35c..da7c8dc 100644 --- a/malduck/crypto/chacha20.py +++ b/malduck/crypto/chacha20.py @@ -1,4 +1,4 @@ -from typing import Optional +from __future__ import annotations from Cryptodome.Cipher import ChaCha20 as ChaCha20Cipher @@ -6,7 +6,7 @@ class ChaCha20: - def encrypt(self, key: bytes, data: bytes, nonce: Optional[bytes] = None) -> bytes: + def encrypt(self, key: bytes, data: bytes, nonce: bytes | None = None) -> bytes: """ Encrypts buffer using ChaCha20 algorithm. @@ -23,7 +23,7 @@ def encrypt(self, key: bytes, data: bytes, nonce: Optional[bytes] = None) -> byt nonce = b"\x00" * 8 return ChaCha20Cipher.new(key=key, nonce=nonce).encrypt(data) - def decrypt(self, key: bytes, data: bytes, nonce: Optional[bytes] = None) -> bytes: + def decrypt(self, key: bytes, data: bytes, nonce: bytes | None = None) -> bytes: """ Decrypts buffer using ChaCha20 algorithm. diff --git a/malduck/crypto/components/pyserpent.py b/malduck/crypto/components/pyserpent.py index 789821b..3153c74 100644 --- a/malduck/crypto/components/pyserpent.py +++ b/malduck/crypto/components/pyserpent.py @@ -34,16 +34,17 @@ # Anyone thinking of using this code should reconsider. It's slow. # Try python-mcrypt instead. In case a faster library is not installed # on the target system, this code can be used as a portable fallback. +from __future__ import annotations + import struct import sys -from typing import List, Optional block_size = 16 key_size = 32 class Serpent: - def __init__(self, key: Optional[bytes] = None) -> None: + def __init__(self, key: bytes | None = None) -> None: """Serpent.""" if key: @@ -133,7 +134,7 @@ def byteswap32(x: int) -> int: ) -def set_key(l_key: List[int], key: List[int], key_len: int) -> None: +def set_key(l_key: list[int], key: list[int], key_len: int) -> None: key_len *= 8 if key_len > 256: return None @@ -962,7 +963,7 @@ def set_key(l_key: List[int], key: List[int], key_len: int) -> None: key[4 * 32 + 11] = h -def encrypt(key: List[int], in_blk: List[int]) -> None: +def encrypt(key: list[int], in_blk: list[int]) -> None: # serpent_generate.py a = in_blk[0] b = in_blk[1] @@ -1946,7 +1947,7 @@ def encrypt(key: List[int], in_blk: List[int]) -> None: in_blk[3] = d -def decrypt(key: List[int], in_blk: List[int]) -> None: +def decrypt(key: list[int], in_blk: list[int]) -> None: # serpent_generate.py a = in_blk[0] b = in_blk[1] diff --git a/malduck/crypto/des3.py b/malduck/crypto/des3.py index 8f161a5..119c02d 100644 --- a/malduck/crypto/des3.py +++ b/malduck/crypto/des3.py @@ -1,6 +1,8 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations + from typing import cast from Cryptodome.Cipher import DES diff --git a/malduck/crypto/rabbit.py b/malduck/crypto/rabbit.py index 907a14c..e20181b 100644 --- a/malduck/crypto/rabbit.py +++ b/malduck/crypto/rabbit.py @@ -1,9 +1,9 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations import struct -from typing import Optional from ..bits import rol from .xor import xor @@ -25,7 +25,7 @@ def __init__(self) -> None: class Rabbit: - def __init__(self, key: bytes, iv: Optional[bytes]) -> None: + def __init__(self, key: bytes, iv: bytes | None) -> None: self.ctx = Context() self.set_key(key) if iv: diff --git a/malduck/crypto/rc.py b/malduck/crypto/rc.py index 0dd4c02..7671be9 100644 --- a/malduck/crypto/rc.py +++ b/malduck/crypto/rc.py @@ -1,6 +1,7 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations from Cryptodome.Cipher import ARC4 diff --git a/malduck/crypto/rsa.py b/malduck/crypto/rsa.py index aa309cd..a8d68f3 100644 --- a/malduck/crypto/rsa.py +++ b/malduck/crypto/rsa.py @@ -1,11 +1,12 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations import io from io import BytesIO from itertools import takewhile -from typing import Optional, cast +from typing import cast from Cryptodome.PublicKey import RSA as RSA_ @@ -20,10 +21,10 @@ class PublicKeyBlob(BaseBlob): def __init__(self) -> None: BaseBlob.__init__(self) - self.e: Optional[int] = None - self.n: Optional[int] = None + self.e: int | None = None + self.n: int | None = None - def parse(self, buf: BytesIO) -> Optional[int]: + def parse(self, buf: BytesIO) -> int | None: header = buf.read(12) if len(header) != 12 or header[:4] != self.magic: return None @@ -49,12 +50,12 @@ class PrivateKeyBlob(PublicKeyBlob): def __init__(self) -> None: PublicKeyBlob.__init__(self) - self.p1: Optional[int] = None - self.p2: Optional[int] = None - self.exp1: Optional[int] = None - self.exp2: Optional[int] = None - self.coeff: Optional[int] = None - self.d: Optional[int] = None + self.p1: int | None = None + self.p2: int | None = None + self.exp1: int | None = None + self.exp2: int | None = None + self.coeff: int | None = None + self.d: int | None = None def parse(self, buf: BytesIO) -> None: off = PublicKeyBlob.parse(self, buf) @@ -101,7 +102,7 @@ class RSA: algorithms = (0x0000A400,) # RSA @staticmethod - def import_key(data: bytes) -> Optional[bytes]: + def import_key(data: bytes) -> bytes | None: r""" Extracts key from buffer containing :class:`PublicKeyBlob` or :class:`PrivateKeyBlob` data @@ -134,10 +135,10 @@ def import_key(data: bytes) -> Optional[bytes]: def export_key( n: int, e: int, - d: Optional[int] = None, - p: Optional[int] = None, - q: Optional[int] = None, - crt: Optional[int] = None, + d: int | None = None, + p: int | None = None, + q: int | None = None, + crt: int | None = None, ) -> bytes: r""" Constructs key from tuple of RSA components diff --git a/malduck/crypto/salsa20.py b/malduck/crypto/salsa20.py index 22b8a4f..067b107 100644 --- a/malduck/crypto/salsa20.py +++ b/malduck/crypto/salsa20.py @@ -1,4 +1,4 @@ -from typing import Optional +from __future__ import annotations from Cryptodome.Cipher import Salsa20 as Salsa20Cipher @@ -6,7 +6,7 @@ class Salsa20: - def encrypt(self, key: bytes, data: bytes, nonce: Optional[bytes] = None) -> bytes: + def encrypt(self, key: bytes, data: bytes, nonce: bytes | None = None) -> bytes: """ Encrypts buffer using Salsa20 algorithm. @@ -23,7 +23,7 @@ def encrypt(self, key: bytes, data: bytes, nonce: Optional[bytes] = None) -> byt nonce = b"\x00" * 8 return Salsa20Cipher.new(key=key, nonce=nonce).encrypt(data) - def decrypt(self, key: bytes, data: bytes, nonce: Optional[bytes] = None) -> bytes: + def decrypt(self, key: bytes, data: bytes, nonce: bytes | None = None) -> bytes: """ Decrypts buffer using Salsa20 algorithm. diff --git a/malduck/crypto/serpent.py b/malduck/crypto/serpent.py index bb9151d..c1ae32e 100644 --- a/malduck/crypto/serpent.py +++ b/malduck/crypto/serpent.py @@ -1,4 +1,4 @@ -from typing import Optional +from __future__ import annotations from .components.pyserpent import serpent_cbc_decrypt, serpent_cbc_encrypt @@ -6,7 +6,7 @@ class SerpentCbc: - def encrypt(self, key: bytes, data: bytes, iv: Optional[bytes] = None) -> bytes: + def encrypt(self, key: bytes, data: bytes, iv: bytes | None = None) -> bytes: """ Encrypts buffer using Serpent algorithm in CBC mode. @@ -21,7 +21,7 @@ def encrypt(self, key: bytes, data: bytes, iv: Optional[bytes] = None) -> bytes: """ return serpent_cbc_encrypt(key, data, iv=iv or b"\x00" * 16) - def decrypt(self, key: bytes, data: bytes, iv: Optional[bytes] = None) -> bytes: + def decrypt(self, key: bytes, data: bytes, iv: bytes | None = None) -> bytes: """ Decrypts buffer using Serpent algorithm in CBC mode. diff --git a/malduck/crypto/winhdr.py b/malduck/crypto/winhdr.py index 4f1f9de..81078b4 100644 --- a/malduck/crypto/winhdr.py +++ b/malduck/crypto/winhdr.py @@ -1,13 +1,17 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations import io -from typing import Any, Optional +from typing import TYPE_CHECKING from ..ints import UInt8, UInt16, UInt32 from ..structure import Structure +if TYPE_CHECKING: + from typing import Any + class BLOBHEADER(Structure): r""" @@ -18,6 +22,7 @@ class BLOBHEADER(Structure): BLOBHEADER structure description (Microsoft Docs): https://docs.microsoft.com/en-us/windows/win32/api/wincrypt/ns-wincrypt-publickeystruc """ + _pack_ = 1 _fields_ = [ ("bType", UInt8), @@ -31,7 +36,7 @@ class BaseBlob: def __init__(self) -> None: self.bitsize = 0 - def parse(self, buf: io.BytesIO) -> Optional[int]: + def parse(self, buf: io.BytesIO) -> int | None: raise NotImplementedError def export_key(self) -> Any: diff --git a/malduck/crypto/xor.py b/malduck/crypto/xor.py index 80e5eb5..07221de 100644 --- a/malduck/crypto/xor.py +++ b/malduck/crypto/xor.py @@ -1,10 +1,11 @@ +from __future__ import annotations + from itertools import cycle -from typing import Union __all__ = ["xor"] -def xor(key: Union[int, bytes], data: bytes) -> bytes: +def xor(key: int | bytes, data: bytes) -> bytes: """ XOR encryption/decryption diff --git a/malduck/disasm.py b/malduck/disasm.py index fd0a3dc..b40d029 100644 --- a/malduck/disasm.py +++ b/malduck/disasm.py @@ -2,12 +2,18 @@ # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations + import collections -from typing import Any, Dict, Iterator, List, Optional, Union +from typing import TYPE_CHECKING from capstone import CsInsn from capstone.x86 import X86Op +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any + __all__ = ["disasm", "insn", "Disassemble", "Instruction", "Operand", "Memory"] Memory = collections.namedtuple("Memory", ("size", "base", "scale", "index", "disp")) @@ -22,7 +28,7 @@ class Operand: _x86_op_imm = None _x86_op_reg = None _x86_op_mem = None - regs: Dict[str, Union[str, int]] = {} + regs: dict[str, str | int] = {} sizes = { 1: "byte", @@ -51,7 +57,7 @@ def is_mem(self) -> bool: return self.op.type == Operand._x86_op_mem @property - def value(self) -> Union[str, int]: + def value(self) -> str | int: """ Returns operand value or displacement value for memory operands @@ -67,7 +73,7 @@ def value(self) -> Union[str, int]: raise Exception("Invalid Operand type") @property - def reg(self) -> Optional[Union[str, int]]: + def reg(self) -> str | int | None: """ Returns register used by operand. @@ -85,7 +91,7 @@ def reg(self) -> Optional[Union[str, int]]: return None @property - def mem(self) -> Optional[Memory]: + def mem(self) -> Memory | None: """ Returns :class:`Memory` object for memory operands """ @@ -93,9 +99,9 @@ def mem(self) -> Optional[Memory]: return None mem = self.op.value.mem - base: Optional[Union[str, int]] = None - index: Optional[Union[str, int]] = None - scale: Optional[int] = None + base: str | int | None = None + index: str | int | None = None + scale: int | None = None if mem.base: base = self.regs[mem.base] @@ -171,11 +177,11 @@ def get_move_value(self, p, hit, *args): def __init__( self, - mnem: Optional[str] = None, - op1: Optional[Operand] = None, - op2: Optional[Operand] = None, - op3: Optional[Operand] = None, - addr: Optional[int] = None, + mnem: str | None = None, + op1: Operand | None = None, + op2: Operand | None = None, + op3: Operand | None = None, + addr: int | None = None, x64: bool = False, ) -> None: self.insn = None @@ -188,35 +194,35 @@ def parse(self, insn: CsInsn) -> None: self.insn = insn self.mnem = insn.mnemonic - operands: List[Optional[Operand]] = [] + operands: list[Operand | None] = [] for op in insn.operands + [None, None, None]: operands.append(Operand(op, self.x64) if op else None) self.operands = operands[0], operands[1], operands[2] @staticmethod - def from_capstone(insn: CsInsn, x64: bool = False) -> "Instruction": + def from_capstone(insn: CsInsn, x64: bool = False) -> Instruction: ret = Instruction() ret.x64 = x64 ret.parse(insn) return ret @property - def op1(self) -> Optional[Operand]: + def op1(self) -> Operand | None: """First operand""" return self.operands[0] @property - def op2(self) -> Optional[Operand]: + def op2(self) -> Operand | None: """Second operand""" return self.operands[1] @property - def op3(self) -> Optional[Operand]: + def op3(self) -> Operand | None: """Third operand""" return self.operands[2] @property - def addr(self) -> Optional[int]: + def addr(self) -> int | None: """Instruction address""" if self._addr: return self._addr diff --git a/malduck/dnpe.py b/malduck/dnpe.py index 82c7a04..b109a19 100644 --- a/malduck/dnpe.py +++ b/malduck/dnpe.py @@ -1,10 +1,16 @@ -from typing import Any, Iterator, List, Optional, Union +from __future__ import annotations + +from typing import TYPE_CHECKING import dnfile from .pe import PE, MemoryPEData from .procmem import ProcessMemory +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any + __all__ = ["dnpe", "DnPE", "MemoryDnPEData"] @@ -18,37 +24,34 @@ def __init__(self, memory: ProcessMemory, fast_load: bool) -> None: class DnPE(PE): - def __init__( - self, data: Union[ProcessMemory, bytes], fast_load: bool = False - ) -> None: - + def __init__(self, data: ProcessMemory | bytes, fast_load: bool = False) -> None: if isinstance(data, ProcessMemory): self.pe = MemoryDnPEData(data, fast_load).pe else: self.pe = dnfile.dnPE(data=data, fast_load=fast_load) @property - def dn_metadata(self) -> Optional[dnfile.stream.MetaDataTables]: + def dn_metadata(self) -> dnfile.stream.MetaDataTables | None: return self.pe.net.metadata @property - def dn_strings(self) -> Optional[dnfile.stream.StringsHeap]: + def dn_strings(self) -> dnfile.stream.StringsHeap | None: return self.pe.net.strings @property - def dn_user_strings(self) -> Optional[dnfile.stream.UserStringHeap]: + def dn_user_strings(self) -> dnfile.stream.UserStringHeap | None: return self.pe.net.user_strings @property - def dn_guid(self) -> Optional[dnfile.stream.GuidHeap]: + def dn_guid(self) -> dnfile.stream.GuidHeap | None: return self.pe.net.guids @property - def dn_mdtables(self) -> Optional[dnfile.stream.MetaDataTables]: + def dn_mdtables(self) -> dnfile.stream.MetaDataTables | None: return self.pe.net.mdtables @property - def dn_resources(self) -> List: + def dn_resources(self) -> list: return self.pe.net.resources @property @@ -57,7 +60,7 @@ def dn_flags(self) -> Any: def dn_user_string( self, index: int, encoding="utf-16" - ) -> Optional[dnfile.stream.UserString]: + ) -> dnfile.stream.UserString | None: if not self.dn_user_strings or self.dn_user_strings.sizeof() == 0: return None diff --git a/malduck/extractor/config_utils.py b/malduck/extractor/config_utils.py index 6d4b32b..ae45ecc 100644 --- a/malduck/extractor/config_utils.py +++ b/malduck/extractor/config_utils.py @@ -1,10 +1,18 @@ +from __future__ import annotations + import logging -from typing import Any, Dict +from typing import TYPE_CHECKING -log = logging.getLogger(__name__) +if TYPE_CHECKING: + from typing import Any + + from typing_extensions import TypeAlias -Config = Dict[str, Any] -ConfigSet = Dict[str, Config] + Config: TypeAlias = dict[str, Any] + ConfigSet: TypeAlias = dict[str, Config] + + +log = logging.getLogger(__name__) def is_config_better(base_config: Config, new_config: Config) -> bool: diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index 3d0fcd1..740c456 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -1,13 +1,14 @@ +from __future__ import annotations + import json import logging import warnings -from typing import Any, Dict, List, Optional, Type +from typing import TYPE_CHECKING from ..procmem import ProcessMemory, ProcessMemoryELF, ProcessMemoryPE from ..procmem.binmem import ProcessMemoryBinary from ..yara import Yara, YaraRuleOffsets, YaraRulesetMatch from .config_utils import ( - Config, apply_config_part, encode_for_json, is_config_better, @@ -16,6 +17,11 @@ from .extractor import Extractor from .modules import ExtractorModules +if TYPE_CHECKING: + from typing import Any, Optional + + from .config_utils import Config + log = logging.getLogger(__name__) __all__ = ["ExtractManager"] @@ -31,11 +37,11 @@ class ExtractManager: def __init__(self, modules: ExtractorModules) -> None: self.modules = modules - self.binary_classes: List[Type[ProcessMemoryBinary]] = [ + self.binary_classes: list[type[ProcessMemoryBinary]] = [ ProcessMemoryPE, ProcessMemoryELF, ] - self.configs: Dict[str, Config] = {} + self.configs: dict[str, Config] = {} @property def rules(self) -> Yara: @@ -46,7 +52,7 @@ def rules(self) -> Yara: return self.modules.rules @property - def extractors(self) -> List[Type[Extractor]]: + def extractors(self) -> list[type[Extractor]]: """ Bound extractor modules :rtype: List[Type[:class:`malduck.extractor.Extractor`]] @@ -114,7 +120,7 @@ def match_procmem(self, p: ProcessMemory) -> YaraRulesetMatch: log.debug("Matched rules: %s", ",".join(list(matches.keys()))) return matches - def carve_procmem(self, p: ProcessMemory) -> List[ProcessMemoryBinary]: + def carve_procmem(self, p: ProcessMemory) -> list[ProcessMemoryBinary]: """ Carves binaries from ProcessMemory to try configuration extraction using every possible address mapping. @@ -217,7 +223,7 @@ def push_procmem( return family @property - def config(self) -> List[Config]: + def config(self) -> list[Config]: """ Extracted configuration (list of configs for each extracted family) """ @@ -232,7 +238,7 @@ class ExtractionContext: def __init__(self, parent: ExtractManager) -> None: #: Collected configuration so far (especially useful for "final" extractors) self.collected_config: Config = {} - self.globals: Dict[str, Any] = {} + self.globals: dict[str, Any] = {} self.parent = parent #: Bound ExtractManager instance @property diff --git a/malduck/extractor/extractor.py b/malduck/extractor/extractor.py index c15be80..7ad1ad1 100644 --- a/malduck/extractor/extractor.py +++ b/malduck/extractor/extractor.py @@ -1,7 +1,9 @@ +from __future__ import annotations + import functools import inspect import logging -from typing import List, cast +from typing import cast from ..procmem import ProcessMemory, ProcessMemoryELF, ProcessMemoryPE @@ -511,7 +513,7 @@ def string(*strings_or_method): raise TypeError("@extractor decorator must be first") return StringExtractorMethod(method) elif all(isinstance(string, str) for string in strings_or_method): - strings = cast(List[str], strings_or_method) + strings = cast("list[str]", strings_or_method) def extractor_wrapper(method): if isinstance(method, ExtractorMethod): diff --git a/malduck/extractor/extractor.pyi b/malduck/extractor/extractor.pyi index 3aff06e..81c5da8 100644 --- a/malduck/extractor/extractor.pyi +++ b/malduck/extractor/extractor.pyi @@ -1,18 +1,6 @@ import logging -from typing import ( - Any, - Callable, - Dict, - Generic, - Iterator, - List, - Optional, - Tuple, - Type, - TypeVar, - Union, - overload, -) +from collections.abc import Callable, Iterator +from typing import Any, Generic, TypeVar, overload from typing_extensions import Protocol @@ -20,70 +8,68 @@ from ..procmem import ProcessMemory, ProcessMemoryELF, ProcessMemoryPE from ..yara import YaraRuleMatch, YaraStringMatch from .extract_manager import ExtractionContext -Config = Dict[str, Any] +Config = dict[str, Any] T = TypeVar("T", bound="Extractor", contravariant=True) U = TypeVar("U", bound=ProcessMemory, contravariant=True) V = TypeVar("V", bound="ExtractorMethod") class _StringOffsetCallback(Protocol[T, U]): - def __call__(cls, self: T, p: U, addr: int) -> Union[Config, bool, None]: ... + def __call__(cls, self: T, p: U, addr: int) -> Config | bool | None: ... class _StringCallback(Protocol[T, U]): def __call__( cls, self: T, p: U, addr: int, match: YaraStringMatch - ) -> Union[Config, bool, None]: ... + ) -> Config | bool | None: ... class _RuleCallback(Protocol[T, U]): - def __call__( - cls, self: T, p: U, match: YaraRuleMatch - ) -> Union[Config, bool, None]: ... + def __call__(cls, self: T, p: U, match: YaraRuleMatch) -> Config | bool | None: ... class _FinalCallback(Protocol[T, U]): - def __call__(cls, self: T, p: U) -> Union[Config, bool, None]: ... + def __call__(cls, self: T, p: U) -> Config | bool | None: ... class ExtractorMethod(Generic[T, U]): """ Represents registered extractor method """ - method: Union[ - _StringOffsetCallback[T, U], - _StringCallback[T, U], - _RuleCallback[T, U], - _FinalCallback[T, U], - ] - procmem_type: Type["ProcessMemory"] + method: ( + _StringOffsetCallback[T, U] + | _StringCallback[T, U] + | _RuleCallback[T, U] + | _FinalCallback[T, U] + ) + procmem_type: type[ProcessMemory] weak: bool def __init__( self, - method: Union[ - _StringOffsetCallback[T, U], - _StringCallback[T, U], - _RuleCallback[T, U], - _FinalCallback[T, U], - ], + method: ( + _StringOffsetCallback[T, U] + | _StringCallback[T, U] + | _RuleCallback[T, U] + | _FinalCallback[T, U] + ), ) -> None: ... def __call__(self, extractor: T, procmem: U, *args, **kwargs) -> None: ... class StringOffsetExtractorMethod(ExtractorMethod[T, U]): string_name: str def __init__( - self, method: _StringOffsetCallback[T, U], string_name: Optional[str] = None + self, method: _StringOffsetCallback[T, U], string_name: str | None = None ) -> None: super().__init__(method) class StringExtractorMethod(ExtractorMethod[T, U]): - string_names: List[str] + string_names: list[str] def __init__( - self, method: _StringCallback[T, U], string_names: Optional[List[str]] = None + self, method: _StringCallback[T, U], string_names: list[str] | None = None ) -> None: super().__init__(method) class RuleExtractorMethod(ExtractorMethod[T, U]): rule_name: str def __init__( - self, method: _RuleCallback[T, U], rule_name: Optional[str] = None + self, method: _RuleCallback[T, U], rule_name: str | None = None ) -> None: super().__init__(method) @@ -92,9 +78,9 @@ class FinalExtractorMethod(ExtractorMethod[T, U]): super().__init__(method) class Extractor: - yara_rules: Tuple[str, ...] - family: Optional[str] - overrides: List[str] + yara_rules: tuple[str, ...] + family: str | None + overrides: list[str] parent: ExtractionContext def __init__(self, parent: ExtractionContext) -> None: ... def push_procmem(self, procmem: ProcessMemory, **info): ... @@ -104,10 +90,10 @@ class Extractor: @property def collected_config(self) -> Config: ... @property - def globals(self) -> Dict[str, Any]: ... + def globals(self) -> dict[str, Any]: ... @property def log(self) -> logging.Logger: ... - def _get_methods(self, method_type: Type[V]) -> Iterator[Tuple[str, V]]: ... + def _get_methods(self, method_type: type[V]) -> Iterator[tuple[str, V]]: ... def on_error(self, exc: Exception, method_name: str) -> None: ... def handle_match(self, p: ProcessMemory, match: YaraRuleMatch) -> None: ... # Extractor method decorators diff --git a/malduck/extractor/modules.py b/malduck/extractor/modules.py index a602c22..9fa9c10 100644 --- a/malduck/extractor/modules.py +++ b/malduck/extractor/modules.py @@ -1,3 +1,4 @@ +from __future__ import annotations import importlib.util import logging import os @@ -6,11 +7,16 @@ import warnings from collections import defaultdict from importlib.abc import FileLoader, PathEntryFinder -from typing import Any, Callable, DefaultDict, Dict, List, Optional, Type, cast +from typing import TYPE_CHECKING, cast from ..yara import Yara from .extractor import Extractor +if TYPE_CHECKING: + from typing import Any + + from collecions.abc import Callable + log = logging.getLogger(__name__) @@ -22,7 +28,7 @@ class ExtractorModules: :type modules_path: str """ - def __init__(self, modules_path: Optional[str] = None) -> None: + def __init__(self, modules_path: str | None = None) -> None: if modules_path is None: modules_path = os.path.join(os.path.expanduser("~"), ".malduck") if not os.path.exists(modules_path): @@ -31,7 +37,7 @@ def __init__(self, modules_path: Optional[str] = None) -> None: self.rules: Yara = Yara.from_dir(modules_path) # Preload modules loaded_modules = load_modules(modules_path, onerror=self.on_error) - self.extractors: List[Type[Extractor]] = Extractor.__subclasses__() + self.extractors: list[type[Extractor]] = Extractor.__subclasses__() loaded_extractors = [x.__module__ for x in self.extractors] @@ -76,9 +82,9 @@ def compare_family_overrides(self, first: str, second: str) -> int: return 0 -def make_override_paths(extractors: List[Type[Extractor]]) -> Dict[str, List[str]]: +def make_override_paths(extractors: list[type[Extractor]]) -> dict[str, list[str]]: # Make override trees and get roots - overrides: DefaultDict[str, List[str]] = defaultdict(list) + overrides: defaultdict[str, list[str]] = defaultdict(list) parents = set() children = set() for extractor in extractors: @@ -143,8 +149,8 @@ def import_module_by_finder(finder: PathEntryFinder, module_name: str) -> Any: def load_modules( - search_path: str, onerror: Optional[Callable[[Exception, str], None]] = None -) -> Dict[str, Any]: + search_path: str, onerror: Callable[[Exception, str], None] | None = None +) -> dict[str, Any]: """ Loads plugin modules under specified paths @@ -157,7 +163,7 @@ def load_modules( :param onerror: Exception handler (default: ignore exceptions) :return: dict {name: module} """ - modules: Dict[str, Any] = {} + modules: dict[str, Any] = {} for finder, module_name, is_pkg in pkgutil.iter_modules( [search_path], "malduck.extractor.modules." ): diff --git a/malduck/hash/crc.py b/malduck/hash/crc.py index b487e93..b8c500a 100644 --- a/malduck/hash/crc.py +++ b/malduck/hash/crc.py @@ -1,6 +1,7 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations import zlib diff --git a/malduck/hash/sha.py b/malduck/hash/sha.py index cb22953..2148bdd 100644 --- a/malduck/hash/sha.py +++ b/malduck/hash/sha.py @@ -1,6 +1,7 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations import hashlib diff --git a/malduck/ints.py b/malduck/ints.py index d31e024..3593b82 100644 --- a/malduck/ints.py +++ b/malduck/ints.py @@ -1,9 +1,15 @@ +from __future__ import annotations + from abc import ABCMeta, abstractmethod from struct import error, pack, unpack_from -from typing import Any, Generic, Iterator, Tuple, Type, TypeVar, Union +from typing import TYPE_CHECKING, Generic, TypeVar from .bits import rol +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any + __all__ = [ "QWORD", "DWORD", @@ -40,7 +46,7 @@ class MultipliedIntTypeBase(IntTypeBase, Generic[T], metaclass=ABCMeta): @abstractmethod def unpack( other: bytes, offset: int = 0, foxed: bool = False - ) -> Union[Tuple[T, ...], int, None]: + ) -> tuple[T, ...] | int | None: raise NotImplementedError() @@ -68,19 +74,19 @@ def invert_mask(cls) -> int: """ return (2**cls.bits) >> 1 - def __mul__(cls: Type[T], multiplier: int) -> Type[MultipliedIntTypeBase[T]]: # type: ignore + def __mul__(cls: type[T], multiplier: int) -> type[MultipliedIntTypeBase[T]]: # type: ignore # mypy doesn't know how to deal with metaclasses # that are used for specific base class instantiation # We're doing our best, but 'type: ignore' is still needed here class MultipliedIntTypeClass(MultipliedIntTypeBase): - int_type: Type[T] = cls + int_type: type[T] = cls mul = multiplier @staticmethod def unpack( other: bytes, offset: int = 0, fixed: bool = True - ) -> Union[Tuple[T, ...], int, None]: + ) -> tuple[T, ...] | int | None: """ Unpacks multiple values from provided buffer :param other: Buffer object containing value to unpack @@ -162,73 +168,73 @@ def sdbm_hash(name: bytes): signed = False fmt = "Q" - def __new__(cls: MetaIntType, value: Any) -> "IntType": + def __new__(cls: MetaIntType, value: Any) -> IntType: value = int(value) & cls.mask if cls.signed: value |= -(value & cls.invert_mask) return int.__new__(cls, value) # type: ignore - def __add__(self, other: Any) -> "IntType": + def __add__(self, other: Any) -> IntType: res = super().__add__(other) return self.__class__(res) - def __sub__(self, other: Any) -> "IntType": + def __sub__(self, other: Any) -> IntType: res = super().__sub__(other) return self.__class__(res) - def __mul__(self, other: Any) -> "IntType": + def __mul__(self, other: Any) -> IntType: res = super().__mul__(other) return self.__class__(res) - def __truediv__(self, other: Any) -> "IntType": + def __truediv__(self, other: Any) -> IntType: res = super().__truediv__(other) return self.__class__(res) - def __floordiv__(self, other: Any) -> "IntType": + def __floordiv__(self, other: Any) -> IntType: res = super().__floordiv__(other) return self.__class__(res) - def __and__(self, other: Any) -> "IntType": + def __and__(self, other: Any) -> IntType: res = super().__and__(other) return self.__class__(res) - def __xor__(self, other: Any) -> "IntType": + def __xor__(self, other: Any) -> IntType: res = super().__xor__(other) return self.__class__(res) - def __or__(self, other: Any) -> "IntType": + def __or__(self, other: Any) -> IntType: res = super().__or__(other) return self.__class__(res) - def __lshift__(self, other: Any) -> "IntType": + def __lshift__(self, other: Any) -> IntType: res = super().__lshift__(other) return self.__class__(res) - def __pos__(self) -> "IntType": + def __pos__(self) -> IntType: res = super().__pos__() return self.__class__(res) - def __abs__(self) -> "IntType": + def __abs__(self) -> IntType: res = super().__abs__() return self.__class__(res) - def __rshift__(self, other: Any) -> "IntType": + def __rshift__(self, other: Any) -> IntType: res = int.__rshift__(int(self) & self.__class__.mask, other) return self.__class__(res) - def __neg__(self) -> "IntType": + def __neg__(self) -> IntType: res = (int(self) ^ self.__class__.mask) + 1 return self.__class__(res) - def __invert__(self) -> "IntType": + def __invert__(self) -> IntType: res = int(self) ^ self.__class__.mask return self.__class__(res) - def rol(self, other) -> "IntType": + def rol(self, other) -> IntType: """Bitwise rotate left""" return self.__class__(rol(int(self), other, bits=self.bits)) - def ror(self, other) -> "IntType": + def ror(self, other) -> IntType: """Bitwise rotate right""" return self.rol(self.bits - other) @@ -243,7 +249,7 @@ def pack_be(self) -> bytes: @classmethod def unpack( cls, other: bytes, offset: int = 0, fixed: bool = True - ) -> Union["IntType", int, None]: + ) -> IntType | int | None: """ Unpacks single value from provided buffer with little-endian order @@ -267,7 +273,7 @@ def unpack( @classmethod def unpack_be( cls, other: bytes, offset: int = 0, fixed: bool = True - ) -> Union["IntType", int, None]: + ) -> IntType | int | None: """ Unpacks single value from provided buffer with big-endian order diff --git a/malduck/pe.py b/malduck/pe.py index 8e89da8..f4374cd 100644 --- a/malduck/pe.py +++ b/malduck/pe.py @@ -1,12 +1,16 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations -from typing import TYPE_CHECKING, Any, Iterator, Optional, Tuple, Union +from typing import TYPE_CHECKING import pefile if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any + from .procmem import ProcessMemory __all__ = ["pe", "PE", "MemoryPEData"] @@ -48,7 +52,7 @@ def __getitem__(self, item: Any) -> object: stop = start return self.memory.readv(start, stop - start + 1) - def find(self, str: bytes, beg: int = 0, end: Optional[int] = None) -> int: + def find(self, str: bytes, beg: int = 0, end: int | None = None) -> int: if end and beg >= end: return -1 try: @@ -65,9 +69,7 @@ class PE: :class:`ProcessMemory` instance. """ - def __init__( - self, data: Union["ProcessMemory", bytes], fast_load: bool = False - ) -> None: + def __init__(self, data: ProcessMemory | bytes, fast_load: bool = False) -> None: from .procmem import ProcessMemory if isinstance(data, ProcessMemory): @@ -130,7 +132,7 @@ def headers_size(self) -> int: else min(len(self.pe.__data__), 0x1000) ) - def section(self, name: Union[str, bytes]) -> Any: + def section(self, name: str | bytes) -> Any: """ Get section by name @@ -264,7 +266,7 @@ def validate_padding(self) -> bool: def iterate_resources( self, ) -> Iterator[ - Tuple[ + tuple[ pefile.ResourceDirEntryData, pefile.ResourceDirEntryData, pefile.ResourceDirEntryData, @@ -275,7 +277,7 @@ def iterate_resources( for e3 in e2.directory.entries: yield (e1, e2, e3) - def resources(self, name: Union[int, str, bytes]) -> Iterator[bytes]: + def resources(self, name: int | str | bytes) -> Iterator[bytes]: """ Finds resource objects by specified name or type @@ -316,7 +318,7 @@ def type_int(e1, e2, e3): if compare(e1, e2, e3): yield self.pe.get_data(e3.data.struct.OffsetToData, e3.data.struct.Size) - def resource(self, name: Union[int, str, bytes]) -> Optional[bytes]: + def resource(self, name: int | str | bytes) -> bytes | None: """ Retrieves single resource by specified name or type diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index 0be7daa..d407678 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -1,10 +1,15 @@ +from __future__ import annotations + import logging from abc import ABCMeta, abstractmethod -from typing import Iterator, List, Optional, Type, TypeVar +from typing import TYPE_CHECKING, TypeVar from .procmem import ProcessMemory, ProcessMemoryBuffer from .region import Region +if TYPE_CHECKING: + from collections.abc import Iterator + log = logging.getLogger(__name__) T = TypeVar("T", bound="ProcessMemoryBinary") @@ -15,13 +20,13 @@ class ProcessMemoryBinary(ProcessMemory, metaclass=ABCMeta): Abstract class for memory-mapped executable binary """ - __magic__: Optional[bytes] = None + __magic__: bytes | None = None def __init__( self: T, buf: ProcessMemoryBuffer, base: int = 0, - regions: Optional[List[Region]] = None, + regions: list[Region] | None = None, image: bool = False, detect_image: bool = False, ) -> None: @@ -29,7 +34,7 @@ def __init__( if detect_image: image = self.is_image_loaded_as_memdump() self.is_image = image - self._image: Optional[T] = None + self._image: T | None = None if image: self._reload_as_image() @@ -41,7 +46,7 @@ def _reload_as_image(self) -> None: raise NotImplementedError() @property - def image(self: T) -> Optional[T]: + def image(self: T) -> T | None: """ Returns ProcessMemory object loaded with image=True or None if can't be loaded or is loaded as image yet """ @@ -67,7 +72,7 @@ def is_valid(self) -> bool: raise NotImplementedError() @classmethod - def load_binaries_from_memory(cls: Type[T], procmem: ProcessMemory) -> Iterator[T]: + def load_binaries_from_memory(cls: type[T], procmem: ProcessMemory) -> Iterator[T]: """ Looks for binaries in ProcessMemory object and yields specialized ProcessMemoryBinary objects :param procmem: ProcessMemory object to search diff --git a/malduck/procmem/cuckoomem.py b/malduck/procmem/cuckoomem.py index 53fe719..8388aae 100644 --- a/malduck/procmem/cuckoomem.py +++ b/malduck/procmem/cuckoomem.py @@ -1,5 +1,6 @@ +from __future__ import annotations + import struct -from typing import Optional from .procmem import ProcessMemory, ProcessMemoryBuffer from .region import Region @@ -10,9 +11,7 @@ class CuckooProcessMemory(ProcessMemory): """Wrapper object to operate on process memory dumps in Cuckoo 2.x format.""" - def __init__( - self, buf: ProcessMemoryBuffer, base: Optional[int] = None, **_ - ) -> None: + def __init__(self, buf: ProcessMemoryBuffer, base: int | None = None, **_) -> None: super(CuckooProcessMemory, self).__init__(buf) ptr = 0 self.regions = [] diff --git a/malduck/procmem/idamem.py b/malduck/procmem/idamem.py index 7862b8c..1ce0413 100644 --- a/malduck/procmem/idamem.py +++ b/malduck/procmem/idamem.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from .procmem import MemoryBuffer, ProcessMemory from .region import Region diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index 36791cf..ef8a04b 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -1,6 +1,8 @@ +from __future__ import annotations + import mmap import re -from typing import BinaryIO, List, Optional, Union, cast +from typing import TYPE_CHECKING, cast from ..disasm import disasm from ..string.bin import int8, int16, int32, int64, uint8, uint16, uint32, uint64 @@ -8,6 +10,11 @@ from ..yara import Yara, YaraString from .region import PAGE_EXECUTE_READWRITE, Region +if TYPE_CHECKING: + from typing import BinaryIO + + from typing_extensions import TypeAlias + __all__ = ["ProcessMemory", "procmem"] @@ -22,7 +29,7 @@ def __len__(self): raise NotImplementedError("__len__ not implemented") -ProcessMemoryBuffer = Union[bytes, bytearray, mmap.mmap, MemoryBuffer] +ProcessMemoryBuffer: TypeAlias = "bytes | bytearray | mmap.mmap | MemoryBuffer" class ProcessMemory: @@ -88,9 +95,9 @@ class ProcessMemory: """ def __init__(self, buf, base=0, regions=None, **_): - self.opened_file: Optional[BinaryIO] = None - self.mapped_memory: Optional[mmap.mmap] = None - self.memory: Optional[bytearray] = None + self.opened_file: BinaryIO | None = None + self.mapped_memory: mmap.mmap | None = None + self.memory: bytearray | None = None if isinstance(buf, mmap.mmap): self.mapped_memory = buf @@ -146,7 +153,7 @@ def close(self, copy=False): # Get object contents from mapped_memory self.mapped_memory.seek(0) contents = self.mapped_memory.read() - buf: Optional[bytearray] = bytearray(contents) + buf: bytearray | None = bytearray(contents) else: # Invalidate object buf = None @@ -427,7 +434,7 @@ def readv_regions(self, addr=None, length=None, contiguous=True): :rtype: Iterator[Tuple[int, bytes]] """ current_addr = 0 - current_strings: List[bytes] = [] + current_strings: list[bytes] = [] prev_region = None for region in self.iter_regions( addr=addr, length=length, contiguous=contiguous, trim=True diff --git a/malduck/procmem/procmem.pyi b/malduck/procmem/procmem.pyi index 7346397..e603056 100644 --- a/malduck/procmem/procmem.pyi +++ b/malduck/procmem/procmem.pyi @@ -1,19 +1,8 @@ import mmap -from typing import ( - Any, - BinaryIO, - Dict, - Iterator, - List, - Optional, - Tuple, - Type, - TypeVar, - Union, - overload, -) +from collections.abc import Iterator +from typing import Any, BinaryIO, TypeVar, overload -from typing_extensions import Literal, Protocol +from typing_extensions import Literal, Protocol, TypeAlias from ..disasm import Instruction from ..extractor import ExtractManager, ExtractorModules @@ -22,44 +11,44 @@ from ..yara import Yara, YaraRulesetMatch, YaraRulesetOffsets from .region import Region class MemoryBuffer: - def __setitem__(self, item: Union[int, slice], value: Union[int, slice]): ... - def __getitem__(self, item: Union[int, slice]): ... + def __setitem__(self, item: int | slice, value: int | slice): ... + def __getitem__(self, item: int | slice): ... def __len__(self) -> int: ... -ProcessMemoryBuffer = Union[bytes, bytearray, mmap.mmap, MemoryBuffer] +ProcessMemoryBuffer: TypeAlias = bytes | bytearray | mmap.mmap | MemoryBuffer T = TypeVar("T", bound="ProcessMemory") -procmem: Type["ProcessMemory"] +procmem: type[ProcessMemory] class ProcessMemoryYaraCallback(Protocol): @overload def __call__( self, ruleset: Yara, - addr: Optional[int], - length: Optional[int], + addr: int | None, + length: int | None, extended: Literal[True], ) -> YaraRulesetMatch: ... @overload def __call__( self, ruleset: Yara, - offset: Optional[int], - length: Optional[int], + offset: int | None, + length: int | None, extended: Literal[True], ) -> YaraRulesetMatch: ... class ProcessMemory: - f: Optional[BinaryIO] - memory: Optional[bytearray] - mapped_memory: Optional[mmap.mmap] + f: BinaryIO | None + memory: bytearray | None + mapped_memory: mmap.mmap | None imgbase: int - regions: List[Region] + regions: list[Region] def __init__( self, buf: ProcessMemoryBuffer, base: int = 0, - regions: Optional[List[Region]] = None, + regions: list[Region] | None = None, **_, ) -> None: ... def __enter__(self): ... @@ -68,169 +57,165 @@ class ProcessMemory: def m(self) -> bytearray: ... def close(self, copy: bool = False) -> None: ... @classmethod - def from_file(cls: Type[T], filename: str, **kwargs) -> T: ... + def from_file(cls: type[T], filename: str, **kwargs) -> T: ... @classmethod def from_memory( - cls: Type[T], memory: "ProcessMemory", base: int = None, **kwargs + cls: type[T], memory: ProcessMemory, base: int = None, **kwargs ) -> T: ... @property def length(self) -> int: ... def iter_regions( self, - addr: Optional[int] = None, - offset: Optional[int] = None, - length: Optional[int] = None, + addr: int | None = None, + offset: int | None = None, + length: int | None = None, contiguous: bool = False, trim: bool = False, ) -> Iterator[Region]: ... - def v2p( - self, addr: Optional[int], length: Optional[int] = None - ) -> Optional[int]: ... - def p2v( - self, off: Optional[int], length: Optional[int] = None - ) -> Optional[int]: ... - def is_addr(self, addr: Optional[int]) -> bool: ... - def addr_region(self, addr: Optional[int]) -> Optional[Region]: ... - def readp(self, offset: int, length: Optional[int] = None) -> bytes: ... + def v2p(self, addr: int | None, length: int | None = None) -> int | None: ... + def p2v(self, off: int | None, length: int | None = None) -> int | None: ... + def is_addr(self, addr: int | None) -> bool: ... + def addr_region(self, addr: int | None) -> Region | None: ... + def readp(self, offset: int, length: int | None = None) -> bytes: ... def readv_regions( self, - addr: Optional[int] = None, - length: Optional[int] = None, + addr: int | None = None, + length: int | None = None, contiguous: bool = True, - ) -> Iterator[Tuple[int, bytes]]: ... - def readv(self, addr: int, length: Optional[int] = None) -> bytes: ... + ) -> Iterator[tuple[int, bytes]]: ... + def readv(self, addr: int, length: int | None = None) -> bytes: ... def readv_until(self, addr: int, s: bytes) -> bytes: ... def patchp(self, offset: int, buf: bytes) -> None: ... def patchv(self, addr: int, buf: bytes) -> None: ... @overload - def uint8p(self, offset: int, fixed: Literal[False]) -> Optional[int]: ... + def uint8p(self, offset: int, fixed: Literal[False]) -> int | None: ... @overload - def uint8p(self, offset: int, fixed: Literal[True]) -> Optional[IntType]: ... + def uint8p(self, offset: int, fixed: Literal[True]) -> IntType | None: ... @overload - def uint8p(self, offset: int) -> Optional[int]: ... + def uint8p(self, offset: int) -> int | None: ... @overload - def uint16p(self, offset: int, fixed: Literal[False]) -> Optional[int]: ... + def uint16p(self, offset: int, fixed: Literal[False]) -> int | None: ... @overload - def uint16p(self, offset: int, fixed: Literal[True]) -> Optional[IntType]: ... + def uint16p(self, offset: int, fixed: Literal[True]) -> IntType | None: ... @overload - def uint16p(self, offset: int) -> Optional[int]: ... + def uint16p(self, offset: int) -> int | None: ... @overload - def uint32p(self, offset: int, fixed: Literal[False]) -> Optional[int]: ... + def uint32p(self, offset: int, fixed: Literal[False]) -> int | None: ... @overload - def uint32p(self, offset: int, fixed: Literal[True]) -> Optional[IntType]: ... + def uint32p(self, offset: int, fixed: Literal[True]) -> IntType | None: ... @overload - def uint32p(self, offset: int) -> Optional[int]: ... + def uint32p(self, offset: int) -> int | None: ... @overload - def uint64p(self, offset: int, fixed: Literal[False]) -> Optional[int]: ... + def uint64p(self, offset: int, fixed: Literal[False]) -> int | None: ... @overload - def uint64p(self, offset: int, fixed: Literal[True]) -> Optional[IntType]: ... + def uint64p(self, offset: int, fixed: Literal[True]) -> IntType | None: ... @overload - def uint64p(self, offset: int) -> Optional[int]: ... + def uint64p(self, offset: int) -> int | None: ... @overload - def uint8v(self, addr: int, fixed: Literal[False]) -> Optional[int]: ... + def uint8v(self, addr: int, fixed: Literal[False]) -> int | None: ... @overload - def uint8v(self, addr: int, fixed: Literal[True]) -> Optional[IntType]: ... + def uint8v(self, addr: int, fixed: Literal[True]) -> IntType | None: ... @overload - def uint8v(self, addr: int) -> Optional[int]: ... + def uint8v(self, addr: int) -> int | None: ... @overload - def uint16v(self, addr: int, fixed: Literal[False]) -> Optional[int]: ... + def uint16v(self, addr: int, fixed: Literal[False]) -> int | None: ... @overload - def uint16v(self, addr: int, fixed: Literal[True]) -> Optional[IntType]: ... + def uint16v(self, addr: int, fixed: Literal[True]) -> IntType | None: ... @overload - def uint16v(self, addr: int) -> Optional[int]: ... + def uint16v(self, addr: int) -> int | None: ... @overload - def uint32v(self, addr: int, fixed: Literal[False]) -> Optional[int]: ... + def uint32v(self, addr: int, fixed: Literal[False]) -> int | None: ... @overload - def uint32v(self, addr: int, fixed: Literal[True]) -> Optional[IntType]: ... + def uint32v(self, addr: int, fixed: Literal[True]) -> IntType | None: ... @overload - def uint32v(self, addr: int) -> Optional[int]: ... + def uint32v(self, addr: int) -> int | None: ... @overload - def uint64v(self, addr: int, fixed: Literal[False]) -> Optional[int]: ... + def uint64v(self, addr: int, fixed: Literal[False]) -> int | None: ... @overload - def uint64v(self, addr: int, fixed: Literal[True]) -> Optional[IntType]: ... + def uint64v(self, addr: int, fixed: Literal[True]) -> IntType | None: ... @overload - def uint64v(self, addr: int) -> Optional[int]: ... + def uint64v(self, addr: int) -> int | None: ... @overload - def int8p(self, offset: int, fixed: Literal[False]) -> Optional[int]: ... + def int8p(self, offset: int, fixed: Literal[False]) -> int | None: ... @overload - def int8p(self, offset: int, fixed: Literal[True]) -> Optional[IntType]: ... + def int8p(self, offset: int, fixed: Literal[True]) -> IntType | None: ... @overload - def int8p(self, offset: int) -> Optional[int]: ... + def int8p(self, offset: int) -> int | None: ... @overload - def int16p(self, offset: int, fixed: Literal[False]) -> Optional[int]: ... + def int16p(self, offset: int, fixed: Literal[False]) -> int | None: ... @overload - def int16p(self, offset: int, fixed: Literal[True]) -> Optional[IntType]: ... + def int16p(self, offset: int, fixed: Literal[True]) -> IntType | None: ... @overload - def int16p(self, offset: int) -> Optional[int]: ... + def int16p(self, offset: int) -> int | None: ... @overload - def int32p(self, offset: int, fixed: Literal[False]) -> Optional[int]: ... + def int32p(self, offset: int, fixed: Literal[False]) -> int | None: ... @overload - def int32p(self, offset: int, fixed: Literal[True]) -> Optional[IntType]: ... + def int32p(self, offset: int, fixed: Literal[True]) -> IntType | None: ... @overload - def int32p(self, offset: int) -> Optional[int]: ... + def int32p(self, offset: int) -> int | None: ... @overload - def int64p(self, offset: int, fixed: Literal[False]) -> Optional[int]: ... + def int64p(self, offset: int, fixed: Literal[False]) -> int | None: ... @overload - def int64p(self, offset: int, fixed: Literal[True]) -> Optional[IntType]: ... + def int64p(self, offset: int, fixed: Literal[True]) -> IntType | None: ... @overload - def int64p(self, offset: int) -> Optional[int]: ... + def int64p(self, offset: int) -> int | None: ... @overload - def int8v(self, addr: int, fixed: Literal[False]) -> Optional[int]: ... + def int8v(self, addr: int, fixed: Literal[False]) -> int | None: ... @overload - def int8v(self, addr: int, fixed: Literal[True]) -> Optional[IntType]: ... + def int8v(self, addr: int, fixed: Literal[True]) -> IntType | None: ... @overload - def int8v(self, addr: int) -> Optional[int]: ... + def int8v(self, addr: int) -> int | None: ... @overload - def int16v(self, addr: int, fixed: Literal[False]) -> Optional[int]: ... + def int16v(self, addr: int, fixed: Literal[False]) -> int | None: ... @overload - def int16v(self, addr: int, fixed: Literal[True]) -> Optional[IntType]: ... + def int16v(self, addr: int, fixed: Literal[True]) -> IntType | None: ... @overload - def int16v(self, addr: int) -> Optional[int]: ... + def int16v(self, addr: int) -> int | None: ... @overload - def int32v(self, addr: int, fixed: Literal[False]) -> Optional[int]: ... + def int32v(self, addr: int, fixed: Literal[False]) -> int | None: ... @overload - def int32v(self, addr: int, fixed: Literal[True]) -> Optional[IntType]: ... + def int32v(self, addr: int, fixed: Literal[True]) -> IntType | None: ... @overload - def int32v(self, addr: int) -> Optional[int]: ... + def int32v(self, addr: int) -> int | None: ... @overload - def int64v(self, addr: int, fixed: Literal[False]) -> Optional[int]: ... + def int64v(self, addr: int, fixed: Literal[False]) -> int | None: ... @overload - def int64v(self, addr: int, fixed: Literal[True]) -> Optional[IntType]: ... + def int64v(self, addr: int, fixed: Literal[True]) -> IntType | None: ... @overload - def int64v(self, addr: int) -> Optional[int]: ... + def int64v(self, addr: int) -> int | None: ... def asciiz(self, addr: int) -> bytes: ... def utf16z(self, addr: int) -> bytes: ... def _find( self, buf: bytes, query: bytes, - offset: Optional[int] = None, - length: Optional[int] = None, + offset: int | None = None, + length: int | None = None, ) -> Iterator[int]: ... def findp( - self, query: bytes, offset: Optional[int] = None, length: Optional[int] = None + self, query: bytes, offset: int | None = None, length: int | None = None ) -> Iterator[int]: ... def findv( - self, query: bytes, addr: Optional[int] = None, length: Optional[int] = None + self, query: bytes, addr: int | None = None, length: int | None = None ) -> Iterator[int]: ... def regexp( - self, query: bytes, offset: Optional[int] = None, length: Optional[int] = None + self, query: bytes, offset: int | None = None, length: int | None = None ) -> Iterator[int]: ... def regexv( - self, query: bytes, addr: Optional[int] = None, length: Optional[int] = None + self, query: bytes, addr: int | None = None, length: int | None = None ) -> Iterator[int]: ... def disasmv( self, addr: int, - size: Optional[int] = None, + size: int | None = None, x64: bool = False, - count: Optional[int] = None, + count: int | None = None, ) -> Iterator[Instruction]: ... def extract( self, modules: ExtractorModules = None, extract_manager: ExtractManager = None, - ) -> Optional[List[Dict[str, Any]]]: ... + ) -> list[dict[str, Any]] | None: ... # yarap(ruleset) # yarap(ruleset, offset) # yarap(ruleset, offset, length) @@ -239,8 +224,8 @@ class ProcessMemory: def yarap( self, ruleset: Yara, - offset: Optional[int] = None, - length: Optional[int] = None, + offset: int | None = None, + length: int | None = None, extended: Literal[False] = False, ) -> YaraRulesetOffsets: ... # yarap(ruleset, offset, length, extended=True) @@ -248,8 +233,8 @@ class ProcessMemory: def yarap( self, ruleset: Yara, - offset: Optional[int], - length: Optional[int], + offset: int | None, + length: int | None, extended: Literal[True], ) -> YaraRulesetMatch: ... # yarap(ruleset, extended=True) @@ -259,12 +244,12 @@ class ProcessMemory: # yarap(ruleset, 0, extended=True) @overload def yarap( - self, ruleset: Yara, offset: Optional[int], *, extended: Literal[True] + self, ruleset: Yara, offset: int | None, *, extended: Literal[True] ) -> YaraRulesetMatch: ... # yarap(ruleset, length=0, extended=True) @overload def yarap( - self, ruleset: Yara, *, length: Optional[int], extended: Literal[True] + self, ruleset: Yara, *, length: int | None, extended: Literal[True] ) -> YaraRulesetMatch: ... # yarav(ruleset) # yarav(ruleset, addr) @@ -274,8 +259,8 @@ class ProcessMemory: def yarav( self, ruleset: Yara, - addr: Optional[int] = None, - length: Optional[int] = None, + addr: int | None = None, + length: int | None = None, extended: Literal[False] = False, ) -> YaraRulesetOffsets: ... # yarav(ruleset, addr, length, extended=True) @@ -283,8 +268,8 @@ class ProcessMemory: def yarav( self, ruleset: Yara, - addr: Optional[int], - length: Optional[int], + addr: int | None, + length: int | None, extended: Literal[True], ) -> YaraRulesetMatch: ... # yarav(ruleset, extended=True) @@ -294,30 +279,30 @@ class ProcessMemory: # yarav(ruleset, 0, extended=True) @overload def yarav( - self, ruleset: Yara, addr: Optional[int], *, extended: Literal[True] + self, ruleset: Yara, addr: int | None, *, extended: Literal[True] ) -> YaraRulesetMatch: ... # yarav(ruleset, length=0, extended=True) @overload def yarav( - self, ruleset: Yara, *, length: Optional[int], extended: Literal[True] + self, ruleset: Yara, *, length: int | None, extended: Literal[True] ) -> YaraRulesetMatch: ... def _findbytes( self, yara_fn: ProcessMemoryYaraCallback, - query: Union[str, bytes], - addr: Optional[int], - length: Optional[int], + query: str | bytes, + addr: int | None, + length: int | None, ) -> Iterator[int]: ... def findbytesp( self, - query: Union[str, bytes], - offset: Optional[int] = None, - length: Optional[int] = None, + query: str | bytes, + offset: int | None = None, + length: int | None = None, ) -> Iterator[int]: ... def findbytesv( self, - query: Union[str, bytes], - addr: Optional[int] = None, - length: Optional[int] = None, + query: str | bytes, + addr: int | None = None, + length: int | None = None, ) -> Iterator[int]: ... - def findmz(self, addr: int) -> Optional[int]: ... + def findmz(self, addr: int) -> int | None: ... diff --git a/malduck/procmem/procmemdnpe.py b/malduck/procmem/procmemdnpe.py index 3c38333..7269c74 100644 --- a/malduck/procmem/procmemdnpe.py +++ b/malduck/procmem/procmemdnpe.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from __future__ import annotations from ..dnpe import DnPE from .binmem import ProcessMemoryBuffer @@ -9,18 +9,17 @@ class ProcessMemoryDnPE(ProcessMemoryPE): - __magic__ = b"MZ" def __init__( self, buf: ProcessMemoryBuffer, base: int = 0, - regions: Optional[List[Region]] = None, + regions: list[Region] | None = None, image: bool = False, detect_image: bool = False, ) -> None: - self._pe: Optional[DnPE] = None + self._pe: DnPE | None = None super(ProcessMemoryPE, self).__init__( buf, base=base, regions=regions, image=image, detect_image=detect_image ) diff --git a/malduck/procmem/procmemelf.py b/malduck/procmem/procmemelf.py index ad74814..8dc376a 100644 --- a/malduck/procmem/procmemelf.py +++ b/malduck/procmem/procmemelf.py @@ -1,5 +1,6 @@ +from __future__ import annotations + import io -from typing import List, Optional import elftools import elftools.elf.elffile @@ -27,7 +28,7 @@ def __init__( self, buf: ProcessMemoryBuffer, base: int = 0, - regions: Optional[List[Region]] = None, + regions: list[Region] | None = None, image: bool = False, detect_image: bool = False, ) -> None: diff --git a/malduck/procmem/procmempe.py b/malduck/procmem/procmempe.py index 12f0d5f..6897d25 100644 --- a/malduck/procmem/procmempe.py +++ b/malduck/procmem/procmempe.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from __future__ import annotations from ..bits import align from ..pe import PE @@ -56,11 +56,11 @@ def __init__( self, buf: ProcessMemoryBuffer, base: int = 0, - regions: Optional[List[Region]] = None, + regions: list[Region] | None = None, image: bool = False, detect_image: bool = False, ) -> None: - self._pe: Optional[PE] = None + self._pe: PE | None = None super(ProcessMemoryPE, self).__init__( buf, base=base, regions=regions, image=image, detect_image=detect_image ) diff --git a/malduck/procmem/region.py b/malduck/procmem/region.py index 413ea73..563d762 100644 --- a/malduck/procmem/region.py +++ b/malduck/procmem/region.py @@ -1,7 +1,7 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. -from typing import Dict, Optional, Union +from __future__ import annotations __all__ = [ "Region", @@ -46,7 +46,7 @@ def __init__( self.protect = protect self.offset = offset - def to_json(self) -> Dict[str, Union[int, Optional[str]]]: + def to_json(self) -> dict[str, int | str | None]: """ Returns JSON-like dict representation """ @@ -122,7 +122,7 @@ def intersects_range(self, addr: int, length: int) -> bool: """ return self.addr < addr + length and addr < self.end - def trim_range(self, addr: int, length: Optional[int] = None) -> Optional["Region"]: + def trim_range(self, addr: int, length: int | None = None) -> Region | None: """ Returns region intersection with provided range :param addr: Virtual address of starting point diff --git a/malduck/string/bin.py b/malduck/string/bin.py index c22a699..d0b4c82 100644 --- a/malduck/string/bin.py +++ b/malduck/string/bin.py @@ -1,9 +1,10 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations + import struct import warnings -from typing import Optional from ..ints import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64 from ..string.ops import Padding, enhex, unhex @@ -56,7 +57,7 @@ class Bigint: - def unpack(self, other: bytes, size: Optional[int] = None) -> int: + def unpack(self, other: bytes, size: int | None = None) -> int: """ Unpacks bigint value from provided buffer with little-endian order @@ -75,7 +76,7 @@ def unpack(self, other: bytes, size: Optional[int] = None) -> int: other = other[:size] return int(enhex(other[::-1]), 16) - def pack(self, other: int, size: Optional[int] = None) -> bytes: + def pack(self, other: int, size: int | None = None) -> bytes: """ Packs bigint value into bytes with little-endian order @@ -92,7 +93,7 @@ def pack(self, other: int, size: Optional[int] = None) -> bytes: size = (other.bit_length() + 7) // 8 return other.to_bytes(size, byteorder="little") - def unpack_be(self, other: bytes, size: Optional[int] = None) -> int: + def unpack_be(self, other: bytes, size: int | None = None) -> int: """ Unpacks bigint value from provided buffer with big-endian order @@ -108,7 +109,7 @@ def unpack_be(self, other: bytes, size: Optional[int] = None) -> int: other = other[:size] return int(enhex(other), 16) - def pack_be(self, other: int, size: Optional[int] = None) -> bytes: + def pack_be(self, other: int, size: int | None = None) -> bytes: """ Packs bigint value into bytes with big-endian order diff --git a/malduck/string/inet.py b/malduck/string/inet.py index 7ee6dfb..1d8b4e8 100644 --- a/malduck/string/inet.py +++ b/malduck/string/inet.py @@ -1,10 +1,10 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations import re import socket -from typing import Optional, Union from ..string.bin import p32 @@ -16,7 +16,7 @@ ) -def ipv4(s: Union[bytes, int]) -> Optional[str]: +def ipv4(s: bytes | int) -> str | None: """ Decodes IPv4 address and returns dot-decimal notation diff --git a/malduck/string/ops.py b/malduck/string/ops.py index f490ef6..69fa004 100644 --- a/malduck/string/ops.py +++ b/malduck/string/ops.py @@ -1,10 +1,17 @@ # Copyright (C) 2018 Jurriaan Bremer. # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations import binascii from base64 import b64decode, b64encode -from typing import Iterator, List, Optional, Sequence, Tuple, TypeVar, Union, cast +from typing import TYPE_CHECKING, cast + +if TYPE_CHECKING: + from collections.abc import Iterator, Sequence + from typing import TypeVar + + T = TypeVar("T", bound=Sequence) __all__ = [ "asciiz", @@ -24,8 +31,6 @@ "unpkcs7", ] -T = TypeVar("T", bound=Sequence) - def asciiz(s: bytes) -> bytes: """ @@ -39,10 +44,10 @@ def asciiz(s: bytes) -> bytes: def chunks_iter(s: T, n: int) -> Iterator[T]: """Yield successive n-sized chunks from s.""" - return (cast(T, s[i : i + n]) for i in range(0, len(s), n)) + return (cast("T", s[i : i + n]) for i in range(0, len(s), n)) -def chunks(s: T, n: int) -> List[T]: +def chunks(s: T, n: int) -> list[T]: """Return list of successive n-sized chunks from s.""" return list(chunks_iter(s, n)) @@ -76,11 +81,11 @@ def enhex(s: bytes) -> bytes: return binascii.hexlify(s) -def unhex(s: Union[str, bytes]) -> bytes: +def unhex(s: str | bytes) -> bytes: return binascii.unhexlify(s) -def uleb128(s: bytes) -> Optional[Tuple[int, int]]: +def uleb128(s: bytes) -> tuple[int, int] | None: """Unsigned Little-Endian Base 128""" ret = 0 for idx in range(len(s)): @@ -98,7 +103,7 @@ class Base64: def encode(self, s: bytes) -> bytes: return b64encode(s) - def decode(self, s: Union[str, bytes]) -> bytes: + def decode(self, s: str | bytes) -> bytes: return b64decode(s) __call__ = decode diff --git a/malduck/structure.py b/malduck/structure.py index 4956d94..8682e7e 100644 --- a/malduck/structure.py +++ b/malduck/structure.py @@ -2,8 +2,9 @@ # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. +from __future__ import annotations + import ctypes -from typing import List, Tuple, Type from .ints import ( Int8, @@ -34,7 +35,7 @@ class Structure: _pack_ = 0 - _fields_: List[Tuple[str, Type]] = [] + _fields_: list[tuple[str, type]] = [] def __init__(self): self.subfields, fields = {}, [] diff --git a/malduck/yara.py b/malduck/yara.py index 06986f3..55881d8 100644 --- a/malduck/yara.py +++ b/malduck/yara.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import enum import json import logging @@ -5,10 +7,18 @@ import re import textwrap from collections import defaultdict, namedtuple -from typing import Callable, Dict, Optional, Tuple, TypeVar +from typing import TYPE_CHECKING, TypeVar import yara +if TYPE_CHECKING: + from collections.abc import Callable + + from typing_extensions import TypeAlias + + YaraRulesString: TypeAlias = tuple[int, str, bytes] + OffsetMapper: TypeAlias = Callable[[int | None, int | None], int | None] + __all__ = [ "Yara", "YaraString", @@ -24,9 +34,6 @@ log = logging.getLogger(__name__) T = TypeVar("T") -OffsetMapper = Callable[[Optional[int], Optional[int]], Optional[int]] - -YaraRulesString = Tuple[int, str, bytes] class _Mapper: @@ -155,7 +162,7 @@ def from_dir(path, recursive=True, followlinks=True): :type followlinks: bool :rtype: :class:`Yara` """ - rule_paths: Dict[str, str] = {} + rule_paths: dict[str, str] = {} for root, _, files in os.walk(path, followlinks=followlinks): for fname in files: if not fname.endswith(".yar") and not fname.endswith(".yara"): diff --git a/malduck/yara.pyi b/malduck/yara.pyi index c3ea6d6..444379b 100644 --- a/malduck/yara.pyi +++ b/malduck/yara.pyi @@ -1,40 +1,28 @@ import enum from collections import namedtuple -from typing import ( - Any, - Callable, - Dict, - Generic, - Iterable, - KeysView, - List, - Optional, - Tuple, - TypeVar, - Union, - overload, -) +from collections.abc import Callable, Iterable, KeysView +from typing import Any, Generic, TypeVar, overload -from typing_extensions import Literal, Protocol +from typing_extensions import Literal, Protocol, TypeAlias T = TypeVar("T") -OffsetMapper = Callable[[Optional[int], Optional[int]], Optional[int]] -YaraRulesString = Union[Tuple[int, str, bytes], Any] +YaraRulesString: TypeAlias = tuple[int, str, bytes] +OffsetMapper: TypeAlias = Callable[[int | None, int | None], int | None] class YaraRulesMatch(Protocol): - meta: Dict[str, str] + meta: dict[str, str] namespace: str rule: str - strings: List[YaraRulesString] - tags: List[str] + strings: list[YaraRulesString] + tags: list[str] class _Mapper(Generic[T]): - elements: Dict[str, T] - default: Optional[T] - def __init__(self, elements: Dict[str, T], default: Optional[T] = None) -> None: ... + elements: dict[str, T] + default: T | None + def __init__(self, elements: dict[str, T], default: T | None = None) -> None: ... def keys(self) -> KeysView[str]: ... - def get(self, item) -> Optional[T]: ... + def get(self, item) -> T | None: ... def __bool__(self) -> bool: ... def __nonzero__(self) -> bool: ... def __contains__(self, item: str) -> bool: ... @@ -45,35 +33,33 @@ class Yara: rules: Any def __init__( self, - rule_paths: Optional[Dict[str, str]] = None, + rule_paths: dict[str, str] | None = None, name: str = "r", - strings: Union[ - str, "YaraString", Dict[str, Union[str, "YaraString"]], None - ] = None, + strings: (str | YaraString | dict[str, str | YaraString] | None) = None, condition: str = "any of them", ) -> None: ... @staticmethod def from_dir( path: str, recursive: bool = True, followlinks: bool = True - ) -> "Yara": ... + ) -> Yara: ... # match(...) # match(offset_mapper, ...) # match(offset_mapper, extended=False, ...) @overload def match( self, - offset_mapper: Optional[OffsetMapper] = None, + offset_mapper: OffsetMapper | None = None, extended: Literal[False] = False, **kwargs, - ) -> "YaraRulesetOffsets": ... + ) -> YaraRulesetOffsets: ... # match(offset_mapper, extended=True, ...) @overload def match( - self, offset_mapper: Optional[OffsetMapper], extended: Literal[True], **kwargs - ) -> "YaraRulesetMatch": ... + self, offset_mapper: OffsetMapper | None, extended: Literal[True], **kwargs + ) -> YaraRulesetMatch: ... # match(extended=True, ...) @overload - def match(self, *, extended: Literal[True], **kwargs) -> "YaraRulesetMatch": ... + def match(self, *, extended: Literal[True], **kwargs) -> YaraRulesetMatch: ... class YaraStringType(enum.IntEnum): TEXT = 0 @@ -87,59 +73,57 @@ class YaraString: value: str type: YaraStringType - modifiers: List[str] + modifiers: list[str] def __init__( self, value: str, type: YaraStringType = YaraStringType.TEXT, **modifiers: bool ) -> None: ... def __str__(self) -> str: ... class YaraRulesetMatch(_Mapper["YaraRuleMatch"]): - _matches: List[YaraRulesMatch] + _matches: list[YaraRulesMatch] def __init__( self, - matches: List[YaraRulesMatch], - offset_mapper: Optional[OffsetMapper] = None, + matches: list[YaraRulesMatch], + offset_mapper: OffsetMapper | None = None, ) -> None: super().__init__(elements={}) def _map_matches( - self, matches: List[YaraRulesMatch], offset_mapper: Optional[OffsetMapper] - ) -> Dict[str, "YaraRuleMatch"]: ... + self, matches: list[YaraRulesMatch], offset_mapper: OffsetMapper | None + ) -> dict[str, YaraRuleMatch]: ... def _map_strings( - self, strings: Iterable[YaraRulesString], offset_mapper: Optional[OffsetMapper] - ) -> Dict[str, List["YaraStringMatch"]]: ... - def _parse_string_identifier(self, identifier: str) -> Tuple[str, str]: ... - def remap( - self, offset_mapper: Optional[OffsetMapper] = None - ) -> "YaraRulesetMatch": ... + self, strings: Iterable[YaraRulesString], offset_mapper: OffsetMapper | None + ) -> dict[str, list[YaraStringMatch]]: ... + def _parse_string_identifier(self, identifier: str) -> tuple[str, str]: ... + def remap(self, offset_mapper: OffsetMapper | None = None) -> YaraRulesetMatch: ... class YaraRulesetOffsets(_Mapper["YaraRuleOffsets"]): _matches: YaraRulesetMatch def __init__(self, matches: YaraRulesetMatch) -> None: super().__init__(elements={}) def remap( - self, offset_mapper: Optional[OffsetMapper] = None - ) -> "YaraRulesetOffsets": ... + self, offset_mapper: OffsetMapper | None = None + ) -> YaraRulesetOffsets: ... YaraStringMatch = namedtuple("YaraStringMatch", ["identifier", "offset", "content"]) -class YaraRuleMatch(_Mapper[List[YaraStringMatch]]): +class YaraRuleMatch(_Mapper[list[YaraStringMatch]]): rule: str name: str - meta: Dict[str, str] + meta: dict[str, str] namespace: str - tags: List[str] + tags: list[str] def __init__( self, rule: str, - strings: Dict[str, List[YaraStringMatch]], - meta: Dict[str, str], + strings: dict[str, list[YaraStringMatch]], + meta: dict[str, str], namespace: str, - tags: List[str], + tags: list[str], ) -> None: super().__init__({}) - def get_offsets(self, string) -> List[int]: ... + def get_offsets(self, string) -> list[int]: ... -class YaraRuleOffsets(_Mapper[List[int]]): +class YaraRuleOffsets(_Mapper[list[int]]): rule: str name: str def __init__(self, rule_match: YaraRuleMatch) -> None: From 70c943843f5b754972052ee74d96697e09a8e8d0 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 07:58:49 +0100 Subject: [PATCH 03/24] Consistently use double-quote strings --- docs/conf.py | 31 ++++++++++++++++--------------- setup.py | 2 +- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 60f538d..5ea083c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,17 +12,18 @@ # import os import sys -sys.path.insert(0, os.path.abspath('..')) + +sys.path.insert(0, os.path.abspath("..")) # -- Project information ----------------------------------------------------- -project = 'malduck' -copyright = '2022, CERT Polska' -author = 'CERT Polska' +project = "malduck" +copyright = "2022, CERT Polska" +author = "CERT Polska" # The full version, including alpha/beta/rc tags -version = '4.4.0' +version = "4.4.0" # -- General configuration --------------------------------------------------- @@ -30,35 +31,35 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.viewcode', - 'sphinx_rtd_theme', + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx_rtd_theme", ] html_theme_options = { - 'display_version': True, + "display_version": True, } -project = 'Malduck 🦆' +project = "Malduck 🦆" # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] -master_doc = 'index' +master_doc = "index" # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] diff --git a/setup.py b/setup.py index d4c4f68..9a727da 100644 --- a/setup.py +++ b/setup.py @@ -26,5 +26,5 @@ "Programming Language :: Python :: 3", "Operating System :: POSIX :: Linux", ], - python_requires='>=3.8' + python_requires=">=3.8", ) From a4619788af0abaeb8ca58e1af168ee0192c9611a Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 08:00:56 +0100 Subject: [PATCH 04/24] Consistently use no-arg `super()` inside methods --- malduck/procmem/cuckoomem.py | 2 +- malduck/procmem/procmemdnpe.py | 2 +- malduck/procmem/procmempe.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/malduck/procmem/cuckoomem.py b/malduck/procmem/cuckoomem.py index 8388aae..1075ea6 100644 --- a/malduck/procmem/cuckoomem.py +++ b/malduck/procmem/cuckoomem.py @@ -12,7 +12,7 @@ class CuckooProcessMemory(ProcessMemory): """Wrapper object to operate on process memory dumps in Cuckoo 2.x format.""" def __init__(self, buf: ProcessMemoryBuffer, base: int | None = None, **_) -> None: - super(CuckooProcessMemory, self).__init__(buf) + super().__init__(buf) ptr = 0 self.regions = [] diff --git a/malduck/procmem/procmemdnpe.py b/malduck/procmem/procmemdnpe.py index 7269c74..d6700e8 100644 --- a/malduck/procmem/procmemdnpe.py +++ b/malduck/procmem/procmemdnpe.py @@ -20,7 +20,7 @@ def __init__( detect_image: bool = False, ) -> None: self._pe: DnPE | None = None - super(ProcessMemoryPE, self).__init__( + super().__init__( buf, base=base, regions=regions, image=image, detect_image=detect_image ) diff --git a/malduck/procmem/procmempe.py b/malduck/procmem/procmempe.py index 6897d25..70f6cf7 100644 --- a/malduck/procmem/procmempe.py +++ b/malduck/procmem/procmempe.py @@ -61,7 +61,7 @@ def __init__( detect_image: bool = False, ) -> None: self._pe: PE | None = None - super(ProcessMemoryPE, self).__init__( + super().__init__( buf, base=base, regions=regions, image=image, detect_image=detect_image ) From ed8b5769c211fb3b400d03a55d388ef35a896222 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 08:06:38 +0100 Subject: [PATCH 05/24] Use pathlib --- malduck/main.py | 6 ++---- malduck/procmem/procmem.py | 3 ++- setup.py | 6 ++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/malduck/main.py b/malduck/main.py index c60b2da..f756b2c 100644 --- a/malduck/main.py +++ b/malduck/main.py @@ -62,8 +62,7 @@ def fixpe(mempath, outpath, force, base): outpath = outpath or mempath + ".exe" if not force and os.path.isfile(outpath): click.confirm(f"{outpath} exists. Overwrite?", abort=True) - with open(outpath, "wb") as f: - f.write(p.store()) + Path(outpath).write_bytes(p.store()) click.echo(f"Fixed {mempath} => {outpath}") @@ -145,8 +144,7 @@ def echo_config(extract_manager, file_path=None): @click.argument("outpath", type=click.Path()) def extract_resources(filepath, outpath): """Extract PE resources from an EXE into a directory""" - with open(filepath, "rb") as f: - pe = PE(data=f.read()) + pe = PE(data=Path(filepath).read_bytes()) out_dir = Path(outpath) out_dir.mkdir(exist_ok=True) diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index ef8a04b..27eb03a 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -2,6 +2,7 @@ import mmap import re +from pathlib import Path from typing import TYPE_CHECKING, cast from ..disasm import disasm @@ -186,7 +187,7 @@ def from_file(cls, filename, **kwargs): mem = p.readv(...) ... """ - file = open(filename, "rb") + file = Path(filename).open("rb") try: # Allow copy-on-write if hasattr(mmap, "ACCESS_COPY"): diff --git a/setup.py b/setup.py index 9a727da..a8076d3 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +from pathlib import Path + try: from setuptools import setup except ImportError: @@ -7,7 +9,7 @@ name="malduck", version="4.4.0", description="Malduck is your ducky companion in malware analysis journeys", - long_description=open("README.md").read(), + long_description=Path("README.md").read_text(), long_description_content_type="text/markdown", author="CERT Polska", author_email="info@cert.pl", @@ -20,7 +22,7 @@ }, license="GPLv3", include_package_data=True, - install_requires=open("requirements.txt").read().splitlines(), + install_requires=Path("requirements.txt").read_text().splitlines(), url="https://github.com/CERT-Polska/malduck", classifiers=[ "Programming Language :: Python :: 3", From 49632400b189cd868d00a6c7b3f7ad2cc0e49bc2 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 08:08:21 +0100 Subject: [PATCH 06/24] Use f-string mini-language for no-prefix hex --- malduck/procmem/binmem.py | 2 +- malduck/procmem/procmem.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index d407678..6891073 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -102,4 +102,4 @@ def is_image_loaded_as_memdump(self) -> bool: raise NotImplementedError() def __repr__(self): - return f"{self.__class__.__name__}:{'IMG' if self.is_image else 'DMP'}:{hex(self.imgbase)[2:]}" + return f"{self.__class__.__name__}:{'IMG' if self.is_image else 'DMP'}:{self.imgbase:x}" diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index 27eb03a..3808698 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -904,7 +904,7 @@ def findmz(self, addr): addr -= 0x1000 def __repr__(self): - return f"{self.__class__.__name__}:DMP:{hex(self.imgbase)[2:]}" + return f"{self.__class__.__name__}:DMP:{self.imgbase:x}" procmem = ProcessMemory From f3582cc400256adf8da8942020407d2b579ff80f Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 09:05:04 +0100 Subject: [PATCH 07/24] Make some exprlists multiline with trailing commas --- malduck/crypto/components/pyserpent.py | 4 +-- malduck/disasm.py | 9 +++-- malduck/dnpe.py | 4 ++- malduck/extractor/config_utils.py | 2 +- malduck/extractor/extract_manager.py | 38 ++++++++++++++------ malduck/extractor/extractor.py | 11 +++--- malduck/extractor/extractor.pyi | 26 +++++++++----- malduck/extractor/modules.py | 13 ++++--- malduck/ints.py | 18 +++++++--- malduck/main.py | 5 +-- malduck/pe.py | 13 ++++--- malduck/procmem/binmem.py | 3 +- malduck/procmem/idamem.py | 2 +- malduck/procmem/procmem.py | 43 +++++++++++++++------- malduck/procmem/procmem.pyi | 49 +++++++++++++++++++++----- malduck/procmem/procmemdnpe.py | 6 +++- malduck/procmem/procmemelf.py | 8 +++-- malduck/procmem/procmempe.py | 11 ++++-- malduck/procmem/region.py | 8 ++++- malduck/string/inet.py | 2 +- malduck/yara.py | 29 +++++++++------ malduck/yara.pyi | 25 +++++++++---- 22 files changed, 236 insertions(+), 93 deletions(-) diff --git a/malduck/crypto/components/pyserpent.py b/malduck/crypto/components/pyserpent.py index 3153c74..4e453cf 100644 --- a/malduck/crypto/components/pyserpent.py +++ b/malduck/crypto/components/pyserpent.py @@ -2958,10 +2958,10 @@ def decrypt(key: list[int], in_blk: list[int]) -> None: ) __testdat = b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" assert b"\xde&\x9f\xf83\xe42\xb8[.\x88\xd2p\x1c\xe7\\" == Serpent(__testkey).encrypt( - __testdat + __testdat, ) assert __testdat == Serpent(__testkey).decrypt( - b"\xde&\x9f\xf83\xe42\xb8[.\x88\xd2p\x1c\xe7\\" + b"\xde&\x9f\xf83\xe42\xb8[.\x88\xd2p\x1c\xe7\\", ) diff --git a/malduck/disasm.py b/malduck/disasm.py index b40d029..24d51c2 100644 --- a/malduck/disasm.py +++ b/malduck/disasm.py @@ -267,7 +267,11 @@ def __init__(self) -> None: Operand.regs[getattr(capstone.x86, reg)] = reg.split("_")[2].lower() def disassemble( - self, data: bytes, addr: int, x64: bool = False, count: int = 0 + self, + data: bytes, + addr: int, + x64: bool = False, + count: int = 0, ) -> Iterator[Instruction]: """ Disassembles data from specific address @@ -293,7 +297,8 @@ def disassemble( import capstone cs = capstone.Cs( - capstone.CS_ARCH_X86, capstone.CS_MODE_64 if x64 else capstone.CS_MODE_32 + capstone.CS_ARCH_X86, + capstone.CS_MODE_64 if x64 else capstone.CS_MODE_32, ) cs.detail = True for insn in cs.disasm(data, addr, count): diff --git a/malduck/dnpe.py b/malduck/dnpe.py index b109a19..b9209c1 100644 --- a/malduck/dnpe.py +++ b/malduck/dnpe.py @@ -59,7 +59,9 @@ def dn_flags(self) -> Any: return self.pe.net.flags def dn_user_string( - self, index: int, encoding="utf-16" + self, + index: int, + encoding="utf-16", ) -> dnfile.stream.UserString | None: if not self.dn_user_strings or self.dn_user_strings.sizeof() == 0: return None diff --git a/malduck/extractor/config_utils.py b/malduck/extractor/config_utils.py index ae45ecc..c8c3719 100644 --- a/malduck/extractor/config_utils.py +++ b/malduck/extractor/config_utils.py @@ -67,6 +67,6 @@ def apply_config_part(base_config: Config, new_config_part: Config) -> Config: else: raise RuntimeError( f"Extractor tries to override '{config[k]}' " - f"value of '{k}' with '{v}'" + f"value of '{k}' with '{v}'", ) return config diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index 740c456..376128e 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -74,7 +74,10 @@ def on_error(self, exc: Exception, extractor: Extractor) -> None: self.on_extractor_error(exc, extractor, "handle_yara") def on_extractor_error( - self, exc: Exception, extractor: Extractor, method_name: str + self, + exc: Exception, + extractor: Extractor, + method_name: str, ) -> None: """ Handler for all exceptions raised by extractor methods (including :py:meth:`Extractor.handle_yara`). @@ -131,7 +134,7 @@ def carve_procmem(self, p: ProcessMemory) -> list[ProcessMemoryBinary]: for carved_bin in carved_bins: log.debug( f"carve: Found {carved_bin.__class__.__name__} " - f"at offset {carved_bin.regions[0].offset}" + f"at offset {carved_bin.regions[0].offset}", ) binaries += carved_bins return binaries @@ -162,7 +165,9 @@ def push_config(self, config: Config) -> bool: del self.configs[stored_family] self.configs[family] = config log.debug( - "%s config looks better (overrides %s)", family, stored_family + "%s config looks better (overrides %s)", + family, + stored_family, ) return True elif score == 1: @@ -198,7 +203,9 @@ def _extract_procmem(self, p: ProcessMemory, matches) -> Optional[str]: return None def push_procmem( - self, p: ProcessMemory, rip_binaries: bool = False + self, + p: ProcessMemory, + rip_binaries: bool = False, ) -> Optional[str]: """ Pushes ProcessMemory object for extraction @@ -247,7 +254,10 @@ def family(self) -> Optional[str]: return self.collected_config.get("family") def on_extractor_error( - self, exc: Exception, extractor: Extractor, method_name: str + self, + exc: Exception, + extractor: Extractor, + method_name: str, ) -> None: """ Handler for all exceptions raised by extractor methods. @@ -262,7 +272,9 @@ def on_extractor_error( self.parent.on_extractor_error(exc, extractor, method_name) def push_procmem( - self, p: ProcessMemory, _matches: Optional[YaraRulesetMatch] = None + self, + p: ProcessMemory, + _matches: Optional[YaraRulesetMatch] = None, ) -> None: """ Pushes ProcessMemory object for extraction @@ -279,7 +291,7 @@ def push_procmem( if type(extractor.yara_rules) is str: raise TypeError( - f'"{extractor.__class__.__name__}.yara_rules" cannot be a string, convert it into a list of strings' + f'"{extractor.__class__.__name__}.yara_rules" cannot be a string, convert it into a list of strings', ) # For each rule identifier in extractor.yara_rules... @@ -292,7 +304,8 @@ def push_procmem( DeprecationWarning, ) getattr(extractor, "handle_yara")( - p, YaraRuleOffsets(matches[rule]) + p, + YaraRuleOffsets(matches[rule]), ) else: extractor.handle_match(p, matches[rule]) @@ -332,19 +345,22 @@ def push_config(self, config: Config, extractor: Extractor) -> None: if "family" in config: log.debug( - "%s tells it's %s", extractor.__class__.__name__, config["family"] + "%s tells it's %s", + extractor.__class__.__name__, + config["family"], ) if ( "family" in self.collected_config and self.collected_config["family"] != config["family"] ): overrides = self.parent.modules.compare_family_overrides( - config["family"], self.collected_config["family"] + config["family"], + self.collected_config["family"], ) if not overrides: raise RuntimeError( f"Ripped both {self.collected_config['family']} and {config['family']} " - f"from the same ProcessMemory which is not expected" + f"from the same ProcessMemory which is not expected", ) if overrides == -1: self.collected_config["family"] = config["family"] diff --git a/malduck/extractor/extractor.py b/malduck/extractor/extractor.py index 7ad1ad1..82bcffc 100644 --- a/malduck/extractor/extractor.py +++ b/malduck/extractor/extractor.py @@ -389,14 +389,15 @@ def log(self): :return: :class:`logging.Logger` """ return logging.getLogger( - f"{self.__class__.__module__}.{self.__class__.__name__}" + f"{self.__class__.__module__}.{self.__class__.__name__}", ) def _get_methods(self, method_type): return ( (name, method) for name, method in inspect.getmembers( - self.__class__, predicate=lambda member: isinstance(member, method_type) + self.__class__, + predicate=lambda member: isinstance(member, method_type), ) if isinstance(method, method_type) ) @@ -552,7 +553,7 @@ def final(method): def needs_pe(method): if not isinstance(method, ExtractorMethod): raise TypeError( - "@needs_pe decorator must be placed before @final/@rule/@extractor decorator" + "@needs_pe decorator must be placed before @final/@rule/@extractor decorator", ) method.procmem_type = ProcessMemoryPE return method @@ -561,7 +562,7 @@ def needs_pe(method): def needs_elf(method): if not isinstance(method, ExtractorMethod): raise TypeError( - "@needs_elf decorator must be placed before @final/@rule/@extractor decorator" + "@needs_elf decorator must be placed before @final/@rule/@extractor decorator", ) method.procmem_type = ProcessMemoryELF return method @@ -570,7 +571,7 @@ def needs_elf(method): def weak(method): if not isinstance(method, ExtractorMethod): raise TypeError( - "@weak decorator must be placed before @final/@rule/@extractor decorator" + "@weak decorator must be placed before @final/@rule/@extractor decorator", ) method.weak = True return method diff --git a/malduck/extractor/extractor.pyi b/malduck/extractor/extractor.pyi index 81c5da8..33b5e48 100644 --- a/malduck/extractor/extractor.pyi +++ b/malduck/extractor/extractor.pyi @@ -19,7 +19,11 @@ class _StringOffsetCallback(Protocol[T, U]): class _StringCallback(Protocol[T, U]): def __call__( - cls, self: T, p: U, addr: int, match: YaraStringMatch + cls, + self: T, + p: U, + addr: int, + match: YaraStringMatch, ) -> Config | bool | None: ... class _RuleCallback(Protocol[T, U]): @@ -55,21 +59,27 @@ class ExtractorMethod(Generic[T, U]): class StringOffsetExtractorMethod(ExtractorMethod[T, U]): string_name: str def __init__( - self, method: _StringOffsetCallback[T, U], string_name: str | None = None + self, + method: _StringOffsetCallback[T, U], + string_name: str | None = None, ) -> None: super().__init__(method) class StringExtractorMethod(ExtractorMethod[T, U]): string_names: list[str] def __init__( - self, method: _StringCallback[T, U], string_names: list[str] | None = None + self, + method: _StringCallback[T, U], + string_names: list[str] | None = None, ) -> None: super().__init__(method) class RuleExtractorMethod(ExtractorMethod[T, U]): rule_name: str def __init__( - self, method: _RuleCallback[T, U], rule_name: str | None = None + self, + method: _RuleCallback[T, U], + rule_name: str | None = None, ) -> None: super().__init__(method) @@ -100,7 +110,7 @@ class Extractor: @overload @staticmethod def extractor( - string_or_method: _StringOffsetCallback[T, U] + string_or_method: _StringOffsetCallback[T, U], ) -> StringOffsetExtractorMethod[T, U]: ... @overload @staticmethod @@ -110,7 +120,7 @@ class Extractor: @overload @staticmethod def string( - *strings_or_method: _StringCallback[T, U] + *strings_or_method: _StringCallback[T, U], ) -> StringExtractorMethod[T, U]: ... @overload @staticmethod @@ -129,11 +139,11 @@ class Extractor: def final(method: _FinalCallback[T, U]) -> FinalExtractorMethod[T, U]: ... @staticmethod def needs_pe( - method: ExtractorMethod[T, ProcessMemoryPE] + method: ExtractorMethod[T, ProcessMemoryPE], ) -> ExtractorMethod[T, ProcessMemoryPE]: ... @staticmethod def needs_elf( - method: ExtractorMethod[T, ProcessMemoryELF] + method: ExtractorMethod[T, ProcessMemoryELF], ) -> ExtractorMethod[T, ProcessMemoryELF]: ... @staticmethod def weak(method: ExtractorMethod[T, U]) -> ExtractorMethod[T, U]: ... diff --git a/malduck/extractor/modules.py b/malduck/extractor/modules.py index 9fa9c10..e0b45cc 100644 --- a/malduck/extractor/modules.py +++ b/malduck/extractor/modules.py @@ -102,7 +102,7 @@ def make_override_paths(extractors: list[type[Extractor]]) -> dict[str, list[str def make_override_path(node, visited, current_path=None): if node in visited: raise RuntimeError( - f"Override cycle detected: {node} already visited during tree traversal" + f"Override cycle detected: {node} already visited during tree traversal", ) visited.add(node) unvisited.remove(node) @@ -121,7 +121,7 @@ def make_override_path(node, visited, current_path=None): # Root undetected if unvisited: raise RuntimeError( - f"Override cycle detected: {list(unvisited)} not visited during tree traversal" + f"Override cycle detected: {list(unvisited)} not visited during tree traversal", ) return dict(override_paths) @@ -149,7 +149,8 @@ def import_module_by_finder(finder: PathEntryFinder, module_name: str) -> Any: def load_modules( - search_path: str, onerror: Callable[[Exception, str], None] | None = None + search_path: str, + onerror: Callable[[Exception, str], None] | None = None, ) -> dict[str, Any]: """ Loads plugin modules under specified paths @@ -165,7 +166,8 @@ def load_modules( """ modules: dict[str, Any] = {} for finder, module_name, is_pkg in pkgutil.iter_modules( - [search_path], "malduck.extractor.modules." + [search_path], + "malduck.extractor.modules.", ): if not is_pkg: continue @@ -173,7 +175,8 @@ def load_modules( log.warning("Module collision - %s overridden", module_name) try: modules[module_name] = import_module_by_finder( - cast(PathEntryFinder, finder), module_name + cast(PathEntryFinder, finder), + module_name, ) except Exception as exc: if onerror: diff --git a/malduck/ints.py b/malduck/ints.py index 3593b82..b252e53 100644 --- a/malduck/ints.py +++ b/malduck/ints.py @@ -45,7 +45,9 @@ class MultipliedIntTypeBase(IntTypeBase, Generic[T], metaclass=ABCMeta): @staticmethod @abstractmethod def unpack( - other: bytes, offset: int = 0, foxed: bool = False + other: bytes, + offset: int = 0, + foxed: bool = False, ) -> tuple[T, ...] | int | None: raise NotImplementedError() @@ -85,7 +87,9 @@ class MultipliedIntTypeClass(MultipliedIntTypeBase): @staticmethod def unpack( - other: bytes, offset: int = 0, fixed: bool = True + other: bytes, + offset: int = 0, + fixed: bool = True, ) -> tuple[T, ...] | int | None: """ Unpacks multiple values from provided buffer @@ -248,7 +252,10 @@ def pack_be(self) -> bytes: @classmethod def unpack( - cls, other: bytes, offset: int = 0, fixed: bool = True + cls, + other: bytes, + offset: int = 0, + fixed: bool = True, ) -> IntType | int | None: """ Unpacks single value from provided buffer with little-endian order @@ -272,7 +279,10 @@ def unpack( @classmethod def unpack_be( - cls, other: bytes, offset: int = 0, fixed: bool = True + cls, + other: bytes, + offset: int = 0, + fixed: bool = True, ) -> IntType | int | None: """ Unpacks single value from provided buffer with big-endian order diff --git a/malduck/main.py b/malduck/main.py index f756b2c..e67f7ef 100644 --- a/malduck/main.py +++ b/malduck/main.py @@ -56,7 +56,7 @@ def fixpe(mempath, outpath, force, base): with ProcessMemoryPE.from_file(mempath, base=base) as p: if not force and p.is_image_loaded_as_memdump(): click.echo( - "Input file looks like correct PE file. Use -f if you want to fix it anyway." + "Input file looks like correct PE file. Use -f if you want to fix it anyway.", ) return 1 outpath = outpath or mempath + ".exe" @@ -119,7 +119,8 @@ def echo_config(extract_manager, file_path=None): for path in paths: if os.path.isdir(path): files = filter( - os.path.isfile, map(lambda f: os.path.join(path, f), os.listdir(path)) + os.path.isfile, + map(lambda f: os.path.join(path, f), os.listdir(path)), ) elif os.path.isfile(path): files = [path] diff --git a/malduck/pe.py b/malduck/pe.py index f4374cd..7f9fe8f 100644 --- a/malduck/pe.py +++ b/malduck/pe.py @@ -57,7 +57,7 @@ def find(self, str: bytes, beg: int = 0, end: int | None = None) -> int: return -1 try: return next( - self.memory.regexv(str, self.memory.imgbase + beg, end and end - beg) + self.memory.regexv(str, self.memory.imgbase + beg, end and end - beg), ) except StopIteration: return -1 @@ -183,13 +183,15 @@ def validate_import_names(self) -> bool: # Don't go further than 8 entries for _ in range(8): import_desc = self.structure( - import_rva, pefile.PE.__IMAGE_IMPORT_DESCRIPTOR_format__ + import_rva, + pefile.PE.__IMAGE_IMPORT_DESCRIPTOR_format__, ) if import_desc.all_zeroes(): # End of import-table break import_dllname = self.pe.get_string_at_rva( - import_desc.Name, pefile.MAX_DLL_LENGTH + import_desc.Name, + pefile.MAX_DLL_LENGTH, ) if not pefile.is_valid_dos_filename(import_dllname): # Invalid import filename found @@ -210,7 +212,8 @@ def validate_resources(self) -> bool: try: resource_rva = resource_dir.VirtualAddress resource_desc = self.structure( - resource_rva, pefile.PE.__IMAGE_RESOURCE_DIRECTORY_format__ + resource_rva, + pefile.PE.__IMAGE_RESOURCE_DIRECTORY_format__, ) resource_no = ( resource_desc.NumberOfNamedEntries + resource_desc.NumberOfIdEntries @@ -225,7 +228,7 @@ def validate_resources(self) -> bool: ) if ( self.pe.get_word_at_rva( - resource_rva + resource_entry_desc.OffsetToData & 0x7FFFFFFF + resource_rva + resource_entry_desc.OffsetToData & 0x7FFFFFFF, ) is None ): diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index 6891073..000f9b8 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -60,7 +60,8 @@ def image(self: T) -> T | None: import traceback log.debug( - "image construction raised an exception: %s", traceback.format_exc() + "image construction raised an exception: %s", + traceback.format_exc(), ) return None diff --git a/malduck/procmem/idamem.py b/malduck/procmem/idamem.py index 1ce0413..f7d1d94 100644 --- a/malduck/procmem/idamem.py +++ b/malduck/procmem/idamem.py @@ -72,7 +72,7 @@ class IDAProcessMemory(ProcessMemory): def __init__(self): if not IDAPYTHON: raise RuntimeError( - "This class is intended to work only in IDAPython context" + "This class is intended to work only in IDAPython context", ) regions = [] for seg in idautils.Segments(): diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index 3808698..bbd48d1 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -110,7 +110,7 @@ def __init__(self, buf, base=0, regions=None, **_): self.memory = buf else: raise TypeError( - "Wrong buffer type - must be bytes, bytearray, mmap object or MemoryBuffer" + "Wrong buffer type - must be bytes, bytearray, mmap object or MemoryBuffer", ) self.imgbase = base @@ -218,7 +218,10 @@ def from_memory(cls, memory, base=None, **kwargs): :rtype: :class:`ProcessMemory` """ copied = cls( - memory.m, base=base or memory.imgbase, regions=memory.regions, **kwargs + memory.m, + base=base or memory.imgbase, + regions=memory.regions, + **kwargs, ) return copied @@ -236,7 +239,12 @@ def length(self): return 0 def iter_regions( - self, addr=None, offset=None, length=None, contiguous=False, trim=False + self, + addr=None, + offset=None, + length=None, + contiguous=False, + trim=False, ): """ Iterates over Region objects starting at provided virtual address or offset @@ -264,11 +272,11 @@ def iter_regions( """ if addr is not None and offset is not None: raise ValueError( - "'addr' and 'offset' arguments should be provided exclusively" + "'addr' and 'offset' arguments should be provided exclusively", ) if addr is None and offset is None and contiguous: raise ValueError( - "Starting point (addr or offset) must be provided for contiguous regions" + "Starting point (addr or offset) must be provided for contiguous regions", ) if length and length < 0: raise ValueError("Length can't be less than 0") @@ -302,7 +310,7 @@ def iter_regions( else: if length is not None: raise ValueError( - "Don't know how to retrieve length-limited regions with offset from unmapped area" + "Don't know how to retrieve length-limited regions with offset from unmapped area", ) offset = region.offset # If we're out of length after adjustment: time to stop @@ -313,7 +321,7 @@ def iter_regions( if addr is None: if offset is None: raise RuntimeError( - "Something went wrong, starting region offset is set to None?" + "Something went wrong, starting region offset is set to None?", ) addr = region.p2v(offset) # Continue enumeration @@ -345,7 +353,10 @@ def v2p(self, addr, length=None): return None mapping_length = 0 for region in self.iter_regions( - addr=addr, length=length, contiguous=True, trim=True + addr=addr, + length=length, + contiguous=True, + trim=True, ): if length is None: return region.v2p(addr) @@ -371,7 +382,10 @@ def p2v(self, off, length=None): return None mapping_length = 0 for region in self.iter_regions( - offset=off, length=length, contiguous=True, trim=True + offset=off, + length=length, + contiguous=True, + trim=True, ): if length is None: return region.p2v(off) @@ -438,7 +452,10 @@ def readv_regions(self, addr=None, length=None, contiguous=True): current_strings: list[bytes] = [] prev_region = None for region in self.iter_regions( - addr=addr, length=length, contiguous=contiguous, trim=True + addr=addr, + length=length, + contiguous=contiguous, + trim=True, ): if not prev_region or prev_region.end != region.addr: if current_strings: @@ -532,7 +549,7 @@ def patchv(self, addr, buf): # Boundary check if region is None or region.end < (addr + len(buf)): raise ValueError( - "Patched bytes range must be contained within single, existing region" + "Patched bytes range must be contained within single, existing region", ) return self.patchp(region.v2p(addr), buf) @@ -821,7 +838,9 @@ def map_offset(off, len): return ptr return ruleset.match( - offset_mapper=map_offset, extended=extended, data=self.readp(0) + offset_mapper=map_offset, + extended=extended, + data=self.readp(0), ) def _findbytes(self, yara_fn, query, addr, length): diff --git a/malduck/procmem/procmem.pyi b/malduck/procmem/procmem.pyi index e603056..99ef374 100644 --- a/malduck/procmem/procmem.pyi +++ b/malduck/procmem/procmem.pyi @@ -60,7 +60,10 @@ class ProcessMemory: def from_file(cls: type[T], filename: str, **kwargs) -> T: ... @classmethod def from_memory( - cls: type[T], memory: ProcessMemory, base: int = None, **kwargs + cls: type[T], + memory: ProcessMemory, + base: int = None, + **kwargs, ) -> T: ... @property def length(self) -> int: ... @@ -193,16 +196,28 @@ class ProcessMemory: length: int | None = None, ) -> Iterator[int]: ... def findp( - self, query: bytes, offset: int | None = None, length: int | None = None + self, + query: bytes, + offset: int | None = None, + length: int | None = None, ) -> Iterator[int]: ... def findv( - self, query: bytes, addr: int | None = None, length: int | None = None + self, + query: bytes, + addr: int | None = None, + length: int | None = None, ) -> Iterator[int]: ... def regexp( - self, query: bytes, offset: int | None = None, length: int | None = None + self, + query: bytes, + offset: int | None = None, + length: int | None = None, ) -> Iterator[int]: ... def regexv( - self, query: bytes, addr: int | None = None, length: int | None = None + self, + query: bytes, + addr: int | None = None, + length: int | None = None, ) -> Iterator[int]: ... def disasmv( self, @@ -244,12 +259,20 @@ class ProcessMemory: # yarap(ruleset, 0, extended=True) @overload def yarap( - self, ruleset: Yara, offset: int | None, *, extended: Literal[True] + self, + ruleset: Yara, + offset: int | None, + *, + extended: Literal[True], ) -> YaraRulesetMatch: ... # yarap(ruleset, length=0, extended=True) @overload def yarap( - self, ruleset: Yara, *, length: int | None, extended: Literal[True] + self, + ruleset: Yara, + *, + length: int | None, + extended: Literal[True], ) -> YaraRulesetMatch: ... # yarav(ruleset) # yarav(ruleset, addr) @@ -279,12 +302,20 @@ class ProcessMemory: # yarav(ruleset, 0, extended=True) @overload def yarav( - self, ruleset: Yara, addr: int | None, *, extended: Literal[True] + self, + ruleset: Yara, + addr: int | None, + *, + extended: Literal[True], ) -> YaraRulesetMatch: ... # yarav(ruleset, length=0, extended=True) @overload def yarav( - self, ruleset: Yara, *, length: int | None, extended: Literal[True] + self, + ruleset: Yara, + *, + length: int | None, + extended: Literal[True], ) -> YaraRulesetMatch: ... def _findbytes( self, diff --git a/malduck/procmem/procmemdnpe.py b/malduck/procmem/procmemdnpe.py index d6700e8..5c02a9e 100644 --- a/malduck/procmem/procmemdnpe.py +++ b/malduck/procmem/procmemdnpe.py @@ -21,7 +21,11 @@ def __init__( ) -> None: self._pe: DnPE | None = None super().__init__( - buf, base=base, regions=regions, image=image, detect_image=detect_image + buf, + base=base, + regions=regions, + image=image, + detect_image=detect_image, ) def _pe_direct_load(self, fast_load: bool = True) -> DnPE: diff --git a/malduck/procmem/procmemelf.py b/malduck/procmem/procmemelf.py index 8dc376a..286bb8f 100644 --- a/malduck/procmem/procmemelf.py +++ b/malduck/procmem/procmemelf.py @@ -34,7 +34,11 @@ def __init__( ) -> None: self._elf = None super().__init__( - buf, base=base, regions=regions, image=image, detect_image=detect_image + buf, + base=base, + regions=regions, + image=image, + detect_image=detect_image, ) def _elf_direct_load(self) -> elftools.elf.elffile.ELFFile: @@ -86,7 +90,7 @@ def _reload_as_image(self) -> None: segment.header["p_type"], 0, # TODO: protect flags segment.header["p_offset"] - presegment_len, - ) + ), ) if len(regions) == 0: raise elftools.elf.elffile.ELFError("No regions in ELF file!") diff --git a/malduck/procmem/procmempe.py b/malduck/procmem/procmempe.py index 70f6cf7..3ccb7a7 100644 --- a/malduck/procmem/procmempe.py +++ b/malduck/procmem/procmempe.py @@ -62,7 +62,11 @@ def __init__( ) -> None: self._pe: PE | None = None super().__init__( - buf, base=base, regions=regions, image=image, detect_image=detect_image + buf, + base=base, + regions=regions, + image=image, + detect_image=detect_image, ) def _pe_direct_load(self, fast_load: bool = True) -> PE: @@ -96,7 +100,7 @@ def _reload_as_image(self) -> None: 0, 0, section.PointerToRawData, - ) + ), ) def is_valid(self) -> bool: @@ -179,7 +183,8 @@ def store(self) -> bytes: section_size = align(section_size, file_alignment) # Read section data including appropriate padding section_data = self.readv( - self.imgbase + section.VirtualAddress, section_size + self.imgbase + section.VirtualAddress, + section_size, ) section_data += (section_size - len(section_data)) * b"\x00" data.append(section_data) diff --git a/malduck/procmem/region.py b/malduck/procmem/region.py index 563d762..4ab6e5e 100644 --- a/malduck/procmem/region.py +++ b/malduck/procmem/region.py @@ -37,7 +37,13 @@ class Region: """Represents single mapped region in :class:`ProcessMemory`""" def __init__( - self, addr: int, size: int, state: int, type_: int, protect: int, offset: int + self, + addr: int, + size: int, + state: int, + type_: int, + protect: int, + offset: int, ) -> None: self.addr = addr self.size = size diff --git a/malduck/string/inet.py b/malduck/string/inet.py index 1d8b4e8..ae4a646 100644 --- a/malduck/string/inet.py +++ b/malduck/string/inet.py @@ -12,7 +12,7 @@ ipv4_regex = re.compile( b"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}" - b"([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$" + b"([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$", ) diff --git a/malduck/yara.py b/malduck/yara.py index 55881d8..ea69d59 100644 --- a/malduck/yara.py +++ b/malduck/yara.py @@ -118,7 +118,11 @@ class Yara: """ def __init__( - self, rule_paths=None, name="r", strings=None, condition="any of them" + self, + rule_paths=None, + name="r", + strings=None, + condition="any of them", ): if rule_paths: self.rules = yara.compile(filepaths=rule_paths) @@ -134,7 +138,7 @@ def __init__( [ f"${key} = {str(YaraString(value) if isinstance(value, str) else value)}" for key, value in strings.items() - ] + ], ) yara_source = textwrap.dedent( f""" @@ -144,7 +148,7 @@ def __init__( condition: {condition} }} - """ + """, ) self.rules = yara.compile(source=yara_source) @@ -172,7 +176,7 @@ def from_dir(path, recursive=True, followlinks=True): if ruleset_name in rule_paths: log.warning( f"Yara file name collision - {rule_paths[ruleset_name]} " - f"overridden by {ruleset_path}" + f"overridden by {ruleset_path}", ) rule_paths[ruleset_name] = ruleset_path if not recursive: @@ -196,7 +200,8 @@ def match(self, offset_mapper=None, extended=False, **kwargs): if extended is set to True """ matches = YaraRulesetMatch( - self.rules.match(**kwargs), offset_mapper=offset_mapper + self.rules.match(**kwargs), + offset_mapper=offset_mapper, ) return YaraRulesetOffsets(matches) if not extended else matches @@ -258,7 +263,11 @@ def _map_matches(self, matches, offset_mapper): ] return { match.rule: YaraRuleMatch( - match.rule, strings, match.meta, match.namespace, match.tags + match.rule, + strings, + match.meta, + match.namespace, + match.tags, ) for match, strings in mapped_matches if strings @@ -290,12 +299,12 @@ def _map_strings(self, strings, offset_mapper): offset = _offset # Register offset for full identifier mapped_strings[real_ident].append( - YaraStringMatch(real_ident, offset, content) + YaraStringMatch(real_ident, offset, content), ) # Register offset for grouped identifier if real_ident != group_ident: mapped_strings[group_ident].append( - YaraStringMatch(real_ident, offset, content) + YaraStringMatch(real_ident, offset, content), ) return mapped_strings @@ -314,7 +323,7 @@ class YaraRulesetOffsets(_Mapper): def __init__(self, matches): self._matches = matches super().__init__( - elements={k: YaraRuleOffsets(v) for k, v in matches.elements.items()} + elements={k: YaraRuleOffsets(v) for k, v in matches.elements.items()}, ) def remap(self, offset_mapper=None): @@ -337,7 +346,7 @@ def __init__(self, rule, strings, meta, namespace, tags): self.namespace = namespace self.tags = tags super().__init__( - elements={k: sorted(v, key=lambda s: s.offset) for k, v in strings.items()} + elements={k: sorted(v, key=lambda s: s.offset) for k, v in strings.items()}, ) def get_offsets(self, string): diff --git a/malduck/yara.pyi b/malduck/yara.pyi index 444379b..6e61f26 100644 --- a/malduck/yara.pyi +++ b/malduck/yara.pyi @@ -40,7 +40,9 @@ class Yara: ) -> None: ... @staticmethod def from_dir( - path: str, recursive: bool = True, followlinks: bool = True + path: str, + recursive: bool = True, + followlinks: bool = True, ) -> Yara: ... # match(...) # match(offset_mapper, ...) @@ -55,7 +57,10 @@ class Yara: # match(offset_mapper, extended=True, ...) @overload def match( - self, offset_mapper: OffsetMapper | None, extended: Literal[True], **kwargs + self, + offset_mapper: OffsetMapper | None, + extended: Literal[True], + **kwargs, ) -> YaraRulesetMatch: ... # match(extended=True, ...) @overload @@ -75,7 +80,10 @@ class YaraString: type: YaraStringType modifiers: list[str] def __init__( - self, value: str, type: YaraStringType = YaraStringType.TEXT, **modifiers: bool + self, + value: str, + type: YaraStringType = YaraStringType.TEXT, + **modifiers: bool, ) -> None: ... def __str__(self) -> str: ... @@ -88,10 +96,14 @@ class YaraRulesetMatch(_Mapper["YaraRuleMatch"]): ) -> None: super().__init__(elements={}) def _map_matches( - self, matches: list[YaraRulesMatch], offset_mapper: OffsetMapper | None + self, + matches: list[YaraRulesMatch], + offset_mapper: OffsetMapper | None, ) -> dict[str, YaraRuleMatch]: ... def _map_strings( - self, strings: Iterable[YaraRulesString], offset_mapper: OffsetMapper | None + self, + strings: Iterable[YaraRulesString], + offset_mapper: OffsetMapper | None, ) -> dict[str, list[YaraStringMatch]]: ... def _parse_string_identifier(self, identifier: str) -> tuple[str, str]: ... def remap(self, offset_mapper: OffsetMapper | None = None) -> YaraRulesetMatch: ... @@ -101,7 +113,8 @@ class YaraRulesetOffsets(_Mapper["YaraRuleOffsets"]): def __init__(self, matches: YaraRulesetMatch) -> None: super().__init__(elements={}) def remap( - self, offset_mapper: OffsetMapper | None = None + self, + offset_mapper: OffsetMapper | None = None, ) -> YaraRulesetOffsets: ... YaraStringMatch = namedtuple("YaraStringMatch", ["identifier", "offset", "content"]) From 719610fc4de3e6d3d247f458be147ef2c645a7fe Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 09:42:12 +0100 Subject: [PATCH 08/24] Make use of next() default value argument --- malduck/pe.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/malduck/pe.py b/malduck/pe.py index 7f9fe8f..e857215 100644 --- a/malduck/pe.py +++ b/malduck/pe.py @@ -55,12 +55,10 @@ def __getitem__(self, item: Any) -> object: def find(self, str: bytes, beg: int = 0, end: int | None = None) -> int: if end and beg >= end: return -1 - try: - return next( - self.memory.regexv(str, self.memory.imgbase + beg, end and end - beg), - ) - except StopIteration: - return -1 + return next( + self.memory.regexv(str, self.memory.imgbase + beg, end and end - beg), + -1, + ) class PE: @@ -329,10 +327,7 @@ def resource(self, name: int | str | bytes) -> bytes | None: :type name: int or str or bytes :rtype: bytes or None """ - try: - return next(self.resources(name)) - except StopIteration: - return None + return next(self.resources(name), None) pe = PE From bd57af3a93f118fe4f7fbac7586b0c6f1dfd7ffe Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 16:49:44 +0100 Subject: [PATCH 09/24] Fix line length excess --- malduck/compression/aplib.py | 13 ++- malduck/compression/components/aplib.py | 5 +- malduck/compression/lznt1.py | 3 +- malduck/crypto/aes.py | 6 +- malduck/crypto/rsa.py | 3 +- malduck/disasm.py | 5 +- malduck/extractor/extract_manager.py | 29 ++++--- malduck/extractor/extractor.py | 101 ++++++++++++++++-------- malduck/extractor/modules.py | 17 ++-- malduck/ints.py | 27 +++++-- malduck/main.py | 18 ++++- malduck/pe.py | 26 ++++-- malduck/procmem/binmem.py | 12 ++- malduck/procmem/procmem.py | 100 +++++++++++++++-------- malduck/procmem/procmemelf.py | 4 +- malduck/procmem/procmempe.py | 20 +++-- malduck/procmem/region.py | 3 +- malduck/yara.py | 33 +++++--- 18 files changed, 288 insertions(+), 137 deletions(-) diff --git a/malduck/compression/aplib.py b/malduck/compression/aplib.py index 957cb04..c464adb 100644 --- a/malduck/compression/aplib.py +++ b/malduck/compression/aplib.py @@ -20,13 +20,20 @@ class aPLib: from malduck import aplib # Headerless compressed buffer - aplib(b'T\x00he quick\xecb\x0erown\xcef\xaex\x80jumps\xed\xe4veur`t?lazy\xead\xfeg\xc0\x00') + aplib( + b'T\x00he quick\xecb\x0erown\xcef\xaex\x80' + b'jumps\xed\xe4veur`t?lazy\xead\xfeg\xc0\x00' + ) # Header included - aplib(b'AP32\x18\x00\x00\x00\r\x00\x00\x00\xbc\x9ab\x9b\x0b\x00\x00\x00\x85\x11J\rh8el\x8eo wnr\xecd\x00') + aplib( + b'AP32\x18\x00\x00\x00\r\x00\x00\x00\xbc\x9ab' + b'\x9b\x0b\x00\x00\x00\x85\x11J\rh8el\x8eo wnr\xecd\x00' + ) :param buf: Buffer to decompress :type buf: bytes - :param headerless: Force headerless decompression (don't perform 'AP32' magic detection) + :param headerless: + Force headerless decompression (don't perform 'AP32' magic detection) :type headerless: bool (default: `True`) :rtype: bytes """ diff --git a/malduck/compression/components/aplib.py b/malduck/compression/components/aplib.py index 06187fd..0bba05c 100644 --- a/malduck/compression/components/aplib.py +++ b/malduck/compression/components/aplib.py @@ -134,7 +134,10 @@ def pack(self): def main(): # self-test - data = b"T\x00he quick\xecb\x0erown\xcef\xaex\x80jumps\xed\xe4veur`t?lazy\xead\xfeg\xc0\x00" + data = ( + b"T\x00he quick\xecb\x0erown\xcef\xaex\x80" + b"jumps\xed\xe4veur`t?lazy\xead\xfeg\xc0\x00" + ) assert APLib(data).depack() == b"The quick brown fox jumps over the lazy dog" diff --git a/malduck/compression/lznt1.py b/malduck/compression/lznt1.py index 89f38a8..4fcf194 100644 --- a/malduck/compression/lznt1.py +++ b/malduck/compression/lznt1.py @@ -7,7 +7,8 @@ class Lznt1: """ - Implementation of LZNT1 decompression. Allows to decompress data compressed by RtlCompressBuffer + Implementation of LZNT1 decompression. + Allows to decompress data compressed by RtlCompressBuffer .. code-block:: python diff --git a/malduck/crypto/aes.py b/malduck/crypto/aes.py index f2d04c9..51a8822 100644 --- a/malduck/crypto/aes.py +++ b/malduck/crypto/aes.py @@ -46,7 +46,8 @@ def export_key(self) -> tuple[str, bytes] | None: """ Exports key from structure or returns None if no key was imported - :return: Tuple (`algorithm`, `key`). `Algorithm` is one of: "AES-128", "AES-192", "AES-256" + :return: Tuple (`algorithm`, `key`). + `Algorithm` is one of: "AES-128", "AES-192", "AES-256" :rtype: Tuple[str, bytes] """ if self.key is not None: @@ -170,7 +171,8 @@ def import_key(data: bytes) -> tuple[str, bytes] | None: :param data: Buffer with `BLOB` structure data :type data: bytes - :return: Tuple (`algorithm`, `key`). `Algorithm` is one of: "AES-128", "AES-192", "AES-256" + :return: Tuple (`algorithm`, `key`). + `Algorithm` is one of: "AES-128", "AES-192", "AES-256" """ if len(data) < BLOBHEADER.sizeof(): return None diff --git a/malduck/crypto/rsa.py b/malduck/crypto/rsa.py index a8d68f3..cb19b3a 100644 --- a/malduck/crypto/rsa.py +++ b/malduck/crypto/rsa.py @@ -104,7 +104,8 @@ class RSA: @staticmethod def import_key(data: bytes) -> bytes | None: r""" - Extracts key from buffer containing :class:`PublicKeyBlob` or :class:`PrivateKeyBlob` data + Extracts key from buffer containing :class:`PublicKeyBlob` + or :class:`PrivateKeyBlob` data :param data: Buffer with `BLOB` structure data :type data: bytes diff --git a/malduck/disasm.py b/malduck/disasm.py index 24d51c2..1360afb 100644 --- a/malduck/disasm.py +++ b/malduck/disasm.py @@ -77,8 +77,9 @@ def reg(self) -> str | int | None: """ Returns register used by operand. - For memory operands, returns base register or index register if base is not used. - For immediate operands or displacement-only memory operands returns None. + For memory operands, returns base register or index register + if base is not used. For immediate operands or displacement-only + memory operands returns None. :rtype: str """ diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index 376128e..33fa631 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -29,7 +29,8 @@ class ExtractManager: """ - Multi-dump extraction context. Handles merging configs from different dumps, additional dropped families etc. + Multi-dump extraction context. + Handles merging configs from different dumps, additional dropped families etc. :param modules: Object with loaded extractor modules :type modules: :class:`ExtractorModules` @@ -80,7 +81,8 @@ def on_extractor_error( method_name: str, ) -> None: """ - Handler for all exceptions raised by extractor methods (including :py:meth:`Extractor.handle_yara`). + Handler for all exceptions raised by extractor methods + (including :py:meth:`Extractor.handle_yara`). Override this method if you want to set your own error handler. @@ -172,7 +174,8 @@ def push_config(self, config: Config) -> bool: return True elif score == 1: log.debug( - "%s config doesn't look better than previous one (overridden by %s)", + "%s config doesn't look better than " + "previous one (overridden by %s)", family, stored_family, ) @@ -212,8 +215,10 @@ def push_procmem( :param p: ProcessMemory object :type p: :class:`malduck.procmem.ProcessMemory` - :param rip_binaries: Look for binaries (PE, ELF) in provided ProcessMemory and try to perform extraction using - specialized variants (ProcessMemoryPE, ProcessMemoryELF) + :param rip_binaries: + Look for binaries (PE, ELF) in provided ProcessMemory + and try to perform extraction using specialized variants + (ProcessMemoryPE, ProcessMemoryELF) :type rip_binaries: bool (default: False) :return: Detected family if configuration looks better than already stored one """ @@ -291,7 +296,8 @@ def push_procmem( if type(extractor.yara_rules) is str: raise TypeError( - f'"{extractor.__class__.__name__}.yara_rules" cannot be a string, convert it into a list of strings', + f'"{extractor.__class__.__name__}.yara_rules" cannot be a string, ' + 'convert it into a list of strings', ) # For each rule identifier in extractor.yara_rules... @@ -300,7 +306,8 @@ def push_procmem( try: if hasattr(extractor, "handle_yara"): warnings.warn( - "Extractor.handle_yara is deprecated, use Extractor.handle_match", + "Extractor.handle_yara is deprecated, " + "use Extractor.handle_match", DeprecationWarning, ) getattr(extractor, "handle_yara")( @@ -359,8 +366,9 @@ def push_config(self, config: Config, extractor: Extractor) -> None: ) if not overrides: raise RuntimeError( - f"Ripped both {self.collected_config['family']} and {config['family']} " - f"from the same ProcessMemory which is not expected", + f"Ripped both {self.collected_config['family']} " + f"and {config['family']} from the same ProcessMemory " + "which is not expected", ) if overrides == -1: self.collected_config["family"] = config["family"] @@ -372,7 +380,8 @@ def push_config(self, config: Config, extractor: Extractor) -> None: def config(self) -> Config: """ Returns collected config, but if family is not matched - returns empty dict. - Family is not included in config itself, look at :py:attr:`ProcmemExtractManager.family`. + Family is not included in config itself, + look at :py:attr:`ProcmemExtractManager.family`. """ if self.family is None: return {} diff --git a/malduck/extractor/extractor.py b/malduck/extractor/extractor.py index 82bcffc..653fa77 100644 --- a/malduck/extractor/extractor.py +++ b/malduck/extractor/extractor.py @@ -120,11 +120,14 @@ def cit_login(self, p, addr, match): .. py:decoratormethod:: Extractor.string Decorator for string-based extractor methods. - Method is called each time when string with the same identifier as method name has matched + Method is called each time when string with the same identifier + as method name has matched - Extractor can be called for many number-suffixed strings e.g. `$keyex1` and `$keyex2` will call `keyex` method. + Extractor can be called for many number-suffixed strings + e.g. `$keyex1` and `$keyex2` will call `keyex` method. - You can optionally provide the actual string identifier as an argument if you don't want to name your method + You can optionally provide the actual string identifier + as an argument if you don't want to name your method after the string identifier. Signature of decorated method: @@ -132,35 +135,48 @@ def cit_login(self, p, addr, match): .. code-block:: Python @Extractor.string - def string_identifier(self, p: ProcessMemory, addr: int, match: YaraStringMatch) -> Config: + def string_identifier( + self, + p: ProcessMemory, + addr: int, + match: YaraStringMatch + ) -> Config: # p: ProcessMemory object that contains matched file/dump representation # addr: Virtual address of matched string # Called for each "$string_identifier" hit ... - If you want to use same method for multiple different named strings, you can provide multiple identifiers - as `@Extractor.string` decorator argument + If you want to use same method for multiple different named strings, + you can provide multiple identifiers as `@Extractor.string` decorator argument .. code-block::Python @Extractor.string("xor_call", "mov_call") - def xxx_call(self, p: ProcessMemory, addr: int, match: YaraStringMatch) -> Config: + def xxx_call( + self, + p: ProcessMemory, + addr: int, + match: YaraStringMatch + ) -> Config: # This will be called for all $xor_call and $mov_call string hits # You can determine which string triggered the hit via match.identifier if match.identifier == "xor_call": ... - Extractor methods should return `dict` object with extracted part of configuration, `True` indicating - a match or `False`/`None` when family has not been matched. + Extractor methods should return `dict` object with extracted part + of configuration, `True` indicating a match or `False`/`None` + when family has not been matched. - For strong methods: truthy values are transformed to `dict` with `{"family": self.family}` key. + For strong methods: truthy values are transformed to `dict` + with `{"family": self.family}` key. .. versionadded:: 4.0.0 Added `@Extractor.string` as extended version of `@Extractor.extractor` :param strings_or_method: - If method name doesn't match the string identifier, pass yara string identifier as decorator argument. + If method name doesn't match the string identifier, + pass yara string identifier as decorator argument. Multiple strings are accepted :type strings_or_method: str, optional @@ -168,7 +184,8 @@ def xxx_call(self, p: ProcessMemory, addr: int, match: YaraStringMatch) -> Confi Simplified variant of `@Extractor.string`. - Doesn't accept multiple strings and passes only string offset to the extractor method. + Doesn't accept multiple strings and passes only string offset + to the extractor method. .. code-block:: Python @@ -191,12 +208,14 @@ def cit_login(self, p, addr): .. py:decoratormethod:: Extractor.rule - Decorator for rule-based extractor methods, called once for rule match after string-based extraction methods. + Decorator for rule-based extractor methods, + called once for rule match after string-based extraction methods. - Method is called each time when rule with the same identifier as method name has matched. + Method is called each time when rule with the same identifier + as method name has matched. - You can optionally provide the actual rule identifier as an argument if you don't want to name your method - after the rule identifier. + You can optionally provide the actual rule identifier as an argument + if you don't want to name your method after the rule identifier. Rule identifier must appear in `yara_rules` tuple. @@ -205,9 +224,14 @@ def cit_login(self, p, addr): .. code-block:: Python @Extractor.rule - def rule_identifier(self, p: ProcessMemory, matches: YaraMatch) -> Config: + def rule_identifier( + self, + p: ProcessMemory, + matches: YaraMatch + ) -> Config: # p: ProcessMemory object that contains matched file/dump representation - # matches: YaraMatch object with offsets of all matched strings related with the rule + # matches: YaraMatch object with offsets of all matched strings related + # with the rule # Called for matched rule named "rule_identifier". ... @@ -239,10 +263,11 @@ def evil(self, p, matches): .. py:decoratormethod:: Extractor.final - Decorator for final extractor methods, called once for each single rule match after other extraction methods. + Decorator for final extractor methods, called once for each single rule + match after other extraction methods. - Behaves similarly to the @rule-decorated methods but is called for each rule match regardless of - the rule identifier. + Behaves similarly to the @rule-decorated methods but is called + for each rule match regardless of the rule identifier. Signature of decorated method: @@ -275,13 +300,15 @@ def get_config(self, p): .. py:decoratormethod:: Extractor.weak - Use this decorator for extractors when successful extraction is not sufficient to mark family as matched. + Use this decorator for extractors when successful extraction + is not sufficient to mark family as matched. All "weak configs" will be flushed when "strong config" appears. .. versionchanged:: 4.0.0 - Method must be decorated first with `@extractor`, `@rule` or `@final` decorator + Method must be decorated first + with `@extractor`, `@rule` or `@final` decorator .. code-block:: Python @@ -296,8 +323,9 @@ class Evil(Extractor): @Extractor.weak @Extractor.extractor def dga_seed(self, p, hit): - # Even if we're able to get the DGA seed, extractor won't produce config - # until is_it_really_evil match as well + # Even if we're able to get the DGA seed, + # extractor won't produce config until is_it_really_evil + # match as well dga_config = p.readv(hit, 128) seed = self._get_dga_seed(dga_config) if seed is not None: @@ -315,7 +343,8 @@ def is_it_really_evil(self, p): .. versionchanged:: 4.0.0 - Method must be decorated first with `@extractor`, `@rule` or `@final` decorator + Method must be decorated first + with `@extractor`, `@rule` or `@final` decorator .. py:decoratormethod:: Extractor.needs_elf @@ -324,12 +353,14 @@ def is_it_really_evil(self, p): .. versionchanged:: 4.0.0 - Method must be decorated first with `@extractor`, `@rule` or `@final` decorator. + Method must be decorated first + with `@extractor`, `@rule` or `@final` decorator. """ yara_rules = () #: Names of Yara rules for which handle_match is called - family = None #: Extracted malware family, automatically added to "family" key for strong extraction methods + family = None #: Extracted malware family, automatically added to "family" key + # for strong extraction methods overrides = [] #: Family match overrides another match e.g. citadel overrides zeus def __init__(self, parent): @@ -420,8 +451,9 @@ def handle_match(self, p, match): Called for each rule hit listed in Extractor.yara_rules. - Overriding this method means that all Yara hits must be processed within this method. - Ripped configurations must be reported using :py:meth:`push_config` method. + Overriding this method means that all Yara hits must be processed + within this method. Ripped configurations must be reported using + :py:meth:`push_config` method. .. versionadded: 4.0.0:: @@ -553,7 +585,8 @@ def final(method): def needs_pe(method): if not isinstance(method, ExtractorMethod): raise TypeError( - "@needs_pe decorator must be placed before @final/@rule/@extractor decorator", + "@needs_pe decorator must be placed " + "before @final/@rule/@extractor decorator", ) method.procmem_type = ProcessMemoryPE return method @@ -562,7 +595,8 @@ def needs_pe(method): def needs_elf(method): if not isinstance(method, ExtractorMethod): raise TypeError( - "@needs_elf decorator must be placed before @final/@rule/@extractor decorator", + "@needs_elf decorator must be placed " + "before @final/@rule/@extractor decorator", ) method.procmem_type = ProcessMemoryELF return method @@ -571,7 +605,8 @@ def needs_elf(method): def weak(method): if not isinstance(method, ExtractorMethod): raise TypeError( - "@weak decorator must be placed before @final/@rule/@extractor decorator", + "@weak decorator must be placed " + "before @final/@rule/@extractor decorator", ) method.weak = True return method diff --git a/malduck/extractor/modules.py b/malduck/extractor/modules.py index e0b45cc..71d2a37 100644 --- a/malduck/extractor/modules.py +++ b/malduck/extractor/modules.py @@ -24,7 +24,8 @@ class ExtractorModules: """ Configuration object with loaded Extractor modules for ExtractManager - :param modules_path: Path with module files (Extractor classes and Yara files, default '~/.malduck') + :param modules_path: + Path with module files (Extractor classes and Yara files, default '~/.malduck') :type modules_path: str """ @@ -45,8 +46,9 @@ def __init__(self, modules_path: str | None = None) -> None: module_name = module.__name__ if not any(x.startswith(module_name) for x in loaded_extractors): warnings.warn( - f"The extractor engine couldn't import any Extractors from module {module_name}. " - f"Make sure the Extractor class is imported into __init__.py", + "The extractor engine couldn't import any Extractors " + f"from module {module_name}. " + "Make sure the Extractor class is imported into __init__.py", ) self.override_paths = make_override_paths(self.extractors) @@ -102,7 +104,8 @@ def make_override_paths(extractors: list[type[Extractor]]) -> dict[str, list[str def make_override_path(node, visited, current_path=None): if node in visited: raise RuntimeError( - f"Override cycle detected: {node} already visited during tree traversal", + "Override cycle detected: " + f"{node} already visited during tree traversal", ) visited.add(node) unvisited.remove(node) @@ -121,7 +124,8 @@ def make_override_path(node, visited, current_path=None): # Root undetected if unvisited: raise RuntimeError( - f"Override cycle detected: {list(unvisited)} not visited during tree traversal", + "Override cycle detected: " + f"{list(unvisited)} not visited during tree traversal", ) return dict(override_paths) @@ -157,7 +161,8 @@ def load_modules( .. note:: - This method is considered to be used internally (see also :class:`extractor.ExtractorModules`) + This method is considered to be used internally + (see also :class:`extractor.ExtractorModules`) :param search_path: Path searched for modules :type search_path: str diff --git a/malduck/ints.py b/malduck/ints.py index b252e53..4c97ac9 100644 --- a/malduck/ints.py +++ b/malduck/ints.py @@ -95,7 +95,9 @@ def unpack( Unpacks multiple values from provided buffer :param other: Buffer object containing value to unpack :param offset: Buffer offset - :return: tuple of IntType instances or None if there are not enough data to unpack + :return: + tuple of IntType instances + or None if there are not enough data to unpack """ fmt = cls.fmt + cls.fmt[-1] * (multiplier - 1) try: @@ -120,12 +122,18 @@ class IntType(int, IntTypeBase, metaclass=MetaIntType): Supports ctypes-like multiplication for unpacking tuple of values * Unsigned types: - :class:`UInt64` (:class:`QWORD`), :class:`UInt32` (:class:`DWORD`), - :class:`UInt16` (:class:`WORD`), :class:`UInt8` (:class:`BYTE` or :class:`CHAR`) + :class:`UInt64` (:class:`QWORD`), + :class:`UInt32` (:class:`DWORD`), + :class:`UInt16` (:class:`WORD`), + :class:`UInt8` (:class:`BYTE` or :class:`CHAR`) * Signed types: - :class:`Int64`, :class:`Int32`, :class:`Int16`, :class:`Int8` + :class:`Int64`, + :class:`Int32`, + :class:`Int16`, + :class:`Int8` - IntTypes are derived from :class:`int` type, so they are fully compatible with other numeric types + IntTypes are derived from :class:`int` type, so they are fully compatible + with other numeric types .. code-block:: python @@ -134,7 +142,8 @@ class IntType(int, IntTypeBase, metaclass=MetaIntType): res = Int32(res) > -1 - Using IntTypes you don't need to mask everything with 0xFFFFFFFF, only if you remember about appropriate casting. + Using IntTypes you don't need to mask everything with 0xFFFFFFFF, + only if you remember about appropriate casting. .. code-block:: python @@ -269,7 +278,8 @@ def unpack( :rtype: IntType instance or None if there are not enough data to unpack .. warning:: - Fixed-size integer operations are 4-5 times slower than equivalent on built-in integer types + Fixed-size integer operations are 4-5 times slower + than equivalent on built-in integer types """ try: ret = unpack_from("<" + cls.fmt, other, offset=offset) @@ -296,7 +306,8 @@ def unpack_be( :rtype: IntType instance or None if there are not enough data to unpack .. warning:: - Fixed-size integer operations are 4-5 times slower than equivalent on built-in integer types + Fixed-size integer operations are 4-5 times slower + than equivalent on built-in integer types """ try: ret = unpack_from(">" + cls.fmt, other, offset=offset) diff --git a/malduck/main.py b/malduck/main.py index e67f7ef..427994f 100644 --- a/malduck/main.py +++ b/malduck/main.py @@ -15,13 +15,19 @@ "-l", type=str, default=None, - help="Set logging level for commands: critical, error, warning (default), info, debug", + help=( + "Set logging level for commands: " + "critical, error, warning (default), info, debug" + ), ) @click.option( "--verbose/--quiet", "-v/-q", default=None, - help="Verbose mode (shortcut for '--log-level debug') / quiet mode ('--log-level error')", + help=( + "Verbose mode (shortcut for '--log-level debug') " + "/ quiet mode ('--log-level error')" + ), ) @click.version_option() def main(log_level, verbose): @@ -56,7 +62,8 @@ def fixpe(mempath, outpath, force, base): with ProcessMemoryPE.from_file(mempath, base=base) as p: if not force and p.is_image_loaded_as_memdump(): click.echo( - "Input file looks like correct PE file. Use -f if you want to fix it anyway.", + "Input file looks like correct PE file. " + "Use -f if you want to fix it anyway.", ) return 1 outpath = outpath or mempath + ".exe" @@ -86,7 +93,10 @@ def fixpe(mempath, outpath, force, base): default=None, type=click.Path(exists=True), required=False, - help="Specify directory where Yara files and modules are located (default path is ~/.malduck)", + help=( + "Specify directory where Yara files and modules are located " + "(default path is ~/.malduck)" + ), ) def extract(ctx, paths, base, analysis, modules): """Extract static configuration from dumps""" diff --git a/malduck/pe.py b/malduck/pe.py index e857215..1208f3e 100644 --- a/malduck/pe.py +++ b/malduck/pe.py @@ -122,7 +122,8 @@ def is64bit(self) -> Any: def headers_size(self) -> int: """ Estimated size of PE headers (first section offset). - If there are no sections: returns 0x1000 or size of input if provided data are shorter than single page + If there are no sections: returns 0x1000 or size of input + if provided data are shorter than single page """ return ( self.sections[0].PointerToRawData @@ -148,7 +149,9 @@ def directory(self, name: str) -> Any: """ Get pefile directory entry by identifier - :param name: shortened pefile directory entry identifier (e.g. 'IMPORT' for 'IMAGE_DIRECTORY_ENTRY_IMPORT') + :param name: + shortened pefile directory entry identifier + (e.g. 'IMPORT' for 'IMAGE_DIRECTORY_ENTRY_IMPORT') :rtype: :class:`pefile.Structure` """ return self.optional_header.DATA_DIRECTORY[ @@ -160,8 +163,9 @@ def structure(self, rva: int, format: Any) -> Any: Get internal pefile Structure from specified rva :param rva: Relative virtual address of structure - :param format: :class:`pefile.Structure` format - (e.g. :py:attr:`pefile.PE.__IMAGE_LOAD_CONFIG_DIRECTORY64_format__`) + :param format: + :class:`pefile.Structure` format + (e.g. :py:attr:`pefile.PE.__IMAGE_LOAD_CONFIG_DIRECTORY64_format__`) :rtype: :class:`pefile.Structure` """ structure = pefile.Structure(format) @@ -237,7 +241,8 @@ def validate_resources(self) -> bool: def validate_padding(self) -> bool: """ - Returns True if area between first non-bss section and first 4kB doesn't have only null-bytes + Returns True if area between first non-bss section + and first 4kB doesn't have only null-bytes """ section_start_offs = None for section in self.sections: @@ -282,7 +287,9 @@ def resources(self, name: int | str | bytes) -> Iterator[bytes]: """ Finds resource objects by specified name or type - :param name: String name (e2) or type (e1), numeric identifier name (e2) or RT_* type (e1) + :param name: + String name (e2) or type (e1), + numeric identifier name (e2) or RT_* type (e1) :type name: int or str or bytes :rtype: Iterator[bytes] """ @@ -298,7 +305,8 @@ def name_int(e1, e2, e3): def type_int(e1, e2, e3): return e1.id == type_id - # Broken PE files will not have this directory and it's better to return no value + # Broken PE files will not have this directory + # and it's better to return no value # than to throw a meaningless exception if not hasattr(self.pe, "DIRECTORY_ENTRY_RESOURCE"): return @@ -323,7 +331,9 @@ def resource(self, name: int | str | bytes) -> bytes | None: """ Retrieves single resource by specified name or type - :param name: String name (e2) or type (e1), numeric identifier name (e2) or RT_* type (e1) + :param name: + String name (e2) or type (e1), + numeric identifier name (e2) or RT_* type (e1) :type name: int or str or bytes :rtype: bytes or None """ diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index 000f9b8..b4a2f71 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -48,7 +48,8 @@ def _reload_as_image(self) -> None: @property def image(self: T) -> T | None: """ - Returns ProcessMemory object loaded with image=True or None if can't be loaded or is loaded as image yet + Returns ProcessMemory object loaded with image=True or None + if can't be loaded or is loaded as image yet """ if self.is_image: return None @@ -75,7 +76,8 @@ def is_valid(self) -> bool: @classmethod def load_binaries_from_memory(cls: type[T], procmem: ProcessMemory) -> Iterator[T]: """ - Looks for binaries in ProcessMemory object and yields specialized ProcessMemoryBinary objects + Looks for binaries in ProcessMemory object and yields specialized + ProcessMemoryBinary objects :param procmem: ProcessMemory object to search .. versionchanged:: 4.4.0 @@ -103,4 +105,8 @@ def is_image_loaded_as_memdump(self) -> bool: raise NotImplementedError() def __repr__(self): - return f"{self.__class__.__name__}:{'IMG' if self.is_image else 'DMP'}:{self.imgbase:x}" + return ":".join(( + self.__class__.__name__, + "IMG" if self.is_image else "DMP", + f"{self.imgbase:x}" + )) diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index bbd48d1..f99b1a1 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -41,14 +41,18 @@ class ProcessMemory: :param buf: Object with memory contents :type buf: bytes, mmap, memoryview, bytearray or MemoryBuffer object - :param base: Virtual address of the region of interest (or beginning of buf when no regions provided) + :param base: + Virtual address of the region of interest + (or beginning of buf when no regions provided) :type base: int, optional (default: 0) - :param regions: Regions mapping. If set to None (default), buf is mapped into single-region with VA specified in - `base` argument + :param regions: + Regions mapping. If set to None (default), buf is mapped + into single-region with VA specified in `base` argument :type regions: List[:class:`Region`] - Let's assume that `notepad.exe_400000.bin` contains raw memory dump starting at 0x400000 base address. We can - easily load that file to :class:`ProcessMemory` object, using :py:meth:`from_file` method: + Let's assume that `notepad.exe_400000.bin` contains raw memory dump + starting at 0x400000 base address. We can easily load that file + to :class:`ProcessMemory` object, using :py:meth:`from_file` method: .. code-block:: python @@ -69,8 +73,8 @@ class ProcessMemory: p = procmem(payload, base=0x400000) - Then you can work with PE image contained in dump by creating :class:`ProcessMemoryPE` object, using its - :py:meth:`from_memory` constructor method + Then you can work with PE image contained in dump by creating + :class:`ProcessMemoryPE` object, using its :py:meth:`from_memory` constructor method .. code-block:: python @@ -83,9 +87,11 @@ class ProcessMemory: ppe = procmempe.from_memory(p) ppe.pe.resource("NPENCODINGDIALOG") - If you want to load PE file directly and work with it in a similar way as with memory-mapped files, just use - `image` parameter. It also works with :py:meth:`ProcessMemoryPE.from_memory` for embedded binaries. Your file - will be loaded and relocated in similar way as it's done by Windows loader. + If you want to load PE file directly and work with it in a similar way + as with memory-mapped files, just use `image` parameter. + It also works with :py:meth:`ProcessMemoryPE.from_memory` for embedded binaries. + Your file will be loaded and relocated in similar way as it's done by Windows + loader. .. code-block:: python @@ -110,7 +116,8 @@ def __init__(self, buf, base=0, regions=None, **_): self.memory = buf else: raise TypeError( - "Wrong buffer type - must be bytes, bytearray, mmap object or MemoryBuffer", + "Wrong buffer type - must be " + "bytes, bytearray, mmap object or MemoryBuffer", ) self.imgbase = base @@ -143,7 +150,8 @@ def close(self, copy=False): If copy is False (default): invalidates the object. - :param copy: Copy data into string before closing the mmap object (default: False) + :param copy: + Copy data into string before closing the mmap object (default: False) :type copy: bool """ if self.mapped_memory is None: @@ -159,7 +167,8 @@ def close(self, copy=False): # Invalidate object buf = None - # If self.opened_file is not None: mapped_memory is owned by this ProcessMemory object + # If self.opened_file is not None: + # mapped_memory is owned by this ProcessMemory object # We should close all descriptors if self.opened_file is not None: self.mapped_memory.close() @@ -252,19 +261,23 @@ def iter_regions( This method is used internally to enumerate regions using provided strategy. .. warning:: - If starting point is not provided, iteration will start from the first mapped region. This could - be counter-intuitive when length is set. It literally means "get of mapped bytes". - If you want to look for regions from address 0, you need to explicitly provide this address as an argument. + If starting point is not provided, iteration will start from the first + mapped region. This could be counter-intuitive when length is set. + It literally means "get of mapped bytes". + If you want to look for regions from address 0, + you need to explicitly provide this address as an argument. .. versionadded:: 3.0.0 :param addr: Virtual address of starting point :type addr: int (default: None) - :param offset: Offset of starting point, which will be translated to virtual address + :param offset: + Offset of starting point, which will be translated to virtual address :type offset: int (default: None) :param length: Length of queried range in VM mapping context :type length: int (default: None, unlimited) - :param contiguous: If True, break after first gap. Starting point must be inside mapped region. + :param contiguous: + If True, break after first gap. Starting point must be inside mapped region. :type contiguous: bool (default: False) :param trim: Trim Region objects to range boundaries (addr, addr+length) :type trim: bool (default: False) @@ -276,7 +289,8 @@ def iter_regions( ) if addr is None and offset is None and contiguous: raise ValueError( - "Starting point (addr or offset) must be provided for contiguous regions", + "Starting point (addr or offset) must be provided " + "for contiguous regions", ) if length and length < 0: raise ValueError("Length can't be less than 0") @@ -310,7 +324,8 @@ def iter_regions( else: if length is not None: raise ValueError( - "Don't know how to retrieve length-limited regions with offset from unmapped area", + "Don't know how to retrieve length-limited regions" + "with offset from unmapped area", ) offset = region.offset # If we're out of length after adjustment: time to stop @@ -420,7 +435,8 @@ def readp(self, offset, length=None): Family of \\*p methods doesn't care about contiguity of regions. - Use :py:meth:`p2v` and :py:meth:`readv` if you want to operate on contiguous regions only + Use :py:meth:`p2v` and :py:meth:`readv` if you want to operate on contiguous + regions only :param offset: Buffer offset :param length: Length of chunk (optional) @@ -434,7 +450,8 @@ def readp(self, offset, length=None): def readv_regions(self, addr=None, length=None, contiguous=True): """ - Generate chunks of memory from next contiguous regions, starting from the specified virtual address, + Generate chunks of memory from next contiguous regions, + starting from the specified virtual address, until specified length of read data is reached. Used internally. @@ -506,7 +523,8 @@ def patchp(self, offset, buf): Family of \\*p methods doesn't care about contiguity of regions. - Use :py:meth:`p2v` and :py:meth:`patchv` if you want to operate on contiguous regions only + Use :py:meth:`p2v` and :py:meth:`patchv` if you want to operate on contiguous + regions only :param offset: Buffer offset :type offset: int @@ -538,7 +556,8 @@ def patchv(self, addr, buf): """ Patch bytes under specified virtual address - Patched address range must be within single region, ValueError is raised otherwise. + Patched address range must be within single region, ValueError is raised + otherwise. :param addr: Virtual address :type addr: int @@ -766,11 +785,15 @@ def extract(self, modules=None, extract_manager=None): """ Tries to extract config from ProcessMemory object - :param modules: Extractor modules object (optional, loads '~/.malduck' by default) + :param modules: + Extractor modules object (optional, loads '~/.malduck' by default) :type modules: :class:`malduck.extractor.ExtractorModules` - :param extract_manager: ExtractManager object (optional, creates ExtractManager by default) + :param extract_manager: + ExtractManager object (optional, creates ExtractManager by default) :type extract_manager: :class:`malduck.extractor.ExtractManager` - :return: Static configuration(s) (:py:attr:`malduck.extractor.ExtractManager.config`) or None if not extracted + :return: + Static configuration(s) (:py:attr:`malduck.extractor.ExtractManager.config`) + or None if not extracted :rtype: List[dict] or None """ from ..extractor import ExtractManager, ExtractorModules @@ -790,7 +813,8 @@ def yarap(self, ruleset, offset=None, length=None, extended=False): .. versionchanged:: 4.0.0 - Added `extended` option which allows to get extended information about matched strings and rules. + Added `extended` option which allows to get extended information + about matched strings and rules. Default is False for backwards compatibility. :param ruleset: Yara object with loaded yara rules @@ -813,7 +837,8 @@ def yarav(self, ruleset, addr=None, length=None, extended=False): .. versionchanged:: 4.0.0 - Added `extended` option which allows to get extended information about matched strings and rules. + Added `extended` option which allows to get extended information + about matched strings and rules. Default is False for backwards compatibility. :param ruleset: Yara object with loaded yara rules @@ -824,8 +849,9 @@ def yarav(self, ruleset, addr=None, length=None, extended=False): :type length: int (optional) :param extended: Returns extended information about matched strings and rules :type extended: bool (optional, default False) - :rtype: :class:`malduck.yara.YaraRulesetOffsets` or :class:`malduck.yara.YaraRulesetMatches` - if extended is set to True + :rtype: + :class:`malduck.yara.YaraRulesetOffsets` + or :class:`malduck.yara.YaraRulesetMatches` if extended is set to True """ if addr is None: addr = self.regions[0].addr @@ -855,12 +881,14 @@ def _findbytes(self, yara_fn, query, addr, length): def findbytesp(self, query, offset=None, length=None): """ - Search for byte sequences (e.g., `4? AA BB ?? DD`). Uses :py:meth:`yarap` internally + Search for byte sequences (e.g., `4? AA BB ?? DD`). + Uses :py:meth:`yarap` internally If offset is None, looks for match from the beginning of memory .. versionadded:: 1.4.0 - Query is passed to yarap as single hexadecimal string rule. Use Yara-compatible strings only + Query is passed to yarap as single hexadecimal string rule. + Use Yara-compatible strings only :param query: Sequence of wildcarded hexadecimal bytes, separated by spaces :type query: str or bytes @@ -875,12 +903,14 @@ def findbytesp(self, query, offset=None, length=None): def findbytesv(self, query, addr=None, length=None): """ - Search for byte sequences (e.g., `4? AA BB ?? DD`). Uses :py:meth:`yarav` internally + Search for byte sequences (e.g., `4? AA BB ?? DD`). + Uses :py:meth:`yarav` internally If addr is None, looks for match from the beginning of memory .. versionadded:: 1.4.0 - Query is passed to yarav as single hexadecimal string rule. Use Yara-compatible strings only + Query is passed to yarav as single hexadecimal string rule. + Use Yara-compatible strings only :param query: Sequence of wildcarded hexadecimal bytes, separated by spaces :type query: str or bytes diff --git a/malduck/procmem/procmemelf.py b/malduck/procmem/procmemelf.py index 286bb8f..c4939fd 100644 --- a/malduck/procmem/procmemelf.py +++ b/malduck/procmem/procmemelf.py @@ -18,8 +18,8 @@ class ProcessMemoryELF(ProcessMemoryBinary): Short name: `procmemelf` - ELF files can be read directly using inherited :py:meth:`ProcessMemory.from_file` with `image` argument set - (look at :py:meth:`from_memory` method). + ELF files can be read directly using inherited :py:meth:`ProcessMemory.from_file` + with `image` argument set (look at :py:meth:`from_memory` method). """ __magic__ = b"\x7fELF" diff --git a/malduck/procmem/procmempe.py b/malduck/procmem/procmempe.py index 3ccb7a7..1a0a533 100644 --- a/malduck/procmem/procmempe.py +++ b/malduck/procmem/procmempe.py @@ -18,17 +18,23 @@ class ProcessMemoryPE(ProcessMemoryBinary): :param buf: A memory object containing the PE to be loaded :type buf: bytes, mmap, memoryview, bytearray or :py:meth:`MemoryBuffer` object - :param base: Virtual address of the region of interest (or beginning of buf when no regions provided) + :param base: + Virtual address of the region of interest (or beginning of buf + when no regions provided) :type base: int, optional (default: 0) :param image: The memory object is a dump of memory-mapped PE :type image: bool, optional (default: False) - :param detect_image: Try to automatically detect if the input buffer is memory-mapped PE using some heuristics + :param detect_image: + Try to automatically detect if the input buffer is memory-mapped PE + using some heuristics :type detect_image: bool, optional (default: False) - File `memory_dump` contains a 64bit memory-aligned PE dumped from address `0x140000000`, in order to load it - into procmempe and access the `pe` field all we have to do is initialize a new object with the file data: + File `memory_dump` contains a 64bit memory-aligned PE dumped + from address `0x140000000`, in order to load it into procmempe + and access the `pe` field all we have to do is initialize a new object + with the file data: .. code-block:: python @@ -41,7 +47,8 @@ class ProcessMemoryPE(ProcessMemoryBinary): print(pe_dump.pe.is64bit) - PE files can also be read directly using inherited :py:meth:`ProcessMemory.from_file` with `image` argument set + PE files can also be read directly using inherited + :py:meth:`ProcessMemory.from_file` with `image` argument set (look at :py:meth:`from_memory` method). .. code-block:: python @@ -119,7 +126,8 @@ def is_valid(self) -> bool: def is_image_loaded_as_memdump(self) -> bool: """ - Checks whether memory region contains image incorrectly loaded as memory-mapped PE dump (image=False). + Checks whether memory region contains image incorrectly loaded as memory-mapped + PE dump (image=False). .. code-block:: python diff --git a/malduck/procmem/region.py b/malduck/procmem/region.py index 4ab6e5e..cf8e9fd 100644 --- a/malduck/procmem/region.py +++ b/malduck/procmem/region.py @@ -96,7 +96,8 @@ def last_offset(self) -> int: def v2p(self, addr: int) -> int: """ - Virtual address to physical offset translation. Assumes that address is valid within Region. + Virtual address to physical offset translation. + Assumes that address is valid within Region. :param addr: Virtual address :return: Physical offset """ diff --git a/malduck/yara.py b/malduck/yara.py index ea69d59..57afcd9 100644 --- a/malduck/yara.py +++ b/malduck/yara.py @@ -70,7 +70,8 @@ def __getattr__(self, item): class Yara: """ - Represents Yara ruleset. Rules can be compiled from set of files or defined in code (single rule only). + Represents Yara ruleset. + Rules can be compiled from set of files or defined in code (single rule only). Most simple rule (with default identifiers left): @@ -107,11 +108,14 @@ class Yara: # Note: Order of offsets for grouped strings is undetermined print("mal*", match.MalwareRule["mal"]) - :param rule_paths: Dictionary of {"namespace": "rule_path"}. See also :py:meth:`Yara.from_dir`. + :param rule_paths: + Dictionary of {"namespace": "rule_path"}. See also :py:meth:`Yara.from_dir`. :type rule_paths: dict :param name: Name of generated rule (default: "r") :type name: str - :param strings: Dictionary representing set of string patterns ({"string_identifier": YaraString or plain str}) + :param strings: + Dictionary representing set of string patterns + ({"string_identifier": YaraString or plain str}) :type strings: dict or str or :class:`YaraString` :param condition: Yara rule condition (default: "any of them") :type condition: str @@ -136,7 +140,7 @@ def __init__( yara_strings = "\n ".join( [ - f"${key} = {str(YaraString(value) if isinstance(value, str) else value)}" + f"${key} = {YaraString(value) if isinstance(value, str) else value!s}" for key, value in strings.items() ], ) @@ -156,7 +160,8 @@ def __init__( @staticmethod def from_dir(path, recursive=True, followlinks=True): """ - Find rules (recursively) in specified path. Supported extensions: \\*.yar, \\*.yara + Find rules (recursively) in specified path. + Supported extensions: \\*.yar, \\*.yara :param path: Root path for searching :type path: str @@ -191,13 +196,15 @@ def match(self, offset_mapper=None, extended=False, **kwargs): :type filepath: str :param data: Data to be scanned :type data: str - :param offset_mapper: Offset mapping function. For unmapped region, should returned None. - Used by :py:meth:`malduck.procmem.ProcessMemory.yarav` + :param offset_mapper: + Offset mapping function. For unmapped region, should returned None. + Used by :py:meth:`malduck.procmem.ProcessMemory.yarav` :type offset_mapper: function :param extended: Returns extended information about matched strings and rules :type extended: bool (optional, default False) - :rtype: :class:`malduck.yara.YaraRulesetOffsets` or :class:`malduck.yara.YaraRulesetMatches` - if extended is set to True + :rtype: + :class:`malduck.yara.YaraRulesetOffsets` + or :class:`malduck.yara.YaraRulesetMatches` if extended is set to True """ matches = YaraRulesetMatch( self.rules.match(**kwargs), @@ -219,7 +226,10 @@ class YaraString: :param value: Pattern value :type value: str :param type: Pattern type (default is :py:attr:`YaraString.TEXT`) - :type type: :py:attr:`YaraString.TEXT` / :py:attr:`YaraString.HEX` / :py:attr:`YaraString.REGEX` + :type type: + :py:attr:`YaraString.TEXT` + / :py:attr:`YaraString.HEX` + / :py:attr:`YaraString.REGEX` :param modifiers: Yara string modifier flags """ @@ -276,7 +286,8 @@ def _map_matches(self, matches, offset_mapper): def _map_strings(self, strings, offset_mapper): mapped_strings = defaultdict(list) for yara_string in strings: - # yara-python 4.3.0 broke compatibilty and started returning a StringMatch object + # yara-python 4.3.0 broke compatibilty + # and started returning a StringMatch object if type(yara_string) is tuple: offsets = [yara_string[0]] identifier = yara_string[1] From d16a2b7159cebfb6fe8abeda0827addc7cde4943 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 17:10:51 +0100 Subject: [PATCH 10/24] Consistently use type(X) instead of X.__class__ --- malduck/extractor/extract_manager.py | 10 ++++---- malduck/extractor/extractor.py | 16 ++++++------- malduck/ints.py | 36 ++++++++++++++-------------- malduck/procmem/binmem.py | 4 ++-- malduck/procmem/procmem.py | 2 +- 5 files changed, 34 insertions(+), 34 deletions(-) diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index 33fa631..edddb84 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -97,7 +97,7 @@ def on_extractor_error( log.warning( "%s.%s raised an exception: %s", - extractor.__class__.__name__, + type(extractor).__name__, method_name, traceback.format_exc(), ) @@ -135,7 +135,7 @@ def carve_procmem(self, p: ProcessMemory) -> list[ProcessMemoryBinary]: carved_bins = list(binclass.load_binaries_from_memory(p)) for carved_bin in carved_bins: log.debug( - f"carve: Found {carved_bin.__class__.__name__} " + f"carve: Found {type(carved_bin).__name__} " f"at offset {carved_bin.regions[0].offset}", ) binaries += carved_bins @@ -296,7 +296,7 @@ def push_procmem( if type(extractor.yara_rules) is str: raise TypeError( - f'"{extractor.__class__.__name__}.yara_rules" cannot be a string, ' + f'"{type(extractor).__name__}.yara_rules" cannot be a string, ' 'convert it into a list of strings', ) @@ -346,14 +346,14 @@ def push_config(self, config: Config, extractor: Extractor) -> None: log.debug( "%s found the following config parts: %s", - extractor.__class__.__name__, + type(extractor).__name__, sorted(config.keys()), ) if "family" in config: log.debug( "%s tells it's %s", - extractor.__class__.__name__, + type(extractor).__name__, config["family"], ) if ( diff --git a/malduck/extractor/extractor.py b/malduck/extractor/extractor.py index 653fa77..2a1a3f1 100644 --- a/malduck/extractor/extractor.py +++ b/malduck/extractor/extractor.py @@ -27,9 +27,9 @@ def __call__(self, extractor, procmem, *args, **kwargs): if not isinstance(procmem, self.procmem_type): log.debug( "Omitting %s.%s - %s is not %s", - self.__class__.__name__, + type(self).__name__, self.method.__name__, - procmem.__class__.__name__, + type(procmem).__name__, self.procmem_type.__name__, ) return @@ -420,14 +420,14 @@ def log(self): :return: :class:`logging.Logger` """ return logging.getLogger( - f"{self.__class__.__module__}.{self.__class__.__name__}", + f"{type(self).__module__}.{type(self).__name__}", ) def _get_methods(self, method_type): return ( (name, method) for name, method in inspect.getmembers( - self.__class__, + type(self), predicate=lambda member: isinstance(member, method_type), ) if isinstance(method, method_type) @@ -473,7 +473,7 @@ def handle_match(self, p, match): try: log.debug( "Trying %s.%s for %s@%x", - self.__class__.__name__, + type(self).__name__, method_name, identifier, string_match.offset, @@ -492,7 +492,7 @@ def handle_match(self, p, match): try: log.debug( "Trying %s.%s for %s@%x", - self.__class__.__name__, + type(self).__name__, method_name, string_match.identifier, string_match.offset, @@ -505,7 +505,7 @@ def handle_match(self, p, match): for method_name, method in self._get_methods(RuleExtractorMethod): if match.name != method.rule_name: continue - log.debug("Trying %s.%s (rule)", self.__class__.__name__, method_name) + log.debug("Trying %s.%s (rule)", type(self).__name__, method_name) try: method(self, p, match) except Exception as exc: @@ -513,7 +513,7 @@ def handle_match(self, p, match): # Call final extractors for method_name, method in self._get_methods(FinalExtractorMethod): - log.debug("Trying %s.%s (final)", self.__class__.__name__, method_name) + log.debug("Trying %s.%s (final)", type(self).__name__, method_name) try: method(self, p) except Exception as exc: diff --git a/malduck/ints.py b/malduck/ints.py index 4c97ac9..2e1f365 100644 --- a/malduck/ints.py +++ b/malduck/ints.py @@ -189,63 +189,63 @@ def __new__(cls: MetaIntType, value: Any) -> IntType: def __add__(self, other: Any) -> IntType: res = super().__add__(other) - return self.__class__(res) + return type(self)(res) def __sub__(self, other: Any) -> IntType: res = super().__sub__(other) - return self.__class__(res) + return type(self)(res) def __mul__(self, other: Any) -> IntType: res = super().__mul__(other) - return self.__class__(res) + return type(self)(res) def __truediv__(self, other: Any) -> IntType: res = super().__truediv__(other) - return self.__class__(res) + return type(self)(res) def __floordiv__(self, other: Any) -> IntType: res = super().__floordiv__(other) - return self.__class__(res) + return type(self)(res) def __and__(self, other: Any) -> IntType: res = super().__and__(other) - return self.__class__(res) + return type(self)(res) def __xor__(self, other: Any) -> IntType: res = super().__xor__(other) - return self.__class__(res) + return type(self)(res) def __or__(self, other: Any) -> IntType: res = super().__or__(other) - return self.__class__(res) + return type(self)(res) def __lshift__(self, other: Any) -> IntType: res = super().__lshift__(other) - return self.__class__(res) + return type(self)(res) def __pos__(self) -> IntType: res = super().__pos__() - return self.__class__(res) + return type(self)(res) def __abs__(self) -> IntType: res = super().__abs__() - return self.__class__(res) + return type(self)(res) def __rshift__(self, other: Any) -> IntType: - res = int.__rshift__(int(self) & self.__class__.mask, other) - return self.__class__(res) + res = int.__rshift__(int(self) & type(self).mask, other) + return type(self)(res) def __neg__(self) -> IntType: - res = (int(self) ^ self.__class__.mask) + 1 - return self.__class__(res) + res = (int(self) ^ type(self).mask) + 1 + return type(self)(res) def __invert__(self) -> IntType: - res = int(self) ^ self.__class__.mask - return self.__class__(res) + res = int(self) ^ type(self).mask + return type(self)(res) def rol(self, other) -> IntType: """Bitwise rotate left""" - return self.__class__(rol(int(self), other, bits=self.bits)) + return type(self)(rol(int(self), other, bits=self.bits)) def ror(self, other) -> IntType: """Bitwise rotate right""" diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index b4a2f71..aa0ac5b 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -55,7 +55,7 @@ def image(self: T) -> T | None: return None try: if not self._image: - self._image = self.__class__.from_memory(self, image=True) + self._image = type(self).from_memory(self, image=True) return self._image except Exception: import traceback @@ -106,7 +106,7 @@ def is_image_loaded_as_memdump(self) -> bool: def __repr__(self): return ":".join(( - self.__class__.__name__, + type(self).__name__, "IMG" if self.is_image else "DMP", f"{self.imgbase:x}" )) diff --git a/malduck/procmem/procmem.py b/malduck/procmem/procmem.py index f99b1a1..29f5796 100644 --- a/malduck/procmem/procmem.py +++ b/malduck/procmem/procmem.py @@ -953,7 +953,7 @@ def findmz(self, addr): addr -= 0x1000 def __repr__(self): - return f"{self.__class__.__name__}:DMP:{self.imgbase:x}" + return f"{type(self).__name__}:DMP:{self.imgbase:x}" procmem = ProcessMemory From 06b02ca1583503d3fb75bd71bd13d38b700ff8bc Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 18:13:34 +0100 Subject: [PATCH 11/24] Optimize byte-string operations --- malduck/crypto/xor.py | 8 +++++--- malduck/string/ops.py | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/malduck/crypto/xor.py b/malduck/crypto/xor.py index 07221de..3aa89dd 100644 --- a/malduck/crypto/xor.py +++ b/malduck/crypto/xor.py @@ -1,6 +1,8 @@ from __future__ import annotations -from itertools import cycle +import operator +from itertools import cycle, starmap +from sys import byteorder __all__ = ["xor"] @@ -17,5 +19,5 @@ def xor(key: int | bytes, data: bytes) -> bytes: :rtype: bytes """ if isinstance(key, int): - key = bytes([key]) - return bytes([a ^ b for a, b in zip(data, cycle(key))]) + key = key.to_bytes(1, byteorder) # generally faster than bytes([key]) + return bytes(starmap(operator.xor, zip(data, cycle(key)))) diff --git a/malduck/string/ops.py b/malduck/string/ops.py index 69fa004..5deabd4 100644 --- a/malduck/string/ops.py +++ b/malduck/string/ops.py @@ -5,6 +5,7 @@ import binascii from base64 import b64decode, b64encode +from sys import byteorder from typing import TYPE_CHECKING, cast if TYPE_CHECKING: @@ -126,7 +127,7 @@ def pad(self, s: bytes, block_size: int) -> bytes: if length == block_size: padding = b"" elif self.style == "pkcs7": - padding = bytes([length]) * length + padding = length.to_bytes(1, byteorder) * length elif self.style == "null": padding = b"\x00" * length else: From 98551409f2512e007f768c772aaabe017b26b794 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 18:19:30 +0100 Subject: [PATCH 12/24] Reorder imports Via `isort .` --- malduck/extractor/modules.py | 1 + tests/files/modules/apliebe/apliebe.py | 4 ++-- tests/files/modules/multirules/__init__.py | 2 +- tests/files/modules/unbase64/unbase64.py | 2 +- tests/test_bits.py | 2 +- tests/test_compression.py | 2 +- tests/test_crypto.py | 16 +++++++------- tests/test_extractor.py | 2 +- tests/test_hash.py | 2 +- tests/test_ints.py | 19 +++++++++++++--- tests/test_pe.py | 2 +- tests/test_pmem_regions.py | 1 + tests/test_procmem.py | 13 ++++++++++- tests/test_string.py | 25 ++++++++++++++++++---- tests/test_structure.py | 4 +--- tests/test_yara.py | 2 +- 16 files changed, 70 insertions(+), 29 deletions(-) diff --git a/malduck/extractor/modules.py b/malduck/extractor/modules.py index 71d2a37..d0a2668 100644 --- a/malduck/extractor/modules.py +++ b/malduck/extractor/modules.py @@ -1,4 +1,5 @@ from __future__ import annotations + import importlib.util import logging import os diff --git a/tests/files/modules/apliebe/apliebe.py b/tests/files/modules/apliebe/apliebe.py index b63fce1..f119ddb 100644 --- a/tests/files/modules/apliebe/apliebe.py +++ b/tests/files/modules/apliebe/apliebe.py @@ -1,6 +1,6 @@ -from malduck.extractor import Extractor +from malduck import align_down, procmem, procmempe from malduck.compression.aplib import aPLib -from malduck import procmem, procmempe, align_down +from malduck.extractor import Extractor class aPLiebe(Extractor): diff --git a/tests/files/modules/multirules/__init__.py b/tests/files/modules/multirules/__init__.py index 067062c..4c80333 100644 --- a/tests/files/modules/multirules/__init__.py +++ b/tests/files/modules/multirules/__init__.py @@ -1,2 +1,2 @@ -from .multistring import MultiString from .multirule import MultiRule +from .multistring import MultiString diff --git a/tests/files/modules/unbase64/unbase64.py b/tests/files/modules/unbase64/unbase64.py index aa26e8f..ae9f653 100644 --- a/tests/files/modules/unbase64/unbase64.py +++ b/tests/files/modules/unbase64/unbase64.py @@ -1,5 +1,5 @@ -from malduck.extractor import Extractor from malduck import base64, procmempe +from malduck.extractor import Extractor class Unbase64(Extractor): diff --git a/tests/test_bits.py b/tests/test_bits.py index 9207160..8a6f98f 100644 --- a/tests/test_bits.py +++ b/tests/test_bits.py @@ -2,7 +2,7 @@ # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. -from malduck import rol, ror, align, align_down +from malduck import align, align_down, rol, ror def test_rotate(): diff --git a/tests/test_compression.py b/tests/test_compression.py index 1d690b2..f96a730 100644 --- a/tests/test_compression.py +++ b/tests/test_compression.py @@ -4,7 +4,7 @@ import pytest -from malduck import aplib, gzip, base64, lznt1 +from malduck import aplib, base64, gzip, lznt1 def test_aplib(): diff --git a/tests/test_crypto.py b/tests/test_crypto.py index dcc028e..74de2ec 100644 --- a/tests/test_crypto.py +++ b/tests/test_crypto.py @@ -4,19 +4,19 @@ from malduck import ( aes, - camellia, + base64, blowfish, + camellia, + chacha20, des3, + p8, + rabbit, rc4, rsa, - xor, - base64, - unhex, - rabbit, - p8, - serpent, - chacha20, salsa20, + serpent, + unhex, + xor, ) diff --git a/tests/test_extractor.py b/tests/test_extractor.py index ba2f536..7430ada 100644 --- a/tests/test_extractor.py +++ b/tests/test_extractor.py @@ -1,7 +1,7 @@ import os from malduck import procmem, procmempe -from malduck.extractor import ExtractorModules, ExtractManager +from malduck.extractor import ExtractManager, ExtractorModules def test_scan_base64(): diff --git a/tests/test_hash.py b/tests/test_hash.py index bf9b2eb..f98d131 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -2,7 +2,7 @@ # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. -from malduck import enhex, crc32, md5, sha1, sha224, sha256, sha384, sha512 +from malduck import crc32, enhex, md5, sha1, sha224, sha256, sha384, sha512 def test_hash(): diff --git a/tests/test_ints.py b/tests/test_ints.py index b72a836..abe5840 100644 --- a/tests/test_ints.py +++ b/tests/test_ints.py @@ -1,7 +1,20 @@ from malduck import ( - Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, - u8, u16, u32, u64, - p8, p16, p32, p64 + Int8, + Int16, + Int32, + Int64, + UInt8, + UInt16, + UInt32, + UInt64, + p8, + p16, + p32, + p64, + u8, + u16, + u32, + u64, ) diff --git a/tests/test_pe.py b/tests/test_pe.py index 26b81bc..c6f993b 100644 --- a/tests/test_pe.py +++ b/tests/test_pe.py @@ -3,7 +3,7 @@ # See the file 'docs/LICENSE.txt' for copying permission. -from malduck import pe, base64 +from malduck import base64, pe def test_pe_header(): diff --git a/tests/test_pmem_regions.py b/tests/test_pmem_regions.py index 5141792..58df289 100644 --- a/tests/test_pmem_regions.py +++ b/tests/test_pmem_regions.py @@ -1,4 +1,5 @@ import pytest + from malduck import procmem from malduck.procmem import Region diff --git a/tests/test_procmem.py b/tests/test_procmem.py index 699a4e0..d4cfefc 100644 --- a/tests/test_procmem.py +++ b/tests/test_procmem.py @@ -5,9 +5,20 @@ import os import struct import tempfile + import pytest -from malduck import procmem, procmempe, procmemdnpe, cuckoomem, pad, pe, insn, PAGE_READWRITE, enhex +from malduck import ( + PAGE_READWRITE, + cuckoomem, + enhex, + insn, + pad, + pe, + procmem, + procmemdnpe, + procmempe, +) from malduck.procmem import Region diff --git a/tests/test_string.py b/tests/test_string.py index 5ddfac9..601beaa 100644 --- a/tests/test_string.py +++ b/tests/test_string.py @@ -5,10 +5,27 @@ import pytest from malduck import ( - uint8, uint16, uint32, uint64, bigint, - p8, p16, p32, p64, - asciiz, pad, unpad, ipv4, pack, unpack, enhex, unhex, base64, uleb128, - chunks, utf16z + asciiz, + base64, + bigint, + chunks, + enhex, + ipv4, + p8, + p16, + p32, + p64, + pack, + pad, + uint8, + uint16, + uint32, + uint64, + uleb128, + unhex, + unpack, + unpad, + utf16z, ) diff --git a/tests/test_structure.py b/tests/test_structure.py index 82f5936..dc2d73b 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -2,9 +2,7 @@ # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. -from malduck import ( - Structure, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64 -) +from malduck import Int8, Int16, Int32, Int64, Structure, UInt8, UInt16, UInt32, UInt64 def test_structure(): diff --git a/tests/test_yara.py b/tests/test_yara.py index aafb524..102e692 100644 --- a/tests/test_yara.py +++ b/tests/test_yara.py @@ -1,7 +1,7 @@ import os from malduck import Yara, YaraString -from malduck.procmem import Region, ProcessMemory +from malduck.procmem import ProcessMemory, Region def test_yara_match(): From 30de80243973a5a984e29a2e660a7a9115028091 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 18:23:12 +0100 Subject: [PATCH 13/24] Remove constructor calls in no-message exceptions --- malduck/ints.py | 2 +- malduck/procmem/binmem.py | 8 ++++---- malduck/procmem/procmemelf.py | 2 +- malduck/yara.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/malduck/ints.py b/malduck/ints.py index 2e1f365..81d7457 100644 --- a/malduck/ints.py +++ b/malduck/ints.py @@ -49,7 +49,7 @@ def unpack( offset: int = 0, foxed: bool = False, ) -> tuple[T, ...] | int | None: - raise NotImplementedError() + raise NotImplementedError class MetaIntType(type): diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index aa0ac5b..907aaaa 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -43,7 +43,7 @@ def _reload_as_image(self) -> None: """ Load executable file embedded in ProcessMemory like native loader does """ - raise NotImplementedError() + raise NotImplementedError @property def image(self: T) -> T | None: @@ -71,7 +71,7 @@ def is_valid(self) -> bool: """ Checks whether imgbase is pointing at valid binary header """ - raise NotImplementedError() + raise NotImplementedError @classmethod def load_binaries_from_memory(cls: type[T], procmem: ProcessMemory) -> Iterator[T]: @@ -87,7 +87,7 @@ def load_binaries_from_memory(cls: type[T], procmem: ProcessMemory) -> Iterator[ if memory-aligned version was also "valid". """ if cls.__magic__ is None: - raise NotImplementedError() + raise NotImplementedError for binary_va in procmem.findv(cls.__magic__): binary_procmem_dmp = cls.from_memory(procmem, base=binary_va) if binary_procmem_dmp.is_valid(): @@ -102,7 +102,7 @@ def is_image_loaded_as_memdump(self) -> bool: Uses some heuristics to deduce whether contents can be loaded with `image=True`. Used by `detect_image` """ - raise NotImplementedError() + raise NotImplementedError def __repr__(self): return ":".join(( diff --git a/malduck/procmem/procmemelf.py b/malduck/procmem/procmemelf.py index c4939fd..399b3e5 100644 --- a/malduck/procmem/procmemelf.py +++ b/malduck/procmem/procmemelf.py @@ -107,7 +107,7 @@ def elf(self) -> elftools.elf.elffile.ELFFile: return self._elf def is_image_loaded_as_memdump(self): - raise NotImplementedError() + raise NotImplementedError @property def imgend(self) -> int: diff --git a/malduck/yara.py b/malduck/yara.py index 57afcd9..7f536f7 100644 --- a/malduck/yara.py +++ b/malduck/yara.py @@ -65,7 +65,7 @@ def __getattr__(self, item): try: return self[item] except IndexError: - raise AttributeError() + raise AttributeError class Yara: From 64187cd4ca16635b4408a72824467c43e534559c Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 18:40:08 +0100 Subject: [PATCH 14/24] Optimize `is_config_better()` --- malduck/extractor/config_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/malduck/extractor/config_utils.py b/malduck/extractor/config_utils.py index c8c3719..ba3a32c 100644 --- a/malduck/extractor/config_utils.py +++ b/malduck/extractor/config_utils.py @@ -20,9 +20,10 @@ def is_config_better(base_config: Config, new_config: Config) -> bool: Checks whether new config looks more reliable than base. Currently just checking the amount of non-empty keys. """ - base = [(k, v) for k, v in base_config.items() if v] - new = [(k, v) for k, v in new_config.items() if v] - return len(new) > len(base) + return ( + len(tuple(filter(None, new_config.values()))) + > len(tuple(filter(None, base_config.values()))) + ) def encode_for_json(data: Any) -> Any: From 9316ccc1bf9ed0d7f99c547e6a85a30fe6d43afa Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 18:46:18 +0100 Subject: [PATCH 15/24] Prefer on-demand tuples to lists --- malduck/extractor/config_utils.py | 2 +- malduck/pe.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/malduck/extractor/config_utils.py b/malduck/extractor/config_utils.py index ba3a32c..0de7f32 100644 --- a/malduck/extractor/config_utils.py +++ b/malduck/extractor/config_utils.py @@ -44,7 +44,7 @@ def sanitize_config(config: Config) -> Config: :param config: Configuration to sanitize :return: Sanitized configuration """ - return {k: v for k, v in config.items() if v in [0, False] or v} + return {k: v for k, v in config.items() if v == 0 or v} def apply_config_part(base_config: Config, new_config_part: Config) -> Config: diff --git a/malduck/pe.py b/malduck/pe.py index 1208f3e..ad5632e 100644 --- a/malduck/pe.py +++ b/malduck/pe.py @@ -261,7 +261,7 @@ def validate_padding(self) -> bool: # Probably fixpe'd - seems to be ok return True return not all( - b in [0, "\0"] + b in (0, "\0") for b in self.pe.__data__[ section_start_offs : section_start_offs + data_len ] From 3e1b0a77c3314662a60cb1a4ac3f4f54595ca987 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 18:48:28 +0100 Subject: [PATCH 16/24] Use inplace addition in `apply_config_part()` --- malduck/extractor/config_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/malduck/extractor/config_utils.py b/malduck/extractor/config_utils.py index 0de7f32..82c85dd 100644 --- a/malduck/extractor/config_utils.py +++ b/malduck/extractor/config_utils.py @@ -64,7 +64,7 @@ def apply_config_part(base_config: Config, new_config_part: Config) -> Config: elif isinstance(config[k], list): for el in v: if el not in config[k]: - config[k] = config[k] + [el] + config[k] += [el] else: raise RuntimeError( f"Extractor tries to override '{config[k]}' " From ac2d30a8d2fa0f74da9b7b770e00a5f79cc19a14 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 18:51:41 +0100 Subject: [PATCH 17/24] Use `attrgetter` in `ExtractorModules.__init__()` --- malduck/extractor/modules.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/malduck/extractor/modules.py b/malduck/extractor/modules.py index d0a2668..b66cdbd 100644 --- a/malduck/extractor/modules.py +++ b/malduck/extractor/modules.py @@ -8,6 +8,7 @@ import warnings from collections import defaultdict from importlib.abc import FileLoader, PathEntryFinder +from operator import attrgetter from typing import TYPE_CHECKING, cast from ..yara import Yara @@ -41,7 +42,7 @@ def __init__(self, modules_path: str | None = None) -> None: loaded_modules = load_modules(modules_path, onerror=self.on_error) self.extractors: list[type[Extractor]] = Extractor.__subclasses__() - loaded_extractors = [x.__module__ for x in self.extractors] + [*loaded_extractors] = map(attrgetter("__module__"), self.extractors) for module in loaded_modules.values(): module_name = module.__name__ From 1c5fc55fe290ec9f5de62756654d5aafab8ce1b7 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 18:56:30 +0100 Subject: [PATCH 18/24] Use `zip()` in `IDAVM.__setitem__()` --- malduck/procmem/idamem.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/malduck/procmem/idamem.py b/malduck/procmem/idamem.py index f7d1d94..ef721bc 100644 --- a/malduck/procmem/idamem.py +++ b/malduck/procmem/idamem.py @@ -33,13 +33,9 @@ def _get_ea_range(self, item): yield (ea_start, ea_end) def __setitem__(self, item, value): - value_bytes = iter(value) for ea_start, ea_end in self._get_ea_range(item): - for ea in range(ea_start, ea_end): - try: - ida_bytes.patch_byte(ea, next(value_bytes)) - except StopIteration: - return + for ea, byte in zip(range(ea_start, ea_end), value): + ida_bytes.patch_byte(ea, byte) def __getitem__(self, item): data = [] From 63c3cef06c099729a8e8000480826f5f3909cea7 Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 19:00:33 +0100 Subject: [PATCH 19/24] Use unpack operator in `ProcessMemoryPE.store()` --- malduck/procmem/procmempe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/malduck/procmem/procmempe.py b/malduck/procmem/procmempe.py index 1a0a533..22582c1 100644 --- a/malduck/procmem/procmempe.py +++ b/malduck/procmem/procmempe.py @@ -203,7 +203,7 @@ def store(self) -> bytes: pe.optional_header.ImageBase = self.imgbase # Generate header data - pe_data = b"".join([bytes(pe.pe.write())] + data) + pe_data = b"".join((bytes(pe.pe.write()), *data)) # Return PE file data return pe_data From fb62db06707031880821ca2105814b1bbf1e1c8c Mon Sep 17 00:00:00 2001 From: bswck Date: Thu, 23 Nov 2023 19:06:11 +0100 Subject: [PATCH 20/24] Reformat with black --- malduck/extractor/config_utils.py | 5 ++--- malduck/extractor/extract_manager.py | 2 +- malduck/procmem/binmem.py | 12 +++++++----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/malduck/extractor/config_utils.py b/malduck/extractor/config_utils.py index 82c85dd..9bc1165 100644 --- a/malduck/extractor/config_utils.py +++ b/malduck/extractor/config_utils.py @@ -20,9 +20,8 @@ def is_config_better(base_config: Config, new_config: Config) -> bool: Checks whether new config looks more reliable than base. Currently just checking the amount of non-empty keys. """ - return ( - len(tuple(filter(None, new_config.values()))) - > len(tuple(filter(None, base_config.values()))) + return len(tuple(filter(None, new_config.values()))) > len( + tuple(filter(None, base_config.values())) ) diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index edddb84..aa15e34 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -297,7 +297,7 @@ def push_procmem( if type(extractor.yara_rules) is str: raise TypeError( f'"{type(extractor).__name__}.yara_rules" cannot be a string, ' - 'convert it into a list of strings', + "convert it into a list of strings", ) # For each rule identifier in extractor.yara_rules... diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index 907aaaa..17a5860 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -105,8 +105,10 @@ def is_image_loaded_as_memdump(self) -> bool: raise NotImplementedError def __repr__(self): - return ":".join(( - type(self).__name__, - "IMG" if self.is_image else "DMP", - f"{self.imgbase:x}" - )) + return ":".join( + ( + type(self).__name__, + "IMG" if self.is_image else "DMP", + f"{self.imgbase:x}", + ) + ) From 7275edd03777a138be9cee5e403b63dac1c929ad Mon Sep 17 00:00:00 2001 From: bswck Date: Fri, 24 Nov 2023 00:00:24 +0100 Subject: [PATCH 21/24] Make some imports typing-only --- malduck/crypto/winhdr.py | 2 +- malduck/disasm.py | 4 ++-- malduck/extractor/extract_manager.py | 6 +++--- malduck/procmem/binmem.py | 2 +- malduck/procmem/procmemdnpe.py | 7 +++++-- malduck/procmem/procmemelf.py | 5 ++++- malduck/procmem/procmempe.py | 5 ++++- 7 files changed, 20 insertions(+), 11 deletions(-) diff --git a/malduck/crypto/winhdr.py b/malduck/crypto/winhdr.py index 81078b4..5c8fae5 100644 --- a/malduck/crypto/winhdr.py +++ b/malduck/crypto/winhdr.py @@ -3,13 +3,13 @@ # See the file 'docs/LICENSE.txt' for copying permission. from __future__ import annotations -import io from typing import TYPE_CHECKING from ..ints import UInt8, UInt16, UInt32 from ..structure import Structure if TYPE_CHECKING: + import io from typing import Any diff --git a/malduck/disasm.py b/malduck/disasm.py index 1360afb..e8b26a1 100644 --- a/malduck/disasm.py +++ b/malduck/disasm.py @@ -7,10 +7,10 @@ import collections from typing import TYPE_CHECKING -from capstone import CsInsn -from capstone.x86 import X86Op if TYPE_CHECKING: + from capstone import CsInsn + from capstone.x86 import X86Op from collections.abc import Iterator from typing import Any diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index aa15e34..0508a7b 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -6,7 +6,6 @@ from typing import TYPE_CHECKING from ..procmem import ProcessMemory, ProcessMemoryELF, ProcessMemoryPE -from ..procmem.binmem import ProcessMemoryBinary from ..yara import Yara, YaraRuleOffsets, YaraRulesetMatch from .config_utils import ( apply_config_part, @@ -14,10 +13,11 @@ is_config_better, sanitize_config, ) -from .extractor import Extractor -from .modules import ExtractorModules if TYPE_CHECKING: + from .modules import ExtractorModules + from .extractor import Extractor + from ..procmem.binmem import ProcessMemoryBinary from typing import Any, Optional from .config_utils import Config diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index 17a5860..b1634c8 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -5,9 +5,9 @@ from typing import TYPE_CHECKING, TypeVar from .procmem import ProcessMemory, ProcessMemoryBuffer -from .region import Region if TYPE_CHECKING: + from .region import Region from collections.abc import Iterator log = logging.getLogger(__name__) diff --git a/malduck/procmem/procmemdnpe.py b/malduck/procmem/procmemdnpe.py index 5c02a9e..1d0c96a 100644 --- a/malduck/procmem/procmemdnpe.py +++ b/malduck/procmem/procmemdnpe.py @@ -1,9 +1,12 @@ from __future__ import annotations from ..dnpe import DnPE -from .binmem import ProcessMemoryBuffer from .procmempe import ProcessMemoryPE -from .region import Region +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .region import Region + from .binmem import ProcessMemoryBuffer __all__ = ["ProcessMemoryDnPE", "procmemdnpe"] diff --git a/malduck/procmem/procmemelf.py b/malduck/procmem/procmemelf.py index 399b3e5..a45a622 100644 --- a/malduck/procmem/procmemelf.py +++ b/malduck/procmem/procmemelf.py @@ -6,8 +6,11 @@ import elftools.elf.elffile from .binmem import ProcessMemoryBinary -from .procmem import ProcessMemoryBuffer from .region import Region +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .procmem import ProcessMemoryBuffer __all__ = ["ProcessMemoryELF", "procmemelf"] diff --git a/malduck/procmem/procmempe.py b/malduck/procmem/procmempe.py index 22582c1..0e52410 100644 --- a/malduck/procmem/procmempe.py +++ b/malduck/procmem/procmempe.py @@ -3,8 +3,11 @@ from ..bits import align from ..pe import PE from .binmem import ProcessMemoryBinary -from .procmem import ProcessMemoryBuffer from .region import Region +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .procmem import ProcessMemoryBuffer __all__ = ["ProcessMemoryPE", "procmempe"] From aa3b6ae7bf5bddad17f8c92ba5daeb591bf4138b Mon Sep 17 00:00:00 2001 From: bswck Date: Fri, 24 Nov 2023 00:02:16 +0100 Subject: [PATCH 22/24] Remove unused pytest imports --- tests/test_compression.py | 2 -- tests/test_string.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/tests/test_compression.py b/tests/test_compression.py index f96a730..e85e85a 100644 --- a/tests/test_compression.py +++ b/tests/test_compression.py @@ -2,8 +2,6 @@ # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. -import pytest - from malduck import aplib, base64, gzip, lznt1 diff --git a/tests/test_string.py b/tests/test_string.py index 601beaa..41946ff 100644 --- a/tests/test_string.py +++ b/tests/test_string.py @@ -2,8 +2,6 @@ # This file is part of Roach - https://github.com/jbremer/roach. # See the file 'docs/LICENSE.txt' for copying permission. -import pytest - from malduck import ( asciiz, base64, From e683b5666dee9b07961d35678d626b1eb0c6a490 Mon Sep 17 00:00:00 2001 From: bswck Date: Fri, 24 Nov 2023 00:12:47 +0100 Subject: [PATCH 23/24] Use r-value list unpacking in `ExtractorModules()` Credit to @trag1c for finding this pure gold --- malduck/extractor/modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/malduck/extractor/modules.py b/malduck/extractor/modules.py index b66cdbd..72f3a2f 100644 --- a/malduck/extractor/modules.py +++ b/malduck/extractor/modules.py @@ -42,7 +42,7 @@ def __init__(self, modules_path: str | None = None) -> None: loaded_modules = load_modules(modules_path, onerror=self.on_error) self.extractors: list[type[Extractor]] = Extractor.__subclasses__() - [*loaded_extractors] = map(attrgetter("__module__"), self.extractors) + loaded_extractors = [*map(attrgetter("__module__"), self.extractors)] for module in loaded_modules.values(): module_name = module.__name__ From 1b93874ed5a918ba714c44d4e95bd3f2d29aba1b Mon Sep 17 00:00:00 2001 From: bswck Date: Fri, 24 Nov 2023 11:02:46 +0100 Subject: [PATCH 24/24] Sort imports --- malduck/disasm.py | 6 +++--- malduck/extractor/extract_manager.py | 6 +++--- malduck/procmem/binmem.py | 3 ++- malduck/procmem/procmemdnpe.py | 5 +++-- malduck/procmem/procmemelf.py | 2 +- malduck/procmem/procmempe.py | 3 ++- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/malduck/disasm.py b/malduck/disasm.py index e8b26a1..3af2703 100644 --- a/malduck/disasm.py +++ b/malduck/disasm.py @@ -7,13 +7,13 @@ import collections from typing import TYPE_CHECKING - if TYPE_CHECKING: - from capstone import CsInsn - from capstone.x86 import X86Op from collections.abc import Iterator from typing import Any + from capstone import CsInsn + from capstone.x86 import X86Op + __all__ = ["disasm", "insn", "Disassemble", "Instruction", "Operand", "Memory"] Memory = collections.namedtuple("Memory", ("size", "base", "scale", "index", "disp")) diff --git a/malduck/extractor/extract_manager.py b/malduck/extractor/extract_manager.py index 0508a7b..31b680c 100644 --- a/malduck/extractor/extract_manager.py +++ b/malduck/extractor/extract_manager.py @@ -15,12 +15,12 @@ ) if TYPE_CHECKING: - from .modules import ExtractorModules - from .extractor import Extractor - from ..procmem.binmem import ProcessMemoryBinary from typing import Any, Optional + from ..procmem.binmem import ProcessMemoryBinary from .config_utils import Config + from .extractor import Extractor + from .modules import ExtractorModules log = logging.getLogger(__name__) diff --git a/malduck/procmem/binmem.py b/malduck/procmem/binmem.py index b1634c8..093f735 100644 --- a/malduck/procmem/binmem.py +++ b/malduck/procmem/binmem.py @@ -7,9 +7,10 @@ from .procmem import ProcessMemory, ProcessMemoryBuffer if TYPE_CHECKING: - from .region import Region from collections.abc import Iterator + from .region import Region + log = logging.getLogger(__name__) T = TypeVar("T", bound="ProcessMemoryBinary") diff --git a/malduck/procmem/procmemdnpe.py b/malduck/procmem/procmemdnpe.py index 1d0c96a..2ced756 100644 --- a/malduck/procmem/procmemdnpe.py +++ b/malduck/procmem/procmemdnpe.py @@ -1,12 +1,13 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from ..dnpe import DnPE from .procmempe import ProcessMemoryPE -from typing import TYPE_CHECKING if TYPE_CHECKING: - from .region import Region from .binmem import ProcessMemoryBuffer + from .region import Region __all__ = ["ProcessMemoryDnPE", "procmemdnpe"] diff --git a/malduck/procmem/procmemelf.py b/malduck/procmem/procmemelf.py index a45a622..6fe182e 100644 --- a/malduck/procmem/procmemelf.py +++ b/malduck/procmem/procmemelf.py @@ -1,13 +1,13 @@ from __future__ import annotations import io +from typing import TYPE_CHECKING import elftools import elftools.elf.elffile from .binmem import ProcessMemoryBinary from .region import Region -from typing import TYPE_CHECKING if TYPE_CHECKING: from .procmem import ProcessMemoryBuffer diff --git a/malduck/procmem/procmempe.py b/malduck/procmem/procmempe.py index 0e52410..a808fd2 100644 --- a/malduck/procmem/procmempe.py +++ b/malduck/procmem/procmempe.py @@ -1,10 +1,11 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from ..bits import align from ..pe import PE from .binmem import ProcessMemoryBinary from .region import Region -from typing import TYPE_CHECKING if TYPE_CHECKING: from .procmem import ProcessMemoryBuffer