Skip to content

Commit

Permalink
Merge branch 'main' into issue-426-_TypeError_expressions_py_189_Type…
Browse files Browse the repository at this point in the history
…Error_unhashable_type_dict
  • Loading branch information
ebehner authored Nov 13, 2024
2 parents b0bf390 + 43f49d1 commit 71e2e95
Show file tree
Hide file tree
Showing 17 changed files with 843 additions and 39 deletions.
5 changes: 4 additions & 1 deletion decompiler/backend/codegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from decompiler.backend.variabledeclarations import GlobalDeclarationGenerator, LocalDeclarationGenerator
from decompiler.task import DecompilerTask

FAIL_MESSAGE = "Decompilation Failed!\n"


class CodeGenerator:
"""Class in charge of emitting C-code from pseudo code."""
Expand Down Expand Up @@ -53,7 +55,8 @@ def generate_function(self, task: DecompilerTask) -> str:
@staticmethod
def generate_failure_message(task: DecompilerTask):
"""Returns the message to be shown for a failed task."""
msg = f"Failed to decompile {task.name}"
msg = FAIL_MESSAGE
msg += f"Failed to decompile {task.name}"
if origin := task.failure_origin: # checks if the string is empty (should never be None when this method is called)
msg += f" due to error during {origin}."
return msg
25 changes: 21 additions & 4 deletions decompiler/frontend/binaryninja/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from __future__ import annotations

import logging
from typing import List

import binaryninja
from binaryninja import BinaryView
from binaryninja.types import SymbolType
from decompiler.frontend.binaryninja.rust_string_detection import RustStringDetection
from decompiler.task import DecompilerTask
from decompiler.util.options import Options

Expand Down Expand Up @@ -39,6 +41,7 @@ class BinaryninjaFrontend(Frontend):
def __init__(self, bv: BinaryView):
"""Create a new binaryninja view with the given path."""
self._bv = bv if type(bv) == BinaryView else bv.getCurrentFunction().view
self._tagging = CompilerIdiomsTagging(self._bv)

@classmethod
def from_path(cls, path: str, options: Options):
Expand Down Expand Up @@ -67,21 +70,35 @@ def lift(self, task: DecompilerTask):
function = self._get_binninja_function(task.function_identifier)
lifter, parser = self._create_lifter_parser(task.options)

rust_string_detection = RustStringDetection(self._bv, task.options)
rust_string_detection.run()

task.function_return_type = lifter.lift(function.return_type)
task.function_parameters = [lifter.lift(param_type) for param_type in function.type.parameters]

tagging = CompilerIdiomsTagging(self._bv, function.start, task.options)
tagging.run()
self._tagging.run(function, task.options)

task.cfg = parser.parse(function)
task.function_parameter_locations = self._parameter_locations(function)
task.complex_types = parser.complex_types
except Exception as e:
task.fail("Function lifting")
logging.exception(f"Failed to decompile {task.name}, error during function lifting")
task.fail("Function lifting", e)

if task.options.getboolean("pipeline.debug", fallback=False):
raise e

def _parameter_locations(self, function: binaryninja.function.Function) -> List[str | None]:
"""
For a given Binary Ninja Function, this method returns a list of its parameters' locations in the correct order.
E.g. if the first parameter is stored in r14, the first entry in the returned list will be 'r14'.
"""
raw_parameters = function.type.parameters
parameter_locations = []
for parameter in raw_parameters:
name = parameter.location.name if parameter.location is not None else None
parameter_locations.append(name)
return parameter_locations

def get_all_function_names(self):
"""Returns the entire list of all function names in the binary. Ignores blacklisted functions and imported functions."""
functions = list()
Expand Down
15 changes: 11 additions & 4 deletions decompiler/frontend/binaryninja/handlers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import (
Constant,
CustomType,
FunctionSymbol,
GlobalVariable,
Integer,
NotUseableConstant,
OperationType,
Pointer,
Symbol,
UnaryOperation,
)
Expand Down Expand Up @@ -61,10 +60,18 @@ def lift_constant_pointer(self, pointer: mediumlevelil.MediumLevelILConstPtr, **
res = self._lifter.lift(variable, view=view, parent=pointer)

elif (symbol := view.get_symbol_at(pointer.constant)) and symbol.type != SymbolType.DataSymbol:
return self._lifter.lift(symbol)
if isinstance(result := self._lifter.lift(symbol), FunctionSymbol):
try:
result.can_return = view.get_function_at(pointer.constant).can_return.value
return result
except Exception:
pass
return result

elif function := view.get_function_at(pointer.constant):
return self._lifter.lift(function.symbol)
if isinstance(result := self._lifter.lift(function.symbol), FunctionSymbol):
result.can_return = function.can_return.value
return result

else:
res = self._lifter.lift(DataVariable(view, pointer.constant, Type.void(), False), view=view, parent=pointer)
Expand Down
63 changes: 63 additions & 0 deletions decompiler/frontend/binaryninja/rust_string_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import logging
import sys

from binaryninja import BinaryView
from decompiler.util.options import Options


class RustStringDetection:
"""
This 'stage' detects certain Rust strings (string slices), which are struct based strings.
It requires the RustStringSlicer. A path to the tool needs to be configured via the options.
The stage is executed before lifting, as it uses the Binary Ninja API to identify string slices
and 'mark' them, by assigning the appropriate type.
It can be configured to run always, never, or for Rust binaries only.
"""

def __init__(self, binary_view: BinaryView, options: Options):
self._bv = binary_view
self._enabled = options.getboolean("rust-string-detection.enabled", fallback=False)
self._rust_binaries_only = options.getboolean("rust-string-detection.rust_binaries_only", fallback=False)
self._string_slicer_path = options.getstring("rust-string-detection.string_slicer_path", fallback="")
self._debug_submodules = options.getboolean("logging.debug-submodules", fallback=False)

def is_rust_binary(self) -> bool:
"""
Simple heurstic to determine, whether the binary is a Rust binary.
"""
for _ in self._bv.find_all_data(self._bv.start, self._bv.end, "rustc".encode("utf-8")):
return True
for _ in self._bv.find_all_data(self._bv.start, self._bv.end, "cargo".encode("utf-8")):
return True
return False

def run(self):
"""
Runs the Rust String Slicer, if the required conditions are met.
String Slicer's path will be added to Python's path before importing the module.
"""
if not self._enabled:
logging.info("Rust String Slicer not executed")
return

if self._rust_binaries_only and not self.is_rust_binary():
logging.info("Rust String Slicer not executed: Not a Rust Binary")
return

logging.info("Starting Rust String Slicer")
try:
sys.path.append(self._string_slicer_path)
from rust_string_slicer.binja_plugin.actions import RecoverStringFromReadOnlyDataTask, RustStringSlice

if not RustStringSlice.check_binary_ninja_type_exists(self._bv):
RustStringSlice.create_binary_ninja_type(self._bv)
RecoverStringFromReadOnlyDataTask(bv=self._bv).run()

except Exception as e:
if self._debug_submodules:
raise RuntimeError(e)
logging.warning("Rust String Slicer failed. Please check if the tool is installed and the path is set correctly!")
return
21 changes: 13 additions & 8 deletions decompiler/frontend/binaryninja/tagging.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging

import binaryninja.function
from binaryninja import BinaryView
from compiler_idioms.disassembly.smda_disassembly import SMDADisassembly
from compiler_idioms.matcher import Matcher
from decompiler.util.options import Options

Expand All @@ -11,23 +13,26 @@ class CompilerIdiomsTagging:
TAG_SYMBOL = "⚙"
TAG_PREFIX = "compiler_idiom: "

def __init__(self, binary_view: BinaryView, start: int, options: Options):
def __init__(self, binary_view: BinaryView):
self._bv = binary_view
self._function_start = start
self._enabled = options.getboolean("compiler-idioms-tagging.enabled", fallback=True)
self._debug_submodules = options.getboolean("logging.debug-submodules")
self._disassembly = SMDADisassembly(self._bv.file.filename)
self._matcher = Matcher()

def run(self):
def run(self, function: binaryninja.function.Function, options: Options):
"""
Matches idioms in the function (disassembly) currently being decompiled.
For each found match creates a tag that contains info for original computation reconstruction.
"""
if not self._enabled:
enabled = options.getboolean("compiler-idioms-tagging.enabled", fallback=True)
debug_submodules = options.getboolean("logging.debug-submodules")

if not enabled:
return
try:
matches = Matcher().find_idioms_in_function(self._bv.file.filename, self._function_start)
instructions = self._disassembly.get_smda_function_at(function.start)
matches = self._matcher._match_single_function(instructions)
except Exception as e:
if self._debug_submodules:
if debug_submodules:
raise RuntimeError(e)
logging.warning("Compiler idioms matching failed, continue without compiler idioms.")
return
Expand Down
24 changes: 13 additions & 11 deletions decompiler/pipeline/expressions/bitfieldcomparisonunrolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ def _get_folded_case(self, block: BasicBlock) -> Optional[FoldedCase]:
if not isinstance(branch_instruction := block[-1], Branch):
return None
match branch_instruction.condition:
case Condition(OperationType.equal, subexpr, Constant(value=0x0)):
case Condition(operation=OperationType.equal, left=subexpr, right=Constant(value=0x0)):
edge_type_to_case_node = FalseCase
case Condition(OperationType.not_equal, subexpr, Constant(value=0x0)):
case Condition(operation=OperationType.not_equal, left=subexpr, right=Constant(value=0x0)):
edge_type_to_case_node = TrueCase
case Condition(OperationType.equal, Constant(value=0x0), subexpr):
case Condition(operation=OperationType.equal, left=Constant(value=0x0), right=subexpr):
edge_type_to_case_node = FalseCase
case Condition(OperationType.not_equal, Constant(value=0x0), subexpr):
case Condition(operation=OperationType.not_equal, left=Constant(value=0x0), right=subexpr):
edge_type_to_case_node = TrueCase
case _:
return None
Expand All @@ -132,17 +132,19 @@ def _get_switch_var_and_bitfield(self, subexpr: Expression) -> Optional[Tuple[Ex
"""
match subexpr:
case BinaryOperation(
OperationType.bitwise_and,
BinaryOperation(
OperationType.bitwise_and, BinaryOperation(OperationType.left_shift, Constant(value=1), switch_var), Constant()
operation=OperationType.bitwise_and,
left=BinaryOperation(
operation=OperationType.bitwise_and,
left=BinaryOperation(operation=OperationType.left_shift, left=Constant(value=1), right=switch_var),
right=Constant(),
),
Constant() as bit_field,
right=Constant() as bit_field,
) if bit_field.value != 0xFFFFFFFF:
return switch_var, bit_field
case BinaryOperation(
OperationType.bitwise_and,
BinaryOperation(OperationType.left_shift, Constant(value=1), switch_var),
Constant() as bit_field,
operation=OperationType.bitwise_and,
left=BinaryOperation(operation=OperationType.left_shift, left=Constant(value=1), right=switch_var),
right=Constant() as bit_field,
) if bit_field.value != 0xFFFFFFFF:
return switch_var, bit_field
case _:
Expand Down
9 changes: 6 additions & 3 deletions decompiler/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from logging import debug, error, warning
from logging import debug, warning
from typing import List

from decompiler.pipeline.controlflowanalysis.restructuring import PatternIndependentRestructuring
Expand All @@ -13,6 +13,8 @@
MemPhiConverter,
PhiFunctionFixer,
RegisterPairHandling,
RemoveGoPrologue,
RemoveNoreturnBoilerplate,
RemoveStackCanary,
SwitchVariableDetection,
)
Expand All @@ -28,7 +30,9 @@

PREPROCESSING_STAGES = [
CompilerIdiomHandling,
RemoveGoPrologue,
RemoveStackCanary,
RemoveNoreturnBoilerplate,
RegisterPairHandling,
Coherence,
SwitchVariableDetection,
Expand Down Expand Up @@ -108,8 +112,7 @@ def run(self, task: DecompilerTask):
if show_all or stage.name in showed_stages:
self._show_stage(task, f"After {stage.name}", print_ascii, show_in_tabs)
except Exception as e:
task.fail(origin=stage.name)
error(f"Failed to decompile {task.name}, error during stage {stage.name}: {e}")
task.fail(origin=stage.name, exception=e)
if debug_mode:
raise e
break
Expand Down
2 changes: 2 additions & 0 deletions decompiler/pipeline/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@
from .missing_definitions import InsertMissingDefinitions
from .phi_predecessors import PhiFunctionFixer
from .register_pair_handling import RegisterPairHandling
from .remove_go_prologue import RemoveGoPrologue
from .remove_noreturn_boilerplate import RemoveNoreturnBoilerplate
from .remove_stack_canary import RemoveStackCanary
from .switch_variable_detection import BackwardSliceSwitchVariableDetection as SwitchVariableDetection
Loading

0 comments on commit 71e2e95

Please sign in to comment.