Skip to content

Commit

Permalink
add AppleDouble Kaitai plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
WJ-NFI committed Aug 5, 2024
1 parent 016781e commit c379d77
Show file tree
Hide file tree
Showing 14 changed files with 508 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test-plugins.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ jobs:
python -m pip install --upgrade pip
pip install tox pip-tools
- name: Test example plugin Python AppleDouble Kaitai
run: cd python/appledoublekaitai && tox

- name: Test example plugin Python chat
run: cd python/chat && tox
Expand Down
1 change: 1 addition & 0 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The following paragraph describes the workings in the context of an extraction.
# Getting started

Before showing how to get started with a Python plugin, note that a couple of example plugins are provided to show you how the API operates:
* *AppleDoubleKaitaiPlugin*: This plugin is an example to parse the content of an AppleDouble file through Kaitai into a property represented as json. This plugin can be found in the `python/appledoublekaitai` directory.
* *ChatPlugin*: This plugin parses a simple made-up chat logs into a message tree. This plugin can be found in the `python/chat` directory.
* *DataDigestPlugin*: This plugin reads data in chunks and calculates an SHA-256 hash over the entire data. This plugin can be found in the `python/datadigest` directory.
* *DataTransformationPlugin*: This plugin creates transformations from simple made-up chat logs. This plugin can be found in the `python/datatransformation`directory.
Expand Down
1 change: 1 addition & 0 deletions python/appledoublekaitai/.Dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.tox
36 changes: 36 additions & 0 deletions python/appledoublekaitai/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Multi-stage Dockerfile, to build and package an extraction plugin
# Recommended way to build the plugin is by calling tox:
# tox -e package
# if you need to pass a proxy:
# tox -e package -- --build-arg https_proxy=https://your-proxy
# if you want to pass a private Python package index:
# tox -e package -- --build-arg PIP_INDEX_URL=https://your-pypi-mirror

###############################################################################
# Stage 1: build the plugin
# use a 'fat' image to setup the dependencies we'll need

FROM python:3.12 AS builder
ARG PIP_INDEX_URL=https://pypi.org/simple/
# build wheels for all dependencies in /app/dist (compiling binary distributions for sdists containing non-python code)
RUN mkdir --parents /app/dist
COPY requirements.txt /app/requirements.txt
RUN pip wheel --requirement /app/requirements.txt --wheel-dir /app/dist


###############################################################################
# Stage 2: create the distributable plugin image
# use a 'slim' image for running the actual plugin

FROM python:3.12-slim
# copy and install the dependencies in wheel form from the builder
RUN mkdir --parents /app/dist
COPY --from=builder /app/dist/*.whl /app/dist/
RUN pip install --no-index /app/dist/*.whl

# copy the actual plugin file, run that on port 8999
COPY *.py /app/
COPY structs/* /app/structs/
EXPOSE 8999
ENTRYPOINT ["/usr/local/bin/serve_plugin", "-vv"]
CMD ["/app/plugin.py", "8999"]
125 changes: 125 additions & 0 deletions python/appledoublekaitai/kaitai_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import enum
import inspect
import json
from io import BufferedWriter
from typing import Any, BinaryIO, Dict, Generator, List, Type

import kaitaistruct
from kaitaistruct import KaitaiStruct
from json_stream import streamable_dict, streamable_list


def write_to_json(data_binary: BinaryIO, writer: BufferedWriter, class_type: Type[KaitaiStruct]):
"""
Writes a binary form of JSON string into a BufferedWriter
@param data_binary: binary data containing the file content
@param writer: bufferedWriter to write the binary form of the JSON string to
@param class_type: class that contains the parsing to a KaiTai struct
@return: JSON string representing contents of data object
"""
writer.write(bytes(to_json_string(data_binary, class_type), "utf-8"))


def to_json_string(data_binary: BinaryIO, class_type: Type[KaitaiStruct]) -> str:
"""
Parses a binary data object to a JSON string
@param data_binary: binary data containing the file content
@param class_type: class that contains the parsing to a KaiTai struct
@return: JSON string representing contents of data object
"""
parsed_kaitai_struct = class_type.from_io(data_binary)
return json.dumps(_object_to_dict(parsed_kaitai_struct), indent=2)


@streamable_dict
def _object_to_dict(instance: Any) -> Generator[Dict[str, Any], None, None]:
"""
Recursive helper method that parses an object to a dictionary.
Key: The parameters and property method names
Value: The parsed value or returning values of the fields and property method names
@param instance: object that needs parsing to dictionary
@yield: dictionary containing parsed fields and their respective parsed values in a dictionary
"""
parameters_dict = _parameters_dict(instance)
for key, value_object in parameters_dict.items():
if not key.startswith("_") and value_object is not None:
if _is_kaitai_struct(value_object):
yield _to_lower_camel_case(key), _object_to_dict(value_object)
elif _is_list(value_object):
yield _to_lower_camel_case(key), _list_to_dict(value_object)
else:
yield _to_lower_camel_case(key), _process_value(value_object)


def _parameters_dict(instance: Any) -> Dict[str, Any]:
"""
Helper method that parses an object to a dictionary.
Key: The parameters and property method names
Value: The original value or returning values of the original fields and property method names
@param instance: object that needs parsing to dictionary
@yield: dictionary containing original field names and their respective values in a dictionary
"""
parameters_dict = vars(instance)
if _is_kaitai_struct(instance):
methods = _get_property_methods(type(instance))
for method in methods:
parameters_dict[str(method)] = getattr(instance, method)
return parameters_dict


def _process_value(value_object: Any) -> Any:
"""
Helper method to process the different types of values to enable their printing in a json
@param value_object: value of whatever type that might require preprocessing; if not, the value itself is returned
@return: type that can be dumped in a json
"""
if type(value_object) is bytes:
return list(value_object)
if isinstance(value_object, enum.Enum):
return {
"name": value_object.name.upper(),
"value": value_object.value
}
return value_object


def _get_property_methods(class_type: Any) -> List[str]:
"""
Helper method to obtain method names in a class that are annotated with @property
@param class_type: class_type that the method obtains @property method names from
@return: list of method names containing the annotation property
"""
property_method_names = []
for name, member in inspect.getmembers(class_type):
if isinstance(member, property):
property_method_names.append(name)
return property_method_names


def _is_kaitai_struct(value_object: Any) -> bool:
return issubclass(type(value_object), kaitaistruct.KaitaiStruct)


def _is_list(value_object: Any) -> bool:
return issubclass(type(value_object), List)


@streamable_list
def _list_to_dict(object_list: List[Any]) -> Generator[tuple[str, Any], None, None]:
for obj in object_list:
yield _object_to_dict(obj)


def _to_camel_case(snake_str: str) -> str:
return "".join(x.capitalize() for x in snake_str.lower().split("_"))


def _to_lower_camel_case(snake_str: str) -> str:
camel_string = _to_camel_case(snake_str)
return snake_str[0].lower() + camel_string[1:]
37 changes: 37 additions & 0 deletions python/appledoublekaitai/plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from hansken_extraction_plugin.api.extraction_plugin import ExtractionPlugin
from hansken_extraction_plugin.api.plugin_info import Author, MaturityLevel, PluginId, PluginInfo
from hansken_extraction_plugin.runtime.extraction_plugin_runner import run_with_hanskenpy
from logbook import Logger

import kaitai_utils
from structs.apple_single_double import AppleSingleDouble


log = Logger(__name__)


class Plugin(ExtractionPlugin):

def plugin_info(self):
plugin_info = PluginInfo(
id=PluginId(domain='nfi.nl', category='extract', name='apple_double_kaitai_plugin'),
version='0.0.1',
description='Parses Apple double files using Kaitai',
author=Author('Team Formats', 'formats@nfi.nl', 'Netherlands Forensic Institute'),
maturity=MaturityLevel.PROOF_OF_CONCEPT,
webpage_url='', # e.g. url to the code repository of your plugin
matcher='$data.fileType=AppleDouble', # add the query for the types of traces your plugin should match
license='Apache License 2.0'
)
return plugin_info

def process(self, trace, data_context):
with trace.open(data_type='text', mode='wb') as writer:
kaitai_utils.write_to_json(trace.open(), writer, AppleSingleDouble)


if __name__ == '__main__':
# optional main method to run your plugin with Hansken.py
# see detail at:
# https://netherlandsforensicinstitute.github.io/hansken-extraction-plugin-sdk-documentation/latest/dev/python/hanskenpy.html
run_with_hanskenpy(Plugin)
3 changes: 3 additions & 0 deletions python/appledoublekaitai/requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
hansken-extraction-plugin==0.8.1
kaitaistruct
json_stream
61 changes: 61 additions & 0 deletions python/appledoublekaitai/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile requirements.in
#
certifi==2024.7.4
# via requests
charset-normalizer==3.3.2
# via requests
decorator==5.1.1
# via hansken
docker==7.1.0
# via hansken-extraction-plugin
grpcio==1.65.4
# via hansken-extraction-plugin
hansken==2024.7.15
# via hansken-extraction-plugin
hansken-extraction-plugin==0.8.1
# via -r requirements.in
idna==3.7
# via requests
ijson==3.3.0
# via hansken
iso8601==2.1.0
# via hansken
json-stream==2.3.2
# via -r requirements.in
json-stream-rs-tokenizer==0.4.26
# via json-stream
kaitaistruct==0.10
# via -r requirements.in
logbook==1.7.0.post0
# via
# hansken
# hansken-extraction-plugin
more-itertools==10.3.0
# via hansken
protobuf==5.27.3
# via hansken-extraction-plugin
python-dateutil==2.9.0.post0
# via hansken
pytz==2024.1
# via hansken-extraction-plugin
requests==2.32.3
# via
# docker
# hansken
# requests-toolbelt
requests-toolbelt==1.0.0
# via hansken
six==1.16.0
# via python-dateutil
tabulate==0.9.0
# via hansken
typing-extensions==4.12.2
# via hansken-extraction-plugin
urllib3==2.2.2
# via
# docker
# requests
Loading

0 comments on commit c379d77

Please sign in to comment.