Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Drop dependency on fact_helper_file #1140

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docsrc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@
'docker',
'docker-compose',
'email-validator',
'fact_helper_file',
'flaky',
'flask',
'flask_login',
Expand Down
43 changes: 43 additions & 0 deletions src/helperFunctions/magic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""This is a wrapper around pymagic.
It aims to provide the same API but with the ability to load multiple magic
files in the default api.
"""
import os

import magic as pymagic

from helperFunctions.fileSystem import get_src_dir

# On ubuntu this is provided by the libmagic-mgc package
_default_magic = os.getenv('MAGIC', '/usr/lib/file/magic.mgc')
_fact_magic = f'{get_src_dir()}/bin/firmware.mgc'
_internal_symlink_magic = f'{get_src_dir()}/bin/internal_symlink.mgc'
_magic_file = f'{_internal_symlink_magic}:{_fact_magic}:{_default_magic}'

_instances = {}


def _get_magic_instance(**kwargs):
"""Returns an instance of pymagic.Maigc"""
# Dicts are not hashable but sorting and creating a tuple is a valid hash
key = hash(tuple(sorted(kwargs.items())))
i = _instances.get(key)
if i is None:
i = _instances[key] = pymagic.Magic(**kwargs)
return i


def from_file(filename, magic_file=_magic_file, **kwargs) -> str:
"""Like pymagic's ``magic.from_file`` but it accepts all keyword arguments
that ``magic.Magic`` accepts.
"""
m = _get_magic_instance(magic_file=magic_file, **kwargs)
return m.from_file(filename)


def from_buffer(filename, magic_file=_magic_file, **kwargs) -> str:
"""Like pymagic's ``magic.from_buffer`` but it accepts all keyword arguments
that ``magic.Magic`` accepts.
"""
m = _get_magic_instance(magic_file=magic_file, **kwargs)
return m.from_buffer(filename)
7 changes: 7 additions & 0 deletions src/install/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ def main(distribution):

BIN_DIR.mkdir(exist_ok=True)

with OperateInDirectory('../../src/bin'):
run_cmd_with_logging(
'wget https://github.com/maringuu/firmware-magic-database/releases/download/v0.1.0/firmware.mgc'
)
run_cmd_with_logging(f'file -C -m {INSTALL_DIR / "internal_symlink_magic"}')
run_cmd_with_logging(f'mv {INSTALL_DIR / "internal_symlink_magic.mgc"} {BIN_DIR}')

apt_packages_path = INSTALL_DIR / 'apt-pkgs-common.txt'
dnf_packages_path = INSTALL_DIR / 'dnf-pkgs-common.txt'

Expand Down
6 changes: 6 additions & 0 deletions src/install/internal_symlink_magic
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# ====================== faf internal ======================

# ---- faf internal link representation ----
0 string symbolic\ link\ -> symbolic link
>17 string x to '%s'
!:mime inode/symlink
2 changes: 0 additions & 2 deletions src/install/requirements_common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ pydantic==2.4.0
# Config parsing
toml==0.10.2

git+https://github.com/fkie-cad/fact_helper_file.git

# Common code modules
git+https://github.com/fkie-cad/common_helper_files.git
git+https://github.com/fkie-cad/common_helper_filter.git
Expand Down
8 changes: 3 additions & 5 deletions src/plugins/analysis/file_type/code/file_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from typing import List

import pydantic
from fact_helper_file import get_file_type_from_path
from pydantic import Field

from analysis.plugin import AnalysisPluginV0
from analysis.plugin.compat import AnalysisBasePluginAdapterMixin
from helperFunctions import magic

if typing.TYPE_CHECKING:
import io
Expand Down Expand Up @@ -39,9 +39,7 @@ def summarize(self, result: Schema) -> List[str]:
def analyze(self, file_handle: io.FileIO, virtual_file_path: str, analyses: dict) -> Schema:
del virtual_file_path, analyses

file_dict = get_file_type_from_path(file_handle.name)

return AnalysisPlugin.Schema(
mime=file_dict['mime'],
full=file_dict['full'],
mime=magic.from_file(file_handle.name, mime=True),
full=magic.from_file(file_handle.name, mime=False),
)
7 changes: 5 additions & 2 deletions src/plugins/analysis/qemu_exec/code/qemu_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
from common_helper_files import get_binary_from_file, safe_rglob
from docker.errors import DockerException
from docker.types import Mount
from fact_helper_file import get_file_type_from_path
from requests.exceptions import ReadTimeout

import config
from analysis.PluginBase import AnalysisBasePlugin
from helperFunctions import magic
from helperFunctions.docker import run_docker_container
from helperFunctions.tag import TagColor
from helperFunctions.uid import create_uid
Expand Down Expand Up @@ -125,7 +125,10 @@ def _find_relevant_files(self, extracted_files_dir: Path):
result = []
for path in safe_rglob(extracted_files_dir):
if path.is_file() and not path.is_symlink():
file_type = get_file_type_from_path(path.absolute())
file_type = {
'full': magic.from_file(path.absolute(), mime=False),
'mime': magic.from_file(path.absolute(), mime=True),
}
if self._has_relevant_type(file_type):
result.append((f'/{path.relative_to(Path(self.root_path))}', file_type['full']))
return result
Expand Down
4 changes: 2 additions & 2 deletions src/test/acceptance/test_io_routes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
from fact_helper_file import get_file_type_from_binary

from helperFunctions import magic
from storage.db_interface_comparison import ComparisonDbInterface
from test.common_helper import create_test_firmware

Expand Down Expand Up @@ -68,4 +68,4 @@ def test_pdf_download(self, test_client, backend_db):
assert response.status_code == 200, 'pdf download failed' # noqa: PLR2004
device = self.test_fw.device_name.replace(' ', '_')
assert response.headers['Content-Disposition'] == f'attachment; filename={device}_analysis_report.pdf'
assert get_file_type_from_binary(response.data)['mime'] == 'application/pdf'
assert magic.from_buffer(response.data, mime=True) == 'application/pdf'
5 changes: 2 additions & 3 deletions src/test/integration/helperFunctions/test_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import os
from pathlib import Path

from fact_helper_file import get_file_type_from_binary

from helperFunctions import magic
from helperFunctions.pdf import build_pdf_report
from test.common_helper import TEST_FW

Expand All @@ -21,5 +20,5 @@ def test_build_pdf_report():

pdf_path = build_pdf_report(TEST_FW, docker_mount_base_dir)

assert get_file_type_from_binary(pdf_path.read_bytes())['mime'] == 'application/pdf'
assert magic.from_buffer(pdf_path.read_bytes(), mime=True) == 'application/pdf'
assert pdf_path.name == f"{TEST_FW.device_name.replace(' ', '_')}_analysis_report.pdf"
6 changes: 3 additions & 3 deletions src/unpacker/unpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
from time import time
from typing import TYPE_CHECKING, Optional

from fact_helper_file import get_file_type_from_path

import config
from analysis.PluginBase import sanitize_processed_analysis
from helperFunctions import magic
from helperFunctions.fileSystem import file_is_empty, get_relative_object_path
from helperFunctions.tag import TagColor
from objects.file import FileObject
Expand Down Expand Up @@ -94,7 +93,8 @@ def generate_objects_and_store_files(
continue
current_file = FileObject(file_path=str(path))
current_virtual_path = get_relative_object_path(path, extraction_dir)
current_file.temporary_data['parent_fo_type'] = get_file_type_from_path(parent.file_path)['mime']
current_file.temporary_data['parent_fo_type'] = magic.from_file(parent.file_path, mime=True)

if current_file.uid not in extracted_files:
# the same file can be contained multiple times in one archive -> only the VFP needs an update
self.unpacking_locks.set_unpacking_lock(current_file.uid)
Expand Down
4 changes: 2 additions & 2 deletions src/web_interface/components/io_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from time import sleep

import requests
from fact_helper_file import get_file_type_from_binary
from flask import Response, make_response, redirect, render_template, request

import config
from helperFunctions import magic
from helperFunctions.database import get_shared_session
from helperFunctions.pdf import build_pdf_report
from helperFunctions.task_conversion import check_for_errors, convert_analysis_task_to_fw_obj, create_analysis_task
Expand Down Expand Up @@ -82,7 +82,7 @@ def _prepare_file_download(self, uid: str, packed: bool = False) -> str | Respon
def _get_file_download_mime(self, binary: bytes, uid: str) -> str:
type_analysis = self.db.frontend.get_analysis(uid, 'file_type')
mime = type_analysis.get('mime') if type_analysis is not None else None
return mime or get_file_type_from_binary(binary)['mime']
return mime or magic.from_buffer(binary, mime=True)

@roles_accepted(*PRIVILEGES['download'])
@AppRoute('/ida-download/<compare_id>', GET)
Expand Down