Skip to content

Commit

Permalink
chore: Remove dependency on fact_helper_file
Browse files Browse the repository at this point in the history
  • Loading branch information
jstucke authored and maringuu committed Nov 14, 2024
1 parent 92988a3 commit dfc3b18
Show file tree
Hide file tree
Showing 10 changed files with 98 additions and 13 deletions.
46 changes: 46 additions & 0 deletions fact_extractor/helperFunctions/magic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""This is a wrapper around pymagic.
It aims to provide the same API but with the ability to load multiple magic
files in the default api.
"""

from __future__ import annotations

import os
from os import PathLike

import magic as pymagic

from helperFunctions.file_system import get_src_dir

# On ubuntu this is provided by the libmagic-mgc package
_default_magic = os.getenv('MAGIC', '/usr/lib/file/magic.mgc')
_fw_magic = f'{get_src_dir()}/bin/firmware'
_magic_file = f'{_fw_magic}:{_default_magic}'

_instances = {}


def _get_magic_instance(**kwargs):
"""Returns an instance of pymagic.Magic"""
# Dicts are not hashable but sorting and creating a tuple is a valid hash
key = hash(tuple(sorted(kwargs.items())))
instance = _instances.get(key)
if instance is None:
instance = _instances[key] = pymagic.Magic(**kwargs)
return instance


def from_file(filename: bytes | str | PathLike, magic_file: str | None = _magic_file, **kwargs) -> str:
"""Like pymagic's ``magic.from_file`` but it accepts all keyword arguments
that ``magic.Magic`` accepts.
"""
instance = _get_magic_instance(magic_file=magic_file, **kwargs)
return instance.from_file(filename)


def from_buffer(buf: bytes | str, magic_file: str | None = _magic_file, **kwargs) -> str:
"""Like pymagic's ``magic.from_buffer`` but it accepts all keyword arguments
that ``magic.Magic`` accepts.
"""
instance = _get_magic_instance(magic_file=magic_file, **kwargs)
return instance.from_buffer(buf)
4 changes: 2 additions & 2 deletions fact_extractor/helperFunctions/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from pathlib import Path
from typing import Dict, List

from helperFunctions import magic
from common_helper_files import safe_rglob
from common_helper_unpacking_classifier import (
avg_entropy, get_binary_size_without_padding, is_compressed
)
from fact_helper_file import get_file_type_from_path
from helperFunctions.config import read_list_from_config


Expand All @@ -28,7 +28,7 @@ def get_unpack_status(file_path: str, binary: bytes, extracted_files: List[Path]
meta_data['entropy'] = avg_entropy(binary)

if not extracted_files and meta_data.get('number_of_excluded_files', 0) == 0:
if get_file_type_from_path(file_path)['mime'] in read_list_from_config(config, 'ExpertSettings', 'compressed_file_types')\
if magic.from_file(file_path, mime=True) in read_list_from_config(config, 'ExpertSettings', 'compressed_file_types')\
or not is_compressed(binary, compress_entropy_threshold=config.getfloat('ExpertSettings', 'unpack_threshold'), classifier=avg_entropy):
meta_data['summary'] = ['unpacked']
else:
Expand Down
26 changes: 25 additions & 1 deletion fact_extractor/install/common.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
import subprocess as sp
import os
from contextlib import suppress
from pathlib import Path

from helperFunctions.config import load_config
from helperFunctions.install import (
apt_install_packages, apt_update_sources, pip_install_packages, load_requirements_file
apt_install_packages, apt_update_sources, pip_install_packages, load_requirements_file, OperateInDirectory
)

APT_DEPENDENCIES = {
Expand Down Expand Up @@ -37,6 +38,27 @@ def install_apt_dependencies(distribution: str):
apt_install_packages(*APT_DEPENDENCIES[distribution])


def _install_magic():
bin_dir = Path(__file__).parent.parent / 'bin'
with OperateInDirectory(bin_dir):
sp.run(
[
"wget",
"--output-document",
"firmware.xz",
"https://github.com/fkie-cad/firmware-magic-database/releases/download/v0.2.1/firmware.xz",
],
check=True,
)
sp.run(
[
"unxz",
"--force",
"firmware.xz",
]
)


def main(distribution):
logging.info('Updating package lists')
apt_update_sources()
Expand All @@ -49,6 +71,8 @@ def main(distribution):
with suppress(FileExistsError):
os.mkdir('../bin')

_install_magic()

config = load_config('main.cfg')
data_folder = config.get('unpack', 'data_folder')
os.makedirs(str(Path(data_folder, 'files')), exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion fact_extractor/install/pre_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ echo "Install Pre-Install Requirements"
(apt-get update && apt-get install sudo) || true

sudo apt-get update
sudo apt-get -y install git apt-transport-https ca-certificates curl software-properties-common wget libmagic-dev
sudo apt-get -y install git apt-transport-https ca-certificates curl software-properties-common wget libmagic-dev xz-utils

IS_VENV=$(python3 -c 'import sys; print(sys.exec_prefix!=sys.base_prefix)')
if [[ $IS_VENV == "False" ]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
'''
from __future__ import annotations

from helperFunctions import magic
import logging
import re
import shutil
from pathlib import Path

from common_helper_process import execute_shell_command
from fact_helper_file import get_file_type_from_path

NAME = 'generic_carver'
MIME_PATTERNS = ['generic/carver']
Expand Down Expand Up @@ -45,7 +45,7 @@ def remove_false_positive_archives(self) -> str:
for file_path in self.unpack_directory.glob('**/*'):
if not file_path.is_file():
continue
file_type = get_file_type_from_path(file_path)['mime']
file_type = magic.from_file(file_path, mime=True)

if file_type == 'application/x-tar' or self._is_possible_tar(file_type, file_path):
self._remove_invalid_archives(file_path, 'tar -tvf {}', 'does not look like a tar archive')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
This plugin mounts filesystem images and extracts their content
'''
import re
from helperFunctions import magic
from shlex import split
from subprocess import run, PIPE, STDOUT
from tempfile import TemporaryDirectory
from time import sleep

from fact_helper_file import get_file_type_from_path

NAME = 'genericFS'
MIME_PATTERNS = [
'filesystem/btrfs', 'filesystem/dosmbr', 'filesystem/f2fs', 'filesystem/jfs', 'filesystem/minix',
Expand All @@ -28,7 +27,7 @@


def unpack_function(file_path, tmp_dir):
mime_type = get_file_type_from_path(file_path)['mime']
mime_type = magic.from_file(file_path, mime=True)
if mime_type == 'filesystem/dosmbr':
output = _mount_from_boot_record(file_path, tmp_dir)
else:
Expand Down
Binary file added fact_extractor/test/data/ros_header
Binary file not shown.
15 changes: 15 additions & 0 deletions fact_extractor/test/unit/test_mime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pathlib import Path

from helperFunctions import magic

from helperFunctions.file_system import get_fact_bin_dir, get_test_data_dir


def test_magic():
firmware_magic_path = Path(get_fact_bin_dir()) / 'firmware'
assert firmware_magic_path.is_file()

assert (
magic.from_file(f'{get_test_data_dir()}/ros_header', mime=True) == 'firmware/ros'
), 'firmware-magic-database is not loaded'
assert magic.from_file(f'{get_test_data_dir()}/container/test.zip', mime=True) == 'application/zip'
7 changes: 4 additions & 3 deletions fact_extractor/unpacker/unpackBase.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import fnmatch
import logging
from os import getgid, getuid
from subprocess import PIPE, Popen
from time import time
import fnmatch
from typing import Callable, Dict, List, Tuple

from helperFunctions import magic
from common_helper_files import get_files_in_dir
from fact_helper_file import get_file_type_from_path

from helperFunctions.config import read_list_from_config
from helperFunctions.plugin import import_plugins

Expand Down Expand Up @@ -50,7 +51,7 @@ def get_unpacker(self, mime_type: str):
return self.unpacker_plugins['generic/carver']

def extract_files_from_file(self, file_path: str, tmp_dir) -> Tuple[List, Dict]:
current_unpacker = self.get_unpacker(get_file_type_from_path(file_path)['mime'])
current_unpacker = self.get_unpacker(magic.from_file(file_path, mime=True))
return self._extract_files_from_file_using_specific_unpacker(file_path, tmp_dir, current_unpacker)

def unpacking_fallback(self, file_path, tmp_dir, old_meta, fallback_plugin_mime) -> Tuple[List, Dict]:
Expand Down
2 changes: 1 addition & 1 deletion requirements-unpackers.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# FixMe: deprecated
pluginbase~=1.0.1
git+https://github.com/fkie-cad/common_helper_unpacking_classifier.git
git+https://github.com/fkie-cad/fact_helper_file.git
python-magic
patool~=2.2.0
# jffs2: jefferson + deps
git+https://github.com/sviehb/[email protected]
Expand Down

0 comments on commit dfc3b18

Please sign in to comment.