From 76ca13fb226a515fefa25deb2bab09c3bd8f993f Mon Sep 17 00:00:00 2001 From: dorp Date: Sat, 21 Jan 2023 17:25:44 +0100 Subject: [PATCH 01/11] Replaced binwalk with unblob in code and setup --- .gitignore | 1 + fact_extractor/install/unpacker.py | 34 ++++++++++++++--- .../generic_carver/code/generic_carver.py | 30 ++++++--------- .../unpacking/generic_carver/install.sh | 38 +++++++++++++++++++ .../plugins/unpacking/linuxkernel/install.sh | 6 +-- .../plugins/unpacking/sevenz/install.sh | 17 +-------- 6 files changed, 84 insertions(+), 42 deletions(-) create mode 100644 fact_extractor/plugins/unpacking/generic_carver/install.sh diff --git a/.gitignore b/.gitignore index 81976e22..a9bef5cf 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ __pycache__ bin/ install.log +unblob diff --git a/fact_extractor/install/unpacker.py b/fact_extractor/install/unpacker.py index 4720411f..62870b9a 100644 --- a/fact_extractor/install/unpacker.py +++ b/fact_extractor/install/unpacker.py @@ -12,7 +12,6 @@ ) from contextlib import suppress - BIN_DIR = Path(__file__).parent.parent / 'bin' DEPENDENCIES = { @@ -121,6 +120,9 @@ 'unace', 'sharutils', 'unar', + 'zstd', + 'liblz4-tool', + 'p7zip-full', # Freetz 'bison', 'flex', @@ -153,10 +155,10 @@ 'python-lzo', 'numpy', 'scipy', + 'lz4', 'git+https://github.com/jrspruitt/ubi_reader@v0.6.3-master' # pinned as broken currently ], 'github': [ - ('kartone/sasquatch', ['./build.sh']), ('svidovich/jefferson-3', ['sudo python3 setup.py install']), ('rampageX/firmware-mod-kit', ['(cd src && make)', 'cp src/yaffs2utils/unyaffs2 src/untrx src/tpl-tool/src/tpl-tool ../../bin/']) ] @@ -164,6 +166,13 @@ } +def check_mod_kit_installed() -> bool: + return all( + (Path(__file__).parent.parent / 'bin' / tool).exists() + for tool in ['tpl-tool', 'untrx', 'unyaffs2'] + ) + + def install_dependencies(dependencies): apt = dependencies.get('apt', []) pip3 = dependencies.get('pip3', []) @@ -171,7 +180,10 @@ def install_dependencies(dependencies): apt_install_packages(*apt) pip3_install_packages(*pip3) for repo in github: - install_github_project(*repo) + if repo[0].endswith('firmware-mod-kit') and check_mod_kit_installed(): + logging.info('Skipping firmware-mod-kit since it is already installed') + else: + install_github_project(*repo) def main(distribution): @@ -206,14 +218,24 @@ def _edit_sudoers(): sudoers_content = '\n'.join((f'{username}\tALL=NOPASSWD: {command}' for command in ( '/sbin/kpartx', '/sbin/losetup', '/bin/mount', '/bin/umount', '/bin/mknod', '/usr/local/bin/sasquatch', '/bin/rm', '/bin/cp', '/bin/dd', '/bin/chown' ))) - Path('/tmp/fact_overrides').write_text(f'{sudoers_content}\n') - chown_output, chown_code = execute_shell_command_get_return_code('sudo chown root:root /tmp/fact_overrides') - mv_output, mv_code = execute_shell_command_get_return_code('sudo mv /tmp/fact_overrides /etc/sudoers.d/fact_overrides') + Path('/tmp/fact_overrides').write_text(f'{sudoers_content}\n', encoding='utf-8') + _, chown_code = execute_shell_command_get_return_code('sudo chown root:root /tmp/fact_overrides') + _, mv_code = execute_shell_command_get_return_code('sudo mv /tmp/fact_overrides /etc/sudoers.d/fact_overrides') if not chown_code == mv_code == 0: raise InstallationError('Editing sudoers file did not succeed\n{chown_output}\n{mv_output}') def _install_freetz(): + if all( + (Path(__file__).parent.parent / 'bin' / tool).exists() + for tool in [ + 'find-squashfs', 'unpack-kernel', 'freetz_bin_functions', 'unlzma', 'sfk', 'unsquashfs4-avm-be', + 'unsquashfs4-avm-le', 'unsquashfs3-multi' + ] + ): + logging.info('Skipping FREETZ as it is already installed') + return + logging.info('Installing FREETZ') current_user = getuser() with TemporaryDirectory(prefix='fact_freetz') as build_directory: diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index 65457c9c..3b2a1d69 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -5,7 +5,6 @@ import logging import re -import shutil from pathlib import Path from common_helper_process import execute_shell_command @@ -13,7 +12,7 @@ NAME = 'generic_carver' MIME_PATTERNS = ['generic/carver'] -VERSION = '0.8' +VERSION = '0.9' TAR_MAGIC = b'ustar' BZ2_EOF_MAGIC = [ # the magic string is only aligned to half bytes -> two possible strings @@ -28,12 +27,18 @@ def unpack_function(file_path, tmp_dir): file_path specifies the input file. tmp_dir should be used to store the extracted files. ''' + logging.debug(f'File type unknown: Execute unblob on {file_path}') - logging.debug(f'File Type unknown: execute binwalk on {file_path}') - output = execute_shell_command(f'binwalk --extract --carve --signature --directory {tmp_dir} {file_path}') - - drop_underscore_directory(tmp_dir) - return {'output': output, 'filter_log': ArchivesFilter(tmp_dir).remove_false_positive_archives()} + temp_file = Path('/tmp/unblob_report.json') + temp_file.unlink(missing_ok=True) + output = execute_shell_command( + f'unblob -sk --report {temp_file.absolute()} --entropy-depth 0 --depth 1 --extract-dir {tmp_dir} {file_path}' + ) + return { + 'output': output, + 'unblob_meta': temp_file.read_text(encoding='utf-8'), + 'filter_log': ArchivesFilter(tmp_dir).remove_false_positive_archives() + } class ArchivesFilter: @@ -135,17 +140,6 @@ def _find_trailing_data_index_bz2(file_path: Path) -> int | None: return None -def drop_underscore_directory(tmp_dir): - extracted_contents = list(Path(tmp_dir).iterdir()) - if not extracted_contents: - return - if not len(extracted_contents) == 1 or not extracted_contents[0].name.endswith('.extracted'): - return - for result in extracted_contents[0].iterdir(): - shutil.move(str(result), str(result.parent.parent)) - shutil.rmtree(str(extracted_contents[0])) - - # ----> Do not edit below this line <---- def setup(unpack_tool): for item in MIME_PATTERNS: diff --git a/fact_extractor/plugins/unpacking/generic_carver/install.sh b/fact_extractor/plugins/unpacking/generic_carver/install.sh new file mode 100644 index 00000000..c2f37fe6 --- /dev/null +++ b/fact_extractor/plugins/unpacking/generic_carver/install.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# This setup is largely ripped of from emba @ https://github.com/e-m-b-a/emba/blob/master/installer/IP61_unblob.sh +# Thanks to m-1-k-3 and the emba team! + +cd "$( dirname "${BASH_SOURCE[0]}" )" || exit 1 + +echo "------------------------------------" +echo " install unblob via poetry " +echo "------------------------------------" + +sudo -EH apt-get install -y e2fsprogs img2simg lziprecover xz-utils libmagic1 libhyperscan5 + +curl -L -o sasquatch_1.0_amd64.deb https://github.com/onekey-sec/sasquatch/releases/download/sasquatch-v1.0/sasquatch_1.0_amd64.deb +sudo dpkg -i sasquatch_1.0_amd64.deb +rm -f sasquatch_1.0_amd64.deb + +git clone https://github.com/dorpvom/unblob.git -b allow-carving-only + +# install poetry +sudo -EH pip3 install --upgrade poetry +cd unblob || exit 1 + +# install unblob with poetry: +poetry install --only main +UNBLOB_PATH=$(poetry env info --path) + +if [[ -f "$UNBLOB_PATH""/bin/unblob" ]]; then + if [[ ! -f "/usr/local/bin/unblob" ]]; then + sudo ln -s "$UNBLOB_PATH""/bin/unblob" /usr/local/bin/unblob + fi +else + echo "Could not install unblob" && exit 1 +fi + +rm -r tests + +exit 0 \ No newline at end of file diff --git a/fact_extractor/plugins/unpacking/linuxkernel/install.sh b/fact_extractor/plugins/unpacking/linuxkernel/install.sh index 6ab01488..75e57dff 100755 --- a/fact_extractor/plugins/unpacking/linuxkernel/install.sh +++ b/fact_extractor/plugins/unpacking/linuxkernel/install.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash echo "------------------------------------" -echo " install liblz4-tools, zstd " +echo " install vmlinux-to-elf " echo "------------------------------------" -sudo apt-get install -y liblz4-tool zstd sudo pip3 install --upgrade lz4 git+https://github.com/marin-m/vmlinux-to-elf -exit 0 + +exit 0 \ No newline at end of file diff --git a/fact_extractor/plugins/unpacking/sevenz/install.sh b/fact_extractor/plugins/unpacking/sevenz/install.sh index f55d12dc..7089c588 100755 --- a/fact_extractor/plugins/unpacking/sevenz/install.sh +++ b/fact_extractor/plugins/unpacking/sevenz/install.sh @@ -1,22 +1,9 @@ #!/usr/bin/env bash -cd "$( dirname "${BASH_SOURCE[0]}" )" - echo "------------------------------------" -echo " install p7z from source " +echo " install password lists " echo "------------------------------------" sudo -EH pip3 install --upgrade git+https://github.com/fkie-cad/common_helper_passwords.git -# install newest version of p7zip -sudo apt-get remove -y p7zip-full - -mkdir /tmp/fact_build -cd /tmp/fact_build - -wget -O 7zip.tar.bz2 https://sourceforge.net/projects/p7zip/files/latest/download -tar xvjf 7zip.tar.bz2 -(cd p7zip* && cp makefile.linux_amd64_asm makefile.linux && make -j$(nproc) all3 && sudo ./install.sh) -rm -fr p7zip* 7zip.tar.bz2 - -exit 0 +exit 0 \ No newline at end of file From 27dc074142fb699b2ad4fb99459305559802acd3 Mon Sep 17 00:00:00 2001 From: dorp Date: Sat, 21 Jan 2023 18:04:37 +0100 Subject: [PATCH 02/11] fixed file permission --- fact_extractor/plugins/unpacking/generic_carver/install.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 fact_extractor/plugins/unpacking/generic_carver/install.sh diff --git a/fact_extractor/plugins/unpacking/generic_carver/install.sh b/fact_extractor/plugins/unpacking/generic_carver/install.sh old mode 100644 new mode 100755 From f25a63fd35de46e2667178effab02fccceb3ed4b Mon Sep 17 00:00:00 2001 From: dorpvom Date: Mon, 23 Jan 2023 11:12:48 +0100 Subject: [PATCH 03/11] Fixed or skipped tests (where necessary / non applicable). Return dropping of intermediate folder --- .../generic_carver/code/generic_carver.py | 22 +++++++++++++++++-- .../test_plugin_generic_carver_binwalk.py | 5 +++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index 3b2a1d69..1e56b9c5 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -5,6 +5,7 @@ import logging import re +import shutil from pathlib import Path from common_helper_process import execute_shell_command @@ -34,13 +35,30 @@ def unpack_function(file_path, tmp_dir): output = execute_shell_command( f'unblob -sk --report {temp_file.absolute()} --entropy-depth 0 --depth 1 --extract-dir {tmp_dir} {file_path}' ) + meta = temp_file.read_text(encoding='utf-8') + temp_file.unlink(missing_ok=True) + + drop_underscore_directory(tmp_dir) + filter_log = ArchivesFilter(tmp_dir).remove_false_positive_archives() + return { 'output': output, - 'unblob_meta': temp_file.read_text(encoding='utf-8'), - 'filter_log': ArchivesFilter(tmp_dir).remove_false_positive_archives() + 'unblob_meta': meta, + 'filter_log': filter_log } +def drop_underscore_directory(tmp_dir): + extracted_contents = list(Path(tmp_dir).iterdir()) + if not extracted_contents: + return + if not len(extracted_contents) == 1 or not extracted_contents[0].name.endswith('_extract'): + return + for result in extracted_contents[0].iterdir(): + shutil.move(str(result), str(result.parent.parent)) + shutil.rmtree(str(extracted_contents[0])) + + class ArchivesFilter: def __init__(self, unpack_directory): self.unpack_directory = Path(unpack_directory) diff --git a/fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver_binwalk.py b/fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver_binwalk.py index d21bc4b6..1c6c3b4e 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver_binwalk.py +++ b/fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver_binwalk.py @@ -24,11 +24,12 @@ def test_extraction(self): in_file, self.tmp_dir.name, self.unpacker.unpacker_plugins['generic/carver'] ) files = set(files) - assert len(files) == 1, 'file number incorrect' - assert files == {f'{self.tmp_dir.name}/64.zip'}, 'not all files found' + assert len(files) == 3, 'file number incorrect' + assert f'{self.tmp_dir.name}/100-887.zip' in files, 'hidden zip not identified correctly' assert 'output' in meta_data assert 'filter_log' in meta_data + @pytest.mark.skip(reason='unblob does not seem to produce the same kind of bad archives') def test_extraction_of_filtered_files(self): in_file = str(TEST_DATA_DIR / 'fake_xz.bin') files, meta_data = self.unpacker._extract_files_from_file_using_specific_unpacker( From 8b54daa67898a82046ab3290036efb3d9350c797 Mon Sep 17 00:00:00 2001 From: Johannes vom Dorp Date: Fri, 7 Apr 2023 23:00:06 +0200 Subject: [PATCH 04/11] Added poetry to common pip installation for venv compatibility --- fact_extractor/install/unpacker.py | 2 ++ fact_extractor/plugins/unpacking/generic_carver/install.sh | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fact_extractor/install/unpacker.py b/fact_extractor/install/unpacker.py index 7187d540..f7768a96 100644 --- a/fact_extractor/install/unpacker.py +++ b/fact_extractor/install/unpacker.py @@ -184,6 +184,8 @@ 'extract-dtb', # uefi 'git+https://github.com/theopolis/uefi-firmware-parser@v1.10', + # unblob + 'poetry', ], 'github': [ ( diff --git a/fact_extractor/plugins/unpacking/generic_carver/install.sh b/fact_extractor/plugins/unpacking/generic_carver/install.sh index c2f37fe6..cc41b4e6 100755 --- a/fact_extractor/plugins/unpacking/generic_carver/install.sh +++ b/fact_extractor/plugins/unpacking/generic_carver/install.sh @@ -15,10 +15,7 @@ curl -L -o sasquatch_1.0_amd64.deb https://github.com/onekey-sec/sasquatch/relea sudo dpkg -i sasquatch_1.0_amd64.deb rm -f sasquatch_1.0_amd64.deb -git clone https://github.com/dorpvom/unblob.git -b allow-carving-only - -# install poetry -sudo -EH pip3 install --upgrade poetry +git clone https://github.com/onekey-sec/unblob.git cd unblob || exit 1 # install unblob with poetry: From 2f9b2c10aaac5b5e278c8e95afc010c90039ead7 Mon Sep 17 00:00:00 2001 From: dorp Date: Tue, 18 Apr 2023 12:48:26 +0200 Subject: [PATCH 05/11] replaced unblob installation with pip --- fact_extractor/install/unpacker.py | 2 +- .../unpacking/generic_carver/install.sh | 19 +------------------ 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/fact_extractor/install/unpacker.py b/fact_extractor/install/unpacker.py index f7768a96..6b2006b6 100644 --- a/fact_extractor/install/unpacker.py +++ b/fact_extractor/install/unpacker.py @@ -185,7 +185,7 @@ # uefi 'git+https://github.com/theopolis/uefi-firmware-parser@v1.10', # unblob - 'poetry', + 'unblob', ], 'github': [ ( diff --git a/fact_extractor/plugins/unpacking/generic_carver/install.sh b/fact_extractor/plugins/unpacking/generic_carver/install.sh index cc41b4e6..ddafda5e 100755 --- a/fact_extractor/plugins/unpacking/generic_carver/install.sh +++ b/fact_extractor/plugins/unpacking/generic_carver/install.sh @@ -15,21 +15,4 @@ curl -L -o sasquatch_1.0_amd64.deb https://github.com/onekey-sec/sasquatch/relea sudo dpkg -i sasquatch_1.0_amd64.deb rm -f sasquatch_1.0_amd64.deb -git clone https://github.com/onekey-sec/unblob.git -cd unblob || exit 1 - -# install unblob with poetry: -poetry install --only main -UNBLOB_PATH=$(poetry env info --path) - -if [[ -f "$UNBLOB_PATH""/bin/unblob" ]]; then - if [[ ! -f "/usr/local/bin/unblob" ]]; then - sudo ln -s "$UNBLOB_PATH""/bin/unblob" /usr/local/bin/unblob - fi -else - echo "Could not install unblob" && exit 1 -fi - -rm -r tests - -exit 0 \ No newline at end of file +exit 0 From 9383ab0b6dbbe65e6238977e21eeb3f3f294ddca Mon Sep 17 00:00:00 2001 From: Johannes vom Dorp Date: Wed, 19 Apr 2023 10:31:32 +0200 Subject: [PATCH 06/11] Build and tag container with unblob as generic carver (#119) * added action to build extractor image based on unblob-2 branch --- .github/workflows/build-unblob-image.yml | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/build-unblob-image.yml diff --git a/.github/workflows/build-unblob-image.yml b/.github/workflows/build-unblob-image.yml new file mode 100644 index 00000000..b5de81af --- /dev/null +++ b/.github/workflows/build-unblob-image.yml @@ -0,0 +1,27 @@ +name: Publish Docker image based on unblob-2 branch + +on: + push: + branches: ['unblob-2'] + workflow_dispatch: + +jobs: + build-and-publish-image: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Login to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_HUB_USER }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Build and push Docker image + uses: docker/build-push-action@v3 + with: + context: . + push: true + tags: fkiecad/fact_extractor:unblob From 9ffb3e74d1457a5648770d5d28eaeeccda8dd15c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Thu, 25 Jul 2024 13:04:13 +0200 Subject: [PATCH 07/11] generic_carver: replaced external call to unblob with use of internal methods --- .../generic_carver/code/generic_carver.py | 216 ++++++------------ .../unpacking/generic_carver/install.sh | 5 +- .../generic_carver/test/data/fake_7z.7z | Bin 50 -> 0 bytes .../generic_carver/test/data/fake_gz.gz | Bin 400 -> 0 bytes .../generic_carver/test/data/fake_tar.tar | Bin 500 -> 0 bytes .../generic_carver/test/data/fake_xz.bin | Bin 130 -> 0 bytes .../generic_carver/test/data/fake_xz.xz | Bin 30 -> 0 bytes .../generic_carver/test/data/fake_zip.zip | Bin 100 -> 0 bytes .../test/data/trailing_data.bz2 | Bin 152 -> 0 bytes .../test/data/trailing_data.zip | Bin 270 -> 0 bytes .../test/test_plugin_generic_carver.py | 35 +++ .../test_plugin_generic_carver_binwalk.py | 71 ------ requirements-unpackers.txt | 3 +- 13 files changed, 106 insertions(+), 224 deletions(-) delete mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/data/fake_7z.7z delete mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/data/fake_gz.gz delete mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/data/fake_tar.tar delete mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/data/fake_xz.bin delete mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/data/fake_xz.xz delete mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/data/fake_zip.zip delete mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/data/trailing_data.bz2 delete mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/data/trailing_data.zip create mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver.py delete mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver_binwalk.py diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index acbbb3f4..b6185860 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -4,159 +4,79 @@ from __future__ import annotations import logging -import re -import shutil +import traceback +from itertools import chain from pathlib import Path +from typing import Iterable -from common_helper_process import execute_shell_command -from fact_helper_file import get_file_type_from_path +import structlog +from common_helper_unpacking_classifier import avg_entropy +from unblob.extractor import carve_unknown_chunk, carve_valid_chunk +from unblob.file_utils import File +from unblob.finder import search_chunks +from unblob.handlers import BUILTIN_HANDLERS +from unblob.models import TaskResult, PaddingChunk, UnknownChunk, Chunk +from unblob.processing import Task, remove_inner_chunks, calculate_unknown_chunks NAME = 'generic_carver' MIME_PATTERNS = ['generic/carver'] -VERSION = '0.9' - -TAR_MAGIC = b'ustar' -BZ2_EOF_MAGIC = [ # the magic string is only aligned to half bytes -> two possible strings - b'\x17\x72\x45\x38\x50\x90', - b'\x77\x24\x53\x85\x09', -] -REAL_SIZE_REGEX = re.compile(r'Physical Size = (\d+)') - - -def unpack_function(file_path, tmp_dir): - ''' - file_path specifies the input file. - tmp_dir should be used to store the extracted files. - ''' - logging.debug(f'File type unknown: Execute unblob on {file_path}') - - temp_file = Path('/tmp/unblob_report.json') - temp_file.unlink(missing_ok=True) - output = execute_shell_command( - f'unblob -sk --report {temp_file.absolute()} --entropy-depth 0 --depth 1 --extract-dir {tmp_dir} {file_path}' - ) - meta = temp_file.read_text(encoding='utf-8') - temp_file.unlink(missing_ok=True) - - drop_underscore_directory(tmp_dir) - filter_log = ArchivesFilter(tmp_dir).remove_false_positive_archives() - - return { - 'output': output, - 'unblob_meta': meta, - 'filter_log': filter_log - } - - -class ArchivesFilter: - def __init__(self, unpack_directory): - self.unpack_directory = Path(unpack_directory) - self.screening_logs = [] - - def remove_false_positive_archives(self) -> str: - for file_path in self.unpack_directory.glob('**/*'): - if not file_path.is_file(): - continue - file_type = get_file_type_from_path(file_path)['mime'] - - if file_type == 'application/x-tar' or self._is_possible_tar(file_type, file_path): - self._remove_invalid_archives(file_path, 'tar -tvf {}', 'does not look like a tar archive') - - elif file_type == 'application/x-xz': - self._remove_invalid_archives(file_path, 'xz -c -d {} | wc -c') - - elif file_type == 'application/gzip': - self._remove_invalid_archives(file_path, 'gzip -c -d {} | wc -c') - - elif file_path.suffix == '7z' or file_type in [ - 'application/x-7z-compressed', - 'application/x-lzma', - 'application/zip', - 'application/zlib', - ]: - self._remove_invalid_archives(file_path, '7z l {}', 'ERROR') - - if file_path.is_file(): - self._remove_trailing_data(file_type, file_path) - - return '\n'.join(self.screening_logs) - - @staticmethod - def _is_possible_tar(file_type: str, file_path: Path) -> bool: - # broken tar archives may be identified as octet-stream by newer versions of libmagic - if file_type == 'application/octet-stream': - with file_path.open(mode='rb') as fp: - fp.seek(0x101) - return fp.read(5) == TAR_MAGIC - return False - - def _remove_invalid_archives(self, file_path: Path, command, search_key=None): - output = execute_shell_command(command.format(file_path)) - - if search_key and search_key in output.replace('\n ', ''): - self._remove_file(file_path) - - elif not search_key and _output_is_empty(output): - self._remove_file(file_path) - - def _remove_file(self, file_path): - file_path.unlink() - self.screening_logs.append(f'{file_path.name} was removed (invalid archive)') - - def _remove_trailing_data(self, file_type: str, file_path: Path): - trailing_data_index = None - - if file_type in ['application/zip', 'application/zlib']: - trailing_data_index = _find_trailing_data_index_zip(file_path) - - elif file_type == 'application/x-bzip2': - trailing_data_index = _find_trailing_data_index_bz2(file_path) - - if trailing_data_index: - self._resize_file(trailing_data_index, file_path) - - def _resize_file(self, actual_size: int, file_path: Path): - with file_path.open('rb') as fp: - actual_content = fp.read(actual_size) - file_path.write_bytes(actual_content) - self.screening_logs.append(f'Removed trailing data at the end of {file_path.name}') - - -def _output_is_empty(output): - return int((output.split())[-1]) == 0 - - -def _find_trailing_data_index_zip(file_path: Path) -> int | None: - '''Archives carved by binwalk often have trailing data at the end. 7z can determine the actual file size.''' - output = execute_shell_command(f'7z l {file_path}') - if 'There are data after the end of archive' in output: - match = REAL_SIZE_REGEX.search(output) - if match: - return int(match.groups()[0]) - return None - - -def _find_trailing_data_index_bz2(file_path: Path) -> int | None: - output = execute_shell_command(f'bzip2 -t {file_path}') - if 'trailing garbage' in output: - file_content = file_path.read_bytes() - matches = sorted(index for magic in BZ2_EOF_MAGIC if (index := file_content.find(magic)) != -1) - # there may be two matches, but we want the first one (but also not -1 == no match) - if matches: - # 10 is magic string + CRC 32 checksum + padding (see https://en.wikipedia.org/wiki/Bzip2#File_format) - return matches[0] + 10 - return None - - -def drop_underscore_directory(tmp_dir): - extracted_contents = list(Path(tmp_dir).iterdir()) - if not extracted_contents: - return - if not len(extracted_contents) == 1 or not extracted_contents[0].name.endswith('_extract'): - return - for result in extracted_contents[0].iterdir(): - shutil.move(str(result), str(result.parent.parent)) - shutil.rmtree(str(extracted_contents[0])) +VERSION = '1.0.0' + +# deactivate internal logger of unblob because it can slow down searching chunks +structlog.configure(wrapper_class=structlog.make_filtering_bound_logger(logging.CRITICAL)) + + +def unpack_function(file_path: str, tmp_dir: str) -> dict: + extraction_dir = Path(tmp_dir) + chunks = [] + filter_report = '' + path = Path(file_path) + + try: + with File.from_path(path) as file: + for chunk in _find_chunks(path, file): + if isinstance(chunk, PaddingChunk): + continue + if isinstance(chunk, UnknownChunk): + if _has_low_entropy(file, chunk): + filter_report += ( + f'removed chunk {chunk.start_offset}-{chunk.end_offset} (reason: low entropy)\n' + ) + continue + carve_unknown_chunk(extraction_dir, file, chunk) + else: + carve_valid_chunk(extraction_dir, file, chunk) + chunks.append(chunk.as_report(None).asdict()) + + report = _create_report(chunks) if chunks else 'No valid chunks found.' + if filter_report: + report += f'\nFiltered chunks:\n{filter_report}' + except Exception as error: + report = f"Error {error} during unblob extraction:\n{traceback.format_exc()}" + return {'output': report} + + +def _find_chunks(file_path: Path, file: File) -> Iterable[Chunk]: + task = Task(path=file_path, depth=0, blob_id='') + known_chunks = remove_inner_chunks(search_chunks(file, file.size(), BUILTIN_HANDLERS, TaskResult(task))) + unknown_chunks = calculate_unknown_chunks(known_chunks, file.size()) + yield from chain(known_chunks, unknown_chunks) + + +def _create_report(chunk_list: list[dict]) -> str: + report = ['Extracted chunks:'] + for chunk in sorted(chunk_list, key=lambda c: c['start_offset']): + chunk_type = chunk.get('handler_name', 'unknown') + report.append( + f'start: {chunk["start_offset"]}, end: {chunk["end_offset"]}, size: {chunk["size"]}, type: {chunk_type}' + ) + return '\n'.join(report) + + +def _has_low_entropy(file: File, chunk: UnknownChunk) -> bool: + file.seek(chunk.start_offset) + content = file.read(chunk.size) + return avg_entropy(content) < 0.01 # ----> Do not edit below this line <---- diff --git a/fact_extractor/plugins/unpacking/generic_carver/install.sh b/fact_extractor/plugins/unpacking/generic_carver/install.sh index ddafda5e..571a4859 100755 --- a/fact_extractor/plugins/unpacking/generic_carver/install.sh +++ b/fact_extractor/plugins/unpacking/generic_carver/install.sh @@ -1,12 +1,9 @@ #!/usr/bin/env bash -# This setup is largely ripped of from emba @ https://github.com/e-m-b-a/emba/blob/master/installer/IP61_unblob.sh -# Thanks to m-1-k-3 and the emba team! - cd "$( dirname "${BASH_SOURCE[0]}" )" || exit 1 echo "------------------------------------" -echo " install unblob via poetry " +echo " install unblob dependencies " echo "------------------------------------" sudo -EH apt-get install -y e2fsprogs img2simg lziprecover xz-utils libmagic1 libhyperscan5 diff --git a/fact_extractor/plugins/unpacking/generic_carver/test/data/fake_7z.7z b/fact_extractor/plugins/unpacking/generic_carver/test/data/fake_7z.7z deleted file mode 100644 index 9ebfa30219b6396951e2809526f8745f270c7543..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 50 xcmXr7+Ou9=hJktI|Nqx(86cnrN{ebwP-ZZg!?pAC+Dn5Q~#);kdJPmkWt8o5}3tpZWKj%|B{HkVbNq)Q{HWgw5}uFZQhvvtt?x z&P1zuxL{CpC>FpI1|I|+UYB{)OaDAoNc2)AvA)p^UfOg?6_-K2l8ExvoYL)#s5R)U z?9i!c@5y^+R-T$`gV%d$@HQQ3hboVWY3sp&YogZFX5GmpcpoOdVq3c9d`vpP(Zb0aPa^n$QTj; diff --git a/fact_extractor/plugins/unpacking/generic_carver/test/data/fake_xz.bin b/fact_extractor/plugins/unpacking/generic_carver/test/data/fake_xz.bin deleted file mode 100644 index 93cea4cbd468e3483a6efc40bd3c4d4f2ea1dd73..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 130 zcmV-|0Db>frMWt%uVxpffT=)Qirs|qb(Ekst~P`y-U+wF@tDPu;Aw{>vl})&?e|=_ zl+4@_Rck#((|&<@Om6n;eAWb~YYn%v3d`zdax`sNV9RR6O&96QR{iY&XQWJ*tR)^4)sqh{A6ce#cO>NEk`B45)u$<*eJu^Lf zD7Rpp7$&Py)nFea)14C-E6Fi(-4PWIV|c45(;ROgCh2D4Bt?WKKr_Q8zLsC@LS`>t GHln%k5+F^&5p7_THWP>m>gA7Ajetu{OCj;}I`TLSVxU_hl2}5m;yRFy0Ca= FilterTest: - with TemporaryDirectory() as temp_dir: - test_file = Path(temp_dir) / filename - source_file = TEST_DATA_DIR / filename - shutil.copyfile(source_file, test_file) - arch_filter = ArchivesFilter(temp_dir) - yield FilterTest(test_file, source_file, arch_filter) - - -@pytest.mark.parametrize('filename', ['fake_zip.zip', 'fake_tar.tar', 'fake_7z.7z', 'fake_xz.xz', 'fake_gz.gz']) -def test_remove_false_positives(filename): - with filter_test_setup(filename) as setup: - setup.filter.remove_false_positive_archives() - assert setup.test_file.is_file() is False - - -@pytest.mark.parametrize('filename', ['trailing_data.zip', 'trailing_data.bz2']) -def test_remove_trailing_data(filename): - with filter_test_setup(filename) as setup: - setup.filter.remove_false_positive_archives() - assert setup.filter.screening_logs == [f'Removed trailing data at the end of {filename}'] - assert setup.test_file.stat().st_size < setup.source_file.stat().st_size diff --git a/requirements-unpackers.txt b/requirements-unpackers.txt index 666752dc..10bfadd7 100644 --- a/requirements-unpackers.txt +++ b/requirements-unpackers.txt @@ -30,4 +30,5 @@ extract-dtb~=1.2.3 # uefi uefi-firmware~=1.11 # unblob -unblob +# FixMe: pin to next stable version; the latest release is missing a bug fix related to zip64 +git+https://github.com/onekey-sec/unblob.git@e0d9805 From b32f26faee6ad0066c3ac13d5367bb140153848c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Thu, 25 Jul 2024 13:22:58 +0200 Subject: [PATCH 08/11] generic_carver: add missing test file --- .../generic_carver/test/data/carving_test_file | Bin 0 -> 626 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 fact_extractor/plugins/unpacking/generic_carver/test/data/carving_test_file diff --git a/fact_extractor/plugins/unpacking/generic_carver/test/data/carving_test_file b/fact_extractor/plugins/unpacking/generic_carver/test/data/carving_test_file new file mode 100644 index 0000000000000000000000000000000000000000..ede93aefc97e619807a7e471875e26973785df81 GIT binary patch literal 626 zcmcc9`rtxFLfG;PyLp43X0PVTa$D%9z4P&b)$7f8Sp3~h8ihD|`j}Q+xoh}hsnAIs zmC2@K=Jp8o!{?x3V_%^ z%*-Iekd~hx8p6rI{C~mxG{%MV(@HD285mh!Ff%ZK37|ArQeqKTNosM4p^-6HfHxzP z95XKCBtW(>Faq(GMi2{ZI4i_(G=l@YS=m5J7=h3eNc(~~0}jlq_N-TzVPLt~(0)e* z>Y5;!tBU`*Ff#C?hX^pV*ccc&MHtw*fqqd`WMtqGVBp|l1O+q$11qB>4@^IZ7UX0| zW5@?W1_ntc1_lu>MuvvWGcLdH++q}EV`NZh00o@2`I1}dcW(132)Mh(3n=~U(?8^N zu(!-e>si2|Qk_~>o|cl|m!9zKsE{gM{;l%7N$R%q7nT{F`SNtG*xYAq_m%(v)%TSR literal 0 HcmV?d00001 From 263f8ba7d1361985c9162d89d30f28a23d5aff55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Tue, 12 Nov 2024 15:35:01 +0100 Subject: [PATCH 09/11] chore: ruff fixes --- .../generic_carver/code/generic_carver.py | 15 ++++--- .../test/test_plugin_generic_carver.py | 4 +- .../squashFS/test/test_plugin_squashfs.py | 41 ++++++++++--------- 3 files changed, 33 insertions(+), 27 deletions(-) mode change 100755 => 100644 fact_extractor/plugins/unpacking/squashFS/test/test_plugin_squashfs.py diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index b6185860..fedd172c 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -1,6 +1,7 @@ -''' +""" This plugin unpacks all files via carving -''' +""" + from __future__ import annotations import logging @@ -15,13 +16,15 @@ from unblob.file_utils import File from unblob.finder import search_chunks from unblob.handlers import BUILTIN_HANDLERS -from unblob.models import TaskResult, PaddingChunk, UnknownChunk, Chunk -from unblob.processing import Task, remove_inner_chunks, calculate_unknown_chunks +from unblob.models import Chunk, PaddingChunk, TaskResult, UnknownChunk +from unblob.processing import Task, calculate_unknown_chunks, remove_inner_chunks NAME = 'generic_carver' MIME_PATTERNS = ['generic/carver'] VERSION = '1.0.0' +MIN_FILE_ENTROPY = 0.01 + # deactivate internal logger of unblob because it can slow down searching chunks structlog.configure(wrapper_class=structlog.make_filtering_bound_logger(logging.CRITICAL)) @@ -52,7 +55,7 @@ def unpack_function(file_path: str, tmp_dir: str) -> dict: if filter_report: report += f'\nFiltered chunks:\n{filter_report}' except Exception as error: - report = f"Error {error} during unblob extraction:\n{traceback.format_exc()}" + report = f'Error {error} during unblob extraction:\n{traceback.format_exc()}' return {'output': report} @@ -76,7 +79,7 @@ def _create_report(chunk_list: list[dict]) -> str: def _has_low_entropy(file: File, chunk: UnknownChunk) -> bool: file.seek(chunk.start_offset) content = file.read(chunk.size) - return avg_entropy(content) < 0.01 + return avg_entropy(content) < MIN_FILE_ENTROPY # ----> Do not edit below this line <---- diff --git a/fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver.py index adca025c..549ea610 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/test/test_plugin_generic_carver.py @@ -18,7 +18,7 @@ def test_extraction(self): in_file, self.tmp_dir.name, self.unpacker.unpacker_plugins['generic/carver'] ) files = set(files) - assert len(files) == 3, 'file number incorrect' + assert len(files) == 3, 'file number incorrect' # noqa: PLR2004 assert f'{self.tmp_dir.name}/100-887.zip' in files, 'hidden zip not identified correctly' assert 'output' in meta_data @@ -29,7 +29,7 @@ def test_filter(self): str(in_file), self.tmp_dir.name, self.unpacker.unpacker_plugins['generic/carver'] ) files = set(files) - assert len(files) == 4, 'file number incorrect' + assert len(files) == 4, 'file number incorrect' # noqa: PLR2004 assert 'removed chunk 300-428' in meta_data['output'] for file in ('0-128.unknown', '128-300.zip', '428-562.sevenzip', '562-626.unknown'): assert f'{self.tmp_dir.name}/{file}' in files diff --git a/fact_extractor/plugins/unpacking/squashFS/test/test_plugin_squashfs.py b/fact_extractor/plugins/unpacking/squashFS/test/test_plugin_squashfs.py old mode 100755 new mode 100644 index 82ca6111..0aa3e2cb --- a/fact_extractor/plugins/unpacking/squashFS/test/test_plugin_squashfs.py +++ b/fact_extractor/plugins/unpacking/squashFS/test/test_plugin_squashfs.py @@ -1,17 +1,17 @@ from pathlib import Path +from tempfile import TemporaryDirectory import pytest -from tempfile import TemporaryDirectory from test.unit.unpacker.test_unpacker import TestUnpackerBase -from ..code.squash_fs import _unpack_success, unpack_function, SQUASH_UNPACKER +from ..code.squash_fs import SQUASH_UNPACKER, _unpack_success, unpack_function TEST_DATA_DIR = Path(__file__).parent / 'data' @pytest.mark.parametrize( - 'unpack_path, expected', + ('unpack_path', 'expected'), [ ('/foo/bar/unpacker', False), (TEST_DATA_DIR, True), @@ -38,22 +38,25 @@ class TestSquashUnpacker(TestUnpackerBase): def test_unpacker_selection_generic(self): self.check_unpacker_selection('filesystem/squashfs', 'SquashFS') - @pytest.mark.parametrize(('file', 'expected'), [ - ('avm_be.sqfs4', 'sasquatch-v4be'), - ('avm_le.sqfs4', 'sasquatch'), - ('gzip.sqfs', 'sasquatch'), - ('lz4.sqfs', 'sasquatch'), - ('lzma.sqfs', 'sasquatch'), - ('lzma1_be.sqfs3', 'sasquatch'), - ('lzma1_le.sqfs3', 'sasquatch'), - ('lzma_be.sqfs2', 'unsquashfs4-avm-be'), - ('lzma_le.sqfs2', 'unsquashfs4-avm-be'), - ('lzo.sqfs', 'sasquatch'), - ('xz.sqfs', 'sasquatch'), - ('zlib_be.sqfs3', 'sasquatch'), - ('zlib_le.sqfs3', 'sasquatch'), - ('zstd.sqfs', 'sasquatch'), - ]) + @pytest.mark.parametrize( + ('file', 'expected'), + [ + ('avm_be.sqfs4', 'sasquatch-v4be'), + ('avm_le.sqfs4', 'sasquatch'), + ('gzip.sqfs', 'sasquatch'), + ('lz4.sqfs', 'sasquatch'), + ('lzma.sqfs', 'sasquatch'), + ('lzma1_be.sqfs3', 'sasquatch'), + ('lzma1_le.sqfs3', 'sasquatch'), + ('lzma_be.sqfs2', 'unsquashfs4-avm-be'), + ('lzma_le.sqfs2', 'unsquashfs4-avm-be'), + ('lzo.sqfs', 'sasquatch'), + ('xz.sqfs', 'sasquatch'), + ('zlib_be.sqfs3', 'sasquatch'), + ('zlib_le.sqfs3', 'sasquatch'), + ('zstd.sqfs', 'sasquatch'), + ], + ) def test_extraction_sqfs(self, file, expected): meta_data = self.check_unpacking_of_standard_unpack_set(TEST_DATA_DIR / file) assert meta_data['plugin_used'] == 'SquashFS' From 5e67c63b96477b2ddbe33ad9beeaf23f7c33f9d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Fri, 15 Nov 2024 10:51:09 +0100 Subject: [PATCH 10/11] feat: added zlib carving for unblob --- .../generic_carver/code/generic_carver.py | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index fedd172c..5a808e46 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -16,7 +16,9 @@ from unblob.file_utils import File from unblob.finder import search_chunks from unblob.handlers import BUILTIN_HANDLERS -from unblob.models import Chunk, PaddingChunk, TaskResult, UnknownChunk +from unblob.handlers.compression.zlib import ZlibHandler +from unblob.models import Chunk, HexString, PaddingChunk, TaskResult, UnknownChunk +from unblob.plugins import hookimpl from unblob.processing import Task, calculate_unknown_chunks, remove_inner_chunks NAME = 'generic_carver' @@ -29,6 +31,23 @@ structlog.configure(wrapper_class=structlog.make_filtering_bound_logger(logging.CRITICAL)) +# register custom zlib handler to allow carving zlib chunks from inside files +@hookimpl +def unblob_register_handlers(): + yield from [ZlibCarvingHandler] + + +class ZlibCarvingHandler(ZlibHandler): + NAME = 'zlib_carver' + + PATTERNS = [ # noqa: RUF012 + HexString('78 01'), # low compression + HexString('78 9c'), # default compression + HexString('78 da'), # best compression + HexString('78 5e'), # compressed + ] + + def unpack_function(file_path: str, tmp_dir: str) -> dict: extraction_dir = Path(tmp_dir) chunks = [] From deff5d8e7962a5302dffdf1900e413a119648120 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Fri, 22 Nov 2024 12:17:25 +0100 Subject: [PATCH 11/11] feat: added xiaomi hdr unpacker based on unblob --- .../plugins/unpacking/xiaomi_hdr/__init__.py | 0 .../unpacking/xiaomi_hdr/code/__init__.py | 0 .../unpacking/xiaomi_hdr/code/xiaomi_hdr.py | 50 ++++++++++++++++++ .../unpacking/xiaomi_hdr/test/__init__.py | 0 .../unpacking/xiaomi_hdr/test/data/test.hdr1 | Bin 0 -> 574 bytes .../unpacking/xiaomi_hdr/test/test_hdr.py | 22 ++++++++ 6 files changed, 72 insertions(+) create mode 100644 fact_extractor/plugins/unpacking/xiaomi_hdr/__init__.py create mode 100644 fact_extractor/plugins/unpacking/xiaomi_hdr/code/__init__.py create mode 100644 fact_extractor/plugins/unpacking/xiaomi_hdr/code/xiaomi_hdr.py create mode 100644 fact_extractor/plugins/unpacking/xiaomi_hdr/test/__init__.py create mode 100644 fact_extractor/plugins/unpacking/xiaomi_hdr/test/data/test.hdr1 create mode 100644 fact_extractor/plugins/unpacking/xiaomi_hdr/test/test_hdr.py diff --git a/fact_extractor/plugins/unpacking/xiaomi_hdr/__init__.py b/fact_extractor/plugins/unpacking/xiaomi_hdr/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fact_extractor/plugins/unpacking/xiaomi_hdr/code/__init__.py b/fact_extractor/plugins/unpacking/xiaomi_hdr/code/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fact_extractor/plugins/unpacking/xiaomi_hdr/code/xiaomi_hdr.py b/fact_extractor/plugins/unpacking/xiaomi_hdr/code/xiaomi_hdr.py new file mode 100644 index 00000000..56e7a89f --- /dev/null +++ b/fact_extractor/plugins/unpacking/xiaomi_hdr/code/xiaomi_hdr.py @@ -0,0 +1,50 @@ +""" +This plugin uses unblob to unpack Xiaomi HDR1/2 images. +""" + +from __future__ import annotations + +import logging +from pathlib import Path + +import structlog +from structlog.testing import capture_logs +from unblob.handlers.archive.xiaomi.hdr import HDRExtractor + +NAME = 'Xiaomi HDR' +MIME_PATTERNS = ['firmware/xiaomi-hdr1', 'firmware/xiaomi-hdr2'] +VERSION = '0.1.0' + +structlog.configure( + wrapper_class=structlog.make_filtering_bound_logger(logging.DEBUG), +) + + +def unpack_function(file_path: str, tmp_dir: str) -> dict: + path = Path(file_path) + with path.open('rb') as fp: + magic = fp.read(4) + if magic in [b'HDR1', b'HDR2']: + extractor = HDRExtractor(f'{magic.decode().lower()}_header_t') + else: + return {'output': ''} + + # unblob uses structlog for logging, but we can capture the logs with this convenient testing function + with capture_logs() as log_list: + extractor.extract(path, Path(tmp_dir)) + return {'output': _format_logs(log_list)} + + +def _format_logs(logs: list[dict]) -> str: + output = '' + for entry in logs: + output += '\n'.join(f'{key}: {value}' for key, value in entry.items() if key not in {'_verbosity', 'log_level'}) + return output + + +# ----> Do not edit below this line <---- + + +def setup(unpack_tool): + for item in MIME_PATTERNS: + unpack_tool.register_plugin(item, (unpack_function, NAME, VERSION)) diff --git a/fact_extractor/plugins/unpacking/xiaomi_hdr/test/__init__.py b/fact_extractor/plugins/unpacking/xiaomi_hdr/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fact_extractor/plugins/unpacking/xiaomi_hdr/test/data/test.hdr1 b/fact_extractor/plugins/unpacking/xiaomi_hdr/test/data/test.hdr1 new file mode 100644 index 0000000000000000000000000000000000000000..3d97faa7cea5eb2a8135262780f7f51cd5a1c7fc GIT binary patch literal 574 zcmeZp2{P1UWMHszGXKiJ;Lc#cz`#%f#E*a&8`!stf#E+8hyW%3|7T!GPt8j$%1l;B z%g;$kEz&PZEiO?=%gjktFpk$NsVHFx$w*Zw$;d29QGiP+!pyLPngNml=`qA+Ay5pi z(~2uOKd&S;uf$5hw=x8%E2tD`lcAoGo-tQgYEf}!ex8*=Kv8~rQDUxwfgTug!EBR3 gv&{&HZ6O(%#R@=F0(5zCD$qhCr!xrP&@@7Y0r{v?AOHXW literal 0 HcmV?d00001 diff --git a/fact_extractor/plugins/unpacking/xiaomi_hdr/test/test_hdr.py b/fact_extractor/plugins/unpacking/xiaomi_hdr/test/test_hdr.py new file mode 100644 index 00000000..2fb11f5e --- /dev/null +++ b/fact_extractor/plugins/unpacking/xiaomi_hdr/test/test_hdr.py @@ -0,0 +1,22 @@ +from pathlib import Path + +from plugins.unpacking.xiaomi_hdr.code.xiaomi_hdr import MIME_PATTERNS +from test.unit.unpacker.test_unpacker import TestUnpackerBase + +TEST_DATA_DIR = Path(__file__).parent / 'data' + + +class TestXiaomiHdrUnpacker(TestUnpackerBase): + def test_unpacker_selection_generic(self): + for mime in MIME_PATTERNS: + self.check_unpacker_selection(mime, 'Xiaomi HDR') + + def test_extraction_hdr(self): + in_file = TEST_DATA_DIR / 'test.hdr1' + assert in_file.is_file(), 'test file is missing' + meta = self.check_unpacking_of_standard_unpack_set( + in_file, + output=True, + ) + assert 'output' in meta + assert 'testfile1' in meta['output']