Skip to content

Commit

Permalink
Add additional file types to patool extraction (fkie-cad#115)
Browse files Browse the repository at this point in the history
* added types, tests and test files to patool plugin
* added missing black and isort configurations to pyproject.toml
  • Loading branch information
jstucke authored May 3, 2023
1 parent 5e109f7 commit ed65a74
Show file tree
Hide file tree
Showing 27 changed files with 352 additions and 35 deletions.
62 changes: 43 additions & 19 deletions fact_extractor/install/unpacker.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
import hashlib
import logging
import os
from getpass import getuser
from pathlib import Path
from shlex import split
from subprocess import CalledProcessError, run
from tempfile import TemporaryDirectory

from common_helper_process import execute_shell_command_get_return_code

from helperFunctions.install import (
apt_install_packages,
install_github_project,
InstallationError,
OperateInDirectory,
pip_install_packages,
apt_install_packages,
apt_remove_packages,
install_github_project,
pip_install_packages,
)

BIN_DIR = Path(__file__).parent.parent / 'bin'
Expand Down Expand Up @@ -102,28 +105,29 @@
'liblzo2-dev',
'xvfb',
'libcapstone-dev',
# patool and unpacking backends
'lrzip',
# patool
'arj',
'cabextract',
'cpio',
'unadf',
'rpm2cpio',
'lzop',
'flac',
'gzip',
'lhasa',
'cabextract',
'zpaq',
'libchm-dev',
'arj',
'xdms',
'rzip',
'lrzip',
'lzip',
'unalz',
'unrar',
'gzip',
'lzop',
'ncompress',
'nomarch',
'flac',
'unace',
'rpm2cpio',
'rzip',
'sharutils',
'unace',
'unadf',
'unalz',
'unar',
'unrar',
'xdms',
'zpaq',
# Freetz
'autoconf',
'automake',
Expand Down Expand Up @@ -194,7 +198,7 @@
'rampageX/firmware-mod-kit',
[
'(cd src && make untrx && make -C tpl-tool/src && make -C yaffs2utils)',
'cp src/untrx src/yaffs2utils/unyaffs2 src/tpl-tool/src/tpl-tool ../../bin/'
'cp src/untrx src/yaffs2utils/unyaffs2 src/tpl-tool/src/tpl-tool ../../bin/',
],
),
],
Expand Down Expand Up @@ -228,6 +232,7 @@ def main(distribution):

# install plug-in dependencies
_install_plugins()
_install_patool_deps()

# configure environment
_edit_sudoers()
Expand Down Expand Up @@ -262,6 +267,25 @@ def _edit_sudoers():
raise InstallationError('Editing sudoers file did not succeed\n{chown_output}\n{mv_output}')


def _install_patool_deps():
'''install additional dependencies of patool'''
with TemporaryDirectory(prefix='patool') as build_directory:
with OperateInDirectory(build_directory):
# install zoo unpacker
file_name = 'zoo_2.10-28_amd64.deb'
try:
run(split(f'wget http://launchpadlibrarian.net/230277773/{file_name}'), capture_output=True, check=True)
expected_sha = '953f4f94095ef3813dfd30c8977475c834363aaabce15ab85ac5195e52fd816a'
assert _sha256_hash_file(Path(file_name)) == expected_sha
run(split(f'sudo dpkg -i {file_name}'), capture_output=True, check=True)
except (AssertionError, CalledProcessError) as error:
raise InstallationError('Error during zoo unpacker installation') from error


def _sha256_hash_file(file_path: Path) -> str:
return hashlib.sha256(file_path.read_bytes()).hexdigest()


def _install_freetz():
logging.info('Installing FREETZ')
current_user = getuser()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def remove_false_positive_archives(self) -> str:

return '\n'.join(self.screening_logs)


@staticmethod
def _is_possible_tar(file_type: str, file_path: Path) -> bool:
# broken tar archives may be identified as octet-stream by newer versions of libmagic
Expand Down
8 changes: 3 additions & 5 deletions fact_extractor/plugins/unpacking/patool/code/patool.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
MIME_PATTERNS = [
'application/gzip',
'application/java-archive',
'application/rar',
'application/vnd.debian.binary-package',
'application/vnd.ms-cab-compressed',
'application/x-ace',
Expand All @@ -23,15 +22,14 @@
'application/x-debian-package',
'application/x-dms',
'application/x-gzip',
'application/x-iso9660-image',
'application/x-lha',
'application/x-lrzip',
'application/x-lzh',
'application/x-lzh-compressed',
'application/x-lzip',
'application/x-lzo',
'application/x-lzop',
'application/x-rar',
'application/x-redhat-package-manager',
'application/x-rpm',
'application/x-rzip',
'application/x-shar',
'application/x-tar',
Expand All @@ -40,7 +38,7 @@
'application/zpaq',
'audio/flac',
]
VERSION = '0.5.3'
VERSION = '0.6.0'

TOOL_PATH = execute_shell_command('which patool').strip()

Expand Down
3 changes: 3 additions & 0 deletions fact_extractor/plugins/unpacking/patool/test/data/test.a
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
!<arch>
test.data/ 0 0 0 644 128 `
d34bb4669ac6e2b15b07f2489a22c289974a3dcfddeec68fc003d0109186e34dbc4c419546211e534c4be6ae657397959ae8b4a5e2c095c5bba3e0bca38f0b51
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
227 changes: 227 additions & 0 deletions fact_extractor/plugins/unpacking/patool/test/data/test.shar
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
#!/bin/sh
# This is a shell archive (produced by GNU sharutils 4.15.2).
# To extract the files from this archive, save it to some FILE, remove
# everything before the '#!/bin/sh' line above, then type 'sh FILE'.
#
lock_dir=_sh99321
# Made on 2023-02-10 12:53 CET by <foo@bar-123>.
# Source directory was '/foo/bar'.
#
# Existing files will *not* be overwritten, unless '-c' is specified.
#
# This shar contains:
# length mode name
# ------ ---------- ------------------------------------------
# 20 -rw-r--r-- get_files_test/generic folder/test file 3_.txt
# 62 -rw-r--r-- get_files_test/testfile1
# 28 -rw-r--r-- get_files_test/testfile2
#
MD5SUM=${MD5SUM-md5sum}
f=`${MD5SUM} --version | egrep '^md5sum .*(core|text)utils'`
test -n "${f}" && md5check=true || md5check=false
${md5check} || \
echo 'Note: not verifying md5sums. Consider installing GNU coreutils.'
if test "X$1" = "X-c"
then keep_file=''
else keep_file=true
fi
echo=echo
save_IFS="${IFS}"
IFS="${IFS}:"
gettext_dir=
locale_dir=
set_echo=false

for dir in $PATH
do
if test -f $dir/gettext \
&& ($dir/gettext --version >/dev/null 2>&1)
then
case `$dir/gettext --version 2>&1 | sed 1q` in
*GNU*) gettext_dir=$dir
set_echo=true
break ;;
esac
fi
done

if ${set_echo}
then
set_echo=false
for dir in $PATH
do
if test -f $dir/shar \
&& ($dir/shar --print-text-domain-dir >/dev/null 2>&1)
then
locale_dir=`$dir/shar --print-text-domain-dir`
set_echo=true
break
fi
done

if ${set_echo}
then
TEXTDOMAINDIR=$locale_dir
export TEXTDOMAINDIR
TEXTDOMAIN=sharutils
export TEXTDOMAIN
echo="$gettext_dir/gettext -s"
fi
fi
IFS="$save_IFS"
if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null
then if (echo -n test; echo 1,2,3) | grep n >/dev/null
then shar_n= shar_c='
'
else shar_n=-n shar_c= ; fi
else shar_n= shar_c='\c' ; fi
f=shar-touch.$$
st1=200112312359.59
st2=123123592001.59
st2tr=123123592001.5 # old SysV 14-char limit
st3=1231235901

if touch -am -t ${st1} ${f} >/dev/null 2>&1 && \
test ! -f ${st1} && test -f ${f}; then
shar_touch='touch -am -t $1$2$3$4$5$6.$7 "$8"'

elif touch -am ${st2} ${f} >/dev/null 2>&1 && \
test ! -f ${st2} && test ! -f ${st2tr} && test -f ${f}; then
shar_touch='touch -am $3$4$5$6$1$2.$7 "$8"'

elif touch -am ${st3} ${f} >/dev/null 2>&1 && \
test ! -f ${st3} && test -f ${f}; then
shar_touch='touch -am $3$4$5$6$2 "$8"'

else
shar_touch=:
echo
${echo} 'WARNING: not restoring timestamps. Consider getting and
installing GNU '\''touch'\'', distributed in GNU coreutils...'
echo
fi
rm -f ${st1} ${st2} ${st2tr} ${st3} ${f}
#
if test ! -d ${lock_dir} ; then :
else ${echo} "lock directory ${lock_dir} exists"
exit 1
fi
if mkdir ${lock_dir}
then ${echo} "x - created lock directory ${lock_dir}."
else ${echo} "x - failed to create lock directory ${lock_dir}."
exit 1
fi
# ============= get_files_test/generic folder/test file 3_.txt ==============
if test ! -d 'get_files_test'; then
mkdir 'get_files_test'
if test $? -eq 0
then ${echo} "x - created directory get_files_test."
else ${echo} "x - failed to create directory get_files_test."
exit 1
fi
fi
if test ! -d 'get_files_test/generic folder'; then
mkdir 'get_files_test/generic folder'
if test $? -eq 0
then ${echo} "x - created directory get_files_test/generic folder."
else ${echo} "x - failed to create directory get_files_test/generic folder."
exit 1
fi
fi
if test -n "${keep_file}" && test -f 'get_files_test/generic folder/test file 3_.txt'
then
${echo} "x - SKIPPING get_files_test/generic folder/test file 3_.txt (file already exists)"

else
${echo} "x - extracting get_files_test/generic folder/test file 3_.txt (Text)"
sed 's/^X//' << 'SHAR_EOF' | uudecode &&
begin 600 get_files_test/generic folder/test file 3_.txt
45&AE('1H:7)D('1E<W0@9FEL92$`
`
end
SHAR_EOF
(set 20 15 09 15 12 20 40 'get_files_test/generic folder/test file 3_.txt'
eval "${shar_touch}") && \
chmod 0644 'get_files_test/generic folder/test file 3_.txt'
if test $? -ne 0
then ${echo} "restore of get_files_test/generic folder/test file 3_.txt failed"
fi
if ${md5check}
then (
${MD5SUM} -c >/dev/null 2>&1 || ${echo} 'get_files_test/generic folder/test file 3_.txt': 'MD5 check failed'
) << \SHAR_EOF
dfb79b49698fbae3d6eaca69b211b79d get_files_test/generic folder/test file 3_.txt
SHAR_EOF

else
test `LC_ALL=C wc -c < 'get_files_test/generic folder/test file 3_.txt'` -ne 20 && \
${echo} "restoration warning: size of 'get_files_test/generic folder/test file 3_.txt' is not 20"
fi
fi
# ============= get_files_test/testfile1 ==============
if test -n "${keep_file}" && test -f 'get_files_test/testfile1'
then
${echo} "x - SKIPPING get_files_test/testfile1 (file already exists)"

else
${echo} "x - extracting get_files_test/testfile1 (text)"
sed 's/^X//' << 'SHAR_EOF' > 'get_files_test/testfile1' &&
test file:
content: MyTestRule 1.2.3
Version: Program 0.0.0.0
SHAR_EOF
(set 20 15 09 15 12 20 40 'get_files_test/testfile1'
eval "${shar_touch}") && \
chmod 0644 'get_files_test/testfile1'
if test $? -ne 0
then ${echo} "restore of get_files_test/testfile1 failed"
fi
if ${md5check}
then (
${MD5SUM} -c >/dev/null 2>&1 || ${echo} 'get_files_test/testfile1': 'MD5 check failed'
) << \SHAR_EOF
e802ca22f6cd2d9357cf3da1d191879e get_files_test/testfile1
SHAR_EOF

else
test `LC_ALL=C wc -c < 'get_files_test/testfile1'` -ne 62 && \
${echo} "restoration warning: size of 'get_files_test/testfile1' is not 62"
fi
fi
# ============= get_files_test/testfile2 ==============
if test -n "${keep_file}" && test -f 'get_files_test/testfile2'
then
${echo} "x - SKIPPING get_files_test/testfile2 (file already exists)"

else
${echo} "x - extracting get_files_test/testfile2 (Text)"
sed 's/^X//' << 'SHAR_EOF' | uudecode &&
begin 600 get_files_test/testfile2
<5&AI<R!I<R!T:&4@<V5C;VYD('1E<W0@9FEL94Q_
`
end
SHAR_EOF
(set 20 15 09 15 12 20 40 'get_files_test/testfile2'
eval "${shar_touch}") && \
chmod 0644 'get_files_test/testfile2'
if test $? -ne 0
then ${echo} "restore of get_files_test/testfile2 failed"
fi
if ${md5check}
then (
${MD5SUM} -c >/dev/null 2>&1 || ${echo} 'get_files_test/testfile2': 'MD5 check failed'
) << \SHAR_EOF
08dd892caa120da5985fb0c0ec739295 get_files_test/testfile2
SHAR_EOF

else
test `LC_ALL=C wc -c < 'get_files_test/testfile2'` -ne 28 && \
${echo} "restoration warning: size of 'get_files_test/testfile2' is not 28"
fi
fi
if rm -fr ${lock_dir}
then ${echo} "x - removed lock directory ${lock_dir}."
else ${echo} "x - failed to remove lock directory ${lock_dir}."
exit 1
fi
exit 0
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit ed65a74

Please sign in to comment.