From 51a3c5caf9b684ef38fcc4b03e87cc6697c24d27 Mon Sep 17 00:00:00 2001 From: Vadim Zhukov Date: Mon, 21 Jun 2021 05:41:06 +0300 Subject: [PATCH 01/89] use native C integral types in get_config, avoids confusion with size_t (#155) --- plugins/plugin_test.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/plugins/plugin_test.cpp b/plugins/plugin_test.cpp index 1aafbb37..ae65108b 100644 --- a/plugins/plugin_test.cpp +++ b/plugins/plugin_test.cpp @@ -162,13 +162,14 @@ feature_recorder_set::feature_recorder_set(uint32_t f,const feature_recorder_set void scanner_info::get_config(const scanner_info::config_t &c, const std::string &n,std::string *val,const std::string &help){} void scanner_info::get_config(const std::string &n,std::string *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,uint64_t *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,int32_t *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,uint32_t *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,uint16_t *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,uint8_t *val,const std::string &help) {} -#ifdef __APPLE__ -void scanner_info::get_config(const std::string &n,size_t *val,const std::string &help) {} -#define HAVE_GET_CONFIG_SIZE_T -#endif +void scanner_info::get_config(const std::string &n,signed char *val,const std::string &help) {} +void scanner_info::get_config(const std::string &n,short *val,const std::string &help) {} +void scanner_info::get_config(const std::string &n,int *val,const std::string &help) {} +void scanner_info::get_config(const std::string &n,long *val,const std::string &help) {} +void scanner_info::get_config(const std::string &n,long long *val,const std::string &help) {} +void scanner_info::get_config(const std::string &n,unsigned char *val,const std::string &help) {} +void scanner_info::get_config(const std::string &n,unsigned short *val,const std::string &help) {} +void scanner_info::get_config(const std::string &n,unsigned int *val,const std::string &help) {} +void scanner_info::get_config(const std::string &n,unsigned long *val,const std::string &help) {} +void scanner_info::get_config(const std::string &n,unsigned long long *val,const std::string &help) {} void scanner_info::get_config(const std::string &n,bool *val,const std::string &help) {} From 1413582d295053c5107f5f33122b1282df500df0 Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Sun, 20 Jun 2021 22:45:52 -0400 Subject: [PATCH 02/89] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e2081353..17ee478d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![codecov](https://codecov.io/gh/simsong/bulk_extractor/branch/master/graph/badge.svg?token=3w691sdgLu)](https://codecov.io/gh/simsong/bulk_extractor) + Welcome to bulk_extractor. Note: bulk_extractor version 2.0 is now under development. For information, please see [Release 2.0 roadmap in the release-2.0-dev branch](https://github.com/simsong/bulk_extractor/blob/release-2.0-dev/doc/ROADMAP_2.0.md). From f0623a7fefaca2a9182ebb03b9ce9005246a2ab8 Mon Sep 17 00:00:00 2001 From: Jan Gruber Date: Tue, 22 Jun 2021 18:05:53 +0000 Subject: [PATCH 03/89] Add a license exception in regard to linking with openssl (#169) --- LICENSE.md | 4 ++ licenses/LICENSE.openssl | 125 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 licenses/LICENSE.openssl diff --git a/LICENSE.md b/LICENSE.md index 37bdf7b8..fe564a73 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -116,3 +116,7 @@ Licensed under General Public License version 3 with the Autoconf exception: * m4/ac_check_classpath.m4 is Copyright (C) 2000 Stephane Bortzmeyer + +Licensed under OpenSSL License + +Because of the fact, that the GPL (including version 3) is incompatible with some terms of the OpenSSL license, the author hereby explicitly states a license exception, which permits the linking of bulk_extractor with OpenSSL. diff --git a/licenses/LICENSE.openssl b/licenses/LICENSE.openssl new file mode 100644 index 00000000..e953f590 --- /dev/null +++ b/licenses/LICENSE.openssl @@ -0,0 +1,125 @@ + + LICENSE ISSUES + ============== + + The OpenSSL toolkit stays under a double license, i.e. both the conditions of + the OpenSSL License and the original SSLeay license apply to the toolkit. + See below for the actual license texts. + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2018 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + From 154dfea6db843515772baa0e25fcd85cff38405f Mon Sep 17 00:00:00 2001 From: Jan Gruber Date: Thu, 24 Jun 2021 16:23:54 +0000 Subject: [PATCH 04/89] Modify scan_json.cpp to use the libary json-c (#170) Add test data and rework scan_json.cpp to use json-c instead of code under a non-free MIT license. --- Makefile.am | 12 +- configure.ac | 8 +- etc/CONFIGURE_AMAZON_LINUX.bash | 1 + etc/CONFIGURE_CENTOS7.bash | 1 + etc/CONFIGURE_DEBIAN9.bash | 2 +- etc/CONFIGURE_FEDORA31.bash | 1 + etc/CONFIGURE_FEDORA32.bash | 1 + etc/CONFIGURE_UBUNTU16LTS.bash | 2 +- etc/CONFIGURE_UBUNTU18LTS.bash | 2 +- src/scan_json.cpp | 502 ++++---------------------------- tests/Data/json_log.txt | 26 ++ tests/Data/json_plain.txt | 15 + 12 files changed, 118 insertions(+), 455 deletions(-) create mode 100644 tests/Data/json_log.txt create mode 100644 tests/Data/json_plain.txt diff --git a/Makefile.am b/Makefile.am index f2d93de4..dbce4582 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,5 @@ # bulk_extractor Makefile.am -# This file is compiled with automake to create Makefile.in. +# This file is compiled with automake to create Makefile.in. # Makefile.in is transformed by "configure" to create Makefile # @@ -8,7 +8,7 @@ SRC_WIN=src_win/ include src_win/Makefile -# These two conditionals are set by configure. If they, +# These two conditionals are set by configure. If they, # they cause a "make" in the top-level to automatically run make in the subdirs if WANT_BE_VIEWER BE_VIEWER_DIR = java_gui @@ -20,7 +20,7 @@ endif ## SRC_WIN_DIR=src_win ## endif -SUBDIRS = doc doc/latex_manuals src man plugins python specfiles tests $(BE_VIEWER_DIR) +SUBDIRS = doc doc/latex_manuals src man plugins python specfiles tests $(BE_VIEWER_DIR) # don't include bootstrap. People run it, and they shoudln't # It's only for people who check out the git repo @@ -51,7 +51,7 @@ EXTRA_DIST = \ ACLOCAL_AMFLAGS = -I m4 -distclean2: +distclean2: @echo Deleting: @find . \ \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ @@ -103,10 +103,10 @@ winlibs: if [ -r Makefile ]; then $(MAKE) distclean ; fi if [ ! -d win32 ]; then mkdir win32 ; fi if [ ! -r win32/Makefile ]; then (cd win32;mingw32-configure --disable-libewf --disable-afflib) ; fi - (cd win32/src;$(MAKE) lib;mv libbulkextractor.so ../../libbulkextractor32.DLL) + (cd win32/src;$(MAKE) lib;mv libbulkextractor.so ../../libbulkextractor32.DLL) if [ ! -d win64 ]; then mkdir win64 ; fi if [ ! -r win64/Makefile ]; then (cd win64;mingw64-configure --disable-libewf --disable-afflib) ; fi - (cd win64/src;$(MAKE) lib;mv libbulkextractor.so ../../libbulkextractor64.DLL) + (cd win64/src;$(MAKE) lib;mv libbulkextractor.so ../../libbulkextractor64.DLL) ls -l libbulk*.DLL diff --git a/configure.ac b/configure.ac index f9a236bc..4feafd5a 100644 --- a/configure.ac +++ b/configure.ac @@ -474,7 +474,13 @@ AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wsuggest-attribute=noreturn"],[ AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wdeprecated-register"],[return 0;], [AC_DEFINE(HAVE_DIAGNOSTIC_DEPRECATED_REGISTER,1,[define 1 if GCC supports -Wdeprecated-register])]) - + +################################################################ +## Enable json-c >= 0.15 +AC_CHECK_LIB([json-c], [json_tokener_parse],[LIBS="-ljson-c $LIBS"], + [AC_MSG_WARN([Could not find libjson-c library])]) + + ################################################################ ## sceadan support ## Enabled for 1.5 release diff --git a/etc/CONFIGURE_AMAZON_LINUX.bash b/etc/CONFIGURE_AMAZON_LINUX.bash index 21a54a6c..ef367a70 100755 --- a/etc/CONFIGURE_AMAZON_LINUX.bash +++ b/etc/CONFIGURE_AMAZON_LINUX.bash @@ -35,6 +35,7 @@ MPKGS+="md5deep wget bison zlib-devel " MPKGS+="libewf libewf-devel java-1.8.0-openjdk-devel " MPKGS+="libxml2-devel libxml2-static openssl-devel " MPKGS+="sqlite-devel expat-devel " +MPKGS+="libjson-c-devel " if [ ! -r /etc/os-release ]; then echo This requires Amazon Linux diff --git a/etc/CONFIGURE_CENTOS7.bash b/etc/CONFIGURE_CENTOS7.bash index 202454bf..ff097919 100755 --- a/etc/CONFIGURE_CENTOS7.bash +++ b/etc/CONFIGURE_CENTOS7.bash @@ -35,6 +35,7 @@ MPKGS+="md5deep wget bison zlib-devel " MPKGS+="libewf libewf-devel java-1.8.0-openjdk-devel " MPKGS+="libxml2-devel libxml2-static openssl-devel " MPKGS+="sqlite-devel expat-devel " +MPKGS+="libjson-c-devel " if [ ! -r /etc/os-release ]; then echo This requires Amazon Linux diff --git a/etc/CONFIGURE_DEBIAN9.bash b/etc/CONFIGURE_DEBIAN9.bash index 14f1e8ad..e779df8a 100755 --- a/etc/CONFIGURE_DEBIAN9.bash +++ b/etc/CONFIGURE_DEBIAN9.bash @@ -27,7 +27,7 @@ fi cd $DIR MPKGS="autoconf automake flex gcc git libtool " -MPKGS+="md5deep openssl patch wget bison g[+][+] libssl-dev zlib1g-dev libxml2-dev" +MPKGS+="md5deep openssl patch wget bison g[+][+] libssl-dev zlib1g-dev libxml2-dev libjson-c-dev" if [ ! -r /etc/os-release ]; then echo This requires Debian Linux. diff --git a/etc/CONFIGURE_FEDORA31.bash b/etc/CONFIGURE_FEDORA31.bash index e087f4a9..cfa90e0b 100755 --- a/etc/CONFIGURE_FEDORA31.bash +++ b/etc/CONFIGURE_FEDORA31.bash @@ -35,6 +35,7 @@ MPKGS+="md5deep wget bison zlib-devel " MPKGS+="libewf libewf-devel java-1.8.0-openjdk-devel " MPKGS+="libxml2-devel libxml2-static openssl-devel " MPKGS+="sqlite-devel expat-devel " +MPKGS+="libjson-c-devel " if [ ! -r /etc/os-release ]; then echo This requires Fedora Linux diff --git a/etc/CONFIGURE_FEDORA32.bash b/etc/CONFIGURE_FEDORA32.bash index a7a1a11f..23391bd9 100755 --- a/etc/CONFIGURE_FEDORA32.bash +++ b/etc/CONFIGURE_FEDORA32.bash @@ -36,6 +36,7 @@ MPKGS+="md5deep wget bison zlib-devel " MPKGS+="libewf libewf-devel java-1.8.0-openjdk-devel " MPKGS+="libxml2-devel libxml2-static openssl-devel " MPKGS+="sqlite-devel expat-devel " +MPKGS+="libjson-c-devel " if [ ! -r /etc/os-release ]; then echo This requires Fedora Linux diff --git a/etc/CONFIGURE_UBUNTU16LTS.bash b/etc/CONFIGURE_UBUNTU16LTS.bash index fc524ee5..f167aac9 100755 --- a/etc/CONFIGURE_UBUNTU16LTS.bash +++ b/etc/CONFIGURE_UBUNTU16LTS.bash @@ -30,7 +30,7 @@ cd $DIR #MPKGS="autoconf automake flex gcc git libtool " #MPKGS+="openssl patch wget bison g++ libssl-dev zlib1g-dev libxml2-dev" -MKPGS="autoconf automake flex gcc g++ libssl-dev zlib1g-dev libxml2-dev dpkg-dev openssl patch wget bison" +MKPGS="autoconf automake flex gcc g++ libssl-dev zlib1g-dev libxml2-dev dpkg-dev openssl patch wget bison libjson-c-dev" if [ ! -r /etc/os-release ]; then echo This requires Debian Linux. diff --git a/etc/CONFIGURE_UBUNTU18LTS.bash b/etc/CONFIGURE_UBUNTU18LTS.bash index abfe5f44..61c69166 100755 --- a/etc/CONFIGURE_UBUNTU18LTS.bash +++ b/etc/CONFIGURE_UBUNTU18LTS.bash @@ -30,7 +30,7 @@ cd $DIR #MPKGS="autoconf automake flex gcc git libtool " #MPKGS+="openssl patch wget bison g++ libssl-dev zlib1g-dev libxml2-dev" -MKPGS="autoconf automake flex gcc g++ libssl-dev zlib1g-dev libxml2-dev dpkg-dev openssl patch wget bison" +MKPGS="autoconf automake flex gcc g++ libssl-dev zlib1g-dev libxml2-dev dpkg-dev openssl patch wget bison libjson-c-dev" if [ ! -r /etc/os-release ]; then echo This requires Debian Linux. diff --git a/src/scan_json.cpp b/src/scan_json.cpp index d81ba12d..dd0c6149 100644 --- a/src/scan_json.cpp +++ b/src/scan_json.cpp @@ -1,432 +1,10 @@ -/* - * - * 2011-08-21 - moved to C++ by Simson Garfinkel. - * - * 2007-08-24 - other changes - * - * -Copyright (c) 2005 JSON.org - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -The Software shall be used for Good, not Evil. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - #include "config.h" #include "be13_api/bulk_extractor_i.h" #include #include +#include -class json_checker { - static const int stacksize=256; // max stack - uint8_t state; - int maxtop; // how far did we get - int top; - uint8_t stack[stacksize]; - bool reject; // in reject mode - int push(uint8_t mode); - int pop(uint8_t mode); -public: - uint32_t comma_count; // number of commas, a measure of complexity - json_checker(); - virtual ~json_checker(){}; - int check_char(int next_char); - int done(); - bool check_if_done(); -}; - - -#define __ -1 /* the universal error code */ - -/* - Characters are mapped into these 31 character classes. This allows for - a significant reduction in the size of the state transition table. -*/ - -enum classes { - C_SPACE, /* space */ - C_WHITE, /* other whitespace */ - C_LCURB, /* { */ - C_RCURB, /* } */ - C_LSQRB, /* [ */ - C_RSQRB, /* ] */ - C_COLON, /* : */ - C_COMMA, /* , */ - C_QUOTE, /* " */ - C_BACKS, /* \ */ - C_SLASH, /* / */ - C_PLUS, /* + */ - C_MINUS, /* - */ - C_POINT, /* . */ - C_ZERO , /* 0 */ - C_DIGIT, /* 123456789 */ - C_LOW_A, /* a */ - C_LOW_B, /* b */ - C_LOW_C, /* c */ - C_LOW_D, /* d */ - C_LOW_E, /* e */ - C_LOW_F, /* f */ - C_LOW_L, /* l */ - C_LOW_N, /* n */ - C_LOW_R, /* r */ - C_LOW_S, /* s */ - C_LOW_T, /* t */ - C_LOW_U, /* u */ - C_ABCDF, /* ABCDF */ - C_E, /* E */ - C_ETC, /* everything else */ - NR_CLASSES -}; - -static int ascii_class[128] = { -/* - This array maps the 128 ASCII characters into character classes. - The remaining Unicode characters should be mapped to C_ETC. - Non-whitespace control characters are errors. -*/ - __, __, __, __, __, __, __, __, - __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __, - __, __, __, __, __, __, __, __, - __, __, __, __, __, __, __, __, - - C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH, - C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, - C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - - C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC, - C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC, - - C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC, - C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC, - C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC, - C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC -}; - - -/* Strangely , IN is defined on mingw */ -#ifdef IN -#undef IN -#endif - -/* - The state codes. -*/ -enum states { - GO=0, /* start */ - OK, /* ok */ - OB, /* object */ - KE, /* key */ - CO, /* colon */ - VA, /* value */ - AR, /* array */ - ST, /* string */ - ES, /* escape */ - U1, /* u1 */ - U2, /* u2 */ - U3, /* u3 */ - U4, /* u4 */ - MI, /* minus */ - ZE, /* zero */ - IN, /* integer */ - FR, /* fraction */ - E1, /* e */ - E2, /* ex */ - E3, /* exp */ - T1, /* tr */ - T2, /* tru */ - T3, /* true */ - F1, /* fa */ - F2, /* fal */ - F3, /* fals */ - F4, /* false */ - N1, /* nu */ - N2, /* nul */ - N3, /* null */ - NR_STATES -}; - - -static int state_transition_table[NR_STATES][NR_CLASSES] = { -/* - The state transition table takes the current state and the current symbol, - and returns either a new state or an action. An action is represented as a - negative number. A JSON text is accepted if at the end of the text the - state is OK and if the mode is MODE_DONE. - - white 1-9 ABCDF etc - space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E |*/ -/*start GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*object OB*/ {OB,OB,__,-9,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*key KE*/ {KE,KE,__,__,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*value VA*/ {VA,VA,-6,__,-5,__,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__}, -/*array AR*/ {AR,AR,-6,__,-5,-7,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__}, -/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,ES,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST}, -/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__}, -/*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__}, -/*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__}, -/*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__}, -/*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ST,ST,ST,ST,ST,ST,ST,ST,__,__,__,__,__,__,ST,ST,__}, -/*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IN,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*int IN*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,IN,IN,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__}, -/*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__}, -/*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__}, -/*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__}, -/*true T3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__}, -/*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__}, -/*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__}, -/*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__}, -/*false F4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__}, -/*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__}, -/*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__}, -/*null N3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__}, -}; - - -/* - These modes can be pushed on the stack. -*/ -enum modes { - MODE_ARRAY, - MODE_DONE, - MODE_KEY, - MODE_OBJECT, -}; - -json_checker::json_checker(): - state(GO),maxtop(0),top(-1),stack(),reject(false),comma_count(0) -{ - /* constructor. Takes stacksize parameter that restricts the level - * of maximum nesting. - * - * To continue the process, call JSON_checker_char for each - * character in the JSON text, and then call JSON_checker_done to - * obtain the final result. These functions are fully reentrant. - * - */ - push(MODE_DONE); // we know we are done when this is the top of stack -} - -bool json_checker::check_if_done() -{ - return state==OK && top==0; -} - -int json_checker::done() -{ -/* - The JSON_checker_done function should be called after all of the characters - have been processed, but only if every call to JSON_checker_char returned - true. This function deletes the JSON_checker and returns true if the JSON - text was accepted. -*/ - int result = state == OK && pop( MODE_DONE); - reject = true; - return result; -} - - -int json_checker::push(uint8_t mode) -{ - /* - * Push a mode onto the stack. Return false if there is overflow. - */ - top += 1; - if (top >= stacksize) { - return false; - } - stack[top] = mode; - if(top>maxtop) maxtop=top; - return true; -} - - -int json_checker::pop(uint8_t mode) -{ -/* - Pop the stack, assuring that the current mode matches the expectation. - Return false if there is underflow or if the modes mismatch. -*/ - if (top < 0 || stack[top] != mode) { - return false; - } - top -= 1; - return true; -} - - - -/* - * Check the next character. Return 0 if okay, -1 if not. - * - * Call this function for each character (or - * partial character) in your JSON text. It can accept UTF-8, UTF-16, or - * UTF-32. It returns 0 if things are looking ok so far, -1 if error. - * - */ -int json_checker::check_char(int next_char) -{ - int next_class, next_state; -/* - Determine the character's class. -*/ - if(reject) return -1; // in rejecting mode - - if (next_char < 0) { - reject = true; - return -1; - } - if (next_char >= 128) { - next_class = C_ETC; - } else { - next_class = ascii_class[next_char]; - if (next_class <= __) { - reject = true; - return -1; - } - } -/* - Get the next state from the state transition table. -*/ - next_state = state_transition_table[state][next_class]; - if (next_state >= 0) { -/* - Change the state. -*/ - state = next_state; - } else { -/* - Or perform one of the actions. -*/ - switch (next_state) { -/* empty } */ - case -9: - if (!pop( MODE_KEY)) { - reject = true; - return -1; - } - state = OK; - break; - -/* } */ case -8: - if (!pop( MODE_OBJECT)) { - reject = true; - return -1; - } - state = OK; - break; - -/* ] */ case -7: - if (!pop( MODE_ARRAY)) { - reject = true; - return -1; - } - state = OK; - break; - -/* { */ case -6: - if (!push( MODE_KEY)) { - reject = true; - return -1; - } - state = OB; - break; - -/* [ */ case -5: - if (!push( MODE_ARRAY)) { - reject = true; - return -1; - } - state = AR; - break; - -/* " */ case -4: - switch (stack[top]) { - case MODE_KEY: - state = CO; - break; - case MODE_ARRAY: - case MODE_OBJECT: - state = OK; - break; - default: - reject = true; - return -1; - } - break; - -/* , */ case -3: - comma_count++; - switch (stack[top]) { - case MODE_OBJECT: -/* - * A comma causes a flip from object mode to key mode. - */ - if (!pop( MODE_OBJECT) || !push( MODE_KEY)) { - reject = true; - return -1; - } - state = KE; - break; - case MODE_ARRAY: - state = VA; - break; - default: - reject = true; - return -1; - } - break; - -/* : */ case -2: -/* - * A colon causes a flip from key mode to object mode. -*/ - if (!pop( MODE_KEY) || !push( MODE_OBJECT)) { - reject = true; - return -1; - } - state = VA; - break; -/* - Bad action. -*/ - default: - reject = true; - return -1; - } - } - return 0; // not bad data -} - - -/**************************************************************** - ** Make the JSON validator work with bulk_extractor - */ - +#define MIN_SIZE 16 static bool is_json_second_char[256]; // shared between all threads static const char *json_second_chars = "0123456789.-{[ \t\n\r\""; @@ -437,7 +15,7 @@ void scan_json(const class scanner_params &sp,const recursion_control_block &rcb if(sp.phase==scanner_params::PHASE_STARTUP){ assert(sp.info->si_version==scanner_info::CURRENT_SI_VERSION); sp.info->name = "json"; - sp.info->author = "Simson Garfinkel"; + sp.info->author = "Simson Garfinkel & Jan Gruber"; sp.info->description = "Scans for JSON-encoded data"; sp.info->scanner_version= "1.1"; sp.info->feature_names.insert("json"); @@ -458,29 +36,63 @@ void scan_json(const class scanner_params &sp,const recursion_control_block &rcb } if(sp.phase==scanner_params::PHASE_SHUTDOWN) return; + if(sp.phase==scanner_params::PHASE_SCAN){ - for(size_t pos = 0;pos+1 2 ){ - sbuf_t json(sbuf,pos,i-pos+1); - std::string json_hash = (*fr->fs.hasher.func)(json.buf,json.bufsize); - fr->write(sbuf.pos0+i,json.asString(),json_hash);; - } - pos = i; // skip to the end - break; - } - } + json_object *jo = NULL; + json_tokener* jt = NULL; + char* js = NULL; + size_t end = 0; + enum json_tokener_error je; + + + for(size_t pos = 0;pos+1 MIN_SIZE){ + + /* Constructs output buffer */ + sbuf_t jbuf(sbuf, pos, end+1); + std::string json_hash = (*fr->fs.hasher.func)(jbuf.buf, jbuf.bufsize); + + /* Stores result */ + fr->write(sbuf.pos0 + pos, jbuf.asString(), json_hash); + pos += end + 1; } + } + /* Clean up */ + json_tokener_free(jt); + + if(jo){ + free(jo); + jo = NULL; + } + + if(js){ + free(js); + js = NULL; + } + } + } + } + } diff --git a/tests/Data/json_log.txt b/tests/Data/json_log.txt new file mode 100644 index 00000000..e7464dea --- /dev/null +++ b/tests/Data/json_log.txt @@ -0,0 +1,26 @@ +2021-06-18 21:00:05.4521|INFO|Technical|Starting process| +2021-06-18 21:00:08.8740|INFO|Technical|Got entries from 2021-06-18 00:00 : +{ +"name": "Turing", +"surname": "Alan", +"inventions": [{ + "name": "Turing Maching", + "year": "1936" +}, { + "name": "Bombe", + "year": "1940" +}] +} + +2021-06-10 21:00:05.4521|INFO|Technical|Another batch Started| +2021-06-19 21:00:08.8740|INFO|Technical|Got entries from 2021-06-19 12:00 : +{ +"name": "Murdock", +"surname": "Ian", +"inventions": [{ + "name": "Debian", + "year": "1993" +}] +} +2021-06-10 21:00:05.4521|INFO|Technical|Another batch Started| +2021-06-19 21:00:08.8740|INFO|Technical|Got entries from 2021-06-19 12:00 : diff --git a/tests/Data/json_plain.txt b/tests/Data/json_plain.txt new file mode 100644 index 00000000..2cada4ae --- /dev/null +++ b/tests/Data/json_plain.txt @@ -0,0 +1,15 @@ +{ + "firstName": "Konrad", + "lastName": "Zuse", + "gender": "man", + "age": 85, + "address": { + "streetAddress": "Straße des 17. Juni 135", + "city": "Berlin", + "country": "Germany", + "postalCode": "10623" + }, + "inventions": [ + { "type": "computer", "model": "Zuse Z1" } + ] +} From 55ee1a7fb0f6c30985da24e4248c36a0bcdb0765 Mon Sep 17 00:00:00 2001 From: Jan Gruber Date: Fri, 25 Jun 2021 08:07:57 +0000 Subject: [PATCH 05/89] Fix src/scan_json.cpp to compile with libjson-c versions < 0.15 (#171) Fix src/scan_json.cpp to compile with libjson-c versions < 0.15 and modify LICENSE.md to resemble newly added changes --- LICENSE.md | 7 ++----- src/scan_json.cpp | 11 +++++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/LICENSE.md b/LICENSE.md index fe564a73..b46cc28c 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -74,10 +74,7 @@ Licensed under the MIT License * src/old_scanners/scan_ascii85.cpp is Copyright (C) 2011 Remy Oukaour - -Licensed under the JSON License - -* src/scan_json.cpp is Copyright (C) 2005 JSON.org +* src/scan_json.cpp is Copyright (C) 2021 Simson L. Garfinkel and Jan Gruber Licensed under General Public License version 3 and later @@ -117,6 +114,6 @@ Licensed under General Public License version 3 with the Autoconf exception: * m4/ac_check_classpath.m4 is Copyright (C) 2000 Stephane Bortzmeyer -Licensed under OpenSSL License +Licensed under OpenSSL License Because of the fact, that the GPL (including version 3) is incompatible with some terms of the OpenSSL license, the author hereby explicitly states a license exception, which permits the linking of bulk_extractor with OpenSSL. diff --git a/src/scan_json.cpp b/src/scan_json.cpp index dd0c6149..cc1b6490 100644 --- a/src/scan_json.cpp +++ b/src/scan_json.cpp @@ -5,7 +5,9 @@ #include #define MIN_SIZE 16 -static bool is_json_second_char[256]; // shared between all threads +#define IS_STRICT 1 + +static bool is_json_second_char[256]; /* shared between all threads */ static const char *json_second_chars = "0123456789.-{[ \t\n\r\""; extern "C" @@ -44,7 +46,6 @@ void scan_json(const class scanner_params &sp,const recursion_control_block &rcb char* js = NULL; size_t end = 0; enum json_tokener_error je; - for(size_t pos = 0;pos+1 0.15 */ + end = jt->char_offset; js = (char*) json_object_to_json_string_ext(jo, JSON_C_TO_STRING_PLAIN); /* Discard very short matches */ From 6c8398308bede0eac6cbd564aeab86f6ae294034 Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Fri, 25 Jun 2021 06:13:30 -0400 Subject: [PATCH 06/89] Be15 migrate to GitHub action (#172) * moved to GitHub Actions * changed json-c to libjson-c-dev * changed json-c to libjson-c-dev * on MacOS it is called json-c * added openssl to brew * added openssl to brew * try just ubuntu-20.04 * manually applied patches to get back to 2db7b97e7b7baaaec05a716557e9da20c5c8c8a4 --- .../workflows/continuous-integration-pip.yml | 54 +++++++++++++++++++ .travis.yml | 33 ------------ 2 files changed, 54 insertions(+), 33 deletions(-) create mode 100644 .github/workflows/continuous-integration-pip.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/continuous-integration-pip.yml b/.github/workflows/continuous-integration-pip.yml new file mode 100644 index 00000000..f4887e70 --- /dev/null +++ b/.github/workflows/continuous-integration-pip.yml @@ -0,0 +1,54 @@ +# This file based on https://gist.github.com/mwouts/9842452d020c08faf9e84a3bba38a66f +# See: https://help.github.com/en/actions/reference/software-installed-on-github-hosted-runners +# 2020-06-22 - slg - customized +# 2020-06-27 - slg - expanded to G++ for MacOS +# 2020-07-03 - slg - ported to be13_api; removed python (be13_api doesn't use python) +# 2021-05-07 - slg - Started work again + +name: BE2 CI (c++17) +on: [push] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + #os: ['macos-10.15'] + #os: ['macos-10.15', 'ubuntu-18.04', 'ubuntu-20.04'] + os: ['ubuntu-20.04'] + + steps: + # https://github.com/actions/checkout + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Install C++ dependencies on MacOS + if: startsWith(matrix.os, 'macos') + run: | + brew install autoconf automake libtool json-c openssl@1.1 + cp /usr/local/opt/openssl@1.1/lib/pkgconfig/*.pc /usr/local/lib/pkgconfig/ + + - name: Install C++ dependencies on Ubuntu + if: startsWith(matrix.os, 'ubuntu') + run: | + sudo apt install -y libtool autoconf automake libssl-dev pkg-config libjson-c-dev + + - name: make check + run: | + bash bootstrap.sh && ./configure + cd src + make check || (echo ==error== ; cat test-suite.log; exit 1) + + - uses: ammaraskar/gcc-problem-matcher@0.1 + name: GCC Problem Matcher + + - name: codecov report + if: startsWith(matrix.os, 'ubuntu') + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + run: | + bash bootstrap.sh + ./configure CFLAGS='-g -O0 -fprofile-arcs -ftest-coverage' CXXFLAGS='-g -O0 -fprofile-arcs -ftest-coverage' LIBS='-lgcov' + make clean && cd src && make check && gcov-9 -n -o . *cpp && bash <(curl -s https://codecov.io/bash) diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 77a39cc0..00000000 --- a/.travis.yml +++ /dev/null @@ -1,33 +0,0 @@ -version: ~> 1.0 -language: c -os: linux -dist: bionic -jobs: - include: - - name: "Linux with gcc" - env: - - CFLAGS="--coverage" - - CXXFLAGS="--coverage" - - LDFLAGS="--coverage" - - PYTHON3="python3" - compiler: gcc - group: edge - - name: "Mac OS with clang" - env: - - PYTHON3="/usr/local/bin/python3" - compiler: clang - os: osx - osx_image: xcode10 -before_install: -- if test ${TRAVIS_OS_NAME} = "linux"; then sudo apt-get install -y autoconf automake autopoint build-essential libtool pkg-config; fi -- if test ${TRAVIS_OS_NAME} = "osx"; then brew update; fi -install: -- if test ${TRAVIS_OS_NAME} = "osx"; then export CPPFLAGS="-I/usr/local/opt/openssl@1.1/include ${CPPFLAGS}"; export LDFLAGS="-L/usr/local/opt/openssl@1.1/lib ${LDFLAGS}"; fi -- sh bootstrap.sh -script: -- ./configure -- make >/dev/null -- make dist -- cd tests && ${PYTHON3} regress.py --datacheck -after_success: -- if test ${TRAVIS_COMPILER} = "gcc"; then curl -o codecov.sh -s https://codecov.io/bash && /bin/bash ./codecov.sh; fi From 5705c9211b2d0f83b03d6f5ab62b1e5999c2f722 Mon Sep 17 00:00:00 2001 From: Jan Gruber Date: Fri, 25 Jun 2021 19:01:53 +0000 Subject: [PATCH 07/89] Fix spelling errors (patch from S. Brun) (#174) --- src/exif_entry.cpp | 2 +- src/main.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/exif_entry.cpp b/src/exif_entry.cpp index aa835193..fb9ff820 100644 --- a/src/exif_entry.cpp +++ b/src/exif_entry.cpp @@ -101,7 +101,7 @@ const std::string exif_entry::get_full_name() const { case IFD1_INTEROPERABILITY: return "ifd1.interoperability." + name; default: - std::cerr << "Program state errror: Invalid ifd type " << ifd_type << "\n"; + std::cerr << "Program state error: Invalid ifd type " << ifd_type << "\n"; assert(0); } return "ERROR"; // required to avoid compiler warning diff --git a/src/main.cpp b/src/main.cpp index 4565c6f3..4c099b44 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -826,7 +826,7 @@ int main(int argc,char **argv) { std::vector params = split(optarg,'='); if(params.size()!=2){ - std::cerr << "Invalid paramter: " << optarg << "\n"; + std::cerr << "Invalid parameter: " << optarg << "\n"; exit(1); } s_config.namevals[params[0]] = params[1]; From b3c7758c80b4a5d55c9119acba8e733e8a1a3eb6 Mon Sep 17 00:00:00 2001 From: Quintin Walters Date: Fri, 25 Jun 2021 15:02:19 -0400 Subject: [PATCH 08/89] Duplicate autoremove in Ubuntu 18 config script (#175) The apt autoremove on line 58 had a duplicate autoremove, this is interpreted as a package by apt and throws an error. --- etc/CONFIGURE_UBUNTU18LTS.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/CONFIGURE_UBUNTU18LTS.bash b/etc/CONFIGURE_UBUNTU18LTS.bash index 61c69166..2fcebe01 100755 --- a/etc/CONFIGURE_UBUNTU18LTS.bash +++ b/etc/CONFIGURE_UBUNTU18LTS.bash @@ -55,7 +55,7 @@ echo Will now try to install # I use emacs. Installing it may install requiremnts sudo apt update -y sudo apt install -y emacs -sudo apt autoremove -y autoremove +sudo apt autoremove -y # Now install what is required echo apt install -y $MKPGS From 20f9c553eb49c25ddf501253b777e1e333a3f8f9 Mon Sep 17 00:00:00 2001 From: Quintin Walters Date: Fri, 25 Jun 2021 15:45:20 -0400 Subject: [PATCH 09/89] REmoved erroneous exit statements (#177) --- etc/CONFIGURE_UBUNTU18LTS.bash | 3 --- 1 file changed, 3 deletions(-) diff --git a/etc/CONFIGURE_UBUNTU18LTS.bash b/etc/CONFIGURE_UBUNTU18LTS.bash index 2fcebe01..31cd49d4 100755 --- a/etc/CONFIGURE_UBUNTU18LTS.bash +++ b/etc/CONFIGURE_UBUNTU18LTS.bash @@ -60,14 +60,11 @@ sudo apt autoremove -y echo apt install -y $MKPGS sudo apt install -y $MKPGS -exit 1 if [ $? != 0 ]; then echo "Could not install some of the packages. Will not proceed." exit 1 fi -exit 0 - # ICU requires patching and a special build sequence # From a9e283f8a3fb17830688d8b8015ab2f7696ee8fd Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Wed, 30 Jun 2021 23:42:09 -0400 Subject: [PATCH 10/89] updated to work with new dfxml (#180) * updated to work with new dfxml * updated CI file * here, dfxml is not a child of be13_api * commented-out build-aux * added .make-codecov --- .../workflows/continuous-integration-pip.yml | 41 +- .gitignore | 9 + .make-codecov | 14 + configure.ac | 330 ++---- m4/ax_cxx_compile_stdcxx.m4 | 962 ++++++++++++++++++ m4/ax_cxx_compile_stdcxx_11.m4 | 142 --- m4/ax_cxx_compile_stdcxx_17.m4 | 35 + plugins/plugin_test.cpp | 24 +- src/be13_api | 2 +- src/bulk_extractor_api.cpp | 31 +- src/dfxml | 2 +- src/main.cpp | 47 +- src/phase1.cpp | 36 +- src/phase1.h | 10 +- src/rar/extract.cpp | 37 +- src/scan_ccns2.cpp | 14 +- src/scan_evtx.cpp | 24 +- src/scan_json.cpp | 28 +- src/scan_rar.cpp | 10 +- 19 files changed, 1299 insertions(+), 499 deletions(-) create mode 100644 .make-codecov create mode 100644 m4/ax_cxx_compile_stdcxx.m4 delete mode 100644 m4/ax_cxx_compile_stdcxx_11.m4 create mode 100644 m4/ax_cxx_compile_stdcxx_17.m4 diff --git a/.github/workflows/continuous-integration-pip.yml b/.github/workflows/continuous-integration-pip.yml index f4887e70..6bdbd0a1 100644 --- a/.github/workflows/continuous-integration-pip.yml +++ b/.github/workflows/continuous-integration-pip.yml @@ -14,8 +14,7 @@ jobs: strategy: matrix: #os: ['macos-10.15'] - #os: ['macos-10.15', 'ubuntu-18.04', 'ubuntu-20.04'] - os: ['ubuntu-20.04'] + os: ['macos-10.15', 'ubuntu-20.04'] steps: # https://github.com/actions/checkout @@ -27,17 +26,38 @@ jobs: - name: Install C++ dependencies on MacOS if: startsWith(matrix.os, 'macos') run: | - brew install autoconf automake libtool json-c openssl@1.1 - cp /usr/local/opt/openssl@1.1/lib/pkgconfig/*.pc /usr/local/lib/pkgconfig/ + brew install autoconf automake libtool - name: Install C++ dependencies on Ubuntu if: startsWith(matrix.os, 'ubuntu') run: | - sudo apt install -y libtool autoconf automake libssl-dev pkg-config libjson-c-dev + sudo apt install -y libtool autoconf automake libssl-dev pkg-config - - name: make check + - name: Update autoconf on Ubuntu + if: startsWith(matrix.os, 'ubuntu') + run: | + wget https://ftpmirror.gnu.org/autoconf/autoconf-2.71.tar.gz + tar xfz autoconf-2.71.tar.gz && cd autoconf-2.71 && ./configure && make && sudo make install + + - name: Update automake on Ubuntu + if: startsWith(matrix.os, 'ubuntu') + run: | + wget https://ftpmirror.gnu.org/automake/automake-1.16.3.tar.gz + tar xfz automake-1.16.3.tar.gz && cd automake-1.16.3 && ./configure && make && sudo make install + + - name: Report automake and autoconf version + run: | + automake --version + autoconf --version + + - name: make run: | bash bootstrap.sh && ./configure + cd src + make + + - name: make check + run: | cd src make check || (echo ==error== ; cat test-suite.log; exit 1) @@ -49,6 +69,9 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} run: | - bash bootstrap.sh - ./configure CFLAGS='-g -O0 -fprofile-arcs -ftest-coverage' CXXFLAGS='-g -O0 -fprofile-arcs -ftest-coverage' LIBS='-lgcov' - make clean && cd src && make check && gcov-9 -n -o . *cpp && bash <(curl -s https://codecov.io/bash) + bash .make-codecov + +# - name: regresion test +# run: | +# cd tests +# python3 regress.py --datacheck diff --git a/.gitignore b/.gitignore index 401bd3c0..3e6d69c0 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,12 @@ tests/Data-*/ tests/Makefile win32 win64 +ar-lib +.idea +build-aux +out??/ +out?/ +src/be_tests +src/etc +src/x.cpp +*.trs diff --git a/.make-codecov b/.make-codecov new file mode 100644 index 00000000..a17c19d8 --- /dev/null +++ b/.make-codecov @@ -0,0 +1,14 @@ +#!/bin/bash +# +# 2020-10-29 - slg - compile for codecov, run self-test, and upload results. +# +bash bootstrap.sh +./configure CFLAGS='-g -O0 -fprofile-arcs -ftest-coverage' \ + CXXFLAGS='-g -O0 -fprofile-arcs -ftest-coverage' \ + LIBS='-lgcov' +make clean \ + && cd src \ + && make check \ + && gcov-9 -n -o . *cpp \ + && bash <(curl -s https://codecov.io/bash) +make distclean diff --git a/configure.ac b/configure.ac index 4feafd5a..adda7c48 100644 --- a/configure.ac +++ b/configure.ac @@ -5,13 +5,23 @@ # Order is largely irrevellant, although it must start with AC_INIT and end with AC_OUTPUT # See http://autotoolset.sourceforge.net/tutorial.html # and http://www.openismus.com/documents/linux/automake/automake.shtml +# and http://www.bioinf.uni-freiburg.de/~mmann/HowTo/automake.html -AC_PREREQ(2.57) -AC_INIT(BULK_EXTRACTOR, 1.6.0, bugs@digitalcorpora.org) - +AC_PREREQ([2.71]) +AC_INIT([BULK_EXTRACTOR],[1.6.1],[bugs@digitalcorpora.org]) AC_CONFIG_MACRO_DIR(m4) +#AC_CONFIG_AUX_DIR([build-aux]) +################################################################ +## Includes + +m4_include([m4/slg_mingw_support.m4]) +m4_include([m4/slg_searchdirs.m4]) +m4_include([m4/slg_gcc_all_warnings.m4]) +m4_include([m4/ax_cxx_compile_stdcxx.m4]) +m4_include([src/dfxml/src/dfxml_configure.m4]) +m4_include([src/be13_api/be13_configure.m4]) -# +# # NOTE 1 - plugins/Makefile is referenced so that the plugins Makefile is created. # However, plugins is not listed as a SUBDIR for Makefile.am in this directory. # That prevents the plugins from automatically being built each time. @@ -22,8 +32,8 @@ AC_CONFIG_FILES([Makefile src/Makefile doc/Makefile doc/latex_manuals/Makefile \ man/Makefile python/Makefile \ plugins/Makefile \ specfiles/Makefile specfiles/bulk_extractor.spec.m4 \ - java_gui/Makefile tests/Makefile ]) -AC_CONFIG_HEADER([config.h]) + java_gui/Makefile tests/Makefile ]) +AC_CONFIG_HEADERS([config.h]) AM_INIT_AUTOMAKE AM_MAINTAINER_MODE AC_PREFIX_PROGRAM(bulk_extractor) dnl build for same location @@ -31,31 +41,28 @@ AC_PREFIX_PROGRAM(bulk_extractor) dnl build for same location # Programs we will be using AC_PROG_CC AC_PROG_CXX -AC_PROG_CC_C99 AM_PROG_CC_C_O dnl allow per-produce flags AC_PROG_INSTALL -AC_PROG_LEX +AC_PROG_LEX([noyywrap]) # LT_INIT([disable-shared]) -# use C++11 mode if available; HAVE_CXX11 is defined in config.h if so. Don't -# use the GNU C++11 extensions for portability's sake (noext). -# https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html +# Must use C++17 mode. -m4_include([m4/ax_cxx_compile_stdcxx_11.m4]) -AX_CXX_COMPILE_STDCXX_11(noext, optional) +#AC_LANG_PUSH(C++) +AX_CXX_COMPILE_STDCXX([17],[noext], [mandatory]) +#AC_LANG_POP() +# These are needed for bulk_extractor. They are not needed for tcpflow +AC_DEFINE([BULK_EXTRACTOR],1,[We are compiling bulk_extractor]) # Endian check is required for MD5 implementation AC_C_BIGENDIAN -# These are needed for bulk_extractor. They are not needed for tcpflow -AC_DEFINE([BULK_EXTRACTOR],1,[We are compiling bulk_extractor]) - -## +## ## BEViewer AC_ARG_ENABLE(BEViewer, - [AS_HELP_STRING([--disable-BEViewer],[disable BEViewer even if components for building it are available])], - use_BEViewer=no, + [AS_HELP_STRING([--disable-BEViewer],[disable BEViewer even if components for building it are available])], + use_BEViewer=no, use_BEViewer=yes) if test "x$use_BEViewer" == "xyes"; then # validate availability of requisite programs @@ -90,8 +97,8 @@ AM_CONDITIONAL([WANT_BE_VIEWER], [test x$has_javac == xtrue -a x$has_jar == xtru ## ## NSIS Windows Installer AC_ARG_ENABLE(win_installer, - [AS_HELP_STRING([--enable-win_installer],[enable genration of the Windows installer, this also requires BEViewer])], - request_win_installer=yes, + [AS_HELP_STRING([--enable-win_installer],[enable genration of the Windows installer, this also requires BEViewer])], + request_win_installer=yes, request_win_installer=no) if test x"$request_win_installer" == "xyes" ; then AC_CHECK_PROG(has_nsis, makensis, true, false) @@ -101,9 +108,11 @@ if test x"$request_win_installer" == "xyes" ; then fi AM_CONDITIONAL([WANT_WIN_INSTALLER], [test x"$has_nsis" == xtrue -a x"$has_javac" == xtrue -a x"$has_jar" == xtrue]) + + if test x"$LEX" == x ; then AC_MSG_ERROR([flex not required.]) -fi +fi if test `basename "$LEX"` != flex; then AC_MSG_ERROR([gnu flex is required.]) @@ -111,28 +120,25 @@ fi ## Determine if we can use flex -R if eval "echo %% | $LEX -R -t > /dev/null 2>&1" -then +then AC_MSG_NOTICE([$LEX supports -R]) else AC_MSG_ERROR([$LEX does not support -R. Please get a modern version of gnu flex]) fi - -m4_include([m4/slg_mingw_support.m4]) -m4_include([m4/slg_searchdirs.m4]) -m4_include([m4/slg_gcc_all_warnings.m4]) - ################################################################ -## Includes +## compile with pthread if its available +SAVE_CXXFLAGS="$CXXFLAGS" +CXXFLAGS="$CXXFLAGS -pthread" +AC_MSG_CHECKING([whether C++ compiler understands $option]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[]])], + [AC_MSG_RESULT([Adding -pthread to CXXFLAGS])], + [AC_MSG_RESULT([Compiler does not understand -pthread]); CXXFLAGS="$SAVE_CXXFLAGS"]) +unset SAVE_CXXFLAGS -CFLAGS="$CFLAGS" - -m4_include([src/dfxml/src/dfxml_configure.m4]) -m4_include([src/be13_api/be13_configure.m4]) ################################################################ ## rar support -## Enabled for 1.4 release! AC_ARG_ENABLE([rar], AS_HELP_STRING([--disable-rar], [Disable RAR decompression]), [], @@ -165,36 +171,39 @@ AM_CONDITIONAL([FLEXSCANNERS_ENABLED], [test "yes" = "$flexscanners"]) ## e.g. "export MALLOC_TRACE=~/malloc_outfile.txt" ## 3. improve readability of malloc_outfile.txt using mtrace, ## e.g. "mtrace /usr/local/bin/bulk_extractor ~/malloc_outfile.txt -AC_ARG_ENABLE([mcheck], AS_HELP_STRING([--enable-mcheck], \ - [enable memory analysis support for detecting memory leaks \ - and detecting multiply deallocated memory resources]), \ - AC_DEFINE(HAVE_MCHECK, 1, \ - [define the output file using global variable MALLOC_TRACE, \ - for example "MALLOC_TRACE=./mcheck.out; export MALLOC_TRACE". \ - Then post-process unreadable file mcheck.out using the mtrace command line tool, \ - for example "mtrace /usr/local/bin/bulk_extractor mcheck.out". \ - Unfortunately, only addresses with written code are looked up, \ - in otherwords, use of malloc is looked up, but use of new is not.])) +AC_ARG_ENABLE([mcheck], + [AS_HELP_STRING([--enable-mcheck], + [enable memory analysis support for detecting memory leaks + and detecting multiply deallocated memory resources])], + [AC_DEFINE(HAVE_MCHECK, 1, + [define the output file using global variable MALLOC_TRACE, + for example "MALLOC_TRACE=./mcheck.out; export MALLOC_TRACE". + Then post-process unreadable file mcheck.out using the mtrace command line tool, + for example "mtrace /usr/local/bin/bulk_extractor mcheck.out". + Unfortunately, only addresses with written code are looked up, + in otherwords, use of malloc is looked up, but use of new is not.])] + ) ################################################################ ## AddressSanitizer support -AC_ARG_ENABLE([address-sanitizer], AS_HELP_STRING([--enable-address-sanitizer], \ - [enabled AddressSanitizer support for detecting a wide variety of \ - memory allocation and deallocation errors]), \ +AC_ARG_ENABLE([address-sanitizer], + [AS_HELP_STRING([--enable-address-sanitizer], + [enabled AddressSanitizer support for detecting a wide variety of + memory allocation and deallocation errors])], [AC_DEFINE(HAVE_ADDRESS_SANITIZER, 1, [enable AddressSanitizer]) + address_sanitizer="yes" CFLAGS="$CFLAGS -fsanitize=address" CXXFLAGS="$CXXFLAGS -fsanitize=address" - AC_TRY_COMPILE([],[const int i=0;],[AC_MSG_NOTICE([Address Sanitizer Enabled])], - [AC_MSG_ERROR([Address Sanitizer not available])]) - ]) + ], + []) ################################################################ # PTHREAD support # With special nods to compiling under mingw -case $host in +case $host in *mingw*) AC_MSG_NOTICE([mingw pthreads support]) CFLAGS="$CFLAGS -mthreads -pthread " @@ -235,22 +244,26 @@ AC_CHECK_FUNCS([pthread_win32_process_attach_np pthread_win32_process_detach_np # end PTHREAD SUPPORT ################################################################ + # Specific include files and functions for bulk-extractor -AC_HEADER_STDC +# Autoupdate added the next two lines to ensure that your configure +# script's behavior did not change. They are probably safe to remove. +AC_CHECK_INCLUDES_DEFAULT +AC_PROG_EGREP + AC_TYPE_INT64_T AC_SYS_LARGEFILE AC_SYS_POSIX_TERMIOS ## Check for headers used by bulk Extractor -## do not put pthread here -AC_CHECK_HEADERS([alloca.h dirent.h dlfcn.h err.h errno.h fcntl.h inttypes.h libgen.h limits.h malloc.h mmap.h pwd.h signal.h stdarg.h stdint.h stdio.h strings.h string.h stdlib.h sys/cdefs.h sys/disk.h sys/fcntl.h sys/ioctl.h sys/mman.h sys/mmap.h sys/mount.h sys/param.h sys/socket.h sys/stat.h sys/types.h sys/time.h sys/resource.h time.h unistd.h windows.h CoreServices/CoreServices.h]) +AC_CHECK_HEADERS([alloca.h dirent.h dlfcn.h err.h errno.h fcntl.h inttypes.h libgen.h limits.h malloc.h mmap.h pwd.h regex.h signal.h stdarg.h stdint.h stdio.h strings.h string.h stdlib.h sys/cdefs.h sys/disk.h sys/fcntl.h sys/ioctl.h sys/mman.h sys/mmap.h sys/mount.h sys/param.h sys/socket.h sys/stat.h sys/types.h sys/time.h sys/resource.h time.h unistd.h windows.h CoreServices/CoreServices.h ]) AC_CHECK_FUNCS([err errx getuid getpwuid gethostname getrusage gmtime_r isxdigit ishexnumber le64toh localtime_r _lseeki64 inet_ntop pread64 pread printf mmap munmap MD5 mkstemp mktemp random srandom srandomdev sleep SleepEx strptime usleep vasprintf warn warnx]) AC_CHECK_FUNCS([CreateProcess LoadLibrary IncrementAtomic InterlockedIncrement]) -## dlopen is now itself in a different library +## dlopen is now itself in a different library ## Explicitly check for dlopen library before checking for dlopen ## -ldl (see http://stackoverflow.com/questions/956640/linux-c-error-undefined-reference-to-dlopen) -AC_CHECK_LIB([dl],[dlopen]) +AC_CHECK_LIB([dl],[dlopen]) AC_CHECK_FUNCS([dlopen dlopen_preflight]) # Test for sin_len @@ -275,18 +288,18 @@ AC_CHECK_MEMBER([struct sockaddr_in.sin_len], ]) # Should we disable optimization? -AC_ARG_WITH([opt], AC_HELP_STRING([--without-opt], [Drop all -O C flags])) +AC_ARG_WITH([opt], AS_HELP_STRING([--without-opt],[Drop all -O C flags])) # Or maybe just tone it down a bit? -AC_ARG_WITH([o3], AC_HELP_STRING([--without-o3], [Do not force O3 optimization; use default level])) +AC_ARG_WITH([o3], AS_HELP_STRING([--without-o3],[Do not force O3 optimization; use default level])) # # ZLIB is required: -# Note you cannot put comments in the AC_MSG_ERROR for some reason +# Note you cannot put comments in the AC_MSG_ERROR for some reason # Must be before ewf AC_CHECK_LIB([z],[uncompress],, AC_MSG_ERROR([zlib libraries not installed; try installing zlib-devel zlib-dev zlib-devel zlib1g-dev or libz-dev])) -## EXPAT is now a main line requirements for restarting +## EXPAT is required for reading the dfxml file for restrarting. AC_CHECK_HEADERS([expat.h]) AC_CHECK_LIB([expat],[XML_ParserCreate]) @@ -294,6 +307,15 @@ AC_CHECK_LIB([expat],[XML_ParserCreate]) ## regex support ## there are several options ## tre is better than regex + +AC_CHECK_HEADERS([regex.h tre/tre.h]) +AC_CHECK_LIB([regex],[regerror]) +AC_CHECK_LIB([tre],[tre_regcomp]) +AC_CHECK_FUNCS([regcomp tre_regcomp tre_version]) + +################################################################ +## Lightgrep support +## AC_CHECK_LIB([stdc++],[main]) if test x"$lightgrep" == x"yes"; then @@ -322,34 +344,6 @@ if test x"$lightgrep" == x"yes"; then LDFLAGS="$LDFLAGS `$PKG_CONFIG --libs-only-L --libs-only-other lightgrep`" fi -AC_CHECK_HEADERS([regex.h tre/tre.h]) -AC_CHECK_LIB([regex],[regcomp]) -AC_CHECK_LIB([tre],[tre_regcomp]) -AC_CHECK_FUNCS([regcomp tre_regcomp tre_version]) - -################################################################ -## AFFLIB support -## AFFLIB requires OpenSSL -## Link in openssl if it is available, because afflib requires it. -## (Note that bulk_extractor itself no longer requires openssl) -AC_DEFINE([HAVE_STL],1,[We have the Standard Template Library]) -AC_CHECK_HEADERS([openssl/x509.h openssl/pem.h]) - -AC_ARG_ENABLE([afflib], - [AS_HELP_STRING([--disable-afflib], [disable afflib support])], - [afflib=no], - [afflib=yes]) -AC_MSG_NOTICE([afflib is $afflib]) - -if test x"$afflib" == x"yes" ; then - AC_CHECK_HEADER([afflib/afflib.h], - [AC_DEFINE(HAVE_AFFLIB_AFFLIB_H,1,[Do we have afflib/afflib.h?])] - [AC_CHECK_LIB([afflib],af_get_pagesize,, - AC_MSG_WARN([AFFLIB not found or the version of AFFLIB installed does not support af_get_pagesize; no AFF support]))], - []) -fi - - ################################################################ ## LIBEWF support @@ -371,46 +365,6 @@ fi AC_MSG_NOTICE([libewf is now $libewf]) -################################################################ -## hashdb support -AC_ARG_ENABLE([hashdb], - [AS_HELP_STRING([--disable-hashdb], [disable hashdb scanner support])], - [hashdb=no], - [hashdb=yes]) - -if test x"$hashdb" == x"yes" ; then - if test x"$mingw" = x"yes"; then - # the win32 configuration of libxml2 requires zlib and iconv libraries - AC_CHECK_LIB([z], [gzdopen],[LIBS="-lz $LIBS"], [AC_MSG_ERROR([Could not find zlib library])]) - AC_CHECK_LIB([iconv], [iconv],[LIBS="-liconv $LIBS"], [AC_MSG_ERROR([Could not find win-iconv library])]) - fi - - AC_CHECK_LIB([xml2], [xmlSAXUserParseFile],[LIBS="-lxml2 $LIBS"], - [AC_MSG_WARN([Could not find libxml2 library required to build the hashdb scanner]) - hashdb=no]) - - if test x"$hashdb" == xyes ; then - AC_LANG_PUSH(C++) - AC_CHECK_HEADER([hashdb.hpp], [], - [AC_MSG_WARN([Could not find hashdb header file required to build the hashdb scanner]) - hashdb=no]) - AC_CHECK_LIB([hashdb], [hashdb_version], [], - [AC_MSG_WARN([Could not find hashdb library required to build the hashdb scanner]) - hashdb=no]) - AC_LANG_POP() - fi - -fi -if test x"$hashdb" == xyes ; then - AC_DEFINE(HAVE_HASHDB,1,[define 1 to build the hashdb scanner]) - if [[ $(hashdb -v | cut -d' ' -f2) == "3.1.0" ]]; then - AC_DEFINE(HAVE_HASHDB_3_1,1,[using hashdb-3.1.0 version]) - fi -else - AC_MSG_NOTICE([the hashdb scanner will not be built]) -fi - - ################################################################ ## exiv2 support ## now that exif is in place, exiv2 is an optional scanner. @@ -433,12 +387,14 @@ fi if test x"$exiv2" == x"yes" ; then AC_LANG_PUSH(C++) AC_CHECK_HEADERS([exiv2/image.hpp exiv2/exif.hpp exiv2/error.hpp]) - AC_TRY_COMPILE([#include - #include - #include ], - [Exiv2::ImageFactory::open(0,0);], - exiv2=yes, - exiv2=no) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + #include + #include + #include + ]], + [[Exiv2::ImageFactory::open(0,0);]])], + [exiv2=yes], + [exiv2=no]) if test "${exiv2}" = yes; then AC_DEFINE(HAVE_EXIV2,1,[define 1 if EXIV2 exists and works]) echo Using EXIV2 @@ -448,76 +404,38 @@ if test x"$exiv2" == x"yes" ; then LIBS="$LIBS -lexiv2.dll" # static version does not work on mingw fi dnl see if we have the error feature - AC_TRY_COMPILE([#include - #include - #include ], - [Exiv2::LogMsg::setLevel(Exiv2::LogMsg::mute);], - [AC_DEFINE(HAVE_EXIV2__LOGMSG__SETLEVEL,1,[define 1 if EXIV2 exists and works])]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + #include + #include + #include + ]], + [[Exiv2::LogMsg::setLevel(Exiv2::LogMsg::mute);]], + [AC_DEFINE(HAVE_EXIV2__LOGMSG__SETLEVEL,1,[define 1 if EXIV2 exists and works])])]) fi - AC_LANG_POP() + AC_LANG_POP() fi -AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wshadow"],[return 0;], - [AC_DEFINE(HAVE_DIAGNOSTIC_SHADOW,1,[define 1 if GCC supports -Wshadow])]) - -AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wundef"],[return 0;], - [AC_DEFINE(HAVE_DIAGNOSTIC_UNDEF,1,[define 1 if GCC supports -Wundef])]) +################################################################ +## Enable json-c >= 0.15 +AC_CHECK_LIB([json-c], [json_tokener_parse], + [LIBS="-ljson-c $LIBS"; AC_DEFINE_UNQUOTED([HAVE_JSON_C],[1],[we have json-c]) ]) -AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wcast-qual"],[return 0;], - [AC_DEFINE(HAVE_DIAGNOSTIC_CAST_QUAL,1,[define 1 if GCC supports -Wcast-qual])]) +# this is uglyu and should be added to our existing macro list +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#pragma GCC diagnostic ignored "-Wshadow"]], [[return 0;]])],[AC_DEFINE(HAVE_DIAGNOSTIC_SHADOW,1,[define 1 if GCC supports -Wshadow])],[]) -AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Weffcpp"],[return 0;], - [AC_DEFINE(HAVE_DIAGNOSTIC_EFFCPP,1,[define 1 if GCC supports -Weffc++])]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#pragma GCC diagnostic ignored "-Wundef"]], [[return 0;]])],[AC_DEFINE(HAVE_DIAGNOSTIC_UNDEF,1,[define 1 if GCC supports -Wundef])],[]) -AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wsuggest-attribute=noreturn"],[return 0;], - [AC_DEFINE(HAVE_DIAGNOSTIC_SUGGEST_ATTRIBUTE,1,[define 1 if GCC supports -Wsuggest-attribute=noreturn])]) - -AC_TRY_COMPILE([#pragma GCC diagnostic ignored "-Wdeprecated-register"],[return 0;], - [AC_DEFINE(HAVE_DIAGNOSTIC_DEPRECATED_REGISTER,1,[define 1 if GCC supports -Wdeprecated-register])]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#pragma GCC diagnostic ignored "-Wcast-qual"]], [[return 0;]])],[AC_DEFINE(HAVE_DIAGNOSTIC_CAST_QUAL,1,[define 1 if GCC supports -Wcast-qual])],[]) -################################################################ -## Enable json-c >= 0.15 -AC_CHECK_LIB([json-c], [json_tokener_parse],[LIBS="-ljson-c $LIBS"], - [AC_MSG_WARN([Could not find libjson-c library])]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#pragma GCC diagnostic ignored "-Weffcpp"]], [[return 0;]])],[AC_DEFINE(HAVE_DIAGNOSTIC_EFFCPP,1,[define 1 if GCC supports -Weffc++])],[]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#pragma GCC diagnostic ignored "-Wsuggest-attribute=noreturn"]], [[return 0;]])],[AC_DEFINE(HAVE_DIAGNOSTIC_SUGGEST_ATTRIBUTE,1,[define 1 if GCC supports -Wsuggest-attribute=noreturn])],[]) -################################################################ -## sceadan support -## Enabled for 1.5 release -# m4_include([src/sceadan/sceadan_configure.m4]) -# https://www.gnu.org/software/autoconf/manual/autoconf-2.60/html_node/External-Software.html -AC_ARG_WITH([sceadan], - AS_HELP_STRING([--with-sceadan], [specify location of UTSA SCEADAN file type identification]), - [AC_DEFINE(USE_SCEADAN, 1, [Use SCEADAN FILE TYPE CLASSIFIER]) - sceadan="yes" - SCEADAN_DIR=$withval - CPPFLAGS="$CPPFLAGS -I$SCEADAN_DIR/src -I../$SCEADAN_DIR/src -I../../$SCEADAN_DIR/src" - LDFLAGS="$LDFLAGS -L$SCEADAN_DIR/ -L$SCEADAN_DIR/src/.libs -L../$SCEADAN_DIR/src/.libs" - AC_CHECK_LIB([m],[fmax],,AC_MSG_ERROR([missing -lm])) - AC_CHECK_LIB([sceadan],[sceadan_open],,AC_MSG_ERROR([missing -lsceadan])) - ], - []) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#pragma GCC diagnostic ignored "-Wdeprecated-register"]], [[return 0;]])],[AC_DEFINE(HAVE_DIAGNOSTIC_DEPRECATED_REGISTER,1,[define 1 if GCC supports -Wdeprecated-register])],[]) -AC_SUBST(SCEADAN_DIR) -AM_CONDITIONAL([SCEADAN_ENABLED], [test "yes" = "$sceadan"]) - -################################################################ -# do we have a broken strchr? -# This prevents errors in base64_forensics.cpp -AC_LANG_PUSH(C++) -AC_TRY_COMPILE([#include ], - [static const char Base64[] = "ABCDEFGH";int ch=3;char *pos = strchr(Base64, ch);], - conforming_strchr=yes, - conforming_strchr=no) -if test "$conforming_strchr" = yes; then - AC_MSG_NOTICE([strchr is conforming]) - AC_DEFINE(HAVE_CONFORMING_STRCHR, 1,[define to 1 if strchr takes const char*]) -else - AC_MSG_NOTICE([strchr is not conforming]) -fi -AC_LANG_POP() +#m4_include([m4/slg_check_gcc_diagnostics.m4]) ## If compiling on mingw, add another -lgdi32 to be safe ## This is required becuase some libraries reference functions in gdi32 and, for whatever reason, @@ -537,7 +455,7 @@ if test x"${AFF_NOOPT}" != "x" ; then fi if test "${with_opt}" = "no" ; then - CFLAGS=`echo -g "$CFLAGS" | sed s/-O[[0-9]]//` # note the double quoting! + CFLAGS=`echo -g "$CFLAGS" | sed s/-O[[0-9]]//` # note the double quoting! CXXFLAGS=`echo -g "$CXXFLAGS" | sed s/-O[[0-9]]//` else # If we are not stripping the optimizer, add fortify source @@ -546,7 +464,7 @@ else # and increase optimizer from -O2 to -O3 if not explicitly forbidden if test "${with_o3}" != "no" ; then - CFLAGS=`echo -g "$CFLAGS" | sed s/-O2/-O3/` # note the double quoting! + CFLAGS=`echo -g "$CFLAGS" | sed s/-O2/-O3/` # note the double quoting! CXXFLAGS=`echo -g "$CXXFLAGS" | sed s/-O2/-O3/` fi fi @@ -569,33 +487,10 @@ AC_MSG_NOTICE([ LDFLAGS: $LDFLAGS]) if test x"${libewf}" != "xyes" ; then AC_MSG_WARN([libewf-devel must be installed for E01 support.]) -fi - -# if making BEViewer is not disabled then warn about any missing requisite components -if test "x$use_BEViewer" == "xyes"; then - if test x$has_javac == xfalse -o x$has_jar == xfalse ; then - AC_MSG_WARN([javac for BEViewer is unavailable because one or more requisite components are not available:]) - fi - if test "x$has_javac" == "xfalse" ; then - echo javac Version 6 or later must be installed to allow javac support for BEViewer. - fi - if test "x$has_jar" == "xfalse" ; then - echo jar must be installed to allow javac support for BEViewer. - fi -else - AC_MSG_NOTICE([NOTE: BEViewer is disabled.]) fi AC_OUTPUT -## Create the BEViewer Java config file -echo "// This file is generated by autoconf. Do not edit this file." > $srcdir/java_gui/src/Config.java -echo "public final class Config {" >> $srcdir/java_gui/src/Config.java -echo " private Config() { }" // no constructor >> $srcdir/java_gui/src/Config.java -echo " /** BEViewer Version {@value} */" >> $srcdir/java_gui/src/Config.java -echo " public static final String VERSION = \"$PACKAGE_VERSION\";" >> $srcdir/java_gui/src/Config.java -echo "}" >> $srcdir/java_gui/src/Config.java - ## Finally, record the values of CFLAGS, CPPFLAGS, and CXXFLAGS for DFXML echo "#define CPPFLAGS \"$CPPFLAGS\"" >> config.h echo "#define CFLAGS \"$CFLAGS\"" >> config.h @@ -605,4 +500,3 @@ echo "#define LDFLAGS \"$LDFLAGS\"" >> config.h if test x"$GIT_COMMIT" != "x" ; then echo "#define GIT_COMMIT \"$GIT_COMMIT\"" >> config.h fi - diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4 new file mode 100644 index 00000000..9413da62 --- /dev/null +++ b/m4/ax_cxx_compile_stdcxx.m4 @@ -0,0 +1,962 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional]) +# +# DESCRIPTION +# +# Check for baseline language coverage in the compiler for the specified +# version of the C++ standard. If necessary, add switches to CXX and +# CXXCPP to enable support. VERSION may be '11' (for the C++11 standard) +# or '14' (for the C++14 standard). +# +# The second argument, if specified, indicates whether you insist on an +# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. +# -std=c++11). If neither is specified, you get whatever works, with +# preference for no added switch, and then for an extended mode. +# +# The third argument, if specified 'mandatory' or if left unspecified, +# indicates that baseline support for the specified C++ standard is +# required and that the macro should error out if no mode with that +# support is found. If specified 'optional', then configuration proceeds +# regardless, after defining HAVE_CXX${VERSION} if and only if a +# supporting mode is found. +# +# LICENSE +# +# Copyright (c) 2008 Benjamin Kosnik +# Copyright (c) 2012 Zack Weinberg +# Copyright (c) 2013 Roy Stogner +# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov +# Copyright (c) 2015 Paul Norman +# Copyright (c) 2015 Moritz Klammler +# Copyright (c) 2016, 2018 Krzesimir Nowak +# Copyright (c) 2019 Enji Cooper +# Copyright (c) 2020 Jason Merrill +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 12 + +dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro +dnl (serial version number 13). + +AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl + m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"], + [$1], [14], [ax_cxx_compile_alternatives="14 1y"], + [$1], [17], [ax_cxx_compile_alternatives="17 1z"], + [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl + m4_if([$2], [], [], + [$2], [ext], [], + [$2], [noext], [], + [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl + m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true], + [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true], + [$3], [optional], [ax_cxx_compile_cxx$1_required=false], + [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])]) + AC_LANG_PUSH([C++])dnl + ac_success=no + + m4_if([$2], [], [dnl + AC_CACHE_CHECK(whether $CXX supports C++$1 features by default, + ax_cv_cxx_compile_cxx$1, + [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [ax_cv_cxx_compile_cxx$1=yes], + [ax_cv_cxx_compile_cxx$1=no])]) + if test x$ax_cv_cxx_compile_cxx$1 = xyes; then + ac_success=yes + fi]) + + m4_if([$2], [noext], [], [dnl + if test x$ac_success = xno; then + for alternative in ${ax_cxx_compile_alternatives}; do + switch="-std=gnu++${alternative}" + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) + AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, + $cachevar, + [ac_save_CXX="$CXX" + CXX="$CXX $switch" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [eval $cachevar=yes], + [eval $cachevar=no]) + CXX="$ac_save_CXX"]) + if eval test x\$$cachevar = xyes; then + CXX="$CXX $switch" + if test -n "$CXXCPP" ; then + CXXCPP="$CXXCPP $switch" + fi + ac_success=yes + break + fi + done + fi]) + + m4_if([$2], [ext], [], [dnl + if test x$ac_success = xno; then + dnl HP's aCC needs +std=c++11 according to: + dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf + dnl Cray's crayCC needs "-h std=c++11" + for alternative in ${ax_cxx_compile_alternatives}; do + for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do + cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) + AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, + $cachevar, + [ac_save_CXX="$CXX" + CXX="$CXX $switch" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], + [eval $cachevar=yes], + [eval $cachevar=no]) + CXX="$ac_save_CXX"]) + if eval test x\$$cachevar = xyes; then + CXX="$CXX $switch" + if test -n "$CXXCPP" ; then + CXXCPP="$CXXCPP $switch" + fi + ac_success=yes + break + fi + done + if test x$ac_success = xyes; then + break + fi + done + fi]) + AC_LANG_POP([C++]) + if test x$ax_cxx_compile_cxx$1_required = xtrue; then + if test x$ac_success = xno; then + AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.]) + fi + fi + if test x$ac_success = xno; then + HAVE_CXX$1=0 + AC_MSG_NOTICE([No compiler with C++$1 support was found]) + else + HAVE_CXX$1=1 + AC_DEFINE(HAVE_CXX$1,1, + [define if the compiler supports basic C++$1 syntax]) + fi + AC_SUBST(HAVE_CXX$1) +]) + + +dnl Test body for checking C++11 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 +) + + +dnl Test body for checking C++14 support + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 +) + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17], + _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 + _AX_CXX_COMPILE_STDCXX_testbody_new_in_17 +) + +dnl Tests for new features in C++11 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ + +// If the compiler admits that it is not ready for C++11, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 201103L + +#error "This is not a C++11 compiler" + +#else + +namespace cxx11 +{ + + namespace test_static_assert + { + + template + struct check + { + static_assert(sizeof(int) <= sizeof(T), "not big enough"); + }; + + } + + namespace test_final_override + { + + struct Base + { + virtual ~Base() {} + virtual void f() {} + }; + + struct Derived : public Base + { + virtual ~Derived() override {} + virtual void f() override {} + }; + + } + + namespace test_double_right_angle_brackets + { + + template < typename T > + struct check {}; + + typedef check single_type; + typedef check> double_type; + typedef check>> triple_type; + typedef check>>> quadruple_type; + + } + + namespace test_decltype + { + + int + f() + { + int a = 1; + decltype(a) b = 2; + return a + b; + } + + } + + namespace test_type_deduction + { + + template < typename T1, typename T2 > + struct is_same + { + static const bool value = false; + }; + + template < typename T > + struct is_same + { + static const bool value = true; + }; + + template < typename T1, typename T2 > + auto + add(T1 a1, T2 a2) -> decltype(a1 + a2) + { + return a1 + a2; + } + + int + test(const int c, volatile int v) + { + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == false, ""); + auto ac = c; + auto av = v; + auto sumi = ac + av + 'x'; + auto sumf = ac + av + 1.0; + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == true, ""); + static_assert(is_same::value == false, ""); + static_assert(is_same::value == true, ""); + return (sumf > 0.0) ? sumi : add(c, v); + } + + } + + namespace test_noexcept + { + + int f() { return 0; } + int g() noexcept { return 0; } + + static_assert(noexcept(f()) == false, ""); + static_assert(noexcept(g()) == true, ""); + + } + + namespace test_constexpr + { + + template < typename CharT > + unsigned long constexpr + strlen_c_r(const CharT *const s, const unsigned long acc) noexcept + { + return *s ? strlen_c_r(s + 1, acc + 1) : acc; + } + + template < typename CharT > + unsigned long constexpr + strlen_c(const CharT *const s) noexcept + { + return strlen_c_r(s, 0UL); + } + + static_assert(strlen_c("") == 0UL, ""); + static_assert(strlen_c("1") == 1UL, ""); + static_assert(strlen_c("example") == 7UL, ""); + static_assert(strlen_c("another\0example") == 7UL, ""); + + } + + namespace test_rvalue_references + { + + template < int N > + struct answer + { + static constexpr int value = N; + }; + + answer<1> f(int&) { return answer<1>(); } + answer<2> f(const int&) { return answer<2>(); } + answer<3> f(int&&) { return answer<3>(); } + + void + test() + { + int i = 0; + const int c = 0; + static_assert(decltype(f(i))::value == 1, ""); + static_assert(decltype(f(c))::value == 2, ""); + static_assert(decltype(f(0))::value == 3, ""); + } + + } + + namespace test_uniform_initialization + { + + struct test + { + static const int zero {}; + static const int one {1}; + }; + + static_assert(test::zero == 0, ""); + static_assert(test::one == 1, ""); + + } + + namespace test_lambdas + { + + void + test1() + { + auto lambda1 = [](){}; + auto lambda2 = lambda1; + lambda1(); + lambda2(); + } + + int + test2() + { + auto a = [](int i, int j){ return i + j; }(1, 2); + auto b = []() -> int { return '0'; }(); + auto c = [=](){ return a + b; }(); + auto d = [&](){ return c; }(); + auto e = [a, &b](int x) mutable { + const auto identity = [](int y){ return y; }; + for (auto i = 0; i < a; ++i) + a += b--; + return x + identity(a + b); + }(0); + return a + b + c + d + e; + } + + int + test3() + { + const auto nullary = [](){ return 0; }; + const auto unary = [](int x){ return x; }; + using nullary_t = decltype(nullary); + using unary_t = decltype(unary); + const auto higher1st = [](nullary_t f){ return f(); }; + const auto higher2nd = [unary](nullary_t f1){ + return [unary, f1](unary_t f2){ return f2(unary(f1())); }; + }; + return higher1st(nullary) + higher2nd(nullary)(unary); + } + + } + + namespace test_variadic_templates + { + + template + struct sum; + + template + struct sum + { + static constexpr auto value = N0 + sum::value; + }; + + template <> + struct sum<> + { + static constexpr auto value = 0; + }; + + static_assert(sum<>::value == 0, ""); + static_assert(sum<1>::value == 1, ""); + static_assert(sum<23>::value == 23, ""); + static_assert(sum<1, 2>::value == 3, ""); + static_assert(sum<5, 5, 11>::value == 21, ""); + static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); + + } + + // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae + // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function + // because of this. + namespace test_template_alias_sfinae + { + + struct foo {}; + + template + using member = typename T::member_type; + + template + void func(...) {} + + template + void func(member*) {} + + void test(); + + void test() { func(0); } + + } + +} // namespace cxx11 + +#endif // __cplusplus >= 201103L + +]]) + + +dnl Tests for new features in C++14 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[ + +// If the compiler admits that it is not ready for C++14, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 201402L + +#error "This is not a C++14 compiler" + +#else + +namespace cxx14 +{ + + namespace test_polymorphic_lambdas + { + + int + test() + { + const auto lambda = [](auto&&... args){ + const auto istiny = [](auto x){ + return (sizeof(x) == 1UL) ? 1 : 0; + }; + const int aretiny[] = { istiny(args)... }; + return aretiny[0]; + }; + return lambda(1, 1L, 1.0f, '1'); + } + + } + + namespace test_binary_literals + { + + constexpr auto ivii = 0b0000000000101010; + static_assert(ivii == 42, "wrong value"); + + } + + namespace test_generalized_constexpr + { + + template < typename CharT > + constexpr unsigned long + strlen_c(const CharT *const s) noexcept + { + auto length = 0UL; + for (auto p = s; *p; ++p) + ++length; + return length; + } + + static_assert(strlen_c("") == 0UL, ""); + static_assert(strlen_c("x") == 1UL, ""); + static_assert(strlen_c("test") == 4UL, ""); + static_assert(strlen_c("another\0test") == 7UL, ""); + + } + + namespace test_lambda_init_capture + { + + int + test() + { + auto x = 0; + const auto lambda1 = [a = x](int b){ return a + b; }; + const auto lambda2 = [a = lambda1(x)](){ return a; }; + return lambda2(); + } + + } + + namespace test_digit_separators + { + + constexpr auto ten_million = 100'000'000; + static_assert(ten_million == 100000000, ""); + + } + + namespace test_return_type_deduction + { + + auto f(int& x) { return x; } + decltype(auto) g(int& x) { return x; } + + template < typename T1, typename T2 > + struct is_same + { + static constexpr auto value = false; + }; + + template < typename T > + struct is_same + { + static constexpr auto value = true; + }; + + int + test() + { + auto x = 0; + static_assert(is_same::value, ""); + static_assert(is_same::value, ""); + return x; + } + + } + +} // namespace cxx14 + +#endif // __cplusplus >= 201402L + +]]) + + +dnl Tests for new features in C++17 + +m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[ + +// If the compiler admits that it is not ready for C++17, why torture it? +// Hopefully, this will speed up the test. + +#ifndef __cplusplus + +#error "This is not a C++ compiler" + +#elif __cplusplus < 201703L + +#error "This is not a C++17 compiler" + +#else + +#include +#include +#include + +namespace cxx17 +{ + + namespace test_constexpr_lambdas + { + + constexpr int foo = [](){return 42;}(); + + } + + namespace test::nested_namespace::definitions + { + + } + + namespace test_fold_expression + { + + template + int multiply(Args... args) + { + return (args * ... * 1); + } + + template + bool all(Args... args) + { + return (args && ...); + } + + } + + namespace test_extended_static_assert + { + + static_assert (true); + + } + + namespace test_auto_brace_init_list + { + + auto foo = {5}; + auto bar {5}; + + static_assert(std::is_same, decltype(foo)>::value); + static_assert(std::is_same::value); + } + + namespace test_typename_in_template_template_parameter + { + + template typename X> struct D; + + } + + namespace test_fallthrough_nodiscard_maybe_unused_attributes + { + + int f1() + { + return 42; + } + + [[nodiscard]] int f2() + { + [[maybe_unused]] auto unused = f1(); + + switch (f1()) + { + case 17: + f1(); + [[fallthrough]]; + case 42: + f1(); + } + return f1(); + } + + } + + namespace test_extended_aggregate_initialization + { + + struct base1 + { + int b1, b2 = 42; + }; + + struct base2 + { + base2() { + b3 = 42; + } + int b3; + }; + + struct derived : base1, base2 + { + int d; + }; + + derived d1 {{1, 2}, {}, 4}; // full initialization + derived d2 {{}, {}, 4}; // value-initialized bases + + } + + namespace test_general_range_based_for_loop + { + + struct iter + { + int i; + + int& operator* () + { + return i; + } + + const int& operator* () const + { + return i; + } + + iter& operator++() + { + ++i; + return *this; + } + }; + + struct sentinel + { + int i; + }; + + bool operator== (const iter& i, const sentinel& s) + { + return i.i == s.i; + } + + bool operator!= (const iter& i, const sentinel& s) + { + return !(i == s); + } + + struct range + { + iter begin() const + { + return {0}; + } + + sentinel end() const + { + return {5}; + } + }; + + void f() + { + range r {}; + + for (auto i : r) + { + [[maybe_unused]] auto v = i; + } + } + + } + + namespace test_lambda_capture_asterisk_this_by_value + { + + struct t + { + int i; + int foo() + { + return [*this]() + { + return i; + }(); + } + }; + + } + + namespace test_enum_class_construction + { + + enum class byte : unsigned char + {}; + + byte foo {42}; + + } + + namespace test_constexpr_if + { + + template + int f () + { + if constexpr(cond) + { + return 13; + } + else + { + return 42; + } + } + + } + + namespace test_selection_statement_with_initializer + { + + int f() + { + return 13; + } + + int f2() + { + if (auto i = f(); i > 0) + { + return 3; + } + + switch (auto i = f(); i + 4) + { + case 17: + return 2; + + default: + return 1; + } + } + + } + + namespace test_template_argument_deduction_for_class_templates + { + + template + struct pair + { + pair (T1 p1, T2 p2) + : m1 {p1}, + m2 {p2} + {} + + T1 m1; + T2 m2; + }; + + void f() + { + [[maybe_unused]] auto p = pair{13, 42u}; + } + + } + + namespace test_non_type_auto_template_parameters + { + + template + struct B + {}; + + B<5> b1; + B<'a'> b2; + + } + + namespace test_structured_bindings + { + + int arr[2] = { 1, 2 }; + std::pair pr = { 1, 2 }; + + auto f1() -> int(&)[2] + { + return arr; + } + + auto f2() -> std::pair& + { + return pr; + } + + struct S + { + int x1 : 2; + volatile double y1; + }; + + S f3() + { + return {}; + } + + auto [ x1, y1 ] = f1(); + auto& [ xr1, yr1 ] = f1(); + auto [ x2, y2 ] = f2(); + auto& [ xr2, yr2 ] = f2(); + const auto [ x3, y3 ] = f3(); + + } + + namespace test_exception_spec_type_system + { + + struct Good {}; + struct Bad {}; + + void g1() noexcept; + void g2(); + + template + Bad + f(T*, T*); + + template + Good + f(T1*, T2*); + + static_assert (std::is_same_v); + + } + + namespace test_inline_variables + { + + template void f(T) + {} + + template inline T g(T) + { + return T{}; + } + + template<> inline void f<>(int) + {} + + template<> int g<>(int) + { + return 5; + } + + } + +} // namespace cxx17 + +#endif // __cplusplus < 201703L + +]]) diff --git a/m4/ax_cxx_compile_stdcxx_11.m4 b/m4/ax_cxx_compile_stdcxx_11.m4 deleted file mode 100644 index 163a4c64..00000000 --- a/m4/ax_cxx_compile_stdcxx_11.m4 +++ /dev/null @@ -1,142 +0,0 @@ -# ============================================================================ -# http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html -# ============================================================================ -# -# SYNOPSIS -# -# AX_CXX_COMPILE_STDCXX_11([ext|noext],[mandatory|optional]) -# -# DESCRIPTION -# -# Check for baseline language coverage in the compiler for the C++11 -# standard; if necessary, add switches to CXXFLAGS to enable support. -# -# The first argument, if specified, indicates whether you insist on an -# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. -# -std=c++11). If neither is specified, you get whatever works, with -# preference for an extended mode. -# -# The second argument, if specified 'mandatory' or if left unspecified, -# indicates that baseline C++11 support is required and that the macro -# should error out if no mode with that support is found. If specified -# 'optional', then configuration proceeds regardless, after defining -# HAVE_CXX11 if and only if a supporting mode is found. -# -# LICENSE -# -# Copyright (c) 2008 Benjamin Kosnik -# Copyright (c) 2012 Zack Weinberg -# Copyright (c) 2013 Roy Stogner -# Copyright (c) 2014 Alexey Sokolov -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 4 - -m4_define([_AX_CXX_COMPILE_STDCXX_11_testbody], [[ - template - struct check - { - static_assert(sizeof(int) <= sizeof(T), "not big enough"); - }; - - struct Base { - virtual void f() {} - }; - struct Child : public Base { - virtual void f() override {} - }; - - typedef check> right_angle_brackets; - - int a; - decltype(a) b; - - typedef check check_type; - check_type c; - check_type&& cr = static_cast(c); - - auto d = a; - auto l = [](){}; -]]) - -AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl - m4_if([$1], [], [], - [$1], [ext], [], - [$1], [noext], [], - [m4_fatal([invalid argument `$1' to AX_CXX_COMPILE_STDCXX_11])])dnl - m4_if([$2], [], [ax_cxx_compile_cxx11_required=true], - [$2], [mandatory], [ax_cxx_compile_cxx11_required=true], - [$2], [optional], [ax_cxx_compile_cxx11_required=false], - [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX_11])]) - AC_LANG_PUSH([C++])dnl - ac_success=no - AC_CACHE_CHECK(whether $CXX supports C++11 features by default, - ax_cv_cxx_compile_cxx11, - [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], - [ax_cv_cxx_compile_cxx11=yes], - [ax_cv_cxx_compile_cxx11=no])]) - if test x$ax_cv_cxx_compile_cxx11 = xyes; then - ac_success=yes - fi - - m4_if([$1], [noext], [], [dnl - if test x$ac_success = xno; then - for switch in -std=gnu++11 -std=gnu++0x; do - cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) - AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, - $cachevar, - [ac_save_CXXFLAGS="$CXXFLAGS" - CXXFLAGS="$CXXFLAGS $switch" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], - [eval $cachevar=yes], - [eval $cachevar=no]) - CXXFLAGS="$ac_save_CXXFLAGS"]) - if eval test x\$$cachevar = xyes; then - CXXFLAGS="$CXXFLAGS $switch" - ac_success=yes - break - fi - done - fi]) - - m4_if([$1], [ext], [], [dnl - if test x$ac_success = xno; then - for switch in -std=c++11 -std=c++0x; do - cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch]) - AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch, - $cachevar, - [ac_save_CXXFLAGS="$CXXFLAGS" - CXXFLAGS="$CXXFLAGS $switch" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])], - [eval $cachevar=yes], - [eval $cachevar=no]) - CXXFLAGS="$ac_save_CXXFLAGS"]) - if eval test x\$$cachevar = xyes; then - CXXFLAGS="$CXXFLAGS $switch" - ac_success=yes - break - fi - done - fi]) - AC_LANG_POP([C++]) - if test x$ax_cxx_compile_cxx11_required = xtrue; then - if test x$ac_success = xno; then - AC_MSG_ERROR([*** A compiler with support for C++11 language features is required.]) - fi - else - if test x$ac_success = xno; then - HAVE_CXX11=0 - AC_MSG_NOTICE([No compiler with C++11 support was found]) - else - HAVE_CXX11=1 - AC_DEFINE(HAVE_CXX11,1, - [define if the compiler supports basic C++11 syntax]) - fi - - AC_SUBST(HAVE_CXX11) - fi -]) diff --git a/m4/ax_cxx_compile_stdcxx_17.m4 b/m4/ax_cxx_compile_stdcxx_17.m4 new file mode 100644 index 00000000..a6834171 --- /dev/null +++ b/m4/ax_cxx_compile_stdcxx_17.m4 @@ -0,0 +1,35 @@ +# ============================================================================= +# https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_17.html +# ============================================================================= +# +# SYNOPSIS +# +# AX_CXX_COMPILE_STDCXX_17([ext|noext], [mandatory|optional]) +# +# DESCRIPTION +# +# Check for baseline language coverage in the compiler for the C++17 +# standard; if necessary, add switches to CXX and CXXCPP to enable +# support. +# +# This macro is a convenience alias for calling the AX_CXX_COMPILE_STDCXX +# macro with the version set to C++17. The two optional arguments are +# forwarded literally as the second and third argument respectively. +# Please see the documentation for the AX_CXX_COMPILE_STDCXX macro for +# more information. If you want to use this macro, you also need to +# download the ax_cxx_compile_stdcxx.m4 file. +# +# LICENSE +# +# Copyright (c) 2015 Moritz Klammler +# Copyright (c) 2016 Krzesimir Nowak +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 2 + +AX_REQUIRE_DEFINED([AX_CXX_COMPILE_STDCXX]) +AC_DEFUN([AX_CXX_COMPILE_STDCXX_17], [AX_CXX_COMPILE_STDCXX([17], [$1], [$2])]) diff --git a/plugins/plugin_test.cpp b/plugins/plugin_test.cpp index ae65108b..a954b95f 100644 --- a/plugins/plugin_test.cpp +++ b/plugins/plugin_test.cpp @@ -1,6 +1,6 @@ /** * plugin_test.cpp: - * + * * This program will load a bulk_extractor .so or .dll plug-in and * perform a rudimentary test. */ @@ -23,20 +23,20 @@ #ifdef HAVE_WINDOWS_H #include -typedef int (__cdecl *MYPROC)(LPWSTR); +typedef int (__cdecl *MYPROC)(LPWSTR); #endif static std::string hash_name("md5"); static std::string hash_func(const uint8_t *buf,size_t bufsize) { if(hash_name=="md5" || hash_name=="MD5"){ - return md5_generator::hash_buf(buf,bufsize).hexdigest(); + return dfxml::md5_generator::hash_buf(buf,bufsize).hexdigest(); } if(hash_name=="sha1" || hash_name=="SHA1" || hash_name=="sha-1" || hash_name=="SHA-1"){ - return sha1_generator::hash_buf(buf,bufsize).hexdigest(); + return dfxml::sha1_generator::hash_buf(buf,bufsize).hexdigest(); } if(hash_name=="sha256" || hash_name=="SHA256" || hash_name=="sha-256" || hash_name=="SHA-256"){ - return sha256_generator::hash_buf(buf,bufsize).hexdigest(); + return dfxml::sha256_generator::hash_buf(buf,bufsize).hexdigest(); } std::cerr << "Invalid hash name: " << hash_name << "\n"; std::cerr << "This version of bulk_extractor only supports MD5, SHA1, and SHA256\n"; @@ -44,7 +44,7 @@ static std::string hash_func(const uint8_t *buf,size_t bufsize) } static feature_recorder_set::hash_def my_hasher(hash_name,hash_func); -scanner_params::PrintOptions scanner_params::no_options; +scanner_params::PrintOptions scanner_params::no_options; int main(int argc,char **argv) { if(argc!=2){ @@ -62,7 +62,7 @@ int main(int argc,char **argv) fprintf(stderr,"%s: cannot strip extension\n",name.c_str()); exit(1); } - name = name.substr(0,dot); + name = name.substr(0,dot); /* Strip dir */ size_t slash = name.rfind('.'); @@ -156,17 +156,17 @@ feature_recorder_set::feature_recorder_set(uint32_t f,const feature_recorder_set } -/* http://stackoverflow.com/questions/9406580/c-undefined-reference-to-vtable-and-inheritance +/* http://stackoverflow.com/questions/9406580/c-undefined-reference-to-vtable-and-inheritance * Must provide definitions for all virtual functions */ void scanner_info::get_config(const scanner_info::config_t &c, const std::string &n,std::string *val,const std::string &help){} void scanner_info::get_config(const std::string &n,std::string *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,signed char *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,short *val,const std::string &help) {} +//void scanner_info::get_config(const std::string &n,signed char *val,const std::string &help) {} +//void scanner_info::get_config(const std::string &n,short *val,const std::string &help) {} void scanner_info::get_config(const std::string &n,int *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,long *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,long long *val,const std::string &help) {} +//void scanner_info::get_config(const std::string &n,long *val,const std::string &help) {} +//void scanner_info::get_config(const std::string &n,long long *val,const std::string &help) {} void scanner_info::get_config(const std::string &n,unsigned char *val,const std::string &help) {} void scanner_info::get_config(const std::string &n,unsigned short *val,const std::string &help) {} void scanner_info::get_config(const std::string &n,unsigned int *val,const std::string &help) {} diff --git a/src/be13_api b/src/be13_api index 5dee606a..d79cf186 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 5dee606a2002013fc0e3257725149d7fbf5bcc4f +Subproject commit d79cf18684a1efc407f521144b579746d3bced81 diff --git a/src/bulk_extractor_api.cpp b/src/bulk_extractor_api.cpp index bd7de966..258d8ad5 100644 --- a/src/bulk_extractor_api.cpp +++ b/src/bulk_extractor_api.cpp @@ -43,20 +43,20 @@ class callback_feature_recorder_set; * Typically we will instantiate a single object called the 'cfs' for each BEFILE. * It creates multiple named callback_feature_recorders, but they all callback through the same * callback function using the same set of locks - */ + */ static std::string hash_name("md5"); static std::string hash_func(const uint8_t *buf,size_t bufsize) { if(hash_name=="md5" || hash_name=="MD5"){ - return md5_generator::hash_buf(buf,bufsize).hexdigest(); + return dfxml::md5_generator::hash_buf(buf,bufsize).hexdigest(); } if(hash_name=="sha1" || hash_name=="SHA1" || hash_name=="sha-1" || hash_name=="SHA-1"){ - return sha1_generator::hash_buf(buf,bufsize).hexdigest(); + return dfxml::sha1_generator::hash_buf(buf,bufsize).hexdigest(); } if(hash_name=="sha256" || hash_name=="SHA256" || hash_name=="sha-256" || hash_name=="SHA-256"){ - return sha256_generator::hash_buf(buf,bufsize).hexdigest(); + return dfxml::sha256_generator::hash_buf(buf,bufsize).hexdigest(); } std::cerr << "Invalid hash name: " << hash_name << "\n"; std::cerr << "This version of bulk_extractor only supports MD5, SHA1, and SHA256\n"; @@ -69,7 +69,7 @@ class callback_feature_recorder_set: public feature_recorder_set { // neither copying nor assignment are implemented callback_feature_recorder_set(const callback_feature_recorder_set &cfs); callback_feature_recorder_set &operator=(const callback_feature_recorder_set &cfs); - histogram_defs_t histogram_defs; + histogram_defs_t histogram_defs; public: void *user; @@ -136,7 +136,7 @@ class callback_feature_recorder: public feature_recorder { class feature_recorder_set &fs_,const std::string &name_): feature_recorder(fs_,name_),cb(cb_){ } - virtual std::string carve(const sbuf_t &sbuf,size_t pos,size_t len, + virtual std::string carve(const sbuf_t &sbuf,size_t pos,size_t len, const std::string &ext){ // appended to forensic path return(""); // no file created } @@ -171,7 +171,7 @@ struct BEFILE_t { }; typedef struct BEFILE_t BEFILE; -extern "C" +extern "C" BEFILE *bulk_extractor_open(void *user,be_callback_t cb) { histogram_defs_t histograms; @@ -192,7 +192,7 @@ BEFILE *bulk_extractor_open(void *user,be_callback_t cb) BEFILE *bef = new BEFILE_t(user,cb); return bef; } - + extern "C" void bulk_extractor_config(BEFILE *bef,uint32_t cmd,const char *name,int64_t arg) { switch(cmd){ @@ -225,7 +225,7 @@ extern "C" void bulk_extractor_config(BEFILE *bef,uint32_t cmd,const char *name, bef->cfs.set_flag(feature_recorder_set::MEM_HISTOGRAM); break; - case BEAPI_MEMHIST_LIMIT:{ + case BEAPI_MEMHIST_LIMIT:{ feature_recorder *fr = bef->cfs.get_name(name); assert(fr); fr->set_memhist_limit(arg); @@ -251,7 +251,7 @@ extern "C" void bulk_extractor_config(BEFILE *bef,uint32_t cmd,const char *name, } -extern "C" +extern "C" int bulk_extractor_analyze_buf(BEFILE *bef,uint8_t *buf,size_t buflen) { pos0_t pos0(""); @@ -260,7 +260,7 @@ int bulk_extractor_analyze_buf(BEFILE *bef,uint8_t *buf,size_t buflen) return 0; } -extern "C" +extern "C" int bulk_extractor_analyze_dev(BEFILE *bef,const char *fname,float frac,int pagesize) { bool sampling_mode = frac < 1.0; // are we in sampling mode or full-disk mode? @@ -268,7 +268,7 @@ int bulk_extractor_analyze_dev(BEFILE *bef,const char *fname,float frac,int page /* A single-threaded sampling bulk_extractor. * It may be better to do this with two threads---one that does the reading (and seeking), * the other that doe the analysis. - * + * * This looks like the code in phase1.cpp. */ BulkExtractor_Phase1::blocklist_t blocks_to_sample; @@ -313,14 +313,11 @@ int bulk_extractor_analyze_dev(BEFILE *bef,const char *fname,float frac,int page return 0; } -extern "C" +extern "C" int bulk_extractor_close(BEFILE *bef) { bef->cfs.dump_histograms((void *)&bef->cfs, - callback_feature_recorder_set::histogram_dump_callback,0); + callback_feature_recorder_set::histogram_dump_callback,0); delete bef; return 0; } - - - diff --git a/src/dfxml b/src/dfxml index b080f43f..21acab94 160000 --- a/src/dfxml +++ b/src/dfxml @@ -1 +1 @@ -Subproject commit b080f43fa108983a1f612e5b9107e6b3203ee4ae +Subproject commit 21acab94191c5ad93fbe5ee3c4c39ed0e47c8213 diff --git a/src/main.cpp b/src/main.cpp index 4c099b44..2a6ee7aa 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,7 @@ * feature_recorder_set fs - the collection of feature recorders. * xml xreport - the DFXML output. * image_process p - the image being processed. - * + * * Note that all of the singletons are passed to the phase1() function. */ @@ -49,14 +49,14 @@ #include #endif -#ifdef WIN32 -// Allows us to open standard input in binary mode by default -// See http://gnuwin32.sourceforge.net/compile.html for more +#ifdef WIN32 +// Allows us to open standard input in binary mode by default +// See http://gnuwin32.sourceforge.net/compile.html for more int _CRT_fmode = _O_BINARY; #endif /* Debug help */ -__attribute__((noreturn)) +__attribute__((noreturn)) void debug_help() { puts("#define DEBUG_PEDANTIC 0x0001 // check values more rigorously"); @@ -90,10 +90,10 @@ static void usage(const char *progname) #endif #ifdef HAVE_LIBAFFLIB std::cout << " HAS SUPPORT FOR AFF FILES\n"; -#endif +#endif #ifdef HAVE_EXIV2 std::cout << " EXIV2 ENABLED\n"; -#endif +#endif #ifdef HAVE_LIBLIGHTGREP std::cout << " LIGHTGREP ENABLED\n"; #endif @@ -185,13 +185,13 @@ static std::string be_hash_name("md5"); static std::string be_hash_func(const uint8_t *buf,size_t bufsize) { if(be_hash_name=="md5" || be_hash_name=="MD5"){ - return md5_generator::hash_buf(buf,bufsize).hexdigest(); + return dfxml::md5_generator::hash_buf(buf,bufsize).hexdigest(); } if(be_hash_name=="sha1" || be_hash_name=="SHA1" || be_hash_name=="sha-1" || be_hash_name=="SHA-1"){ - return sha1_generator::hash_buf(buf,bufsize).hexdigest(); + return dfxml::sha1_generator::hash_buf(buf,bufsize).hexdigest(); } if(be_hash_name=="sha256" || be_hash_name=="SHA256" || be_hash_name=="sha-256" || be_hash_name=="SHA-256"){ - return sha256_generator::hash_buf(buf,bufsize).hexdigest(); + return dfxml::sha256_generator::hash_buf(buf,bufsize).hexdigest(); } std::cerr << "Invalid hash name: " << be_hash_name << "\n"; std::cerr << "This version of bulk_extractor only supports MD5, SHA1, and SHA256\n"; @@ -321,7 +321,7 @@ class path_printer_finished: public std::exception { static std::string HTTP_EOL = "\r\n"; // stdout is in binary form void process_path_printer(const scanner_params &sp) { - /* 1. Get next token + /* 1. Get next token * 2. if prefix part is a number, skip forward that much in sbuf and repeat. * if the prefix is PRINT, print the buffer * if next part is a string, strip it and run that decoder. @@ -337,7 +337,7 @@ void process_path_printer(const scanner_params &sp) uint64_t print_start = 0; uint64_t print_len = 4096; - + /* Check for options */ scanner_params::PrintOptions::iterator it; @@ -503,7 +503,7 @@ static void process_path(const char *fn,std::string path,size_t pagesize,size_t std::string line; // the specific query scanner_params::PrintOptions po; scanner_params::setPrintMode(po,scanner_params::MODE_HTTP); // options for this query - + getline(std::cin,line); truncate_at(line,'\r'); if(line.substr(0,4)!="GET "){ @@ -583,7 +583,7 @@ class bulk_extractor_restarter { if(self.thisElement=="provided_filename") self.provided_filename = self.cdata.str(); self.cdata.str(""); } - static void characterDataHandler(void *userData,const XML_Char *s,int len){ + static void characterDataHandler(void *userData,const XML_Char *s,int len){ class bulk_extractor_restarter &self = *(bulk_extractor_restarter *)userData; self.cdata.write(s,len); } @@ -603,7 +603,7 @@ public:; std::cerr << "Cannot continue.\n"; exit(1); } - + XML_Parser parser = XML_ParserCreate(NULL); XML_SetUserData(parser, this); XML_SetElementHandler(parser, startElement, endElement); @@ -644,7 +644,7 @@ public:; * Create the dfxml output */ -static void dfxml_create(dfxml_writer &xreport,const std::string &command_line,const BulkExtractor_Phase1::Config &cfg) +static void dfxml_create(dfxml_writer &xreport,int argc,char * const *argv,const BulkExtractor_Phase1::Config &cfg) { xreport.push("dfxml","xmloutputversion='1.0'"); xreport.push("metadata", @@ -653,7 +653,7 @@ static void dfxml_create(dfxml_writer &xreport,const std::string &command_line,c "\n xmlns:dc='http://purl.org/dc/elements/1.1/'" ); xreport.xmlout("dc:type","Feature Extraction","",false); xreport.pop(); - xreport.add_DFXML_creator(PACKAGE_NAME,PACKAGE_VERSION,svn_revision_clean(),command_line); + xreport.add_DFXML_creator(PACKAGE_NAME,PACKAGE_VERSION,svn_revision_clean(),argc,argv); xreport.push("configuration"); xreport.xmlout("threads",cfg.num_threads); xreport.xmlout("pagesize",cfg.opt_pagesize); @@ -718,6 +718,9 @@ int main(int argc,char **argv) mtrace(); #endif + auto argc_ = argc; + auto argv_ = argv; + /* setup */ feature_recorder::set_main_threadid(); const char *progname = argv[0]; @@ -777,7 +780,7 @@ int main(int argc,char **argv) if(strcmp(optarg,"h")==0) debug_help(); int d = atoi(optarg); switch(d){ - case DEBUG_ALLOCATE_512MiB: + case DEBUG_ALLOCATE_512MiB: if(calloc(1024*1024*512,1)){ std::cerr << "-d1002 -- Allocating 512MB of RAM; may be repeated\n"; } else { @@ -906,7 +909,7 @@ int main(int argc,char **argv) /* Load all the scanners and enable the ones we care about */ be13::plugin::load_scanner_directories(scanner_dirs,s_config); - be13::plugin::load_scanners(scanners_builtin,s_config); + be13::plugin::load_scanners(scanners_builtin,s_config); be13::plugin::scanners_process_enable_disable_commands(); /* Print usage if necessary */ @@ -999,7 +1002,7 @@ int main(int argc,char **argv) /* Open the image file (or the device) now */ p = image_process::open(image_fname,opt_recurse,cfg.opt_pagesize,cfg.opt_marginsize); if(!p) err(1,"Cannot open %s: ",image_fname.c_str()); - + /*** *** Create the feature recording set. *** Initialize the scanners. @@ -1040,7 +1043,7 @@ int main(int argc,char **argv) /* Store the configuration in the XML file */ dfxml_writer *xreport = new dfxml_writer(reportfilename,false); - dfxml_create(*xreport,command_line,cfg); + dfxml_create(*xreport,argc_,argv_,cfg); xreport->xmlout("provided_filename",image_fname); // save this information /* provide documentation to the user; the DFXML information comes from elsewhere */ @@ -1120,7 +1123,7 @@ int main(int argc,char **argv) std::cout.precision(4); printf("Elapsed time: %g sec.\n",timer.elapsed_seconds()); printf("Total MB processed: %d\n",int(phase1.total_bytes / 1000000)); - + printf("Overall performance: %g MBytes/sec (%g MBytes/sec/thread)\n", mb_per_sec,mb_per_sec/cfg.num_threads); if (fs.has_name("email")) { diff --git a/src/phase1.cpp b/src/phase1.cpp index 23acaf81..2b6ca76d 100644 --- a/src/phase1.cpp +++ b/src/phase1.cpp @@ -58,7 +58,7 @@ sbuf_t *BulkExtractor_Phase1::get_sbuf(image_process::iterator &it) << " reading " << it.get_pos0() << " (retry_count=" << retry_count << " of " << config.max_bad_alloc_errors << ")\n"; - + std::stringstream ss; ss << "name='bad_alloc' " << "pos0='" << it.get_pos0() << "' " << "retry_count='" << retry_count << "' "; @@ -89,12 +89,12 @@ void BulkExtractor_Phase1::run(image_process &p,feature_recorder_set &fs, seen_page_ids_t &seen_page_ids) { p.set_report_read_errors(config.opt_report_read_errors); - md5g = new md5_generator(); // keep track of MD5 + md5g = new dfxml::md5_generator(); // keep track of MD5 uint64_t md5_next = 0; // next byte to hash if(config.debug & DEBUG_PRINT_STEPS) std::cout << "DEBUG: CREATING THREAD POOL\n"; - - tp = new threadpool(config.num_threads,fs,xreport); + + tp = new threadpool(config.num_threads,fs,xreport); uint64_t page_ctr=0; xreport.push("runtime","xmlns:debug=\"http://www.github.com/simsong/bulk_extractor/issues\""); @@ -102,7 +102,7 @@ void BulkExtractor_Phase1::run(image_process &p,feature_recorder_set &fs, /* A single loop with two iterators. * * it -- the regular image_iterator; it knows how to read blocks. - * + * * si -- the sampling iterator. It is a iterator for an STL set. * * If sampling, si is used to ask for a specific page from it. @@ -132,7 +132,7 @@ void BulkExtractor_Phase1::run(image_process &p,feature_recorder_set &fs, break; } } - + if(config.opt_offset_end!=0 && config.opt_offset_end <= it.raw_offset ){ break; // passed the offset } @@ -143,10 +143,10 @@ void BulkExtractor_Phase1::run(image_process &p,feature_recorder_set &fs, sbuf_t *sbuf = get_sbuf(it); if(sbuf==0) break; // eof? sbuf->page_number = page_ctr; - + /* compute the md5 hash */ if(md5g){ - if(sbuf->pos0.offset==md5_next){ + if(sbuf->pos0.offset==md5_next){ // next byte follows logically, so continue to compute hash md5g->update(sbuf->buf,sbuf->pagesize); md5_next += sbuf->pagesize; @@ -156,12 +156,12 @@ void BulkExtractor_Phase1::run(image_process &p,feature_recorder_set &fs, } } total_bytes += sbuf->pagesize; - + /*************************** **** SCHEDULE THE WORK **** ***************************/ - - tp->schedule_work(sbuf); + + tp->schedule_work(sbuf); if(!config.opt_quiet) notify_user(it); } catch (const std::exception &e) { @@ -187,7 +187,7 @@ void BulkExtractor_Phase1::run(image_process &p,feature_recorder_set &fs, } ++page_ctr; } - + if(!config.opt_quiet){ std::cout << "All data are read; waiting for threads to finish...\n"; } @@ -209,8 +209,8 @@ void BulkExtractor_Phase1::wait_for_workers(image_process &p,std::string *md5_st if(counter%60==0){ std::stringstream ss; ss << "Time elapsed waiting for " << num_remaining - << " thread" << (num_remaining>1 ? "s" : "") - << " to finish:\n " << minsec(time_waiting) + << " thread" << (num_remaining>1 ? "s" : "") + << " to finish:\n " << minsec(time_waiting) << " (timeout in " << minsec(time_remaining) << ".)\n"; if(config.opt_quiet==0){ std::cout << ss.str(); @@ -226,14 +226,14 @@ void BulkExtractor_Phase1::wait_for_workers(image_process &p,std::string *md5_st } } if(config.opt_quiet==0) std::cout << "All Threads Finished!\n"; - + xreport.pop(); // pop runtime /* We can write out the source info now, since we (might) know the hash */ xreport.push("source"); xreport.xmlout("image_filename",p.image_fname()); - xreport.xmlout("image_size",p.image_size()); + xreport.xmlout("image_size",p.image_size()); if(md5g){ - md5_t md5 = md5g->final(); + auto md5 = md5g->digest(); if(md5_string) *md5_string = md5.hexdigest(); xreport.xmlout("hashdigest",md5.hexdigest(),"type='MD5'",false); delete md5g; @@ -244,7 +244,7 @@ void BulkExtractor_Phase1::wait_for_workers(image_process &p,std::string *md5_st tp->fs.dump_name_count_stats(xreport); if(config.opt_quiet==0) std::cout << "Producer time spent waiting: " << tp->waiting.elapsed_seconds() << " sec.\n"; - + xreport.xmlout("thread_wait",dtos(tp->waiting.elapsed_seconds()),"thread='0'",false); double worker_wait_average = 0; for(threadpool::worker_vector::const_iterator ij=tp->workers.begin();ij!=tp->workers.end();ij++){ diff --git a/src/phase1.h b/src/phase1.h index c12e721f..eb7f21a6 100644 --- a/src/phase1.h +++ b/src/phase1.h @@ -36,12 +36,12 @@ class BulkExtractor_Phase1 { max_wait_time(3600), opt_quiet(0), retry_seconds(60), - num_threads(1), // + num_threads(1), // sampling_fraction(1.0), sampling_passes(1), opt_report_read_errors(true) {} - - uint64_t debug; // debug + + uint64_t debug; // debug size_t opt_pagesize; size_t opt_marginsize; uint32_t max_bad_alloc_errors; @@ -90,8 +90,8 @@ class BulkExtractor_Phase1 { aftimer &timer; Config &config; u_int notify_ctr; /* for random sampling */ - uint64_t total_bytes; // - md5_generator *md5g; + uint64_t total_bytes; // + dfxml::md5_generator *md5g; /* Get the sbuf from current image iterator location, with retries */ sbuf_t *get_sbuf(image_process::iterator &it); diff --git a/src/rar/extract.cpp b/src/rar/extract.cpp index 111b69ef..071b1d08 100644 --- a/src/rar/extract.cpp +++ b/src/rar/extract.cpp @@ -56,7 +56,7 @@ CmdExtract::~CmdExtract() } -void CmdExtract::DoExtract(CommandData *Cmd, byte* ptrlocation, int64 ptrlength, std::string& xml) +void CmdExtract::DoExtract(CommandData *Cmd, unsigned char* ptrlocation, int64 ptrlength, std::string& xml) { PasswordCancelled=false; DataIO.SetCurrentCommand(*Cmd->Command); @@ -70,8 +70,8 @@ void CmdExtract::DoExtract(CommandData *Cmd, byte* ptrlocation, int64 ptrlength, wcscpy(PrevCmdPassword,Cmd->Password); EXTRACT_ARC_CODE Code=ExtractArchive(Cmd, ptrlocation, ptrlength, xml); //this is where the files are extracted - - + + // Restore Cmd->Password, which could be changed in IsArchive() call // for next header encrypted archive. wcscpy(Cmd->Password,PrevCmdPassword); @@ -142,7 +142,7 @@ void CmdExtract::ExtractArchiveInit(CommandData *Cmd,Archive &Arc) } -EXTRACT_ARC_CODE CmdExtract::ExtractArchive(CommandData *Cmd, byte *ptrlocation, int64 ptrlength, std::string& xml) +EXTRACT_ARC_CODE CmdExtract::ExtractArchive(CommandData *Cmd, unsigned char *ptrlocation, int64 ptrlength, std::string& xml) { Archive Arc(Cmd); Arc.InitArc(ptrlocation, ptrlength); //initialize the pointer location @@ -198,7 +198,7 @@ EXTRACT_ARC_CODE CmdExtract::ExtractArchive(CommandData *Cmd, byte *ptrlocation, while (true) { - // First volume is already added to DataIO.TotalArcSize + // First volume is already added to DataIO.TotalArcSize // in initial TotalArcSize calculation in DoExtract. // So we skip it and start from second volume. NextVolumeName(NextName,NextNameW,ASIZE(NextName),(Arc.NewMhd.Flags & MHD_NEWNUMBERING)==0 || Arc.OldFormat); @@ -236,7 +236,7 @@ EXTRACT_ARC_CODE CmdExtract::ExtractArchive(CommandData *Cmd, byte *ptrlocation, #endif std::ostringstream ss; #if 0 - + /*Array CmtBuf; if(Arc.GetComment(&CmtBuf,NULL)) { @@ -716,20 +716,20 @@ bool CmdExtract::ExtractCurrentFile(CommandData *Cmd,Archive &Arc,size_t HeaderS else theos = "Other OS"; - ss << "\n\n" << Arc.NewLhd.FileName << "\n" - << Arc.NewLhd.NameSize << "\n" + ss << "\n\n" << Arc.NewLhd.FileName << "\n" + << Arc.NewLhd.NameSize << "\n" << Arc.NewLhd.UnpSize << "\n" << Arc.NewLhd.DataSize << "" << Arc.NewLhd.DataSize << "\n" << Arc.NewLhd.HighPackSize << "\n" - << Arc.NewLhd.HighUnpSize << "\n" - << theos << "\n" - << Arc.NewLhd.Method << "\n" - << Arc.NewLhd.FileCRC << "\n" - << Arc.NewLhd.FileTime << "\n" + << Arc.NewLhd.HighUnpSize << "\n" + << theos << "\n" + << Arc.NewLhd.Method << "\n" + << Arc.NewLhd.FileCRC << "\n" + << Arc.NewLhd.FileTime << "\n" << Arc.NewLhd.UnpVer << "\n"; - ss << "" << Arc.NewLhd.mtime.GetLocalTimeXML() - << "\n" << Arc.NewLhd.ctime.GetLocalTimeXML() + ss << "" << Arc.NewLhd.mtime.GetLocalTimeXML() + << "\n" << Arc.NewLhd.ctime.GetLocalTimeXML() << "\n" << Arc.NewLhd.atime.GetLocalTimeXML() << "\n" << Arc.NewLhd.arctime.GetLocalTimeXML() << "\n"; @@ -765,7 +765,7 @@ bool CmdExtract::ExtractCurrentFile(CommandData *Cmd,Archive &Arc,size_t HeaderS else #endif wcscpy(FilePassword,Password); - + DataIO.SetEncryption( (Arc.NewLhd.Flags & LHD_PASSWORD)!=0 ? Arc.NewLhd.UnpVer:0,FilePassword, (Arc.NewLhd.Flags & LHD_SALT)!=0 ? Arc.NewLhd.Salt:NULL,false, @@ -826,7 +826,7 @@ bool CmdExtract::ExtractCurrentFile(CommandData *Cmd,Archive &Arc,size_t HeaderS else if (Arc.NewLhd.Method!=0x30 && Arc.NewLhd.FullUnpSize>0 && ValidCRC) AnySolidDataUnpackedWell=true; - + bool BrokenFile=false; if (!SkipSolid) { @@ -900,7 +900,7 @@ bool CmdExtract::ExtractCurrentFile(CommandData *Cmd,Archive &Arc,size_t HeaderS void CmdExtract::UnstoreFile(ComprDataIO &DataIO,int64 DestUnpSize) { - Array Buffer(0x10000); + Array Buffer(0x10000); while (1) { uint Code=DataIO.UnpRead(&Buffer[0],Buffer.Size()); @@ -917,4 +917,3 @@ void CmdExtract::SetComprDataIO(ComprDataIO dataio) { DataIO = dataio; } - diff --git a/src/scan_ccns2.cpp b/src/scan_ccns2.cpp index f0db116b..184be650 100644 --- a/src/scan_ccns2.cpp +++ b/src/scan_ccns2.cpp @@ -92,7 +92,7 @@ static int ccv1_test(const char *digits) int len = strlen(digits); int i; int doubled[] = { 0,2,4,6,8,1,3,5,7,9 }; /* what are number when "doubled" */ - + for(i=len-1;i>=0;i--){ int val = digit_val(digits[i]); if(double_flag==0){ @@ -118,7 +118,7 @@ static int ccv1_test(const char *digits) static int histogram_test(const char *digits) { int cntscore = 0; - int digit_counts[10]; // count of each character + int digit_counts[10]; // count of each character memset((void*)digit_counts,0,sizeof(digit_counts)); while(*digits){ @@ -126,7 +126,7 @@ static int histogram_test(const char *digits) digits++; } - /* If we have more than 7 of one digit, + /* If we have more than 7 of one digit, * or two digits with more than 5, * this isn't a valid number. */ @@ -171,7 +171,7 @@ static int pattern_test(const char *digits) int b = int4(digits+4); int c = int4(digits+8); int d = int4(digits+12); - + if(b-a == c-d) return -1; /* something fishy going on... */ return 0; } @@ -377,13 +377,13 @@ bool unbase58(const char *s,uint8_t *out,size_t len) return true; } -// A bitcoin address uses a base58 encoding, which uses an alphabet of the characters 0 .. 9, A ..Z, a .. z, +// A bitcoin address uses a base58 encoding, which uses an alphabet of the characters 0 .. 9, A ..Z, a .. z, // but without the four characters 0, O, I and l. bool valid_bitcoin_address(const char *s,size_t len){ uint8_t dec[32]; if (unbase58(s,dec,len)==false) return false; - sha256_t d1 = sha256_generator::hash_buf(dec,21); - sha256_t d2 = sha256_generator::hash_buf(d1.digest,d1.size()); + dfxml::sha256_t d1 = dfxml::sha256_generator::hash_buf(dec,21); + dfxml::sha256_t d2 = dfxml::sha256_generator::hash_buf(d1.digest,d1.size()); if (memcmp(dec+21, d2.digest, 4)!=0){ return false; } diff --git a/src/scan_evtx.cpp b/src/scan_evtx.cpp index e2f1ef29..f3ce22d4 100644 --- a/src/scan_evtx.cpp +++ b/src/scan_evtx.cpp @@ -2,7 +2,7 @@ * Plugin: scan_evtx * Purpose: Find all of evtx component, carve out and reconstruct evtx file * Reference: https://github.com/libyal/libevtx/blob/master/documentation/Windows%20XML%20Event%20Log%20(EVTX).asciidoc - * Teru Yamazaki(@4n6ist) - https://github.com/4n6ist/bulk_extractor-rec + * Teru Yamazaki(@4n6ist) - https://github.com/4n6ist/bulk_extractor-rec **/ #include "config.h" #include "be13_api/bulk_extractor_i.h" @@ -78,9 +78,9 @@ struct crc32 { }; // check EVTX Header Signature -// return: > 0 - valid header and number of chunks, 0 - not header, -1 - valid header but invalid num of chunk +// return: > 0 - valid header and number of chunks, 0 - not header, -1 - valid header but invalid num of chunk int64_t check_evtxheader_signature(size_t offset, const sbuf_t &sbuf) { - int16_t num_of_chunks; + int16_t num_of_chunks; // \x45\x6c\x66\x46\x69\x6c\x65 ElfFile if (sbuf[offset] == 0x45 && sbuf[offset + 1] == 0x6c && @@ -126,7 +126,7 @@ int64_t check_evtxchunk_signature(size_t offset, const sbuf_t &sbuf) { } // check EVTX Record Signature -// return: > 0 - record size, 0 - not record, +// return: > 0 - record size, 0 - not record, int64_t check_evtxrecord_signature(size_t offset, const sbuf_t &sbuf) { int64_t record_size; // \x2a\x2a\x00\x00 @@ -188,8 +188,8 @@ void scan_evtx(const class scanner_params &sp,const recursion_control_block &rcb result_last_record_id = check_evtxchunk_signature(offset+total_size, sbuf); } std::string filename = (sbuf.pos0+offset).str() + "_valid_header_" + - std::to_string(result_num_of_chunks) + "chunks_" + - std::to_string(actual_num_of_chunk) + "actual.evtx"; + std::to_string(result_num_of_chunks) + "chunks_" + + std::to_string(actual_num_of_chunk) + "actual.evtx"; evtx_recorder->carve_records(sbuf, offset, total_size, filename); } else if (result_last_record_id == -1) { // If valid ElfChnk and invalid record then skip @@ -197,19 +197,19 @@ void scan_evtx(const class scanner_params &sp,const recursion_control_block &rcb } offset += total_size; continue; - } + } result_last_record_id = check_evtxchunk_signature(offset, sbuf); // ElfChnk if (result_last_record_id > 0) { int32_t last_chunk = 0; - last_record_id = result_last_record_id; + last_record_id = result_last_record_id; int64_t first_record_id = sbuf.get64i(offset + 24); // First Record ID int64_t num_of_records = last_record_id - first_record_id +1; total_size += ELFCHNK_SIZE; result_last_record_id = check_evtxchunk_signature(offset+total_size, sbuf); while (result_last_record_id > 0 && offset+total_size < stop) { first_record_id = sbuf.get64i(offset+ total_size + 24); // First Record ID - last_record_id = result_last_record_id; + last_record_id = result_last_record_id; num_of_records += last_record_id - first_record_id +1; ++last_chunk; total_size += ELFCHNK_SIZE; @@ -231,11 +231,11 @@ void scan_evtx(const class scanner_params &sp,const recursion_control_block &rcb uint32_t table[256]; crc32::generate_table(table); // CRC32 of the first 120 bytes == header.part struct - header.crc32 = crc32::update(table, 0, &header.part, 120); + header.crc32 = crc32::update(table, 0, &header.part, 120); memset(header.unknown2,'\0', sizeof(header.unknown2)); std::string filename = (sbuf.pos0+offset).str() + "_" + std::to_string(header.part.number_of_chunks) + "chunks_" + - std::to_string(num_of_records) + "records.evtx"; + std::to_string(num_of_records) + "records.evtx"; // generate evtx header based on elfchnk information evtx_recorder->write_data((unsigned char *)&header,sizeof(elffile),filename); evtx_recorder->carve_records(sbuf, offset, total_size, filename); @@ -257,5 +257,3 @@ void scan_evtx(const class scanner_params &sp,const recursion_control_block &rcb } // end while } // end PHASE_SCAN } - - diff --git a/src/scan_json.cpp b/src/scan_json.cpp index cc1b6490..0a383cb4 100644 --- a/src/scan_json.cpp +++ b/src/scan_json.cpp @@ -2,7 +2,10 @@ #include "be13_api/bulk_extractor_i.h" #include #include + +#ifdef HAVE_JSON_C #include +#endif #define MIN_SIZE 16 #define IS_STRICT 1 @@ -18,6 +21,9 @@ void scan_json(const class scanner_params &sp,const recursion_control_block &rcb assert(sp.info->si_version==scanner_info::CURRENT_SI_VERSION); sp.info->name = "json"; sp.info->author = "Simson Garfinkel & Jan Gruber"; +#ifndef HAVE_JSON_C + sp.info->description = "(disabled; json-c not installed)"; +#else sp.info->description = "Scans for JSON-encoded data"; sp.info->scanner_version= "1.1"; sp.info->feature_names.insert("json"); @@ -27,8 +33,10 @@ void scan_json(const class scanner_params &sp,const recursion_control_block &rcb for(int i=0;json_second_chars[i];i++){ is_json_second_char[(uint8_t)json_second_chars[i]] = true; } - return; +#endif + return; } +#ifdef HAVE_JSON_C const sbuf_t &sbuf = sp.sbuf; feature_recorder *fr = sp.fs.get_name("json"); @@ -42,11 +50,11 @@ void scan_json(const class scanner_params &sp,const recursion_control_block &rcb if(sp.phase==scanner_params::PHASE_SCAN){ json_object *jo = NULL; - json_tokener* jt = NULL; - char* js = NULL; + json_tokener* jt = NULL; + char* js = NULL; size_t end = 0; enum json_tokener_error je; - + for(size_t pos = 0;pos+1 0.15 */ @@ -70,7 +78,7 @@ void scan_json(const class scanner_params &sp,const recursion_control_block &rcb /* Discard very short matches */ if (strlen(js) > MIN_SIZE){ - + /* Constructs output buffer */ sbuf_t jbuf(sbuf, pos, end+1); std::string json_hash = (*fr->fs.hasher.func)(jbuf.buf, jbuf.bufsize); @@ -87,15 +95,15 @@ void scan_json(const class scanner_params &sp,const recursion_control_block &rcb free(jo); jo = NULL; } - + if(js){ free(js); js = NULL; } - + } } } - +#endif } diff --git a/src/scan_rar.cpp b/src/scan_rar.cpp index ede23927..78bf49f2 100644 --- a/src/scan_rar.cpp +++ b/src/scan_rar.cpp @@ -200,10 +200,10 @@ class RarComponentInfo { uint8_t days = (dos_date & DOS_MASK_DAY) >> DOS_SHIFT_DAY; uint8_t months = (dos_date & DOS_MASK_MONTH) >> DOS_SHIFT_MONTH; uint16_t years = (dos_date & DOS_MASK_YEAR) >> DOS_SHIFT_YEAR; - + years += DOS_OFFSET_YEAR; seconds *= 2; - + char buf[STRING_BUF_LEN]; snprintf(buf,sizeof(buf),"%04d-%02d-%02dT%02d:%02d:%02dZ", years, months, days, hours, minutes, seconds); @@ -539,7 +539,7 @@ static void unpack_buf(const uint8_t* input, size_t input_len, uint8_t* output, CmdExtract extract; //from the extract.cpp file; allows the extraction to occur - byte *startingaddress = (byte*) input; + unsigned char *startingaddress = (unsigned char*) input; ComprDataIO mydataio; mydataio.SetSkipUnpCRC(true); //skip checking the CRC to allow more processing to occur @@ -587,8 +587,8 @@ static bool is_mark_block(const uint8_t* buf, size_t buf_len, size_t offset) //25 50 44 46 2D //25 25 45 4F 46 size_t sz = component.uncompressed_size; -assert(dbuf.buf[0] == 0x25); assert(dbuf.buf[1] == 0x50); assert(dbuf.buf[2] == 0x44); assert(dbuf.buf[3] == 0x46); assert(dbuf.buf[4] == 0x2D); -assert(dbuf.buf[sz-5] == 0x25); assert(dbuf.buf[sz-4] == 0x25); assert(dbuf.buf[sz-3] == 0x45); assert(dbuf.buf[sz-2] == 0x4F); assert(dbuf.buf[sz-1] == 0x46); +assert(dbuf.buf[0] == 0x25); assert(dbuf.buf[1] == 0x50); assert(dbuf.buf[2] == 0x44); assert(dbuf.buf[3] == 0x46); assert(dbuf.buf[4] == 0x2D); +assert(dbuf.buf[sz-5] == 0x25); assert(dbuf.buf[sz-4] == 0x25); assert(dbuf.buf[sz-3] == 0x45); assert(dbuf.buf[sz-2] == 0x4F); assert(dbuf.buf[sz-1] == 0x46); #endif From 1c67a759e12939ae9ccd0c3f1b7e275fdff74947 Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Fri, 13 Aug 2021 12:43:35 -0400 Subject: [PATCH 11/89] Be16 slg dev (#219) * fixes for Fedora 32 * added Config.java because for some reason it was not being auto-generated --- COPYING | 120 ++++++++++++++++++++++++++++++++++++++- README | 78 ++++++++++++++++++++++++- configure.ac | 3 +- java_gui/src/Config.java | 6 ++ plugins/plugin_test.cpp | 2 +- 5 files changed, 204 insertions(+), 5 deletions(-) mode change 120000 => 100644 COPYING mode change 120000 => 100644 README create mode 100644 java_gui/src/Config.java diff --git a/COPYING b/COPYING deleted file mode 120000 index f0c42986..00000000 --- a/COPYING +++ /dev/null @@ -1 +0,0 @@ -LICENSE.md \ No newline at end of file diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..b46cc28c --- /dev/null +++ b/COPYING @@ -0,0 +1,119 @@ +## Copyright/Non-Copyright Statements + +**bulk_extractor** was originally developed by Simson Garfinkel while at +the Naval Postgraduate School. As a work of the US Government this +work is not subject to copyright law. + +Simson Garfinkel left the Naval Postgraduate School in January 2015 +and continued to work on **bulk_extractor** in his personal +capacity. Those modifications are covered under the MIT license. Other +components are licensed as noted. + +## MIT License + +Copyright (C) 2020, Simson L. Garfinkel {{ organization }} + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +## CC0 Original Summary + +Except as otherwise noted, bulk_extractor source code files are public domain +software. + +That software provided here is released by the Naval Postgraduate +School, an agency of the U.S. Department of Navy. The software bears +no warranty, either expressed or implied. NPS does not assume legal +liability nor responsibility for a User's use of the software or the +results of such use. + +Please note that within the United States, copyright protection, under +Section 105 of the United States Code, Title 17, is not available for +any work of the United States Government and/or for any works created +by United States Government employees. + +However, because some bulk_extractor source modules (e.g. pyxpress.c) +are covered under the GNU Public License, the compiled bulk_extractor +executable is covered under the GPL copyright. This means that binary +distributions of bulk_extractor must include the full source code (or +have the source code be made easily available.) + +## Other materials + +bulk_extractor provides other materials with different licenses than the +ones mentioned previously. Copies of the licenses mentioned below can be +found under the licenses/ directory. + +Licensed under the Common Public License 1.0 + +* src/tsk3 includes SleuthKit 3 include files that are party of +SleuthKit 3. These files are Copyright (C) 2010 Brian Carrier + + +Licensed under the MIT License + +* src/base64_forensic.cpp is Copyright (C) 1996-1999 by Internet Software Consortium, with + portions Copyright (C) 1995 by International Business Machines, Inc. + +* src/be13_api/utf8.h is Copyright 2006 Nemanja Trifunovic + +* src/old_scanners/scan_ascii85.cpp is Copyright (C) 2011 Remy Oukaour + +* src/scan_json.cpp is Copyright (C) 2021 Simson L. Garfinkel and Jan Gruber + + +Licensed under General Public License version 3 and later + +* src/pyxpress.c is Copyright 2008 (C) Matthieu Suiche. + + +Licensed under Lesser General Public License version 2.1 + +* src/scan_elf.cpp + + +Licensed under Lesser General Public License version 3 and later + +* src/dfxml is Copyright 2012 (C) Simson L. Garfinkel + +* src/scan_outlook.h is Copyright (C) 2008-2014, Joachim Metz + + +Licensed under General Public License version 3 with the Autoconf exception: + +* m4/ac_prog_java.m4 is Copyright (C) 2000 Stephane Bortzmeyer + +* m4/ac_prog_java_works.m4 is Copyright (C) 2000 Stephane Bortzmeyer + +* m4/ac_check_rqrd_class.m4 is Copyright (C) 2000 Stephane Bortzmeyer + +* m4/ax_pthread.m4 is Copyright (C) 2008 Steven G. Johnson , + 2011 Daniel Richard G. + +* m4/ac_prog_java_cc.m4 is Copyright (C) 2002 Nic Ferrier + +* m4/ac_prog_javac.m4 is Copyright (C) 2000 Stephane Bortzmeyer + +* m4/ac_prog_javac_works.m4 is Copyright (C) 2000 Stephane Bortzmeyer + +* m4/ac_check_classpath.m4 is Copyright (C) 2000 Stephane Bortzmeyer + + +Licensed under OpenSSL License + +Because of the fact, that the GPL (including version 3) is incompatible with some terms of the OpenSSL license, the author hereby explicitly states a license exception, which permits the linking of bulk_extractor with OpenSSL. diff --git a/README b/README deleted file mode 120000 index 42061c01..00000000 --- a/README +++ /dev/null @@ -1 +0,0 @@ -README.md \ No newline at end of file diff --git a/README b/README new file mode 100644 index 00000000..17ee478d --- /dev/null +++ b/README @@ -0,0 +1,77 @@ +[![codecov](https://codecov.io/gh/simsong/bulk_extractor/branch/master/graph/badge.svg?token=3w691sdgLu)](https://codecov.io/gh/simsong/bulk_extractor) + +Welcome to bulk_extractor. + +Note: bulk_extractor version 2.0 is now under development. For information, please see [Release 2.0 roadmap in the release-2.0-dev branch](https://github.com/simsong/bulk_extractor/blob/release-2.0-dev/doc/ROADMAP_2.0.md). + +To build bulk_extractor in Linux or Mac OS: + +1. Make sure required packages have been installed. **You can do this by going into the etc/ directory and looking for a script that installs the necessary packages for your platform.** + +2. Then run these commands: + +``` +./configure +make +make install +``` + +For detailed instructions on installing packages and building bulk_extractor, read the wiki page here: +https://github.com/simsong/bulk_extractor/wiki/Installing-bulk_extractor + +The Windows version of bulk_extractor must be built on Fedora. + +To download the Windows installer and/or other releases of bulk_extractor, visit the downloads page here: +http://digitalcorpora.org/downloads/bulk_extractor + +For more information on bulk_extractor, visit: https://forensicswiki.xyz/wiki/index.php?title=Bulk_extractor + + +Tested Configurations +===================== +This release of bulk_extractor has been tested to compile on the following platforms: + +* Amazon Linux as of 2019-11-09 +* Fedora 32 +* Ubuntu 16.04LTS +* Ubuntu 18.04LTS + +To configure your operating system, please run the appropriate scripts in the [etc/](/etc) directory. + + +RECOMMENDED CITATION +==================== +If you are writing a scientific paper and using bulk_extractor, please cite it with: + +Garfinkel, Simson, Digital media triage with bulk data analysis and bulk_extractor. Computers and Security 32: 56-72 (2013) +* [Science Direct](https://www.sciencedirect.com/science/article/pii/S0167404812001472) +* [Bibliometrics](https://plu.mx/plum/a/?doi=10.1016/j.cose.2012.09.011&theme=plum-sciencedirect-theme&hideUsage=true) +* [Author's website](https://simson.net/clips/academic/2013.COSE.bulk_extractor.pdf) +``` +@article{10.5555/2748150.2748581, +author = {Garfinkel, Simson L.}, +title = {Digital Media Triage with Bulk Data Analysis and Bulk_extractor}, +year = {2013}, +issue_date = {February 2013}, +publisher = {Elsevier Advanced Technology Publications}, +address = {GBR}, +volume = {32}, +number = {C}, +issn = {0167-4048}, +journal = {Comput. Secur.}, +month = feb, +pages = {56–72}, +numpages = {17}, +keywords = {Digital forensics, Bulk data analysis, bulk_extractor, Stream-based forensics, Windows hibernation files, Parallelized forensic analysis, Optimistic decompression, Forensic path, Margin, EnCase} +} +``` + +BULK_EXTRACTOR 2.0 STATUS REPORT +================================ +I continue to port bulk_extractor, tcpflow, be13_api and dfxml to modern C++. After surveying the standards I’ve decided to go with C++17 and not C++14, as support for 17 is now widespread. (I probably don’t need 20). I am sticking with autotools, although there seems a strong reason to move to CMake. I am keeping be13_api and dfxml as a modules that are included, python-style, rather than making them stand-alone libraries that are linked against. I’m not 100% sure that’s the correct decision, though. + +The project is taking longer than anticipated because I am also doing a general code refactoring. The main thing that is taking time is figuring out how to detangle all of the C++ objects having to do with parser options and configuration. + +Given that tcpflow and bulk_extractor both use be13_api, my attention has shifted to using tcpflow to get be13_api operational, as it is a simpler program. I’m about three quarters of the way through now. I anticipate having something finished before the end of 2020. + +--- Simson Garfinkel, October 18, 2020 diff --git a/configure.ac b/configure.ac index adda7c48..fbb83502 100644 --- a/configure.ac +++ b/configure.ac @@ -7,7 +7,7 @@ # and http://www.openismus.com/documents/linux/automake/automake.shtml # and http://www.bioinf.uni-freiburg.de/~mmann/HowTo/automake.html -AC_PREREQ([2.71]) +AC_PREREQ([2.69]) AC_INIT([BULK_EXTRACTOR],[1.6.1],[bugs@digitalcorpora.org]) AC_CONFIG_MACRO_DIR(m4) #AC_CONFIG_AUX_DIR([build-aux]) @@ -248,7 +248,6 @@ AC_CHECK_FUNCS([pthread_win32_process_attach_np pthread_win32_process_detach_np # Specific include files and functions for bulk-extractor # Autoupdate added the next two lines to ensure that your configure # script's behavior did not change. They are probably safe to remove. -AC_CHECK_INCLUDES_DEFAULT AC_PROG_EGREP AC_TYPE_INT64_T diff --git a/java_gui/src/Config.java b/java_gui/src/Config.java new file mode 100644 index 00000000..f0a62aa9 --- /dev/null +++ b/java_gui/src/Config.java @@ -0,0 +1,6 @@ +// This file is generated by autoconf. Do not edit this file. +public final class Config { + private Config() { } // no constructor + /** BEViewer Version {@value} */ + public static final String VERSION = "1.5.5"; +} diff --git a/plugins/plugin_test.cpp b/plugins/plugin_test.cpp index a954b95f..387a2fe6 100644 --- a/plugins/plugin_test.cpp +++ b/plugins/plugin_test.cpp @@ -171,5 +171,5 @@ void scanner_info::get_config(const std::string &n,unsigned char *val,const std: void scanner_info::get_config(const std::string &n,unsigned short *val,const std::string &help) {} void scanner_info::get_config(const std::string &n,unsigned int *val,const std::string &help) {} void scanner_info::get_config(const std::string &n,unsigned long *val,const std::string &help) {} -void scanner_info::get_config(const std::string &n,unsigned long long *val,const std::string &help) {} +//void scanner_info::get_config(const std::string &n,unsigned long long *val,const std::string &help) {} void scanner_info::get_config(const std::string &n,bool *val,const std::string &help) {} From df8a5eec6f69393c24c3f4abeabe04d6c1186609 Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Sun, 5 Sep 2021 06:43:09 -0400 Subject: [PATCH 12/89] revising scan_info (again) --- src/be13_api | 2 +- src/main.cpp | 2 ++ src/scan_aes.cpp | 10 ++++++++-- src/scan_base16_lg.cpp | 14 +++++++------- src/scan_base64.cpp | 2 +- src/scan_elf.cpp | 2 +- src/scan_evtx.cpp | 2 +- src/scan_exif.cpp | 2 +- src/scan_exiv2.cpp | 2 +- src/scan_facebook.cpp | 2 +- src/scan_find.cpp | 2 +- src/scan_gzip.cpp | 2 +- src/scan_hiberfile.cpp | 2 +- src/scan_httplogs.cpp | 2 +- src/scan_json.cpp | 2 +- src/scan_kml.cpp | 2 +- src/scan_lightgrep.cpp | 2 +- src/scan_msxml.cpp | 3 +-- src/scan_net.cpp | 2 +- src/scan_ntfsindx.cpp | 2 +- src/scan_ntfslogfile.cpp | 2 +- src/scan_ntfsmft.cpp | 2 +- src/scan_ntfsusn.cpp | 2 +- src/scan_outlook.cpp | 2 +- src/scan_pdf.cpp | 2 +- src/scan_rar.cpp | 2 +- src/scan_sqlite.cpp | 2 +- src/scan_utmp.cpp | 2 +- src/scan_vcard.cpp | 2 +- src/scan_windirs.cpp | 2 +- src/scan_winlnk.cpp | 2 +- src/scan_winpe.cpp | 2 +- src/scan_winprefetch.cpp | 2 +- src/scan_wordlist.cpp | 2 +- src/scan_xor.cpp | 2 +- src/scan_zip.cpp | 2 +- 36 files changed, 50 insertions(+), 43 deletions(-) diff --git a/src/be13_api b/src/be13_api index 4d28aa69..b30b3add 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 4d28aa69f7c3a8ce0a413bd2db8317c18d844040 +Subproject commit b30b3add2c426b16edad2d19ec869e34a1c1a8d4 diff --git a/src/main.cpp b/src/main.cpp index d18581e5..ce99faac 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -472,12 +472,14 @@ int main(int argc,char **argv) /* Create a configuration that will be used to initialize the scanners */ /* Make individual configuration options appear on the command line interface. */ +#if 0 sc.get_config("debug_histogram_malloc_fail_frequency",&AtomicUnicodeHistogram::debug_histogram_malloc_fail_frequency, "Set >0 to make histogram maker fail with memory allocations"); sc.get_config("hash_alg",&be_hash_name,"Specifies hash algorithm to be used for all hash calculations"); sc.get_config("write_feature_files",&opt_write_feature_files,"Write features to flat files"); sc.get_config("write_feature_sqlite3",&opt_write_sqlite3,"Write feature files to report.sqlite3"); sc.get_config("report_read_errors",&cfg.opt_report_read_errors,"Report read errors"); +#endif /* Load all the scanners and enable the ones we care about */ diff --git a/src/scan_aes.cpp b/src/scan_aes.cpp index a22dd388..c47b9c34 100644 --- a/src/scan_aes.cpp +++ b/src/scan_aes.cpp @@ -373,16 +373,23 @@ static std::string key_to_string(const uint8_t * key, uint64_t sz) return ret; } +int scan_aes_128 = 1; +int scan_aes_192 = 0; +int scan_aes_256 = 1; + extern "C" void scan_aes(struct scanner_params &sp) { if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_aes,"aes"); + sp.info.set_name("aes"); sp.info->author = "Sam Trenholme, Jesse Kornblum and Simson Garfinkel"; sp.info->description = "Search for AES key schedules"; sp.info->scanner_version = "1.1"; sp.info->feature_defs.push_back( feature_recorder_def("aes_keys")); sp.info->min_sbuf_size = WINDOW_SIZE; + sp.get_config("scan_aes_128", &scan_aes_128, "Scan for 128-bit AES keys; 0=No, 1=Yes"); + sp.get_config("scan_aes_192", &scan_aes_192, "Scan for 192-bit AES keys; 0=No, 1=Yes"); + sp.get_config("scan_aes_256", &scan_aes_256, "Scan for 256-bit AES keys; 0=No, 1=Yes"); rcon_setup(); sbox_setup(); return; @@ -411,7 +418,6 @@ void scan_aes(struct scanner_params &sp) } } for (size_t pos = 0 ; pos < sp.sbuf->bufsize-WINDOW_SIZE && pos < sp.sbuf->pagesize; pos++){ - /* add value at end of 128 bits to sliding window */ { const uint8_t val = (*sp.sbuf)[pos+AES256_KEY_SCHEDULE_SIZE]; diff --git a/src/scan_base16_lg.cpp b/src/scan_base16_lg.cpp index 5c4f6bbe..c5eed272 100644 --- a/src/scan_base16_lg.cpp +++ b/src/scan_base16_lg.cpp @@ -126,13 +126,13 @@ namespace base16 { void Scanner::startup(const scanner_params& sp) { sp.check_version(); - sp.info->std::make_unique(scan_base16_lg,"base16_lg") - sp.info->name = "base16_lg"; - sp.info->author = "Simson L. Garfinkel"; - sp.info->description = "Base16 (hex) scanner"; - sp.info->scanner_version = "1.0"; - sp.info->flags = scanner_info::SCANNER_RECURSE; - sp.info->feature_names.insert("hex"); // notable hex values + sp.info.set_name("base16_lg"); + sp.info->name = "base16_lg"; + sp.info->author = "Simson L. Garfinkel"; + sp.info->description = "Base16 (hex) scanner"; + sp.info->scanner_version = "1.0"; + sp.info->flags = scanner_info::SCANNER_RECURSE; + sp.info->feature_names.insert("hex"); // notable hex values } void Scanner::init(const scanner_params& sp) { diff --git a/src/scan_base64.cpp b/src/scan_base64.cpp index 035c1902..3a4ce071 100644 --- a/src/scan_base64.cpp +++ b/src/scan_base64.cpp @@ -143,7 +143,7 @@ void scan_base64(scanner_params &sp) sp.check_version(); if ( sp.phase == scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_base64,"base64"); + sp.info.set_name("base64"); sp.info->author = "Simson L. Garfinkel"; sp.info->description = "scans for Base64-encoded data"; sp.info->scanner_version= "1.1"; diff --git a/src/scan_elf.cpp b/src/scan_elf.cpp index 33d1c936..07dd6c63 100644 --- a/src/scan_elf.cpp +++ b/src/scan_elf.cpp @@ -796,7 +796,7 @@ void scan_elf (scanner_params &sp) sp.check_version(); if (sp.phase == scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_elf,"elf"); + sp.info.set_name("elf"); sp.info->author = "Alex Eubanks"; sp.info->scanner_version = "1.1"; sp.info->feature_defs.push_back( feature_recorder_def("elf") ); diff --git a/src/scan_evtx.cpp b/src/scan_evtx.cpp index 26a6e5d3..f15c4672 100644 --- a/src/scan_evtx.cpp +++ b/src/scan_evtx.cpp @@ -145,7 +145,7 @@ void scan_evtx(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_evtx,"evtx"); + sp.info.set_name("evtx"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for EVTX Chunks and generates valid EVTX file"; sp.info->scanner_version = "1.0"; diff --git a/src/scan_exif.cpp b/src/scan_exif.cpp index d31f3510..df6031a6 100644 --- a/src/scan_exif.cpp +++ b/src/scan_exif.cpp @@ -506,7 +506,7 @@ void scan_exif (scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_exif,"exif"); + sp.info.set_name("exif"); sp.info->author = "Bruce Allen"; sp.info->scanner_version = "1.1"; sp.info->description = "Search for EXIF sections in JPEG files"; diff --git a/src/scan_exiv2.cpp b/src/scan_exiv2.cpp index 8a2f4b2d..51e88770 100644 --- a/src/scan_exiv2.cpp +++ b/src/scan_exiv2.cpp @@ -106,7 +106,7 @@ void scan_exiv2(struct scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique( scan_exiv2, "exiv2" ); + sp.info.set_name("exiv2" ); sp.info->author = "Simson L. Garfinkel"; sp.info->description = "Searches for EXIF information using exiv2. Use exif scanner if this is not available or if this crashes."; sp.info->scanner_flags.default_enabled = false; diff --git a/src/scan_facebook.cpp b/src/scan_facebook.cpp index 10b6f53e..f86b68f6 100644 --- a/src/scan_facebook.cpp +++ b/src/scan_facebook.cpp @@ -52,7 +52,7 @@ void scan_facebook(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT) { - sp.info = std::make_unique(scan_facebook,"facebook"); + sp.info.set_name("facebook"); sp.info->author = ""; sp.info->description = "Searches for facebook html and json tags"; sp.info->scanner_version = "2.0"; diff --git a/src/scan_find.cpp b/src/scan_find.cpp index 0d9d64ca..aa93b34a 100644 --- a/src/scan_find.cpp +++ b/src/scan_find.cpp @@ -43,7 +43,7 @@ void scan_find(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT) { - sp.info = std::make_unique(scan_find,"find"); + sp.info.set_name("find"); sp.info->name = "find"; sp.info->author = "Simson Garfinkel"; sp.info->description = "Simple search for patterns"; diff --git a/src/scan_gzip.cpp b/src/scan_gzip.cpp index 6a658887..10ad5beb 100644 --- a/src/scan_gzip.cpp +++ b/src/scan_gzip.cpp @@ -16,7 +16,7 @@ void scan_gzip(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique( scan_gzip, "gzip" ); + sp.info.set_name("gzip" ); sp.info->author = "Simson Garfinkel"; sp.info->description = "Searches for GZIP-compressed data"; sp.info->scanner_version= "1.1"; diff --git a/src/scan_hiberfile.cpp b/src/scan_hiberfile.cpp index d72af95b..7bd4c2a2 100644 --- a/src/scan_hiberfile.cpp +++ b/src/scan_hiberfile.cpp @@ -38,7 +38,7 @@ void scan_hiberfile(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique( scan_hiberfile, "hiberfile" ); + sp.info.set_name("hiberfile" ); sp.info->author = "Simson Garfinkel and Matthieu Suiche"; sp.info->description = "Scans for Microsoft-XPress compressed data"; sp.info->scanner_version= "1.0"; diff --git a/src/scan_httplogs.cpp b/src/scan_httplogs.cpp index 574f9135..22bef0f1 100644 --- a/src/scan_httplogs.cpp +++ b/src/scan_httplogs.cpp @@ -61,7 +61,7 @@ void scan_httplogs(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_httplogs,"httplogs"); + sp.info.set_name("httplogs"); sp.info->author = "Maxim Suhanov"; sp.info->description = "Extract various web server access logs"; sp.info->feature_defs.push_back( feature_recorder_def("httplogs")); diff --git a/src/scan_json.cpp b/src/scan_json.cpp index 7225b48a..1692c450 100644 --- a/src/scan_json.cpp +++ b/src/scan_json.cpp @@ -436,7 +436,7 @@ void scan_json(struct scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_json,"json"); + sp.info.set_name("json"); sp.info->author = "Simson Garfinkel"; sp.info->description = "Scans for JSON-encoded data"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_kml.cpp b/src/scan_kml.cpp index 58df0dca..54bf967a 100644 --- a/src/scan_kml.cpp +++ b/src/scan_kml.cpp @@ -28,7 +28,7 @@ void scan_kml(scanner_params &sp) std::string myString; sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_kml,"kml"); + sp.info.set_name("kml"); sp.info->author = "Simson Garfinkel "; sp.info->description = "Scans for KML files"; sp.info->scanner_version= "1.0"; diff --git a/src/scan_lightgrep.cpp b/src/scan_lightgrep.cpp index 1a93ff0d..7f70eab9 100644 --- a/src/scan_lightgrep.cpp +++ b/src/scan_lightgrep.cpp @@ -29,7 +29,7 @@ namespace { // local namespace hides these from other translation units }; virtual void startup(const scanner_params& sp) { - sp.info->name = std::make_unique(scan_lightgrep, "scan_lightgrep"); + sp.info.set_name("scan_lightgrep"); sp.info->author = "Jon Stewart"; sp.info->description = "Advanced search for patterns"; sp.info->scanner_version = "0.2"; diff --git a/src/scan_msxml.cpp b/src/scan_msxml.cpp index 0a2e1dbc..ff87d2cf 100644 --- a/src/scan_msxml.cpp +++ b/src/scan_msxml.cpp @@ -56,8 +56,7 @@ void scan_msxml(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique( scan_msxml, SCANNER_NAME ); - sp.info->author = "Simson Garfinkel"; + sp.info.set_name("Simson Garfinkel"; sp.info->description = "Extracts text from Microsoft XML files"; sp.info->scanner_version = "1.0"; sp.info->scanner_flags.recurse = true; diff --git a/src/scan_net.cpp b/src/scan_net.cpp index a795effd..084e9452 100644 --- a/src/scan_net.cpp +++ b/src/scan_net.cpp @@ -1011,7 +1011,7 @@ void scan_net(scanner_params &sp) sp.get_config("carve_net_memory",&opt_carve_net_memory,"Carve network memory structures"); assert(sizeof(struct be13::ip4)==20); // we've had problems on some systems - sp.info = std::make_unique(scan_net,"net"); + sp.info.set_name("net"); sp.info->author = "Simson Garfinkel and Rob Beverly"; sp.info->description = "Scans for IP packets"; sp.info->scanner_version= "1.0"; diff --git a/src/scan_ntfsindx.cpp b/src/scan_ntfsindx.cpp index a7a9519a..9ffa50ae 100644 --- a/src/scan_ntfsindx.cpp +++ b/src/scan_ntfsindx.cpp @@ -77,7 +77,7 @@ void scan_ntfsindx(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_ntfsindx,"ntfsindx"); + sp.info.set_name("ntfsindx"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for NTFS $INDEX_ALLOCATION INDX record"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_ntfslogfile.cpp b/src/scan_ntfslogfile.cpp index 52d0fe1b..2830402d 100644 --- a/src/scan_ntfslogfile.cpp +++ b/src/scan_ntfslogfile.cpp @@ -79,7 +79,7 @@ void scan_ntfslogfile(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_ntfslogfile,"ntfslogfile"); + sp.info.set_name("ntfslogfile"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for NTFS $LogFile RCRD record"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_ntfsmft.cpp b/src/scan_ntfsmft.cpp index ae1ac306..16c03dfa 100644 --- a/src/scan_ntfsmft.cpp +++ b/src/scan_ntfsmft.cpp @@ -61,7 +61,7 @@ void scan_ntfsmft(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_ntfsmft,"ntfsmft"); + sp.info.set_name("ntfsmft"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for NTFS MFT record"; sp.info->scanner_version = "1.0"; diff --git a/src/scan_ntfsusn.cpp b/src/scan_ntfsusn.cpp index 098d5b3d..5cd00451 100644 --- a/src/scan_ntfsusn.cpp +++ b/src/scan_ntfsusn.cpp @@ -72,7 +72,7 @@ void scan_ntfsusn(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_ntfsusn,"ntfsusn"); + sp.info.set_name("ntfsusn"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for USN_RECORD v2/v4 record"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_outlook.cpp b/src/scan_outlook.cpp index 7fc4c9a4..604b2ba0 100644 --- a/src/scan_outlook.cpp +++ b/src/scan_outlook.cpp @@ -61,7 +61,7 @@ void scan_outlook(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT) { - sp.info = std::make_unique( scan_outlook, "outlook" ); + sp.info.set_name("outlook" ); sp.info->scanner_flags.default_enabled = false; sp.info->scanner_flags.depth0_only = true; // only run depth 0 sp.info->scanner_flags.recurse = true; diff --git a/src/scan_pdf.cpp b/src/scan_pdf.cpp index 2ceb6f07..b82c0468 100644 --- a/src/scan_pdf.cpp +++ b/src/scan_pdf.cpp @@ -223,7 +223,7 @@ void scan_pdf(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique( scan_pdf, "pdf" ); + sp.info.set_name("pdf" ); sp.info->author = "Simson Garfinkel"; sp.info->description = "Extracts text from PDF files"; sp.info->scanner_version= "1.0"; diff --git a/src/scan_rar.cpp b/src/scan_rar.cpp index 199b2612..bfac0d5c 100644 --- a/src/scan_rar.cpp +++ b/src/scan_rar.cpp @@ -587,7 +587,7 @@ void scan_rar(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique( scan_rar, "rar" ); + sp.info.set_name("rar" ); sp.info->author = "Michael Shick"; sp.info->scanner_version = "1.1"; sp.info->scanner_flags.recurse = true; diff --git a/src/scan_sqlite.cpp b/src/scan_sqlite.cpp index e747f99a..94a4461d 100644 --- a/src/scan_sqlite.cpp +++ b/src/scan_sqlite.cpp @@ -38,7 +38,7 @@ void scan_sqlite(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique( scan_sqlite, "sqlite" ); + sp.info.set_name("sqlite" ); sp.info->author = "Simson Garfinkel"; sp.info->description = "Scans for SQLITE3 data"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_utmp.cpp b/src/scan_utmp.cpp index 0fc9e866..0f5f4ffc 100644 --- a/src/scan_utmp.cpp +++ b/src/scan_utmp.cpp @@ -81,7 +81,7 @@ void scan_utmp(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique( scan_utmp, "utmp" ); + sp.info.set_name("utmp" ); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for utmp record"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_vcard.cpp b/src/scan_vcard.cpp index 8f4dd82b..6b3f8daf 100644 --- a/src/scan_vcard.cpp +++ b/src/scan_vcard.cpp @@ -79,7 +79,7 @@ void scan_vcard(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_vcard,"vcard"); + sp.info.set_name("vcard"); sp.info->author = "Simson Garfinkel and Tony Melaragno"; sp.info->description = "Scans for VCARD data"; sp.info->scanner_version= "1.1"; diff --git a/src/scan_windirs.cpp b/src/scan_windirs.cpp index 2775f902..17105b04 100644 --- a/src/scan_windirs.cpp +++ b/src/scan_windirs.cpp @@ -487,7 +487,7 @@ void scan_windirs(scanner_params &sp) gmtime_r(&t,&now); opt_last_year = now.tm_year + 1900 + 5; // allow up to 5 years in the future - sp.info = std::make_unique( scan_windirs, "windirs" ); + sp.info.set_name("windirs" ); sp.info->author = "Simson Garfinkel and Maxim Suhanov"; sp.info->description = "Scans Microsoft directory structures"; diff --git a/src/scan_winlnk.cpp b/src/scan_winlnk.cpp index 01b4f8ed..6f6abfd6 100644 --- a/src/scan_winlnk.cpp +++ b/src/scan_winlnk.cpp @@ -301,7 +301,7 @@ void scan_winlnk(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_winlnk,"winlnk"); + sp.info.set_name("winlnk"); sp.info->author = "Simson Garfinkel"; sp.info->description = "Search for Windows LNK files"; sp.info->feature_defs.push_back( feature_recorder_def("winlnk")); diff --git a/src/scan_winpe.cpp b/src/scan_winpe.cpp index a1b98753..dd8197af 100644 --- a/src/scan_winpe.cpp +++ b/src/scan_winpe.cpp @@ -1015,7 +1015,7 @@ void scan_winpe (scanner_params &sp) std::string xml; if (sp.phase == scanner_params::PHASE_INIT){ - sp.info = std::make_unique( scan_winpe, "winpe" ); + sp.info.set_name("winpe" ); sp.info->description = "Scan for Windows PE headers"; sp.info->scanner_version = "1.1.0"; sp.info->feature_defs.push_back( feature_recorder_def("winpe")); diff --git a/src/scan_winprefetch.cpp b/src/scan_winprefetch.cpp index 1975a92c..227285ef 100644 --- a/src/scan_winprefetch.cpp +++ b/src/scan_winprefetch.cpp @@ -250,7 +250,7 @@ void scan_winprefetch(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_winprefetch,"winprefetch"); + sp.info.set_name("winprefetch"); sp.info->name = "winprefetch"; sp.info->author = "Bruce Allen"; sp.info->description = "Search for Windows Prefetch files"; diff --git a/src/scan_wordlist.cpp b/src/scan_wordlist.cpp index ffa96c09..cb23a92b 100644 --- a/src/scan_wordlist.cpp +++ b/src/scan_wordlist.cpp @@ -201,7 +201,7 @@ void scan_wordlist(scanner_params &sp) if (sp.phase==scanner_params::PHASE_INIT){ sp.check_version(); - sp.info = std::make_unique( scan_wordlist, "wordlist" ); + sp.info.set_name("wordlist" ); sp.info->scanner_flags.default_enabled = false; // = scanner_info::SCANNER_DISABLED; //sp.get_config("word_min",&word_min,"Minimum word size"); //sp.get_config("word_max",&word_max,"Maximum word size"); diff --git a/src/scan_xor.cpp b/src/scan_xor.cpp index 82a77ff8..e5ffae3a 100644 --- a/src/scan_xor.cpp +++ b/src/scan_xor.cpp @@ -13,7 +13,7 @@ void scan_xor(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT) { - sp.info = std::make_unique( scan_xor, "xor" ); + sp.info.set_name("xor" ); sp.info->author = "Michael Shick"; sp.info->description = "optimistic XOR deobfuscator"; sp.info->scanner_flags.default_enabled = false; diff --git a/src/scan_zip.cpp b/src/scan_zip.cpp index d7afb84b..8fcaa031 100644 --- a/src/scan_zip.cpp +++ b/src/scan_zip.cpp @@ -159,7 +159,7 @@ void scan_zip(scanner_params &sp) if (sp.phase==scanner_params::PHASE_INIT){ feature_recorder_def::flags_t xml; xml.xml = true; - sp.info = std::make_unique( scan_zip, "zip" ); + sp.info.set_name("zip" ); sp.info->scanner_flags.recurse = true; sp.info->feature_defs.push_back( feature_recorder_def(ZIP_RECORDER_NAME, xml )); sp.get_config("zip_min_uncompr_size",&zip_min_uncompr_size,"Minimum size of a ZIP uncompressed object"); From 0530320f9b11c8c1f0fb1e8d1acc1b3746d05044 Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Sun, 5 Sep 2021 10:48:59 -0400 Subject: [PATCH 13/89] more cleanup of options; distinct character count for sbuf; scan_aes now disables AES192 by default --- src/be13_api | 2 +- src/main.cpp | 15 +++++---- src/sbuf_flex_scanner.h | 1 + src/scan_accts.flex | 8 ++--- src/scan_aes.cpp | 68 ++++++++++++---------------------------- src/scan_base16.flex | 2 +- src/scan_base64.cpp | 2 +- src/scan_elf.cpp | 2 +- src/scan_email.flex | 2 +- src/scan_evtx.cpp | 4 +-- src/scan_exif.cpp | 7 +++-- src/scan_facebook.cpp | 6 ++-- src/scan_find.cpp | 2 +- src/scan_gps.flex | 2 +- src/scan_gzip.cpp | 4 +-- src/scan_hiberfile.cpp | 2 +- src/scan_httplogs.cpp | 2 +- src/scan_json.cpp | 6 ++-- src/scan_kml.cpp | 2 +- src/scan_msxml.cpp | 2 +- src/scan_net.cpp | 4 +-- src/scan_ntfsindx.cpp | 2 +- src/scan_ntfslogfile.cpp | 2 +- src/scan_ntfsmft.cpp | 2 +- src/scan_ntfsusn.cpp | 2 +- src/scan_outlook.cpp | 2 +- src/scan_pdf.cpp | 6 ++-- src/scan_rar.cpp | 6 ++-- src/scan_sqlite.cpp | 2 +- src/scan_utmp.cpp | 2 +- src/scan_vcard.cpp | 2 +- src/scan_windirs.cpp | 16 +++++----- src/scan_winlnk.cpp | 2 +- src/scan_winpe.cpp | 2 +- src/scan_winprefetch.cpp | 2 +- src/scan_wordlist.cpp | 30 ++++++++++++------ src/scan_wordlist.h | 10 ++++-- src/scan_xor.cpp | 4 +-- src/scan_zip.cpp | 8 ++--- src/stand.cpp | 1 - src/test_be.cpp | 3 +- 41 files changed, 119 insertions(+), 132 deletions(-) diff --git a/src/be13_api b/src/be13_api index b30b3add..233b7d55 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit b30b3add2c426b16edad2d19ec869e34a1c1a8d4 +Subproject commit 233b7d558b8d82c41155a608a5a5bcef29d40d95 diff --git a/src/main.cpp b/src/main.cpp index ce99faac..13cae4b1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -339,12 +339,11 @@ int main(int argc,char **argv) int opt_h = 0; int opt_H = 0; std::string opt_sampling_params; - bool opt_write_feature_files = true; - bool opt_write_sqlite3 = false; + //bool opt_write_feature_files = true; + //bool opt_write_sqlite3 = false; /* Startup */ setvbuf(stdout,0,_IONBF,0); // don't buffer stdout - //std::string command_line = dfxml_writer::make_command_line(argc,argv); std::vector scanner_dirs; // where to look for scanners /* Add the default plugin_path */ @@ -473,12 +472,12 @@ int main(int argc,char **argv) /* Create a configuration that will be used to initialize the scanners */ /* Make individual configuration options appear on the command line interface. */ #if 0 - sc.get_config("debug_histogram_malloc_fail_frequency",&AtomicUnicodeHistogram::debug_histogram_malloc_fail_frequency, + sc.get_global_config("debug_histogram_malloc_fail_frequency",&AtomicUnicodeHistogram::debug_histogram_malloc_fail_frequency, "Set >0 to make histogram maker fail with memory allocations"); - sc.get_config("hash_alg",&be_hash_name,"Specifies hash algorithm to be used for all hash calculations"); - sc.get_config("write_feature_files",&opt_write_feature_files,"Write features to flat files"); - sc.get_config("write_feature_sqlite3",&opt_write_sqlite3,"Write feature files to report.sqlite3"); - sc.get_config("report_read_errors",&cfg.opt_report_read_errors,"Report read errors"); + sc.get_global_config("hash_alg",&be_hash_name,"Specifies hash algorithm to be used for all hash calculations"); + sc.get_global_config("write_feature_files",&opt_write_feature_files,"Write features to flat files"); + sc.get_global_config("write_feature_sqlite3",&opt_write_sqlite3,"Write feature files to report.sqlite3"); + sc.get_global_config("report_read_errors",&cfg.opt_report_read_errors,"Report read errors"); #endif /* Load all the scanners and enable the ones we care about */ diff --git a/src/sbuf_flex_scanner.h b/src/sbuf_flex_scanner.h index f5f5e454..0512120a 100644 --- a/src/sbuf_flex_scanner.h +++ b/src/sbuf_flex_scanner.h @@ -19,6 +19,7 @@ #define YY_SKIP_YYWRAP /* Never wrap */ #define YY_NO_INPUT +#include "config.h" #include "be13_api/sbuf.h" #include "be13_api/scanner_params.h" #include "be13_api/scanner_set.h" diff --git a/src/scan_accts.flex b/src/scan_accts.flex index 66a22ccc..96c147c8 100644 --- a/src/scan_accts.flex +++ b/src/scan_accts.flex @@ -14,7 +14,7 @@ * http://en.wikipedia.org/wiki/List_of_Bank_Identification_Numbers */ -size_t min_phone_digits=7; +uint8_t min_phone_digits=7; static int ssn_mode=0; class accts_scanner : public sbuf_scanner { @@ -383,7 +383,7 @@ void scan_accts( struct scanner_params &sp ) if(sp.phase==scanner_params::PHASE_INIT){ //assert(sp.info->si_version==scanner_info::CURRENT_SI_VERSION); build_unbase58(); - sp.info = std::make_unique(scan_accts,"accts"); + sp.info->set_name("accts"); sp.info->author = "Simson L. Garfinkel, modified by Tim Walsh"; sp.info->description = "scans for CCNs, track 2, PII (including SSN and Canadian SIN), and phone #s"; sp.info->scanner_version= "1.1"; @@ -406,8 +406,8 @@ void scan_accts( struct scanner_params &sp ) "", "teamviewer", flag_numeric)); /* This modifies the scanner_config by adding informaton about the help strings, so scanner_config can't be const */ - sp.get_config("ssn_mode", &ssn_mode,"0=Normal; 1=No `SSN' required; 2=No dashes required"); - sp.get_config("min_phone_digits",&min_phone_digits,"Min. digits required in a phone"); + sp.get_scanner_config("ssn_mode", &ssn_mode,"0=Normal; 1=No `SSN' required; 2=No dashes required"); + sp.get_scanner_config("min_phone_digits",&min_phone_digits,"Min. digits required in a phone"); //scan_ccns2_debug = sp.ss.sc.debug; // get debug value return; } diff --git a/src/scan_aes.cpp b/src/scan_aes.cpp index c47b9c34..02067d65 100644 --- a/src/scan_aes.cpp +++ b/src/scan_aes.cpp @@ -32,12 +32,11 @@ */ +#include "config.h" #include #include #include - -#include "config.h" #include "be13_api/scanner_params.h" #include "be13_api/scanner_set.h" @@ -51,10 +50,6 @@ static const u_int AES128_KEY_SCHEDULE_SIZE = 176; // Size of a 128-bit AES static const u_int AES192_KEY_SCHEDULE_SIZE = 208; // Size of a 128-bit AES key schedule, in bytes static const u_int AES256_KEY_SCHEDULE_SIZE = 240; // Size of a 128-bit AES key schedule, in bytes -static const u_int REQUIRED_DISTINCT_COUNTS = 10; // number of unique bytes to require in AES key -static const u_int WINDOW_SIZE = AES256_KEY_SCHEDULE_SIZE; - - // Determines whether or not data represents valid // AES key schedules. In reality, this is very efficient code for @@ -377,81 +372,58 @@ int scan_aes_128 = 1; int scan_aes_192 = 0; int scan_aes_256 = 1; + + extern "C" void scan_aes(struct scanner_params &sp) { if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("aes"); + sp.info->set_name("aes"); sp.info->author = "Sam Trenholme, Jesse Kornblum and Simson Garfinkel"; sp.info->description = "Search for AES key schedules"; sp.info->scanner_version = "1.1"; sp.info->feature_defs.push_back( feature_recorder_def("aes_keys")); - sp.info->min_sbuf_size = WINDOW_SIZE; - sp.get_config("scan_aes_128", &scan_aes_128, "Scan for 128-bit AES keys; 0=No, 1=Yes"); - sp.get_config("scan_aes_192", &scan_aes_192, "Scan for 192-bit AES keys; 0=No, 1=Yes"); - sp.get_config("scan_aes_256", &scan_aes_256, "Scan for 256-bit AES keys; 0=No, 1=Yes"); + sp.info->min_sbuf_size = AES128_KEY_SIZE; + sp.get_scanner_config("scan_aes_128", &scan_aes_128, "Scan for 128-bit AES keys; 0=No, 1=Yes"); + sp.get_scanner_config("scan_aes_192", &scan_aes_192, "Scan for 192-bit AES keys; 0=No, 1=Yes"); + sp.get_scanner_config("scan_aes_256", &scan_aes_256, "Scan for 256-bit AES keys; 0=No, 1=Yes"); rcon_setup(); sbox_setup(); return; } - /* We don't need to check for phase 2 of if sbuf isn't big enough to hold a KEY_SCHEDULE - */ - if(sp.phase==scanner_params::PHASE_SCAN){ auto &aes_recorder = sp.named_feature_recorder("aes_keys"); /* Simple mod: Keep a rolling window of the entropy and don't * scan if we see fewer than 10 distinct characters in window. This will - * eliminate checks on many kinds of bulk data that are unlikely to have a key in the block. + * eliminate checks on many kinds of bulk data that are unlikely to have a key in the block. + * + * Note that we now compute and re-compute the histogram many times, rather than just having a sliding window. + * This is less efficient than before, but the code is simpler, and now the code is correctly computing the histogram + * for the 128, 192 and 256-byte cases. */ - uint32_t counts[256]; - memset(counts,0,sizeof(counts)); - uint32_t distinct_counts = 0; // how many distinct counts do we have? - - /* Initialize the sliding window */ - for (size_t pos = 0; pos < WINDOW_SIZE ; pos++) { - const uint8_t val = (*sp.sbuf)[pos]; - counts[val]++; - if (counts[val] == 1) { - distinct_counts++; - } - } - for (size_t pos = 0 ; pos < sp.sbuf->bufsize-WINDOW_SIZE && pos < sp.sbuf->pagesize; pos++){ - /* add value at end of 128 bits to sliding window */ - { - const uint8_t val = (*sp.sbuf)[pos+AES256_KEY_SCHEDULE_SIZE]; - counts[val]++; - if(counts[val]==1) { // we have one more distinct count - distinct_counts++; - } - } - + for (size_t pos = 0 ; pos < sp.sbuf->bufsize && pos < sp.sbuf->pagesize; pos++){ /* TODO: Remove direct memory access with mediated access */ - if (distinct_counts > REQUIRED_DISTINCT_COUNTS){ - const uint8_t *p2 = sp.sbuf->get_buf() + pos; + const uint8_t *p2 = sp.sbuf->get_buf() + pos; + if (scan_aes_128 && sp.sbuf->distinct_characters( pos, AES128_KEY_SIZE) > AES128_KEY_SIZE/4){ if (valid_aes128_schedule(p2)) { std::string key = key_to_string(p2, AES128_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES128")); } + } + if (scan_aes_192 && sp.sbuf->distinct_characters( pos, AES192_KEY_SIZE) > AES192_KEY_SIZE/4){ if (valid_aes192_schedule(p2)) { std::string key = key_to_string(p2, AES192_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES192")); } + } + if (scan_aes_256 && sp.sbuf->distinct_characters( pos, AES256_KEY_SIZE) > AES256_KEY_SIZE/4){ if (valid_aes256_schedule(p2)) { std::string key = key_to_string(p2, AES256_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES256")); } } - /* remove current byte being analyzed */ - if(pos>WINDOW_SIZE){ - const uint8_t val = (*sp.sbuf)[pos]; - counts[val]--; - if(counts[val]==0){ - distinct_counts--; // we have one fewer - } - assert(distinct_counts>0); // we must have at least one distinct count... - } } } } diff --git a/src/scan_base16.flex b/src/scan_base16.flex index 388b84a6..33de069b 100644 --- a/src/scan_base16.flex +++ b/src/scan_base16.flex @@ -122,7 +122,7 @@ void scan_base16(struct scanner_params &sp) static const u_char *ignore_string = (const u_char *)"\r\n \t"; sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_base16,"base16"); + sp.info->set_name("base16"); sp.info->scanner_flags.recurse = true; sp.info->scanner_flags.default_enabled = false; sp.info->author = "Simson L. Garfinkel"; diff --git a/src/scan_base64.cpp b/src/scan_base64.cpp index 3a4ce071..2e2ebd08 100644 --- a/src/scan_base64.cpp +++ b/src/scan_base64.cpp @@ -143,7 +143,7 @@ void scan_base64(scanner_params &sp) sp.check_version(); if ( sp.phase == scanner_params::PHASE_INIT){ - sp.info.set_name("base64"); + sp.info->set_name("base64"); sp.info->author = "Simson L. Garfinkel"; sp.info->description = "scans for Base64-encoded data"; sp.info->scanner_version= "1.1"; diff --git a/src/scan_elf.cpp b/src/scan_elf.cpp index 07dd6c63..033b0b91 100644 --- a/src/scan_elf.cpp +++ b/src/scan_elf.cpp @@ -796,7 +796,7 @@ void scan_elf (scanner_params &sp) sp.check_version(); if (sp.phase == scanner_params::PHASE_INIT){ - sp.info.set_name("elf"); + sp.info->set_name("elf"); sp.info->author = "Alex Eubanks"; sp.info->scanner_version = "1.1"; sp.info->feature_defs.push_back( feature_recorder_def("elf") ); diff --git a/src/scan_email.flex b/src/scan_email.flex index 19d61928..77a7d84d 100644 --- a/src/scan_email.flex +++ b/src/scan_email.flex @@ -345,7 +345,7 @@ void scan_email(struct scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_email,"email"); + sp.info->set_name("email"); sp.info->author = "Simson L. Garfinkel"; sp.info->description = "Scans for email addresses, domains, URLs, RFC822 headers, etc."; sp.info->scanner_version = "1.1"; diff --git a/src/scan_evtx.cpp b/src/scan_evtx.cpp index f15c4672..fdc1c07d 100644 --- a/src/scan_evtx.cpp +++ b/src/scan_evtx.cpp @@ -5,6 +5,7 @@ * Teru Yamazaki(@4n6ist) - https://github.com/4n6ist/bulk_extractor-rec **/ +#include "config.h" #include #include #include @@ -13,7 +14,6 @@ #include #include -#include "config.h" #include "utf8.h" #include "be13_api/scanner_params.h" @@ -145,7 +145,7 @@ void scan_evtx(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("evtx"); + sp.info->set_name("evtx"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for EVTX Chunks and generates valid EVTX file"; sp.info->scanner_version = "1.0"; diff --git a/src/scan_exif.cpp b/src/scan_exif.cpp index df6031a6..3007a1be 100644 --- a/src/scan_exif.cpp +++ b/src/scan_exif.cpp @@ -7,6 +7,8 @@ * 2011-dec-12 bda - Ported from file scan_exif.cpp. */ +#include "config.h" + #include #include #include @@ -15,7 +17,6 @@ #include #include -#include "config.h" #include "scan_exif.h" #include "be13_api/scanner_params.h" @@ -506,7 +507,7 @@ void scan_exif (scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("exif"); + sp.info->set_name("exif"); sp.info->author = "Bruce Allen"; sp.info->scanner_version = "1.1"; sp.info->description = "Search for EXIF sections in JPEG files"; @@ -516,7 +517,7 @@ void scan_exif (scanner_params &sp) sp.info->feature_defs.push_back( feature_recorder_def("exif", xml_flag)); sp.info->feature_defs.push_back( feature_recorder_def("gps")); sp.info->feature_defs.push_back( feature_recorder_def("jpeg_carved")); - sp.get_config("exif_debug",&exif_debug,"debug exif decoder"); + sp.get_scanner_config("exif_debug",&exif_debug,"debug exif decoder"); return; } if (sp.phase==scanner_params::PHASE_INIT2) { diff --git a/src/scan_facebook.cpp b/src/scan_facebook.cpp index f86b68f6..083c91c9 100644 --- a/src/scan_facebook.cpp +++ b/src/scan_facebook.cpp @@ -1,12 +1,12 @@ +#include "config.h" + #include #include #include #include #include -//#include #include -#include "config.h" #include "be13_api/scanner_params.h" #include "be13_api/scanner_set.h" @@ -52,7 +52,7 @@ void scan_facebook(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT) { - sp.info.set_name("facebook"); + sp.info->set_name("facebook"); sp.info->author = ""; sp.info->description = "Searches for facebook html and json tags"; sp.info->scanner_version = "2.0"; diff --git a/src/scan_find.cpp b/src/scan_find.cpp index aa93b34a..ff03911c 100644 --- a/src/scan_find.cpp +++ b/src/scan_find.cpp @@ -43,7 +43,7 @@ void scan_find(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT) { - sp.info.set_name("find"); + sp.info->set_name("find"); sp.info->name = "find"; sp.info->author = "Simson Garfinkel"; sp.info->description = "Simple search for patterns"; diff --git a/src/scan_gps.flex b/src/scan_gps.flex index 047c6df8..6d8f2434 100644 --- a/src/scan_gps.flex +++ b/src/scan_gps.flex @@ -166,7 +166,7 @@ void scan_gps(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info = std::make_unique(scan_gps,"gps"); + sp.info->set_name("gps"); sp.info->author = "Simson L. Garfinkel"; sp.info->description = "Garmin Trackpt XML info"; sp.info->scanner_version= "1.1"; diff --git a/src/scan_gzip.cpp b/src/scan_gzip.cpp index 10ad5beb..212ff967 100644 --- a/src/scan_gzip.cpp +++ b/src/scan_gzip.cpp @@ -16,12 +16,12 @@ void scan_gzip(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("gzip" ); + sp.info->set_name("gzip" ); sp.info->author = "Simson Garfinkel"; sp.info->description = "Searches for GZIP-compressed data"; sp.info->scanner_version= "1.1"; sp.info->scanner_flags.recurse = true; - sp.get_config("gzip_max_uncompr_size",&gzip_max_uncompr_size,"maximum size for decompressing GZIP objects"); + sp.get_scanner_config("gzip_max_uncompr_size",&gzip_max_uncompr_size,"maximum size for decompressing GZIP objects"); return ; /* no features */ } if (sp.phase==scanner_params::PHASE_SCAN){ diff --git a/src/scan_hiberfile.cpp b/src/scan_hiberfile.cpp index 7bd4c2a2..af088b73 100644 --- a/src/scan_hiberfile.cpp +++ b/src/scan_hiberfile.cpp @@ -38,7 +38,7 @@ void scan_hiberfile(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("hiberfile" ); + sp.info->set_name("hiberfile" ); sp.info->author = "Simson Garfinkel and Matthieu Suiche"; sp.info->description = "Scans for Microsoft-XPress compressed data"; sp.info->scanner_version= "1.0"; diff --git a/src/scan_httplogs.cpp b/src/scan_httplogs.cpp index 22bef0f1..415c13fc 100644 --- a/src/scan_httplogs.cpp +++ b/src/scan_httplogs.cpp @@ -61,7 +61,7 @@ void scan_httplogs(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("httplogs"); + sp.info->set_name("httplogs"); sp.info->author = "Maxim Suhanov"; sp.info->description = "Extract various web server access logs"; sp.info->feature_defs.push_back( feature_recorder_def("httplogs")); diff --git a/src/scan_json.cpp b/src/scan_json.cpp index 1692c450..1aa9a477 100644 --- a/src/scan_json.cpp +++ b/src/scan_json.cpp @@ -28,9 +28,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include - #include "config.h" + +#include #include "be13_api/scanner_params.h" #include "be13_api/scanner_set.h" @@ -436,7 +436,7 @@ void scan_json(struct scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("json"); + sp.info->set_name("json"); sp.info->author = "Simson Garfinkel"; sp.info->description = "Scans for JSON-encoded data"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_kml.cpp b/src/scan_kml.cpp index 54bf967a..5f6f809d 100644 --- a/src/scan_kml.cpp +++ b/src/scan_kml.cpp @@ -28,7 +28,7 @@ void scan_kml(scanner_params &sp) std::string myString; sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("kml"); + sp.info->set_name("kml"); sp.info->author = "Simson Garfinkel "; sp.info->description = "Scans for KML files"; sp.info->scanner_version= "1.0"; diff --git a/src/scan_msxml.cpp b/src/scan_msxml.cpp index ff87d2cf..a4f837c0 100644 --- a/src/scan_msxml.cpp +++ b/src/scan_msxml.cpp @@ -56,7 +56,7 @@ void scan_msxml(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("Simson Garfinkel"; + sp.info->set_name("Simson Garfinkel"); sp.info->description = "Extracts text from Microsoft XML files"; sp.info->scanner_version = "1.0"; sp.info->scanner_flags.recurse = true; diff --git a/src/scan_net.cpp b/src/scan_net.cpp index 084e9452..d51d898f 100644 --- a/src/scan_net.cpp +++ b/src/scan_net.cpp @@ -1008,10 +1008,10 @@ void scan_net(scanner_params &sp) sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.get_config("carve_net_memory",&opt_carve_net_memory,"Carve network memory structures"); + sp.get_scanner_config("carve_net_memory",&opt_carve_net_memory,"Carve network memory structures"); assert(sizeof(struct be13::ip4)==20); // we've had problems on some systems - sp.info.set_name("net"); + sp.info->set_name("net"); sp.info->author = "Simson Garfinkel and Rob Beverly"; sp.info->description = "Scans for IP packets"; sp.info->scanner_version= "1.0"; diff --git a/src/scan_ntfsindx.cpp b/src/scan_ntfsindx.cpp index 9ffa50ae..2350273d 100644 --- a/src/scan_ntfsindx.cpp +++ b/src/scan_ntfsindx.cpp @@ -77,7 +77,7 @@ void scan_ntfsindx(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("ntfsindx"); + sp.info->set_name("ntfsindx"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for NTFS $INDEX_ALLOCATION INDX record"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_ntfslogfile.cpp b/src/scan_ntfslogfile.cpp index 2830402d..c5bd2957 100644 --- a/src/scan_ntfslogfile.cpp +++ b/src/scan_ntfslogfile.cpp @@ -79,7 +79,7 @@ void scan_ntfslogfile(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("ntfslogfile"); + sp.info->set_name("ntfslogfile"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for NTFS $LogFile RCRD record"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_ntfsmft.cpp b/src/scan_ntfsmft.cpp index 16c03dfa..5e223b7f 100644 --- a/src/scan_ntfsmft.cpp +++ b/src/scan_ntfsmft.cpp @@ -61,7 +61,7 @@ void scan_ntfsmft(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("ntfsmft"); + sp.info->set_name("ntfsmft"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for NTFS MFT record"; sp.info->scanner_version = "1.0"; diff --git a/src/scan_ntfsusn.cpp b/src/scan_ntfsusn.cpp index 5cd00451..1d39aac0 100644 --- a/src/scan_ntfsusn.cpp +++ b/src/scan_ntfsusn.cpp @@ -72,7 +72,7 @@ void scan_ntfsusn(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("ntfsusn"); + sp.info->set_name("ntfsusn"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for USN_RECORD v2/v4 record"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_outlook.cpp b/src/scan_outlook.cpp index 604b2ba0..257ceb94 100644 --- a/src/scan_outlook.cpp +++ b/src/scan_outlook.cpp @@ -61,7 +61,7 @@ void scan_outlook(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT) { - sp.info.set_name("outlook" ); + sp.info->set_name("outlook" ); sp.info->scanner_flags.default_enabled = false; sp.info->scanner_flags.depth0_only = true; // only run depth 0 sp.info->scanner_flags.recurse = true; diff --git a/src/scan_pdf.cpp b/src/scan_pdf.cpp index b82c0468..e487ed2b 100644 --- a/src/scan_pdf.cpp +++ b/src/scan_pdf.cpp @@ -223,13 +223,13 @@ void scan_pdf(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("pdf" ); + sp.info->set_name("pdf" ); sp.info->author = "Simson Garfinkel"; sp.info->description = "Extracts text from PDF files"; sp.info->scanner_version= "1.0"; sp.info->scanner_flags.recurse = true; - sp.get_config("pdf_dump_hex" , &pdf_extractor::pdf_dump_hex, "Dump the contents of PDF buffers as hex"); - sp.get_config("pdf_dump_text", &pdf_extractor::pdf_dump_text, "Dump the contents of PDF buffers showing extracted text"); + sp.get_scanner_config("pdf_dump_hex" , &pdf_extractor::pdf_dump_hex, "Dump the contents of PDF buffers as hex"); + sp.get_scanner_config("pdf_dump_text", &pdf_extractor::pdf_dump_text, "Dump the contents of PDF buffers showing extracted text"); if (getenv("DEBUG_PDF_DUMP_HEX")) pdf_extractor::pdf_dump_hex=true; if (getenv("DEBUG_PDF_DUMP_TEXT")) pdf_extractor::pdf_dump_text=true; return; /* No features recorded */ diff --git a/src/scan_rar.cpp b/src/scan_rar.cpp index bfac0d5c..2f7cb88d 100644 --- a/src/scan_rar.cpp +++ b/src/scan_rar.cpp @@ -587,7 +587,7 @@ void scan_rar(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("rar" ); + sp.info->set_name("rar" ); sp.info->author = "Michael Shick"; sp.info->scanner_version = "1.1"; sp.info->scanner_flags.recurse = true; @@ -603,8 +603,8 @@ void scan_rar(scanner_params &sp) auto unrar_def = feature_recorder_def(UNRAR_RECORDER_NAME, flags); unrar_def.default_carve_mode = feature_recorder_def::carve_mode_t::CARVE_ENCODED; sp.info->feature_defs.push_back( unrar_def ); - sp.get_config("rar_find_components",&record_components,"Search for RAR components"); - sp.get_config("rar_find_volumes",&record_volumes,"Search for RAR volumes"); + sp.get_scanner_config("rar_find_components",&record_components,"Search for RAR components"); + sp.get_scanner_config("rar_find_volumes",&record_volumes,"Search for RAR volumes"); #else sp.info->description = "(disabled in configure)"; sp.info->flags.default_enabled = false; diff --git a/src/scan_sqlite.cpp b/src/scan_sqlite.cpp index 94a4461d..cd7d0515 100644 --- a/src/scan_sqlite.cpp +++ b/src/scan_sqlite.cpp @@ -38,7 +38,7 @@ void scan_sqlite(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("sqlite" ); + sp.info->set_name("sqlite" ); sp.info->author = "Simson Garfinkel"; sp.info->description = "Scans for SQLITE3 data"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_utmp.cpp b/src/scan_utmp.cpp index 0f5f4ffc..bd7201b8 100644 --- a/src/scan_utmp.cpp +++ b/src/scan_utmp.cpp @@ -81,7 +81,7 @@ void scan_utmp(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("utmp" ); + sp.info->set_name("utmp" ); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for utmp record"; sp.info->scanner_version = "1.1"; diff --git a/src/scan_vcard.cpp b/src/scan_vcard.cpp index 6b3f8daf..00e3874b 100644 --- a/src/scan_vcard.cpp +++ b/src/scan_vcard.cpp @@ -79,7 +79,7 @@ void scan_vcard(scanner_params &sp) { sp.check_version(); if(sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("vcard"); + sp.info->set_name("vcard"); sp.info->author = "Simson Garfinkel and Tony Melaragno"; sp.info->description = "Scans for VCARD data"; sp.info->scanner_version= "1.1"; diff --git a/src/scan_windirs.cpp b/src/scan_windirs.cpp index 17105b04..046a7b77 100644 --- a/src/scan_windirs.cpp +++ b/src/scan_windirs.cpp @@ -487,7 +487,7 @@ void scan_windirs(scanner_params &sp) gmtime_r(&t,&now); opt_last_year = now.tm_year + 1900 + 5; // allow up to 5 years in the future - sp.info.set_name("windirs" ); + sp.info->set_name("windirs" ); sp.info->author = "Simson Garfinkel and Maxim Suhanov"; sp.info->description = "Scans Microsoft directory structures"; @@ -499,14 +499,14 @@ void scan_windirs(scanner_params &sp) sp.info->scanner_version= "1.0"; sp.info->feature_defs.push_back( feature_recorder_def("windirs")); - sp.get_config("opt_weird_file_size",&opt_weird_file_size,"Threshold for FAT32 scanner"); - sp.get_config("opt_weird_file_size2",&opt_weird_file_size2,"Threshold for FAT32 scanner"); - sp.get_config("opt_weird_cluster_count",&opt_weird_cluster_count,"Threshold for FAT32 scanner"); - sp.get_config("opt_weird_cluster_count2",&opt_weird_cluster_count2,"Threshold for FAT32 scanner"); - sp.get_config("opt_max_bits_in_attrib",&opt_max_bits_in_attrib, + sp.get_scanner_config("opt_weird_file_size",&opt_weird_file_size,"Threshold for FAT32 scanner"); + sp.get_scanner_config("opt_weird_file_size2",&opt_weird_file_size2,"Threshold for FAT32 scanner"); + sp.get_scanner_config("opt_weird_cluster_count",&opt_weird_cluster_count,"Threshold for FAT32 scanner"); + sp.get_scanner_config("opt_weird_cluster_count2",&opt_weird_cluster_count2,"Threshold for FAT32 scanner"); + sp.get_scanner_config("opt_max_bits_in_attrib",&opt_max_bits_in_attrib, "Ignore FAT32 entries with more attributes set than this"); - sp.get_config("opt_max_weird_count",&opt_max_weird_count,"Number of 'weird' counts to ignore a FAT32 entry"); - sp.get_config("opt_last_year",&opt_last_year,"Ignore FAT32 entries with a later year than this"); + sp.get_scanner_config("opt_max_weird_count",&opt_max_weird_count,"Number of 'weird' counts to ignore a FAT32 entry"); + sp.get_scanner_config("opt_last_year",&opt_last_year,"Ignore FAT32 entries with a later year than this"); //debug = sp.info->config->debug; return; diff --git a/src/scan_winlnk.cpp b/src/scan_winlnk.cpp index 6f6abfd6..a236d395 100644 --- a/src/scan_winlnk.cpp +++ b/src/scan_winlnk.cpp @@ -301,7 +301,7 @@ void scan_winlnk(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("winlnk"); + sp.info->set_name("winlnk"); sp.info->author = "Simson Garfinkel"; sp.info->description = "Search for Windows LNK files"; sp.info->feature_defs.push_back( feature_recorder_def("winlnk")); diff --git a/src/scan_winpe.cpp b/src/scan_winpe.cpp index dd8197af..9767a051 100644 --- a/src/scan_winpe.cpp +++ b/src/scan_winpe.cpp @@ -1015,7 +1015,7 @@ void scan_winpe (scanner_params &sp) std::string xml; if (sp.phase == scanner_params::PHASE_INIT){ - sp.info.set_name("winpe" ); + sp.info->set_name("winpe" ); sp.info->description = "Scan for Windows PE headers"; sp.info->scanner_version = "1.1.0"; sp.info->feature_defs.push_back( feature_recorder_def("winpe")); diff --git a/src/scan_winprefetch.cpp b/src/scan_winprefetch.cpp index 227285ef..3c7164b3 100644 --- a/src/scan_winprefetch.cpp +++ b/src/scan_winprefetch.cpp @@ -250,7 +250,7 @@ void scan_winprefetch(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - sp.info.set_name("winprefetch"); + sp.info->set_name("winprefetch"); sp.info->name = "winprefetch"; sp.info->author = "Bruce Allen"; sp.info->description = "Search for Windows Prefetch files"; diff --git a/src/scan_wordlist.cpp b/src/scan_wordlist.cpp index cb23a92b..ae50bcb2 100644 --- a/src/scan_wordlist.cpp +++ b/src/scan_wordlist.cpp @@ -165,8 +165,8 @@ void Scan_Wordlist::shutdown(scanner_params &sp) } /* Read all of the words and uniquify them */ + uint64_t outfilesize = 0; while(!f2.eof()){ - /* Create the first file (of2==0) or roll-over if outfilesize>100M */ std::string line; getline(f2,line); if (line[0]=='#') continue; // ignore comments @@ -184,6 +184,12 @@ void Scan_Wordlist::shutdown(scanner_params &sp) std::cerr << er.what() << std::endl; std::cerr << "scan_wordlist:bad_alloc: Dumping current dataset; will then restart dedup." << std::endl; dump_seen_wordlist(); + outfilesize = 0; + } + + if (outfilesize > max_output_file_size ) { + dump_seen_wordlist(); + outfilesize = 0; } } dump_seen_wordlist(); @@ -200,15 +206,18 @@ void scan_wordlist(scanner_params &sp) bool wordlist_use_flatfiles = true; if (sp.phase==scanner_params::PHASE_INIT){ + uint32_t word_min = Scan_Wordlist::WORD_MIN_DEFAULT; + uint32_t word_max = Scan_Wordlist::WORD_MAX_DEFAULT; + uint64_t max_output_file_size = Scan_Wordlist::MAX_OUTPUT_FILE_SIZE; sp.check_version(); - sp.info.set_name("wordlist" ); + sp.info->set_name("wordlist" ); sp.info->scanner_flags.default_enabled = false; // = scanner_info::SCANNER_DISABLED; - //sp.get_config("word_min",&word_min,"Minimum word size"); - //sp.get_config("word_max",&word_max,"Maximum word size"); - //sp.get_config("max_word_outfile_size",&max_word_outfile_size, "Maximum size of the words output file"); - sp.get_config("wordlist_use_flatfiles",&wordlist_use_flatfiles,"Use flatfiles for wordlist"); - //sp.get_config("wordlist_use_sql",&wordlist_use_sql,"Use SQL DB for wordlist"); - sp.get_config("strings",&wordlist_strings,"Scan for strings instead of words"); + sp.get_scanner_config("word_min",&word_min,"Minimum word size"); + sp.get_scanner_config("word_max",&word_max,"Maximum word size"); + sp.get_scanner_config("max_output_file_size",&max_output_file_size, "Maximum size of the words output file"); + //sp.get_scanner_config("wordlist_use_flatfiles",&wordlist_use_flatfiles,"Use flatfiles for wordlist"); + //sp.get_scanner_config("wordlist_use_sql",&wordlist_use_sql,"Use SQL DB for wordlist"); + sp.get_scanner_config("strings",&wordlist_strings,"Scan for strings instead of words"); if (wordlist_use_flatfiles){ auto def = feature_recorder_def(Scan_Wordlist::WORDLIST); @@ -217,13 +226,14 @@ void scan_wordlist(scanner_params &sp) def.flags.no_alertlist = true; sp.info->feature_defs.push_back( def ); } -#if 0 if (word_min > word_max){ std::cerr << "ERROR: word_min (" << word_min << ") > word_max (" << word_max << ")\n"; throw std::runtime_error("word_min > word_max"); } -#endif wordlist = new Scan_Wordlist(sp, wordlist_strings); + wordlist->word_min = word_min; + wordlist->word_max = word_max; + wordlist->max_output_file_size = max_output_file_size; #if 0 #ifdef USE_SQLITE3 diff --git a/src/scan_wordlist.h b/src/scan_wordlist.h index 28836be6..3f207965 100644 --- a/src/scan_wordlist.h +++ b/src/scan_wordlist.h @@ -42,10 +42,14 @@ public:; std::filesystem::path flat_wordlist_path {}; // feature_recorder *flat_wordlist = nullptr; + static const inline uint32_t WORD_MIN_DEFAULT = 6; + static const inline uint32_t WORD_MAX_DEFAULT = 16; + static const inline uint64_t MAX_OUTPUT_FILE_SIZE = 100*1000*1000; + bool strings {false}; // report all strings, not words. Do not uniquify - uint32_t word_min {6}; - uint32_t word_max {14}; - uint64_t max_word_outfile_size {100 * 1000 * 1000}; + uint32_t word_min {WORD_MIN_DEFAULT}; + uint32_t word_max {WORD_MAX_DEFAULT}; + uint64_t max_output_file_size {MAX_OUTPUT_FILE_SIZE}; /* wordlist support for SQL. Note that the SQL-based wordlist is * faster than the file-based wordlist. diff --git a/src/scan_xor.cpp b/src/scan_xor.cpp index e5ffae3a..a8e9e20c 100644 --- a/src/scan_xor.cpp +++ b/src/scan_xor.cpp @@ -13,13 +13,13 @@ void scan_xor(scanner_params &sp) { sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT) { - sp.info.set_name("xor" ); + sp.info->set_name("xor" ); sp.info->author = "Michael Shick"; sp.info->description = "optimistic XOR deobfuscator"; sp.info->scanner_flags.default_enabled = false; sp.info->scanner_flags.recurse = true; sp.info->scanner_flags.recurse_always = true; - sp.get_config("xor_mask",&xor_mask,"XOR mask value, in decimal"); + sp.get_scanner_config("xor_mask",&xor_mask,"XOR mask value, in decimal"); return; } if (sp.phase==scanner_params::PHASE_SCAN) { diff --git a/src/scan_zip.cpp b/src/scan_zip.cpp index 8fcaa031..6c88a6ca 100644 --- a/src/scan_zip.cpp +++ b/src/scan_zip.cpp @@ -159,12 +159,12 @@ void scan_zip(scanner_params &sp) if (sp.phase==scanner_params::PHASE_INIT){ feature_recorder_def::flags_t xml; xml.xml = true; - sp.info.set_name("zip" ); + sp.info->set_name("zip" ); sp.info->scanner_flags.recurse = true; sp.info->feature_defs.push_back( feature_recorder_def(ZIP_RECORDER_NAME, xml )); - sp.get_config("zip_min_uncompr_size",&zip_min_uncompr_size,"Minimum size of a ZIP uncompressed object"); - sp.get_config("zip_max_uncompr_size",&zip_max_uncompr_size,"Maximum size of a ZIP uncompressed object"); - sp.get_config("zip_name_len_max",&zip_name_len_max,"Maximum name of a ZIP component filename"); + sp.get_scanner_config("zip_min_uncompr_size",&zip_min_uncompr_size,"Minimum size of a ZIP uncompressed object"); + sp.get_scanner_config("zip_max_uncompr_size",&zip_max_uncompr_size,"Maximum size of a ZIP uncompressed object"); + sp.get_scanner_config("zip_name_len_max",&zip_name_len_max,"Maximum name of a ZIP component filename"); return; } diff --git a/src/stand.cpp b/src/stand.cpp index f7ef719c..aec334a0 100644 --- a/src/stand.cpp +++ b/src/stand.cpp @@ -18,7 +18,6 @@ #include #include #include -//#include #include #include diff --git a/src/test_be.cpp b/src/test_be.cpp index 22b1fad1..2d21146a 100644 --- a/src/test_be.cpp +++ b/src/test_be.cpp @@ -1,5 +1,7 @@ // https://github.com/catchorg/Catch2/blob/master/docs/tutorial.md#top +#include "config.h" + #include #include #include @@ -12,7 +14,6 @@ #define CATCH_CONFIG_MAIN #define CATCH_CONFIG_CONSOLE_WIDTH 120 -#include "config.h" #include "be13_api/catch.hpp" #ifdef HAVE_MACH_O_DYLD_H From bf0a49ea213160a79e68d42e303e795d0560c8ec Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Sun, 5 Sep 2021 17:30:44 -0400 Subject: [PATCH 14/89] updated --- src/be13_api | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/be13_api b/src/be13_api index 233b7d55..e18151ca 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 233b7d558b8d82c41155a608a5a5bcef29d40d95 +Subproject commit e18151ca2b1082baf180702fcc357ef55654dd0c From 2f69e339f4b171b51336c29ac9c2131a2ce800db Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Sun, 5 Sep 2021 17:39:32 -0400 Subject: [PATCH 15/89] Addresses #234 --- src/pyxpress.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/pyxpress.c b/src/pyxpress.c index 37c7aeb6..a6904a33 100644 --- a/src/pyxpress.c +++ b/src/pyxpress.c @@ -109,24 +109,30 @@ unsigned long Xpress_Decompress(const unsigned char *InputBuffer, | (InputBuffer[InputIndex + 1] << 8) | InputBuffer[InputIndex]); InputIndex += sizeof(uint32_t); - IndicatorBit = 32; + IndicatorBit = 32; + if (InputIndex >= InputSize) break; } IndicatorBit--; - //* check whether the bit specified by IndicatorBit is set or not - //* set in Indicator. For example, if IndicatorBit has value 4 - //* check whether the 4th bit of the value in Indicator is set + //* check whether the bit specified by IndicatorBit is set or not + //* set in Indicator. For example, if IndicatorBit has value 4 + //* check whether the 4th bit of the value in Indicator is set if (((Indicator >> IndicatorBit) & 1) == 0) { if(OutputIndex>=OutputSize) return OutputIndex; - OutputBuffer[OutputIndex] = InputBuffer[InputIndex]; + OutputBuffer[OutputIndex] = InputBuffer[InputIndex]; InputIndex += sizeof(UCHAR); OutputIndex += sizeof(UCHAR); + if (InputIndex >= InputSize) break; + if (OutputIndex >= OutputSize) { + OutputIndex -= sizeof(UCHAR); /* undo the addition and return */ + break; + } } else { - if(InputIndex+1 >= InputSize) return OutputIndex; + if (InputIndex+1 >= InputSize) return OutputIndex; Length = (unsigned)((InputBuffer[InputIndex + 1] << 8) | InputBuffer[InputIndex]); - + /* if ((OutputIndex > 0xD0) && (OutputIndex < 0xF0)) { @@ -134,7 +140,9 @@ unsigned long Xpress_Decompress(const unsigned char *InputBuffer, } */ - InputIndex += sizeof(USHORT); + InputIndex += sizeof(USHORT); + if (InputIndex >= InputSize) break; + Offset = Length / 8; Length = Length % 8; @@ -142,10 +150,11 @@ unsigned long Xpress_Decompress(const unsigned char *InputBuffer, if (Length == 7) { if (NibbleIndex == 0) { NibbleIndex = InputIndex; - if(InputIndex>=InputSize) return OutputIndex; - Length = InputBuffer[InputIndex] % 16; + if (InputIndex>=InputSize) break; + Length = InputBuffer[InputIndex] % 16; //if ((OutputIndex > 0xD0) && (OutputIndex < 0xF0)) printf("--2 Len: %02X (%d)\n", Length, Length); InputIndex += sizeof(UCHAR); + if (InputIndex >= InputSize) break; } else { Length = InputBuffer[NibbleIndex] / 16; @@ -154,17 +163,19 @@ unsigned long Xpress_Decompress(const unsigned char *InputBuffer, } if (Length == 15) { - if (InputIndex>=InputSize) return OutputIndex; + if (InputIndex>=InputSize) break; Length = InputBuffer[InputIndex]; //if ((OutputIndex > 0xD0) && (OutputIndex < 0xF0)) printf("--4 Len: %02X (%d)\n", Length, Length); - InputIndex += sizeof(UCHAR); + InputIndex += sizeof(UCHAR); + if (InputIndex>= InputSize) break; if (Length == 255) { - if(InputIndex+2>=InputSize) return OutputIndex; + if (InputIndex+2 >= InputSize) break;; Length = (unsigned)((InputBuffer[InputIndex + 1] << 8)) | InputBuffer[InputIndex]; InputIndex += sizeof(USHORT); - Length -= (15 + 7); + if (InputIndex >= InputSize) break; + Length -= (15 + 7); } - Length += 15; + Length += 15; //if ((OutputIndex > 0xD0) && (OutputIndex < 0xF0)) printf("--5 Len: %02X (%d)\n", Length, Length); } Length += 7; @@ -173,12 +184,13 @@ unsigned long Xpress_Decompress(const unsigned char *InputBuffer, Length += 3; //if ((OutputIndex > 0xD0) && (OutputIndex < 0xF0)) printf("--7 Len: %02X (%d)\n", Length, Length); - //if (Length > 280) printf("DECOMP DEBUG: [0x%08X]->[0x%08X] Len: %d Offset: %08X\n", + //if (Length > 280) printf("DECOMP DEBUG: [0x%08X]->[0x%08X] Len: %d Offset: %08X\n", // OutputIndex, InputIndex, Length, Offset); while (Length != 0) { if ((OutputIndex >= OutputSize) || ((Offset + 1) >= OutputIndex)) break; OutputBuffer[OutputIndex] = OutputBuffer[OutputIndex - Offset - 1]; OutputIndex += sizeof(UCHAR); + if (OutputIndex >= OutputSize) break; Length -= sizeof(UCHAR); } } @@ -208,7 +220,7 @@ static PyObject *xpress_decode(PyObject *self, PyObject *args) { outsize = Xpress_Decompress(inbuff, outbuff, outsize); // Truncate buffer back to outsize: - if(_PyString_Resize(&result, outsize) < 0) + if(_PyString_Resize(&result, outsize) < 0) return NULL; return result; From f0b325c51fa0227dc20be5ab912c479eae21960d Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Mon, 6 Sep 2021 08:04:27 -0400 Subject: [PATCH 16/89] added hello world --- src/tests/hello.c | 7 +++++++ src/tests/hello_elf | Bin 0 -> 8304 bytes src/tests/hello_mach-o | Bin 0 -> 49424 bytes 3 files changed, 7 insertions(+) create mode 100644 src/tests/hello.c create mode 100755 src/tests/hello_elf create mode 100755 src/tests/hello_mach-o diff --git a/src/tests/hello.c b/src/tests/hello.c new file mode 100644 index 00000000..352541a7 --- /dev/null +++ b/src/tests/hello.c @@ -0,0 +1,7 @@ +#include + +int main(int argc, char **argv ) +{ + printf("hello, world\n"); + return(0); +} diff --git a/src/tests/hello_elf b/src/tests/hello_elf new file mode 100755 index 0000000000000000000000000000000000000000..8868dc235c667358657344679489dbf9ab64bfbb GIT binary patch literal 8304 zcmeHMZETa*6~2xW2#`1i0!^VEUI!Aa#7QWGK*<_AAH12u$7pD~uHl^%JBc@cAp3Qj zux`r~sN{BZOsX_Aty;DHVIP_pnzX2!G*D2r{+LvLRIS!dVpF@N)w%_>MRXhS&bjY- zoY$|NQK|dMMb5eBxz9QG`@ZL1zS%p_SLN|2B(M6MV(oB^jdV#t>z$Gz(WN@n3i^Au zx=Sr1y;@^-d6zAy)J`|VHDs;eUXpCrQDp;ed6)1Y(L$EeG+~sAN2CHsr2?A3cHGu; z^;#tv&>6^aX~%Jquw#|&X2fnr>>w5RBGEtY8~+9ccgr3s;{?fEPKV#NqMUDBuGKQR z&xREw?7l#D+|G}i{6ah;^({3nk4XC=F%KY8}IF;u#Lq%+3ljn|sL_PrM`4FCGcBfsk2@%)8rSAO<7+u}#rkDVSR zW}pIl6=7>C;G34fA16Fefq!ZVocnuE1-_zj8sTASSI2W!R+;9=M8q78ry_~?i5RI| zZf31W#xj$UcuMUb=nr?BJAylcyHv|~IvH!p#*^7}YOE!Z9*HDchT|z7`@PESA3R`2 zW0}}kJZr@=g9o}3=~QemGMr$yW65+%+?qDEB2l-vN@j-V$D{u#h4qo!5MRMF*Cu_# ze!)6FEQ{{E>kT%}G@fUqs}7vk2hyAa$C~G|c?a%17j8Ik=e~GF zaNb^6kL<^o{(G(QOwFq+1Ijpc(W)xEVoYDIomGWh-zTWBOlqE7bxOgz0Stlex|AbaD| zg4f8;8<*xDFg!0AR~D=$a_A97M+_3j&$tCrN=|k^~5`%Lzs{Z`f|6}BH^VHDih&T6}g+gJ5Ene0= z2|XNos6YQ&=xB(>YRmfNdMKJ%J{Zd1?9X2x*v4LJjY~JZa}SfPaq2xQ(Dnw}Kal^| zK>m79{-2>jwnJM}9lWpOf52a*$*n_3$#u1j)BZzDaUBO>raTIeAVT zQJx9EXJg&++B2Tze#ZG*cJ*eVa4!=iRpu+TnaY_zMf?DBef~aQ)8`vjJyAQQ9@udI zXYbprj~lk{q2C1YYMCdAZ6BlG32LtkFK&4*Rx#yT$NpOr%&vDcR+Xp?HxjFhirGNm8dLah?nByycd2*iFf60hOg z0zST*a!pcqh~a#-odMn|I@*0vcofv?1b?3r9lU=R?EcaCYKT5>y1#z$N1uDGWBlhu z*Owpi^^xf4hq(X0iQ@g!<*^+T;lgjFA}-(G-Mu%k_2_UeW#t09gS&#Q&G+TB*7l9I zj$rG~Z8mxY6%!T`~117##Srz%!#dTK7uPLstQhb@hIxNL$Rml96 z;uObJ#8)V+pHluxHD8gxN@2g1^6ONf!n}A@z3Qrn`-<-`rG6TU`?nOQ+Zqgmm7k)+_8=XQ7vOsoV!k@h0W3h~t!6 zWb=NjR@VxPh@Io}u1fjTlxsYFf?ssupB5bFsqICn-IVdD#r9x-_i4O*9FpP(=QI3F zYJT~6o+Z3e`cAQr|9Lt1v5gxE55TEIensQu>!DTqaS$aNCfrZ!$GuK`nqR&z`j_xCE`HqW zFh_WxyjpdV@YPj!D4dU=e3x;){m4GTaqt4+JPuPb4lyx!k))s2Rg-I-r-YC9R@@EG z6V7~he}60dN5%g=;{Oui+)g+D4}=GV;}m}*9JWsUr-ZMeuXEWsHj=TjR&I1OIHDFG zolGlfj_|V+KWs(Q=2#*<97&i_E1k)jk=%qDNhgmdVpc2~+}*yXqcVwK!{TNnlZi~4 zv6PjWRHKb!NI5d-(RtZ(#-KkBc*>@y#px5j@*kjUzS1B>7%$_eE3?1n2 zE-lf|ZUoUIq}gkT2czeRGWQQ03Wo;FLw$V@_70kZq3}R2d)1G1BiWp8^8b8+8u7c6Y6#28Im9a}6k7cs)bgD!#sVoyqMA$%b#}k$c z>TU)p8yriMYQ-k#FE1q`rS(t;W8<=T#-ro}nr&*YBHIFasv(k$k5HR{b4^N8OU=|FCTB*ci1CNe~2)SsbP;e4-)qa zqUhuvzhi`Q><;_l07*wVUf_iGg9rIlD&xBVbi|F20XC-OZoiGvdmLrBD%c~wguEz3 z)bH;9NwE(LZ(iCBiE(24_4ISwKT8;oiF`ha=RD4P+@px1lemYf@|1Jh2c)4zS!CK7 z{67NZb1wTXv4=!^;RZV7^DcXQHz9xInsC$$e?NBF<30?Ddm;Gl{{KYmF@N~}L#~FQ zNVokjUH0z(=i7G+dpnL>TtA58A!}Xz$GE}m%aq|Yj(aKMKK%cmn-3lGRkCex*yCPs zULF|0BZ^MYA^%J$uRYX{`vd-e6&s_tK5aJ~Y3aoAGk39Q-w@FF3^$6BVT78 zl{oE-aiCOydf)(d(B~<`80ts7C`IOJ-s> z2PIkrBNX;n4_-WY?$KjJ!Ftl8cTp--@F*(A-*4WV-DJ9ZwFkdHczN@i_vX!SJ~P?d zWWWFOw}0+8rZ8=cnUXdy?ZT`vADNPD8M7d*D$Ux}<<}|~D{q|By^~RxO!v%mogZbQ zwd<7&>yuN`lI~g8fAC9&y4z~&ynAub={Wc>he3a z>*wl{JdLvJvbwTswzgTbac`sEZraVHv+sI4b@eCflJC}ppS3|Pskw9MXTH8--_@6` zr@7X&U+jKA&z@O%y>j|Y_T3uhu2XW2d5@&sxYrXRb6h73jnQ)#rGHrZEJ{OnpN>nf z^VCPj8WTrZnk1vONU#C9TC24cgmb!6gsTtLmf?hL_ zT;ow`TE|actepDn(5X*9`0K6stKP!G^Rllb&0V8ZHGLr6PAwn!Wa?QN)8{UWXXN^I z9sPST9(vvvrRkW~KO4-+u#M9Dr{db1W^Qh0&i$DMY z1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009IL zKmY**5I_I{1Q0*~0R#~E?*!hg4t}m)yHonM^0%IJ@ZKNQLHGCS^_NaccGKL}_gPPT zH>lhh+^Sytru6pL+oQ#;vN30`Kh~3Od2O^9$&~Gee3)N<8I4BrDtWX0THhoWBL$~#v*Euu00IagfB*srAbh=F z4nJwmE5PA$&}#-ZPP%nlk6J<4G3PsG+r%k6$+cgf|wTT8-poqqvG(%G*7 literal 0 HcmV?d00001 From 7f8f9306fad5748ab189a75bc90a8fa4abea681c Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Tue, 7 Sep 2021 10:27:09 -0400 Subject: [PATCH 17/89] fixed conflict --- src/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 57672d03..bb893810 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -474,8 +474,8 @@ int main(int argc,char **argv) sc.get_global_config("debug_histogram_malloc_fail_frequency",&AtomicUnicodeHistogram::debug_histogram_malloc_fail_frequency, "Set >0 to make histogram maker fail with memory allocations"); sc.get_global_config("hash_alg",&be_hash_name,"Specifies hash algorithm to be used for all hash calculations"); - sc.get_global_config("write_feature_files",&opt_write_feature_files,"Write features to flat files"); - sc.get_global_config("write_feature_sqlite3",&opt_write_sqlite3,"Write feature files to report.sqlite3"); + //sc.get_global_config("write_feature_files",&opt_write_feature_files,"Write features to flat files"); + //sc.get_global_config("write_feature_sqlite3",&opt_write_sqlite3,"Write feature files to report.sqlite3"); sc.get_global_config("report_read_errors",&cfg.opt_report_read_errors,"Report read errors"); /* Load all the scanners and enable the ones we care about */ From 554246b7b3b832574943c21c32b28ea06ece24e3 Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Tue, 7 Sep 2021 14:50:13 -0400 Subject: [PATCH 18/89] overflow bug in scan_aes --- m4/slg_gcc_all_warnings.m4 | 2 +- src/scan_aes.cpp | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/m4/slg_gcc_all_warnings.m4 b/m4/slg_gcc_all_warnings.m4 index 9d73d92e..0059ec04 100644 --- a/m4/slg_gcc_all_warnings.m4 +++ b/m4/slg_gcc_all_warnings.m4 @@ -82,7 +82,7 @@ if test x"${mingw}" != "xyes" ; then CXX_WARNINGS_TO_TEST="$CXX_WARNINGS_TO_TEST -Weffc++" fi -echo "C++ Warnings to test: $CXX_WARNINGS_TO_TEST" +AC_MSG_NOTICE([C++ Warnings to test: $CXX_WARNINGS_TO_TEST]) for option in $CXX_WARNINGS_TO_TEST do diff --git a/src/scan_aes.cpp b/src/scan_aes.cpp index 02067d65..2e07cf4a 100644 --- a/src/scan_aes.cpp +++ b/src/scan_aes.cpp @@ -406,19 +406,22 @@ void scan_aes(struct scanner_params &sp) for (size_t pos = 0 ; pos < sp.sbuf->bufsize && pos < sp.sbuf->pagesize; pos++){ /* TODO: Remove direct memory access with mediated access */ const uint8_t *p2 = sp.sbuf->get_buf() + pos; - if (scan_aes_128 && sp.sbuf->distinct_characters( pos, AES128_KEY_SIZE) > AES128_KEY_SIZE/4){ + if (scan_aes_128 && (sp.sbuf->bufsize >= AES128_KEY_SCHEDULE_SIZE) && + (sp.sbuf->distinct_characters( pos, AES128_KEY_SIZE) > AES128_KEY_SIZE/4)){ if (valid_aes128_schedule(p2)) { std::string key = key_to_string(p2, AES128_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES128")); } } - if (scan_aes_192 && sp.sbuf->distinct_characters( pos, AES192_KEY_SIZE) > AES192_KEY_SIZE/4){ + if (scan_aes_192 && (sp.sbuf->bufsize>= AES192_KEY_SCHEDULE_SIZE) && + (sp.sbuf->distinct_characters( pos, AES192_KEY_SIZE) > AES192_KEY_SIZE/4)){ if (valid_aes192_schedule(p2)) { std::string key = key_to_string(p2, AES192_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES192")); } } - if (scan_aes_256 && sp.sbuf->distinct_characters( pos, AES256_KEY_SIZE) > AES256_KEY_SIZE/4){ + if (scan_aes_256 && (sp.sbuf->bufsize>=AES256_KEY_SCHEDULE_SIZE) && + (sp.sbuf->distinct_characters( pos, AES256_KEY_SIZE) > AES256_KEY_SIZE/4)){ if (valid_aes256_schedule(p2)) { std::string key = key_to_string(p2, AES256_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES256")); From 5043ba712935a45ab11d546b12365e06928812aa Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Tue, 7 Sep 2021 16:05:29 -0400 Subject: [PATCH 19/89] expanded ETA --- src/be13_api | 2 +- src/main.cpp | 2 +- src/scan_aes.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/be13_api b/src/be13_api index e18151ca..2c5f5c26 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit e18151ca2b1082baf180702fcc357ef55654dd0c +Subproject commit 2c5f5c26d95355ee66a0d48f2a7550e9037b2f17 diff --git a/src/main.cpp b/src/main.cpp index bb893810..0aac6223 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -272,7 +272,7 @@ struct notify_opts { double done = *o->fraction_done; stats["fraction_read"] = std::to_string(done * 100) + std::string(" %"); stats["estimated_time_remaining"] = o->master_timer->eta_text(done); - stats["estimated_time_completion"] = o->master_timer->eta_time(done); + stats["estimated_date_completion"] = o->master_timer->eta_date(done); // print the legacy status if(o->opt_legacy) { diff --git a/src/scan_aes.cpp b/src/scan_aes.cpp index 2e07cf4a..22dde2b7 100644 --- a/src/scan_aes.cpp +++ b/src/scan_aes.cpp @@ -383,7 +383,7 @@ void scan_aes(struct scanner_params &sp) sp.info->description = "Search for AES key schedules"; sp.info->scanner_version = "1.1"; sp.info->feature_defs.push_back( feature_recorder_def("aes_keys")); - sp.info->min_sbuf_size = AES128_KEY_SIZE; + sp.info->min_sbuf_size = AES128_KEY_SCHEDULE_SIZE; sp.get_scanner_config("scan_aes_128", &scan_aes_128, "Scan for 128-bit AES keys; 0=No, 1=Yes"); sp.get_scanner_config("scan_aes_192", &scan_aes_192, "Scan for 192-bit AES keys; 0=No, 1=Yes"); sp.get_scanner_config("scan_aes_256", &scan_aes_256, "Scan for 256-bit AES keys; 0=No, 1=Yes"); From 861c7d93bdb22c5288397800b39c2fa5ec9365f9 Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Tue, 7 Sep 2021 17:14:24 -0400 Subject: [PATCH 20/89] fixed conflict --- src/scan_aes.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scan_aes.cpp b/src/scan_aes.cpp index 22dde2b7..c3ba0ad5 100644 --- a/src/scan_aes.cpp +++ b/src/scan_aes.cpp @@ -406,21 +406,21 @@ void scan_aes(struct scanner_params &sp) for (size_t pos = 0 ; pos < sp.sbuf->bufsize && pos < sp.sbuf->pagesize; pos++){ /* TODO: Remove direct memory access with mediated access */ const uint8_t *p2 = sp.sbuf->get_buf() + pos; - if (scan_aes_128 && (sp.sbuf->bufsize >= AES128_KEY_SCHEDULE_SIZE) && + if (scan_aes_128 && (sp.sbuf->bufsize-pos >= AES128_KEY_SCHEDULE_SIZE) && (sp.sbuf->distinct_characters( pos, AES128_KEY_SIZE) > AES128_KEY_SIZE/4)){ if (valid_aes128_schedule(p2)) { std::string key = key_to_string(p2, AES128_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES128")); } } - if (scan_aes_192 && (sp.sbuf->bufsize>= AES192_KEY_SCHEDULE_SIZE) && + if (scan_aes_192 && (sp.sbuf->bufsize-pos >= AES192_KEY_SCHEDULE_SIZE) && (sp.sbuf->distinct_characters( pos, AES192_KEY_SIZE) > AES192_KEY_SIZE/4)){ if (valid_aes192_schedule(p2)) { std::string key = key_to_string(p2, AES192_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES192")); } } - if (scan_aes_256 && (sp.sbuf->bufsize>=AES256_KEY_SCHEDULE_SIZE) && + if (scan_aes_256 && (sp.sbuf->bufsize-pos >= AES256_KEY_SCHEDULE_SIZE) && (sp.sbuf->distinct_characters( pos, AES256_KEY_SIZE) > AES256_KEY_SIZE/4)){ if (valid_aes256_schedule(p2)) { std::string key = key_to_string(p2, AES256_KEY_SIZE); From 7750005302ce98acfb572d7736f2534730f531ed Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Tue, 7 Sep 2021 17:18:22 -0400 Subject: [PATCH 21/89] fixed conflict --- src/be13_api | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/be13_api b/src/be13_api index 2c5f5c26..5abde376 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 2c5f5c26d95355ee66a0d48f2a7550e9037b2f17 +Subproject commit 5abde3763dce495896019b67a7c45138763390fb From c1845c282202014134e5e611949b2bb2685ebdf6 Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Tue, 7 Sep 2021 17:18:52 -0400 Subject: [PATCH 22/89] removed POINT1 --- src/scan_email.flex | 1 - 1 file changed, 1 deletion(-) diff --git a/src/scan_email.flex b/src/scan_email.flex index 77a7d84d..bfa0fcc5 100644 --- a/src/scan_email.flex +++ b/src/scan_email.flex @@ -324,7 +324,6 @@ h\0t\0t\0p\0(s\0)?:\0([a-zA-Z0-9_%/\-+@:=&\?#~.;]\0){1,128}/[^a-zA-Z0-9_%\/\-+@: s.url_recorder.write_buf(SBUF,POS,yyleng); ssize_t domain_start = find_host_in_email(SBUF.slice(POS,yyleng)); if (domain_start >= 0){ - std::cerr << "POINT1\n"; s.domain_recorder.write_buf(SBUF,POS+domain_start,yyleng-domain_start); } s.pos += yyleng; From ebe4969be819698cb9cec77214e43134beb20065 Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Tue, 7 Sep 2021 20:03:35 -0400 Subject: [PATCH 23/89] updated readme --- README.md | 24 ++++++++++++------------ src/be13_api | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e2081353..c8cc63bd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ -Welcome to bulk_extractor. +[![codecov](https://codecov.io/gh/simsong/bulk_extractor/branch/main/graph/badge.svg?token=3w691sdgLu)](https://codecov.io/gh/simsong/bulk_extractor) -Note: bulk_extractor version 2.0 is now under development. For information, please see [Release 2.0 roadmap in the release-2.0-dev branch](https://github.com/simsong/bulk_extractor/blob/release-2.0-dev/doc/ROADMAP_2.0.md). +Welcome to `bulk_extractor` 2.0 development branch! For information +about the `bulk_extractor` update, please see [Release 2.0 roadmap](https://github.com/simsong/bulk_extractor/blob/main/doc/ROADMAP_2.0.md). To build bulk_extractor in Linux or Mac OS: @@ -27,12 +28,12 @@ For more information on bulk_extractor, visit: https://forensicswiki.xyz/wiki/in Tested Configurations ===================== -This release of bulk_extractor has been tested to compile on the following platforms: +This release of bulk_extractor requires C++17 and has been tested to compile on the following platforms: * Amazon Linux as of 2019-11-09 * Fedora 32 -* Ubuntu 16.04LTS -* Ubuntu 18.04LTS +* Ubuntu 20.04LTS +* MacOS 11.5.2 To configure your operating system, please run the appropriate scripts in the [etc/](/etc) directory. @@ -41,7 +42,7 @@ RECOMMENDED CITATION ==================== If you are writing a scientific paper and using bulk_extractor, please cite it with: -Garfinkel, Simson, Digital media triage with bulk data analysis and bulk_extractor. Computers and Security 32: 56-72 (2013) +Garfinkel, Simson, Digital media triage with bulk data analysis and bulk_extractor. Computers and Security 32: 56-72 (2013) * [Science Direct](https://www.sciencedirect.com/science/article/pii/S0167404812001472) * [Bibliometrics](https://plu.mx/plum/a/?doi=10.1016/j.cose.2012.09.011&theme=plum-sciencedirect-theme&hideUsage=true) * [Author's website](https://simson.net/clips/academic/2013.COSE.bulk_extractor.pdf) @@ -66,10 +67,9 @@ keywords = {Digital forensics, Bulk data analysis, bulk_extractor, Stream-based BULK_EXTRACTOR 2.0 STATUS REPORT ================================ -I continue to port bulk_extractor, tcpflow, be13_api and dfxml to modern C++. After surveying the standards I’ve decided to go with C++17 and not C++14, as support for 17 is now widespread. (I probably don’t need 20). I am sticking with autotools, although there seems a strong reason to move to CMake. I am keeping be13_api and dfxml as a modules that are included, python-style, rather than making them stand-alone libraries that are linked against. I’m not 100% sure that’s the correct decision, though. +`bulk_extractor` 2.0 is now operational for development use. It +requires C++17 to compile. I am keeping be13_api and dfxml as a modules that are included, python-style, rather than making them stand-alone libraries that are linked against. -The project is taking longer than anticipated because I am also doing a general code refactoring. The main thing that is taking time is figuring out how to detangle all of the C++ objects having to do with parser options and configuration. - -Given that tcpflow and bulk_extractor both use be13_api, my attention has shifted to using tcpflow to get be13_api operational, as it is a simpler program. I’m about three quarters of the way through now. I anticipate having something finished before the end of 2020. - ---- Simson Garfinkel, October 18, 2020 +The project took longer than anticipated. In addition to updating to +C++17, I used this as an opportunity for massive code refactoring and +general increase in reliability. diff --git a/src/be13_api b/src/be13_api index e18151ca..69e6010c 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit e18151ca2b1082baf180702fcc357ef55654dd0c +Subproject commit 69e6010cec74ebba63e33460e740f7a6859e9098 From ffe55254a46ea4f387a533694debc279e22c46a1 Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Tue, 7 Sep 2021 20:08:38 -0400 Subject: [PATCH 24/89] added hello_win64.exe --- src/tests/hello_win64.exe | Bin 0 -> 73519 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100755 src/tests/hello_win64.exe diff --git a/src/tests/hello_win64.exe b/src/tests/hello_win64.exe new file mode 100755 index 0000000000000000000000000000000000000000..55540e8b9c9f436771c027465a2d7123c774168a GIT binary patch literal 73519 zcmeFa3tUv!wLgB&JYay483PziNE}m4B`PuuDg&s);~*#_1Q<0Bf-g{R5G~C3NP=Px zBpi>En%4BDz2;YKlGe7ly|y(CF(w(53Z}LtY7=}VB++Kb*akK66`T2g*FI+$G$xI4IgZmI1cMycjwgK_?E9;~nvoqldUq_h zE9&=S+WF$&kEvKz+h|$YuwrS$!sV953+wAwxGjsSEe)P}OKrU+zogu>5e!?{DBPhH5pz;WaC9M{K1@G-n*EH?*8%Mgql_ro|m%?Jw+h&zgF6ux;x ziGTK4iBtr~ty#gNT@EBH3N96(GWe(uC9#>~wxP~1{rEVpg#`2^pZJv=H}VE~<|jGs z$PM5jP>Q>HjT?zhC%%XwYAg62`dNV1rYu?LUI<_XWdJ~JRuXus9|yQIAUx{ZAs+|fHB{HFSggw2hfM0&e?u7Zad2GjBvv8%nczJLM6aF_ z;g5sxtab)Clz>Mc!UzPC&ybIU@N8BF=_5KE)Q0MPjDZHf9IW2SEG(teU6SLFkAv{0rZUKj^qs2WeTRVtzZ`^@q3~B91F!>l7!%xY9o+~z80wU{)`ggj{|t&)W2f?j1t)AvL2F7i)TjG zY?ubc;+*$(=?j}395jOK(&rDx9D}h8HRdUmhh=jPZo|t|j_kO11I0WUav`W)j^W)= z^w7A62gZtP+_9@hi|u}rVZbc5x8NDOYM9u5i6Ew@_L_ckNSsyy+MJXSkB2*|OP8cU zULX7gfd zCYt4h7~Q2es{obI?Nq`*sJbK$55E9BrI-fvTaSAp9%p=QH?MSXtv+`m3XtU>A?XN( z3gQ#&5ziR?PCWx09OVG-8Q{I_*}7&2!!d*S#saYXb2O|=ZvdE-!5^Q?an?gJ(Y9?g zG+(x?i*CMrhbK-v#|L6t4!Gl*-{;-2yQvY5z&p)XEbAj38_g|;JfA$P0}WNK?)Y}H zV;6)T-mNpeBpi8`TY$c!4zeDXge_iOE7@ZSa*fL&{h%I{gw6KKE$h-`-47&J8|A(0 zfRYySk}*kgZu0jdC3`l@t~S}ZN$Qex|3^;$LEic20tW!K`oyL!oT*6IB28A*flUVf zr@-lU(iJ&jJ_I0XpfiT;tU5@HaYQw}2X4e^q{rN|Bk}Dq5cDTViqoi%IFH_c0FUjI z>8&2P=e`H3D!mJ@?(5Ri7$9-`RFX(>{cDkAT0Bw~7`1Pq`~mS2Kgzdn^X3}$ zpMrs28;FDS4+eulDDSR2iGtUCC$|}>*L;9qE}lv93Pwp7kShmJk}BVxu>gJCBi)~5 zkX;6;B1uR5H)8UUgimDrue^GIjj2-u;u*7lJ&KT~K#BZ6L!IqZ-vO#?yAFh?`#roe z1`SXpJppy$tTT)j_=uNp+gXWT|19V+S$)zzhnU?;0M2v1^|(7a82=2VW(UrpvIX&| zEbw_SD0ar*SGiA|HV%U0F!J>jp+*FLwr_KA?EUDb&FIg4n?uqwikGx*9atKH&eO>* zGYPaJi3!ya3C7#0^j;&U2qzvL34S%QV02@Mu{iR@OB(m+LeCUgFw?HpJh+f){ZZS)bBzBR8&xXJHb~_<2!YX)Fg-B4pU)~Z zlWqkQMk0Y~ejBQ03#tj~Ngm$nPiVl?h?qAQdY`?0KG778wE9e~y-=ktJpo9k{7ulL z2YvCP@74~#uA@)q??B6Ur>_NVMhiIHhL0$n-096v8ZGt>JWCvkS0YVMYyyexl=ljK ziC1FQ#mDF~5RK7ilS)y+*!;+jmrr*pq zfS~&p5OYr!FB{yg)Ei*s_6Bfk0nHjw<>e*Di%D7qPQVWs>VFVXnSuKkAnMN~d`SVU zAwV)QrKFc3{h)9pUaM!CZEii}9^-YMVy2@w$tagHLn%ARRt6JPRR{`4WZ_5_>#9dO zLLINzqKxfJnlVZR^GN}t(*pA_=I9$3TQtfm{gSI!b{R3eCD#eBtC#evClGP6m;Nz> z2{TdoEK%rxA`^=XqDjhRkt+?dR_3OC7waigix1*kJ#N}5`1I%DTJO;IV+e*am#*~ySYguPI0G0NaDPta^--wD0a-L2~N2kg683Km~ z38VtG)LVUttnYTWQu_jRrlD5Q!VbvPV*k1f7_)uUj^pmB=x&vWq{Un$<>?e|O1Bd? z65~nA3*iJ4s3NPcj7`?k1TjpK4OvcuXLRc!PYmgE0+9|x$b|-J z7MmYCr=laGD7{ze4*Y(Q4r`UwcVM3~m$A8AfMw8_T4c0jbp+YZsbvERpVrK3 z^O?V^NOv6ADeKacSXh@Y4M|AvZ-CulsY1rDrx@&-iY|Qq~Kmmo~>EVtT1P0uS$2^GH1G z7gvoTwZN1{8Z9LzvFZBb$?)xcZbN~%Z(QIwYUxardP5VA()+B6I8-6+wfbPl$f;Co zo=AC&P9pLZH;-uyEAuX-1=>c&mg%Kt;wJ3AIYUJJFVQri&`y^Vs12r<9#vZK1J;5T z4Yj~}$bNX$@C%M_THN~91MaAzT#{0Nh9G%$kkb%;7lY<*>Qh(;xR@ZBzxO&6hr-^t z8#TunX2~GuCSC05@Q-G!lhriCYKBDr8QFY=cPDG7ytA2j^Jg(;~6Y+ns4 z@QK)&P6B zbwMEgLZ{9>?n0--9or=gXh5NT-|HAfx5H}!fQD+#*x>q_iPIfV2ciH00OGP95Ss>c z?r78N&b|%ram`@7$s(D&&QCB!C(6!5Or2)0YXGT{vU4O-@Zx%rif41G;!z|nz+h+^ z3Hfw7RG+C_I!tSYSvu41_hK6ia+$?mp0q9zsbO+lV=M}@<;pl4Y*mCj+tQ|s4(3?~ zHHuo^BmIFJHa2(g@?fXn6y>J-qz3Zd83Je$$!N50@ zd!+)yg#x|KlZY&DVNjZ5fFhWrbL|$8@XsTx)3aNTugevMPAnZdy-rq%=_SYKwEjff z{Q0P))wj-qEWTS3d|6JP`=IGHUOJZ}IrsQ!bjr>>0soL1*2RWu_!P<`BUW=5D&GbJ`fRBgZf)N$BPG!WXGV&%?l&=&vIN*3Efu=^<2W}+iCy-?< zHJfx;V@1y}Z7xPkc0xImpp(7#RraT$)yz7gEwGMVIYt12@>;Cd+!6bQQxgNXns$F^ zXr%b~fXVARb+O94xK5I+VI56jUKK2Ty#J$?&z5YXB z7xqwLRv8R~Lje`uFAF2jKaY_sdwS)j6Rkc;=r$E~0}8)R?6}#qyALa6U*iZsYQ!~( zoZBdMzk)pv?)w)%<2x9h!H(YSG&#GyIU%2Y*j4zx&}Kk$E6fL0c$zl7qp)oAmoIYswf2`@JQp(%Wsb zgMt4Lo4UDmqh)6|BbVIKdcgDqEp4TDccz~Ine;&P$^G{?p&gK+aO(u z(Y03|G_}S<8T%vf74qa=iYh}0s&&BNPU35 z#C8ac1F!H7oA*-zaAD!M&QK#e->H#i#Y?$nxgov=lg)BD*DO^Zkz75Z&!jH@_QHK( z)S*vJeOBN3pL?-%#ljv$4W^g&hsM~SX=J&2T7BMh?Yac}dH1cR*R-IN=ihw=1IpDC z2(*JS?ATGV`bZ#JNp3|l+&GL4)u<%OO$JFgCC@ap9$06V_9nk!?{25DX>#rj=)pfa zccy`|eQP4+c}9B=jLuBO=)^%2ka1{z#D$r91E~R0$Ja+O?DZ1^c>tT;XKTdu7*HR1 z^E59jBkFRYOkM1LyYG&}Gu zU=LB)z)-sHpbWZ^^Dkg@r7QkM^*AW~oHJx~!tj6~*lAEK= z30x2#9vAdPcIAyD{g(5_**i?m4tX|YtdY8~P_e(cPA7GvyYsLUc7)M^*-A{B>Gkyb z{}XJZjFe}jrO{ro&$8hS>P4|zpv{zHehWC|A2ujv`9q8u^ZDofbD@w!=efD%SQ4En zCO>O!WS7H;-sg@l$7&1CLl3IK^gq(;I-(MBAQ?{*IhG4{)0*D)sTFW+#6mfDcUQQz^Voy?_!FDKMY!+v-P{=l#J;l~4KF$NN9q^5Ssxl^j# z6~F<}N5L6XR2Fv1u3hpvOs>tc&@Q=JYNXb6ur6XScJ)JIQwz5yR(3Yal`Xupd3a?@ z;LOYD7iDYCddPY_`Etj58q@Q85BFW@u$Y7%ldw-ZEOp4P7FlREz0kwI`4Qhe{8;Ou zH4(gT_@O{#O^}g)*#s(~;{Vxm_#;+)uSqxzM!5 zk5GB`tv0tX1QeRZ0X{}(agQF_28F4>LFSv1#f6W0z++Ns8A&y)y(ut4yd1SEM(mgx z&G-vw#g5cA)679Sx{im#JSuq$w-dQ!ZuH}`8&y$|sFliK_u4D~^WoF(zW9nu`pRpx<$QUd2YXMWGZF>C~5*vmsN&5RdI_6EROU+)7 zujBG))G5J(a~@PopsN!`XZWW;9~{5x#xvQ``5O{rl)+8s}pDE zLboV%V?VB2^v$^F=|;OG*TL39Y@5PVA?ZzrRGx?v8gCF z9pvEqIuA+%fh5)Q)cL1GW~q%bq~aF|UOU?nCTL2>lsCUgTfbdc3c z&=U$2`zoi1h&Pv)gcj3l8roQ&h5aP)(pXcg6%kq3q*mm0c3M#>s~c9Hj>LT%HEqIX z6mD+Xj5@ZMUK2J;LYsT%Mq#t)w`AXIdb~!*;dsUuOln=$fE`8Go^}j9&cF4q7_a|{ z*z07s#g8>=C31xK0i77+R}Y6#+u?QYQ4QLTz`V9=(*6~YUwxIBD z({Bz)&K+F>`8b@|)aLODd%T_8YDfPU5T|GwAYg7-JY3B>F@WNmfA9h@V<=)A@ccE^wvM}89snHp*7;qsH(B*c) z=NZ;~ka~q}`9gOhRHmu!54(h&;ENk1i>^^aLs7p(_WPy31Myu#eB$pwTo3EpU;TF=-ib-8KO2a$a6%rz zI#ch7Y}549>Vw2DF#X*rTR(AGJQHqllkoH>-yn;4y{bL!Psr5EI2^UJxKU{@HsW3eH{|02ZYOQp} zuF~x9S7GnrDLFS2*i0PmK$`Qvha*C-Ye#680yDdjcA(f+#gA#cJv|y6J2xv^6|Nt{ zZSTbd+5cUYs*ESv@92M&0X$JeA3O&<%;uzKN3EVLex_ZW3J=pLex~sl*}j>2^$DB* z&IgA_V|;ll%srw%RkY*!&0dKQOks8j16?}r5B9@k3oQ_L;G3`0G#VIY4F;l^p~3m! zfma|pE}$cE$*wK^7uin77U`Y-7im`mw&FD+0LS-v$`MX%=p-A?=6~$7vx9zYU(&fu zytmYp_JE2u9MT?O*d~e4sSD> zb6XC0K1Opjl1=PnTmLR*HKo&-3*^G9qF;CTeW_4KTUOd=Bv#ox z=|H1Ip%19IL2vYVI?3bnOr*JyVjNy_yK?%uI}kh<85>nSIO4IpYN>^Pyngvd!f=XP+% zoIMjKLj>=1(3Rbw+le}q^lil9X;^w z*^|Aiv?!<)A~8$n)Ul04SWlrOY+XB??b&q+TPZvFP06_n6w&maH74G<7565)gy$$L zFdW-_&RrK_MK)1d5}p&AUWi&9i*Nlpleqi^o%K*)l{<`${{}6ab`h%=)A#lQ zP3LwIvin>3hCj27$heD;(7LNnVm}yyV*3eQ!WPORSyXmH7C@y2cL|%J2GDTWC+7N! zK1|N10{Ln_UEpMg`lb@x;w1Zb3RgmD>bOJ2fQYn|wb_Ae94Ql3AQL9^NP21+sTnNL z2XI(qC53G&q3!5jaX^Fx7zDP%oyay8aZ_}M_#y1V4tGpD4W{vMD*EVYM_vT_>;Y)3RpDc6{-FoLTD#@ z#ing4;z`}_=cAOV1=qAVD0A<$B;r`uStGmNYW@SX+I8fe_f87!4yN1P=ug?zE$&Ti zD-9m2hL#>byJ`$taflRM**|XTG5r8La&LCLuajK9{&(?;%Ta&t`-FP(sOj-I5~=G{ zuH-spf5YUB07FOhB&HJw-g)Ebfs>W(fz4p#odf$>ONf`Q*7;~7b@vdiSShjTY0lI(9kig$P)GZkc+(~uk~C&c>i$MnV{IlW-}>~EN7>n4 z>#v~Iy~CJBkTFr62i0X@3L*XwDE(ddg1FI060~p%REJ9WsL#=Rjtynj^*AKGo4WpJ z$1!L3JN?H$dMLuwLM8$N%yM;_S~t>|ICc~_33hPq5!58acz0%(;G>D6|4rcb`i`Gn zoy06-p{JnvJ>H!Y7^Pg4Q=~v~si}vC2O_w;Gpjk%?uoU#c6!dsO*?}^&;H2N&tZdn z7NXe%nks!2In{lse<`53ir}FYAt|HdN%>7JKSoJqE|h6c{ORNYZz-SL0aSUO&i-Z- z4Dt)2Un}K8-YjNtOh5Fs>~-hB`tj?s$kl0jt}waFE@b^Nbw6enFN_}V*^0Kq>lFVynjernr`VROI>Yxdj$~o0Rv`FI zLwNYD%7bF4-S^S~g<9$vBRQvQXCh7B1nOlM9U%h_6Rp1v5E2^PB$|3U-Qd6bdPHhw z9uOftrV`&~J7Sm&ixTq?`qy5d3vlD`hNjqtLS?g#<|w>G zC~C(K6eR-$dvNKregsg&4!+#f?JYIRC1&Y!>1}bp(QEXQw9Y?=R_RDl3GsfEma&QI z+y+_5F4WEh?b=G6AvsU^-#rHtMpco5Jtz87C1IC;6m?G*F##+p0!q1n$tZ^#8Qa9Q zMmYI&6@rSno|c?j{imrW*V9mUx{V_|ExER`o7OUI%HUjVYgC744*Xj=3fSU%e2jC4 zXS&$5%Wi6U8r-%9*WUrxl2>JE`u1V~u}k}KJlqW}gSnr=ZXP>1@9KzpzsU7h|dq?Ol@BTL|dkG0<5Nw@%egzYg1(_JzsB9$DI zz)d89b~5j#d~8t*KD+~_nEQ>+UD(rXi|+p(WP-CP)D7=whsHgY$a!M+YwUpbPe3k2 z;%L)ImsG<2OB7h!O!649+%vUwpaO+)(!{}W>wgKuVThzcLX!63$iQRtKaFgtr?fvX z*1rjH^{3GBchgShW-=GEoy-RHsXuZ6(q-BVwg*|n zWw|e!75_*g3*nw}1YWjYAmwr2L^6=`%9n#IMK9bxn7C2mtGg6xJ8x zq)XoQ!HuQC<|}qjoV{tM=dYnz(b~TU=is?*H}~(Ni&J5hUym1b!s?Dbk}j1r&%ith zE2oaZiLelK*$47=PsD8&GlYtZ*4X5YLk^q5fZ9!>(kOs{dcB>*f~n*T^Xy=vy5(nR z{hTW{HFNvXmN>Uwu589NXk{r~>5Ibs)t%v0D|@Gfy;*@vJw{rupT~|#$7gy>=apw@ zSLI=|)HEPBov}9!n4b6%dS7lDz^s5d!JF6)xK!!)#<#nxU$!6*TP@y)bjfGsG|72l zqx00^zh*W08}E=uZ*-mzyL7Yxargu2D7Isw&Pubd0z+PX_3&Q0c;}g1*gC8# zgLao+ITp%&!y9jq(jb!Y;$B_gPTaY8E6d|=)IJ}$q!bZ9(&9Y(x&m+F22OHE$7PLF zVz%$aLEX%FFoy%Ik;(xUcCOG=Z)rTHJa6Ko*fu)W7$--=l{bh!3)S>j)1Cu-H#V=7 zd+l`oXWnku=)j?Z#*^^i@sS($S&)MGm8<3Dfm+<#E?^q_xSkM^94AkvYucE*2{-sw z8DbeMb~j=#&cY(P;sB0Z^h6XdOi?j(B&Sxyd+0Jf2BZ4^V`<>eKy*KRUnYhAJZk+2Iq}6l2~9ihj-l)PED@-X&N8>+@P}UGtG9w%=?(GyON4)? zd|Ubg2iMWBm3k-kH6wOO%Xd$+ph>Hv`D0Q7c1hkv&+k&o!}%*d{`H?&TtQIMBWV|C^!r3%6U3DzLl7j64?VSin|v$ z`;j@HHYKeG)Fx(=mP7aR%+f_n-Lw^sO>}HkH68gr^qJgJ;fU$K^5u9;o%p_3ZRD?L zg}yk~DfpQH>1UIO8!$^}1(a&(xl0Sf2(?95irupklNyO`dJW^zLoM zvbaZjdvJ1xCi(9~i)HLiqWY#j=@BvBfn6PJ33g_O)KvfbzCvth@pn%n(cY!Ce7oA<%i62qZ!;tO$YglQd6J(VM7y~75JhqqdywdU}b!kQs_P<^-c0QQ-?;IJILxD+|LF!|l`gAb`wTm+Z_*Z8Q_pH_#Ga5n zJ}Ev`6i(;@KN6Q?dlo0a*sF{RqG3&a-1-2TfksM&W@^thmzKmc6XYh`YQ;IP*r#=m zm36JzSmnVQ56)jS5xs&rieADqwREB#9dxKd8_e=;J}5fQCmv+B!2c>vj|O35FwnlG zLFZW^?q>;+bRK&hDA;#P}0D?azgAvHm{{ksU+bt=$~DYA2dG!W7? z<$b%s!O`)#UM|;RF(4dqkXGawvV;>&`dn=KU!W1(4Kf%m)SRDy85mc33&2#gUwRvQ zgpE$A!|H>n7T+I@`?0Pbj5n|DF|UA$*JG9uk6b@Q_@`#Pj9-1xoW6yu1^I#ON zL|&B(t{(Pd4}A4|;MF@jL2l|zJ|=#Y(D9MR?mWfL9=-8TF_p@FQ_|gxhGJQBMD$^& z^+X`zc`ABE8CkgSXr|%z5;a`t8#27+;z$Cfpj@77mUGO>J)WeF_cdZiyf<;Uw_-%R z7|@9y>EtxK%^isw|J{5*XFuA|PcJHj&LWD50uDLgDr2GN-$5hViHn(@yU_L2!TxPH zPf$urfbK0dT(tyAq|9(ufYDu>2_-{ zd0-UcrzGq=_0i-l{+Wkt=}^Se>rFS*0}gnY1-a>FqATPrU`Ovu~rb@9@XcWovN50cHRC zjJ@(yqgGyb$y=BpT_PRrJM53zE4>}K-x{R62+DillIV|d=SUr|SwQLV`^ozymtQ(7 zT{--b^rr0ct5rn3p>B)2&R7p+Ri0Xg_<(V7Aoc$2-dOa>md% zVkewr?<0``z}t}ioJ%3IMElazcUL+-j#sweH+&d`JL`@hie~IL^kMq6nfB~MZ2|NS zhRjWh$POoW9R45*{nc^!Bk>APhH~!6QHP{C@#0=hz--#{#^H~=ITmSdymCl4*Nio6 zoLp)4Zi)Oo*@3v-c(IDZu1k}7VjA>7iZs2N|2=cZ`+Ab~UOECY4>306Zj{r=fuTs` zH^yt);&}&t+Tde)(Ls5yhg7|r?owyDjE#oo49=}}1U{yB%kYj6^H(qtX3>l1HDY!y z%RS5Tf@4p>*V&Bs+k4gL5%t-lJ`bwTPW8D*eYUI5UFvhE`g}os?ogl4sn2cd^J(?D zS$(#s&t~=ci27WkKHchbrTVN>pUc$e67^ZZo?+h%j*AhFF5Rc`jKm3c3mFEn$;`E5 z!oUuCG?H?(={MRgOgZQS=cyLFZ&}j?9kBZP&j2JgCGw`nHX()|n8~rT?iwU#mdV->oxdSG!F(7(!mOR5i<7Q1B~sX_y2%$U37(*z0H|00Z3(K(5e(L z#e+>x%tmdShH)2IdBb|}s(j#kINZV=6K#zY$!4>Fa6KwxPknp;8kTTV`pigH2y?pt4bB?&Jfu$e6$mx=z2CEjt5)?4{!KW)iMUH(R>EW3cSY@>r353542 z+Edf8p=iXuijfvx}Ujh>TJ3x@mf{*yCSq+#NTfR+v zP3KH4k5Qz_fD@gpN^|4Gn6=cpP%Tx7WtkbTQBmCyR41*{;832&l#W$>v?v%f{60P+ z(BYZUh5aLpyQxa6LX_~z7Rn7$i@>h~fQ(o8Gm&{gA*18>1^x&}DE#^(x&Nsc)1Y+$ z^VSPc1Zls2KKO^U^sg)!6b;Ek6%GB^0XvcQ3KxiY35biGspGcCxip4Y}JU46e-_@qntgc<=B6VZP3{YW(#Pp6Ed}({ITXGp)2h z<08bSPdIn1|8Y1{gBlQajwdKyjneBM_}>II(@Pi7@effWA3+n;oio(n&%i7VH%Kp~9>zD>NOqN?5{JZEeH#Qtc5{0jI+1)AsMOP0>*|4ij9>f;f>K2)S_UB2zS|d2 zl-sv`C7OjNT1l%Xhz;np_9esSC!0mE6g-LVN9n1&&>EFXZtVsD!mqxf*ftNA@(GC` zjyt)Y$add7Si@$ZtUTEQAhzm3?l*+=*4=jl<2Fjafi#KWt!IfFW)%0gx1;2T?r&hJMS2o-TF0o}ZL0*CnOO?xb}2{-g}z zz$gwd$A7T|9rEbcGxU^~Y}P1FPb#L2OZBaeaj;i}yEZGiQ4Bl6Y~nr_eso z5P=Pn!0LCDA^NsjPCAO=6T@KELl@6W9nt`9+jJ)42F?EaP|?K)5W5nt2e0f{KsNcx z;XWD&SgrLQ_RGadhA5Q8ur^4Su{HGOVZU^l8Ii!@KO+^0Ixp?VW#rzAZeYA6?Yp=n z$!LG8VHB`W+ke}rv%lpYCiPlz`wf~wKiuQZ3nG<*6a+csqsV81oS)=?T=L~dl8Pax zLhxdij^q18L84BwJ{l?cFJ7c~L!{r5)S0C4 zVvhZAgTZsN9d@~K_;@fq>Iz+L_ts)B1ak}t3#4I#A29uf-dzSBu*|pKk2&HP?r@#8 zUO4PWzwTwIW{9T$Coo`}L+!`W$-h$?+2|O8LO5Kqf7F$JvY4v&)ocdd3c?G0`p5rf zND5;IpKX;6l*RrTJ}mNo@kP7;KF}jt?1MC!5Q;nSq}RNCGjPESar)3pH?-&FoA2O` zARI$*V#NFL+==Jk5dQV~-y?x#)pd0%CRkRlXsBC4x^P>91HaFSU*|$7Mn7A&A+3H~ z^$*ssSY2N7)VkMN#+4V%L|)0N>V}%S z6{{@`3+tCwhf0>r70L>VOXekW70dARvek{AI=2NsJ?mbv!qT{WVO8{SB;jj zD=o_wHd^XeSZbFqTv|P`v3jw)c11mBvCOS)aC;Wk-R-GvSj!56|C$vIma$7Li`KfU z8!ZdnmW6{ymD;ujlOIAQ|_^s82R44st zuY1Mf6?GQKyO986m!w30Wr;<#Zb+7jh%MXSB@dmJ=`4n$b>#+bg!-hikP#SP9JnaZ9T>j>ycD~kQL{+g?TF$|K#yQGk)E@KpwYo|BTPp z{;)JI|L_dmm8NG#Ox%*4cJE9Vr8&-$155AVIA#D?St@`1hwdSaLTI`^mFZb?Di_*vhZ7PDu|+yBm<)xZBeJuOw+G`V>CR~1gB=a07B zLO*}$IK4IXM|f_&y>ri7c&6X^>4U~RuDNgO+I=~B+_v7ZO~%X4gcam{W2m5^Wh@%wL`x@C6$cmCvz_~SrxKIpQ~9;x@=e|2bUw(Tc7{`8Ft zMfPbw`6T~ursvb1`S(qKH0-`_Y~G3&mW+$&CewN2k6QoD_4*fIZ2xY-rMDjL}bDJqYx<6VE9Kxd>$lix5^KG$TBLunl1s!d`@f2yY?0kMI$~ zIfSbS5#T!(VFZE&VH`plLMB2k!YqXO2#XNv5!N6)itq%&(+EF8_yq!e{u$epz~@)j zRlBS6;8?@Utt(gk%qhG8&$!`wFjuRYS?w-f*yt9Rqt7){+PPwhr>?qiVf~W2>UkXZ z8-l^zT&y~?xbsVpayQ&&&q{b7D{{Ey+9J3haH8QQUSEkLk`Tv}lb z$NgT5)T%F_;A)oxn6 zENocX$Ymi-?p1aDDsCp5c~{i0Vi3HugY@!6Rf|0hSRtWRB}lKVs#>^s`I5SN?rtTu zc*S!1OBnPg8dz?Pl6nxS#%g!f!j&tl+-p}Nzg~fO8mk-B^%f1Es)frNmsVA;sdZz7 zaaD_z*n3ZIBqe|n1LXcb4w9lv54dT$XCJJe~??PsajFb3KVOrJoOCrP2Ivp zD;nHfs&1isMJ>nKbWjuIaNG=?Lc3H~v$CPK-d)3$A?9%}=E~W#k(;NhX{ZLLPv~ed zQCGbfqQIXf(9trCD75R)A|;F4t)sQp!a9!IuWNKSEMC@tQB5(nBILL~P`s|Xp5xx6 zSpDMVE4fc~t3p+CX8r2LjVz!2Ed{bu|1ZOAp=^#&Y)ea7QU`8e!$9Zj5|%fvTHN3c zaYo$zfBW&99fK9{i1)>Z)3hIOW=MJv@n4aDN#I`+_}?!9Gkh%`e@-SfKK1t0aj9vk z(^KxyY)5e zAFaLC53CarTL)@S`P>-DVPX8kVfPg%#a`m?TNaoPH8OLlH{VfMOgDf`>m z-^<>f{a@KHX7^+t%RZeQ$i9;O)U;=(y)f<9({@k0G|e|zy?(l9 z`XkeKOy52IchldQ{=szfj5}tGn_-_(IAg&K&O(<~;De^lOFL`-MkbevKLdocKFysr zV81ETm^CkJQPzW5-^yyu>dX2;_AjS>JneQ+T|B*a`q~*U&)^*7alrRYO-)U+YBPst z-jX>Xb4up4%$b>YXD-gH%UqZF-OTNo|CzZb^ADMS%6vC7GAll7N|qz5B5POHD_K{k zB~PC=y=*#Hs{Adb-L}29H*AM&$87K0KC+#&U9oA>qtkCnza{;S^t;m2(lgU@(`TiZ zrQe&rDE;5k8`2+2e>8m<`fA(cPbX(gF;9JV>cNaZWxSt}XaBMNm-d(KhRoQ^k(uAf zoX9vT%bb^4omro`3S2#w`9$WfY1{&iO9cPA)b!Mh)a+CTw68F=IJK07uu}OOT4w0x z$kfDCORB?~Z!NSITT87K)@5mRX)7V6HEEBeHK(UC_ThX}+}1wC=QnX+3F2(t6WQr1hnpO7o|kNgGJ}B<)h#)igLfHiOM*Guz^A zBW;N`i!I4E&Nji8YD>3e*s^U7TfVK(R%|P^RoLd+?zb(lEwL@L)!A0s+_p8gN1)9u zwl>=)+h*Gq+taqKwr#fOY&&c(*ml}>+1hP;Y(873t=o3c)??$g!OvjrrB+!|lc=5l zzz@k@!|G=Tf!vzW4tBL9#1iqU;gayJ{j`-ha;fz!$Dx8{;?mInk~GW2P^ektsjXXrGm?pQwTqT6 zM!}l;6%!lX3l~2)k&Ys;Ld6l&ZDn9-0AW6A*6nvxg?K zaT(Symg_U?R@5(55Dj`34+C_OEPibX-Y}R}>N8?t0}@MCcox-FbK2T98B?mL(O6KH*=*6-$0T@+w?&gsSqqLLuK(EDVK}=9ED?>v!rOw%&MF+kcbK6l$Mngm&|ktRjzW%Mr#Gm z7|s?-i@uOOESybEEf#W6CN>N#b(I%ZIhmL^Qy5J2i_u%i#oqxO?DzZw^u%xV_s=zfTucD}A4i|rIZkbTwDw|WzC4|8RC9blnSuXTb zagnpAf*TRef}|_v<&=TfGNC{~CzE7yg*ie&Q6AKhyE%+noL8!9z{oJLOaMd9lKi5A zd$>_yFbS(@PF}u{$K4W!7R{*;W(s9h`2{E;uns8aZVl&`lvY&b%ye;yVNlN8lA?T) zdsRV6SyfJcei?ULIJX?fyj4Z$cYHZ*Y19+k?O&3e!Ho{*C}e?NRZ^UX#=3pq5Qd7!vA)S))T<(rA9!5@1F^;MQlF?iYd`1hh zE~yG>TRHcQFg}T`Jg2aLOS%@8S6q?LjlC9@TfyBKhA0gx$SDQ|m=D&XTxSvY%^@iH zvs~pBRjxT02F2XCa7GDCQc*!wUdf!f=(O@VTyi*16;uI?k-*&*h70pcY0T&3mUH97 zP~yL^h;?p35kxlO+N|;+Srf0#DkvxugiD^ARAr>vGNuw^usm}r%DGA5?3vKf zGR0n&SCy08Fno*gS270&5CL6tLh>&zfucgeik#4D>z84n2Qd~xXld7?mFDElMo+4G zL6RE8v|Wo?aZjmGER=&WqF2tPUz<(5q*mqLQ&l>@q>P&!&ZGfS2Bu-XNngsVDoU#6 zVhm0B3Y3Df5+_5NdP5}YRiUgLhN7Iy2xBNy#5Dt;pp>bAPzFKU!w<+8(IMYGa#?}5pt@l-S)f|?f2r+R1RR1^wj-1IP1DP>K| zC~)RfGlD*>!!dW@R^K$MfuPVSii&+`6slxE!W`+@zu}x;CSeU~VhBL|w ziwY{NTu~TuuRum~MBL4or#CZTJ(-gJW*U zbWk;~tf)fZoZ(Dn0-e^XxkWiu(EB;zoH-?Amr8Sr%5pG4awXxMkkdkLk>Y-o4#`w` zyqnn-?(Q$<6c#CNP+2%jZ4il%D-VN}4lFKF9fNYNBAiF6I6tp&W-jNt7Kj$5a&yBl zYKJmb$Odxr!a0iGs^bXLB4y7HXRE#w1e#yOW_8G&4BkCq6s3`@+qrwg;7~u^t+w>O zaF&3%zG^OYw5zlf!|48SRx!Gdt5p3}^2)FAS6ejsxR=SWbIRb-(bRZtx|GDBI;ScM z17+mG6(?zuG0T>&4^4?XSU2&>4dc|93uDe0;!*Ae>>vXvM zwGd9eL5m}km37r?EFpkSb;vd5!K@Hu4B~`=2D(U5wbG46 z#$DzskWZA3!`dfW*N?anu^3KYRmJRNaS;Yb6=o}18&y^1lkHStsRUz1@)0r8>VW4W zsJJmIozS&gn;l$)1<_$DU<&};!&rpTGV8xkAm)E?nT54ib?Ef0kuAjPC(`lH#4wb* z?M^-ZRyjgwoxhD*rLM^M>sDmEvLfSexFSO)>lEsE{`wUeD1A{0`0H0>{Ix4GurXA% z_@Sz$D=%W_Xw9YrU|5G$0ZVQe@iHazl~jxA9LrRbts|bml9T^ZrQ}0O$zQLOydk8N z{1=pxzp+yCLzR-hky0A-=iw;Kv)IiUG=JfvbFo`vz=cTemI_+083ICRyZ z`Gk*J1dOOg?gAfoGsSSeb&-#zACKf>^XGZ$>4u@^60apfCc-sWRe6@wu3A!S*NK+P@+3BSbb+!hm7%7etOyf_~%o zIq$#grCXq|HJJ_>fC6_`O zn)-U~w03wS*wZXo!u?r|HZ*V_YGX+9nkA{6UmLv$g*8j8h{m#LJwb0$q68hGMAHzx zg+;%Wiq@bXkd<0y(|>WFKu z6XGHBa~281*UkLQGT^=+LTDQ?1Je6nV1BgMm>=!6=0|&t`O&(*p5`a&i{>XHY5vjhr{$*Fh~ z^&1G9@Yrla@(}_Jj?q_G5*HIQJw^-iS{<)7Odk^&Ge%bw zmpO*d(%Z*G@NwXl=ZoSfqEAPu%rTm*82gyvMR7AmW#e9OL^?MU=vqRLE{fBPgqa-9 zk2S=_Xh+{V+I%xi%?K(MgHka?pb}+5l@y6i;Jyh0F=N1xaST@!H;GrS}zmyB2I!2W7!t%hI6*MFCVU$QcW0%hw^FgFSz%Wd0z?i>i36g;dqe3vX z63h?ONR;|cG~rN#V!{oAAS0+z#yChTCI-zyITFsWoB5&1ShdkARnwhlA12}ye#R&! z48XrRswj@gIYvG-iYN@fjkm^?@$I=WuAAq_+@qfd?rw_VU8pTC23Zt|PeHXxrBvL) zs!oW3K_3wk>z7MV6`l4v(dn)evwlzlirh#75g{oJkz8a5rnb@WMJYvHFQw>fr4)0m zl#Gg$hJBTkVik#)2Ali$kA(k!VpgEpdbp0{0r5QlS?n-+kUxNc$Y@w-_=5-<#7!Cp zWzlcd^PFBoXGPj4c>az%?wOssblN@lq)wWC#~m6T1@nPMbhGiiAzdxGaH%lfZ<7Hh z+B)*NxF}d4-i$9zB7JQ^RYb@3Wpp%4V10FDNr{H`X~eiGFj%~UzVyT$M`(MRX6d!@arB|@1bqauBO{98#`1by#2lb$pV1(y{h(=nO2llvR=b<$ zxUx?tjz*=Qhda$ zlvybelZuH|Aw?f=LzVH5rlf#tZT;pra63F3pquy<4ZK7?3cdH${nCa5#Bq2pLg|-1 z^oGQ}uvz>P`eOY;bG-J0Nc0NphPZfY@*3VvO^Z4Q?veCBMq03_jj&$VLyyhYM{0-j ztOtf`f6C9&M{D0+2k}A5acB|%pz}Nr-uVdPU;Fd*21}@W(cvV2L&P2+8&%{kZZ{>g z?dz$R4{+|ymJ|zh*K6x%UEiH3YIr)Hqy-7JfC##A*8d^oPuE@Bx9G5lVi9sje^Kuq zS_i43w>I(a1cm#1sUOg1!&B9W0eW~l_0y!;)RU9qsGf=FyV?4fi3DJ|!)LMnL!tE}hf zf(Wm-%+g1t>MaxW@ssqoOhi7|KqF5frY%1g2Zh=FmZA*k+JAlicS^tvw&`78*z89P zMxCw=_ST{{C3F=aC)Th*lM`!f*4D%td^xeY4&~0$_2rhKTm}N!Te=H^a;;SEacxel z`7vEiY`mz?i%q>RIySy&U#vNAf2=X5E7p+n+gM#;%xpmO0L>W!of)$RPytW@P+<@k zb=-*WQo!30LTiT`*YD62+@LBUrv3)deAudg?$6@2OIO#{r{LUe#Y*=ic1?TIs)Y@c zmM>hq61OQDC$Wnfld6zgwP+E`uR?wm1LLjX2df*Z7@`7EcYHd~Rl#D6#3hcz? z^fIh1Z6YpuOu8YE2&pw)pFc^tAi~j(L{7OSEyXtVpSwr?XIuA=?{+A~MCCG(+SQZr z20=~j(n+@qPX~8axRN`?Z8M-Xlw#P4hgA7e2vqZ=kqVdU zW<7wf2Wlx@`!J#pqlXY@gi1l^zmS~Iv0tR2RT3Ld4$ zQM4k$!>#6$eyBUo2l>b+xd@gB@-3RXkSKZ57;)3M2rkZGnH0~Zj=0G&JL#5jQJght z#5ed6lRgf{C6;^>gH+Gei+gQ%W5w1|7q{~qvWdU`^@g3sirj2D5*tQHT)2hH3^%LVq!PhUC7c6 zY_l5@V|rd@XJ&WD%+5?_W|K`$Y9quN3`juaV8N59sZbO_j2Layh*aVsVvZu_2-<+u zMx_`kTC5=beD3$&ci){myE~a~+kd<#nVt82-~0W1f8Ag2y}3_Fbj3VHs>>O=79CP0 zh;BJ$K_-?eqKOMHgarfHrulos+Fn?@qY1X}Xxi1(T>jfViCqAR^P(6F{B;9uz9D`S zjlGH9TO+6B{6EHjNN4;IR*T7d5;XTM(^~4YRRV9S(!lzwM)ZKDeOH~fIdmq*hw-$O_4~PITy{R zA4$0yPQD&#I4yB~{1F~L5`UI${Mq=mO*pz1d7w@=bxtAi+ z{W*NgQQsKkq80U`75!8l{*-#Jg5JL&^*8amAwHJ4yXkZIyEbu6^YzUj?`hu8Kl_^> zXy(}mXpPD?-=J#y-Y^%^=gvj4_U|On&iJ)#wbv>u6VT>+q<$~{B&knADW%M>_~%#g zk1@$V)pVyhAlHN!c5T9d?WOBllVO`cPUI?iU{>2){k+3R4(fkMS7vgQ%wl`QxFgB z_B7wqOrCq-BDPnh%4)<(#43RzP44?IP@ic(j@g+PY=RMtHd=;x6z8* zn(jcFChh*G!Tg245|CMw&nmE~<{7UAjz7ti+Uv!)Ldp5K2?p$GUv1Jku7&O9ga zrVA5`+Y`O-i=$0m7e`CHF|Njn#9j=gF?{|OXE4CTW<@KWjQ^}D@odxH=EPB?#uTde zdr|+(9%voR;zzC6_Z9zthbMH0;@Em7GnT=nVyC=UAH`!Wc--GP6&`#X-*`&#DC%eM z$p?bDzL2!|626_lf6H?iM|>h!cmiyxRKyW~Kdvd*acL8dS{w@1n;lZ{CH7tXKOgW` z@=rjSf&W?#cW=V3v`k!~;7jZ|{ND}LSWCvu1Z40 z{zh=QvlQbo+Q&1sa?E&)eiD!2ON~NHpQ*#rKSN!yRPBQ7qTX#%*1Ek3@K!KKX}5JhCUQ5`tWbTqn5KuB0qc|(#Nh+ zezuUuFZ9pMI{d?_qxdJEtYD`C?9i1m(oBXQS9Xn)y*gaeP)RbJ-`rw!o&z$qZI1gq ztU;Zz5}!1t%s$b1GqzhuB}?hXbY|Z&S*vUVkLWzUpO@63^TtqT{!$EE=vkF<20P2a zcJXry{G&+DhaU@oR_BI}g|*Aq*~DToYL91EYsaeo*=xHyIK~lWy#AT->aD}}&5$AS z!nSlA_o{6F1mv86GHPs^z;@xGK5X@FF>QxBPZm=?y(zZ4GRzU};`5i2^)2s31+gp0 zXEt-MLu;6c*E7m4>TIOjzm4fk&HX~1$Li4e6R0XWS90a@XwhmcGVg{Q(Yd9j&g41- zo;P45(sPL*7)uAq7+bf>7WRjrY?a?gfhy7ZE^56mtmXUXc3mHTbU79Ow4;v1l-chV z$>*NV*&r(oYs@z^Pe;o{JbG**g5(D0b1*4VL{8_rd7xRW2m`)qo@JrOYR#im8|+!f z5oBUHJ~-tb(KmdExaOPxcleLSx!7Dj7mX|F-ZI+q1 z?^KyUeH5(oaVl}|MNNr2Km9_T_tv5FS5Z}TPN^+Wqicn!*0~>8qO-5BgcKlaZ;jGDK%NQzDwKzTM)zkA&=y~0`*($|SzHgm=aSOe1xDIbTj%wl!gew>< z&Y$OcR5&;}3r(##G4!hdxu&Xt+3salQ zaCCvFH2Wvm)xuxw#9C^`f=o|%- zAB|CZ0OLc^hq=Mus-LT*nU7QPVW?re#Z-6xGN1eF@FCxiC;1E?j&SSlR4B*jJQKTp zfUTwveVyq^x0u!FZjdFP-Sg&%IuF;O^YTz<)*OJJZ8;U4XQ3M7S+kw$E1XtmiM&OU zZ`7H;j;0b5U75Tp)VYY^J{5Jd60`{d#LuH&vxAt+6MWaRjMPC_MKiMPe%Mm>JM8Pz7sjoo$l zIT!kQFu5^fbur&o5(I_UhqVJys(mZ?yf$)A0pf5KDYm&W`vBdj0&!nn@{LE=6H)oaI{(U!R17OftBrw?0j zBtB$j`>tkgFe4;4R^S^Fx2iB+jFQCbQM5n5uy5H{N6vo$eA0)jA1j%c8@5tnV}6Em zRmJq-`#|T}T74#~Q0D`6=zL3+&h|YSloE^G1rW&%^mX8^F1}qoU~%1O@`qPWn4z_F zJ{7x;)M3~DD!Yn!^&wtVx%QmiHn;omnaNFlyb0#8xvIMR^>Q z=N!sOP#$_slqZ2MsTb3DTWcmLA9g5j1LY-$@(xgzy*65F9VlnNE=nn~)|pYt`$4$} z&B0=Q1e8|nf?+9lf%1?;`3flii9y5i90lc7=R_&TL0R}lq1d}r;zP#t_0Z#N4dzUK+N6h)-b>zm^!`w)ZWXq|@OiM&Q2TP})uPp^{ ztrADKb&26o3}=8+bSP(oa#Kh#Ekz{LinFvmR=)9A8%R^pQl5XM@fcF8s+iG%uXvYU zYbaeQBpq+y+Oi^E@RsqtVeAqf+I0`wRhNS{Rd$(7IEFNkOz3rR%mFiD8ZPPy&-k|g z$l6{r6Xqa-#};6=lop3l0OiI@#2fTJX62xK-l5z8$}^h6ERV%L3d(T| zg6c?&1_J6M28u5H9XZqz_9okMK8IDXL{O0IHPTg=lVO-Ml)o z?hnJPvhkmRf^z9z{5fA+Z5B+Kn{dhy@V;=g09{NcS zJ@28PXb7D%Z^x?(4|?b)JoL35`pJgSa~}Eu5B+Hm{c;ceg@(|FJ#?2qfV{2Nqgw`dgxDh=$Clt(;7nG;GuuoLqF!BFYwT3G=#q1L%-8Qf7C<2z(b$a z5c*mVeV>PZ)I)Fa&|4Zp@AuI6dgw$=)Dc0&+*X5JoJ4Y`hE|6ZA0i6c<2=meXobU*F)dX5c+Hnz38Fe?xBCsLmzGk zy~RW49SsoeEVu1e4}H6bUTg@R+g3VQ3x_@Qn?3ZRhd$O2`YaDU?V<1X(0Tt)>#Td5 z?G2$%_t4jR=sP_0MN@=6%|q|^(6@W&=UH_9TbL8!3UOqhR4Ht(zRGwx*7!S>e_F8m zgTShS*MF;Ohr7&m05Ysatzol%kg2dn?2m&%jV3SJ}CVR-@X7zYcx488^fWBLk7E2b9BoTH7xZ+iUtz zS9?(NKpe^^QR}o7#$#q=6gjm0Z=fA62t~@N*q$?H`ig;8KGVDMh|MZ z&zuOwUZ)i-`e!D%q^{A(#C?fABfq4m z;_>`LANqxHzY`k3d|-`#Mf#l3KXMNdyCrUnru{HvoX~dFx*td63V85NH$*#nMOtbR z{V;ftrjcSSMTexXp+Q8oH?*b2Ix_qqs!wL=TSH5!$SnkP%GIFUYpj?c!-a=-QH0BA z?BYF1#;w+GD@?Cdk6lOWuxn{(S1uK(%`P*l=^Eoc1sMmlU9~cteBS_1sE?}nI4Flq zEk$`Al$RXJv_FW&QkPf9U9f2TXHk#O6WC(>(+g7l{_!*1TsPi}(Np5iKE*F=1y4io z$#P>`=tKYMk>PxDBm>dg@gH!IHCP!Q&2PkWR*)_f)BEL=H)uN;tBSF`A9S(3iUEfv zH!2h4yuRkSfc=L;S3L=+AiQ10MRehR}OG^xHl3!yfwS9{P@k&{ufqH+$%Z zJoFclAg<@HYY4sFL+8D5Em_C?fQSAw5B=tb(3g1VJ3aLM9{N*LgwCfZ97`8&@X)XG z&{tV>J*a<_^p8$$2im+4v{UC#l?G?*898 zf$gTJ<@`tdYDl-a!Kz~7&MQ=s^Zqk|tnvoTCHAU5p=HA7zJ!M(EO%>n%T%=sO67Bd zyI{_%@EyPA#?g5$7>@Y7=sox=&kgeFWWQjqF;!;ZvpIe))5t4)wvh7|vNp$C@afL^ zL5MBZa=5B$j^n|J8imgv?gf21{x`>Yz1Uq>vI?I+-T?h}czVC5Ob=+Tt<&wmmF9-CW0ASqt z6u-p%7qHzgj60((alg8zpBeYHs5)8P8EuLCSw0Xbj5}X0CvoR95VhmZ7t&1@_vuiE z%GO#l5}a}8_%3lD#<$vW&wx+bJ2n|NJ91?5Y06-^3OIXLO5AJCmi7Elc&Lxmi+)aJ z+&6~$5DS5;TG)l2=iUe|db@|7^U#k^5&F{}`VtSFJ%OX)qGMBpe%wP}?4hSU^e;^j z`V$^{n}@!^L+9&Wry}mhJoI@U`g#xjx+y|`)I*=+p>zCl<32V;=tn(t_N@+98+$E> zvS|Gjp&#+kXM5YeV{*;517kJTt3lbVt&m!;2Sx6rn_B0AVlqK^mVm-y?Rpm|W>z3P zCC!5yHX7@_pv=LMMZXmk>cbjR>vQCB^!XMjX3Q0yr$8}168(csG4h3&Jab}nK22W; zu^OHYitG6WprjqFE>O67V;+jTc95EL`h6Wf0a3iHtV@H8$yP^_Fk0fj^>=dCy#bI_N9;$p1? zWdjW78pPPXRqeE9<*X-%z56&lIyD`7TX;ja^DCF`!Q?9U7+lAB;OB8 zo5S;UP#z6=@Vl+iN(#SI=+tXi3XXjTJSSG6xjv`F$SGK~QFa z$F`jxfbxVR`GhCu1yCMzcwT`UMH}EQ6L;(-76eo$qR$!nW|GzLd_`%G@GJslpCfrW zc^t2v{T%t96{axZ|{!X2(s=<8Ul?K#w%?MQwfC{H_-8$p5NBi{H2P}VyU)qPug-fDBGQE zSqq9h$z!~cgEwqDrGIikTK%rp`X_^0_pX#Ru}Ch1vLTcl1l%cSW&cd*W3%NN@Gvti z<$6%4uhscZP|O`Mk-Q%iwp?558=#;;>3DH>oov1`C{Ml83YN#fW3EvP<(Hr^7J?Pv zmlS+<TYEx!yN7HjEipzvWQ7UzIsu7Zl>w}aw(<8n~wd281?P!@L@tRR)lq1TGF zSV}O+jYeWzTiIF})MBx%V9cZJqGa$`|9l*jvmBidvKAPuoNt3-?j?y8KLW+9IE3;d zD7QOurs2}mL5K1>P#9(Fv2#J81%eguHuXmAHg>tO9j9WKfXB>sOdH%9o94(_q2)-< zW6aH`buU2)qTwp=tPSHR zlHUZ%QAhGTQ0{doOF_w@md&U&pzIBKg5&^RqKUapn1|Fp10J*T6+d4C3Zo-FB+u=j zuvmTW0!5xgH6FWP^H}t6fzltMv;I?>$4Y(?6t-?1ub`0UG3v<$$l)0oeVjC*<5`Gc-lb`4b`v4QX|E~PHtQUo+FOVgP^cj+phtItw3gR zJhumw{Z6e(#g94U_B7V8pF%pMe!g`Gb@aYvbVg0js) zAJTFnR(O=#Kw09{x(k$JA%z}eFM8DR*q6Xl40))}QBasaHWPjh3PNf;uU0acU4SG{ z7O_?{exc4{H9P|p+i&pPe2=FaltT{sI#671Y@;01W{X0f(3i_(itTNbXR$sE9`>tN z=Q}~EIFb*60z-dE$d_%lPSx8SHXof#M% z3X-K{ekfBmhuQpK!F*2+43sjP%uz0z&zSFnB|g`pB_L+zD|r~h@VJo5Wy_UNdMQ67 z$7nwC%vd(RDVfWrd!fcirjjYGNaoYIObNeZEoFjKz#SK3*qkDt9Ll7V#iFJci~5Y( zmssfxi52eRjEO=YH!B_m?{Kmlr1*BjSgQd4oaCJ7=)JqiwiV0o$=}ELNJso3?wnz;rkUSz>?G}7h}qZEbQN!NyEx5K@RV< z+@wtlu*09)3Z50W90;y0PyAt=wY1&cD@a2&~X*6HS zj$|Nr-<@}og!ER`uL zrSy}OXj7&J_U@tv!N$xMad2yqTU1+MGEjHJ5HRytJ-{oPMK`h})9H}K5uOrL{2Haq zNMTdPuN3t6^l=_Gki-Nfx_97QBICixAh?RTjve;9N086UlVvzLxHOwj_jYObRv$0l zas^wOF*>*`Q_-B47D~&~Y3u{3GD}=-&vq0LoN8fRV;L#|uHLSw2HoW=3V9Ru?tI5+ zsg%iA6o*C5?1)Mm+=eeDOIrdy-4?d(`g)G)9-^xIiS!*|0~6cA&0gROy+KNl z+N1(%4%KQpa(Xif*i&11D9TiW=I~@jvUrR+KNu8BurW_3nCe&=L=P2bDVffW;Z5MR z&JE@YNxw$MCwml+FqTUfMpeVK>J0f@S_|=-R+}o9HU(-L8gwpL#^Zc7f!E}?s|HX0 z^=JWUZdgm@;p||=997Vwb6e$z{4yhFBTS;V(Y==(p({bU#^jt_SkVWZ#RsWeCYfhP z9R$lGx=|5nti=&1f%zIdh(*~Zw44AG#ZM-q(a317!gj1J%Ptr&UwOuSFBUd0j2`D4 zt1wS9jOpy)p!q(qAQYRnVv(k)kX`@!AxOaxI6qUudH_FVz|d65v@SsBIZ#$Vk3fof zr_n8!LtfQ)>E8h;X;@RL-x8?gbe^q;RJEb%P=<-BhR^{0pNyeqyGHnFlJg=2eeZ-q zCO$tdyz22(oYgZ-=E_>2kI?fE1gX&y2N!wh7->*NqKJ|5CjP1b3%^v5!jiU#u2b`y zQ;{?9XQsmOD2M5T;T~p>3=`3+LmGdfz(lGc7MiBBo6^~f>Y2un6)Wv@wk=jQISACu zL*2A*Rr3^NdaJx^dC|g!7cGnhS7k9zN#=S>g$hR3_W6sb(5lr}^mni93VQpxR(D<* ztm^8yvLoo}?(4^D6*B?sVt^SIhB|c^boO7l%AQ64j6tTBRBwEh?YB9!s7iWSLu3WGG1U0z{?gNuKHJ35!(51vtSC5bs91Z5=oZj43c(Sh<7%sxkgP1r@5lQnV& zY#z9%X0t-oUz1H|I-pN~VI?}jEZx<95xOJuMuJ#S)0ZMGTP-a}j+BQmA>(vhBWCp_ z6dVs^hbozTS$CQWKH8RJoO3uiq;u4!hx9~jqPa>rjRp~56SR~~PyDC|!0Cc{uLlkk z40_@`x^5ieCm+>H%k_2WH-4Myx}wtw#&9NCj0Q>Df;M3rOi;<@;ms<0F_*?Y9NjM$ z)n4&NNm!3Ip3?I*rmRoVPAys+!m#xhOf1_MAZ>${-B(<;Cg{GRuYXxjPte)5eD!6) z%4Pj4x>l_+i27=z2~{x+l{ihVS>bHN3Pb=YRwz5UmE}Y=7{E-`CZN;{Mv z!Gw_#}YvF_)rO1yWDXewVgJzKbk5Wv zz#2BFY$;NVe%%Yc>7?15&2vo}wNbrLm?$Ct_lL(E==V2K2~? zXeKf4$r6G_Vy%%GBckdvQa0Jb*2D{*=zfi$jhN14au90e1ddtKa3f~14WkF-EY0ag z(G9Bn@UlQhv}%4M3f0UfP9QLsjijA0>+Aur5{W@#Q_bF8omF>kG^f5k8wR3C8z4q{ z-Kr+Rq*b}1S_GZSM-#Oq{6ST`(=8+*JwO|OhGv6`Dm6xPeK#T&ba84~Yp@at5x1KW zNA+dVaYdqDiKrh$*MW9;2?rsx240WT*0DcTNiIPz+N>;@Q>ReQ6* zI+~1ZbodoX#Z*xzh8)x57_7>p1$V5nJxDh=#+`lOWeCZ%ZD^4eYb;U^NoX(DHR)14 z%tXm12`!y^bP9n*DlTCKgSq68Oj=ysY$rhPrG___D;qH{$3e7(N1K5>N}@Wpapx`Y zBF_2BsC+h;P__Ay)xV^Bb!S@Dg${S;(MI!}KsdqTg)}{}=&vY9oW~4Ox9-#>qll( Date: Tue, 7 Sep 2021 20:30:49 -0400 Subject: [PATCH 25/89] added starts_with --- src/be13_api | 2 +- src/test_be.cpp | 76 ++++++++++-------- .../{hello_win64.exe => hello_win64_exe} | Bin 3 files changed, 44 insertions(+), 34 deletions(-) rename src/tests/{hello_win64.exe => hello_win64_exe} (100%) diff --git a/src/be13_api b/src/be13_api index 69e6010c..3692145c 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 69e6010cec74ebba63e33460e740f7a6859e9098 +Subproject commit 3692145cec13d8cbb9bd40b463b27f16842202bd diff --git a/src/test_be.cpp b/src/test_be.cpp index 2d21146a..4ca81135 100644 --- a/src/test_be.cpp +++ b/src/test_be.cpp @@ -216,7 +216,18 @@ TEST_CASE("scan_email8", "[support]") { } } -TEST_CASE("scan_email16", "[support]") { +TEST_CASE("sbuf_decompress_zlib_new", "[support]") { + auto *sbufp = map_file("test_hello.gz"); + REQUIRE( sbuf_decompress::is_gzip_header( *sbufp, 0) == true); + REQUIRE( sbuf_decompress::is_gzip_header( *sbufp, 10) == false); + auto *decomp = sbuf_decompress::sbuf_new_decompress( *sbufp, 1024*1024, "GZIP", sbuf_decompress::mode_t::GZIP, 0 ); + REQUIRE( decomp != nullptr); + REQUIRE( decomp->asString() == "hello@world.com\n"); + delete decomp; + delete sbufp; +} + +TEST_CASE("scan_email16", "[scanners]") { /* utf-16 tests */ { uint8_t c[] {"h\000t\000t\000p\000:\000/\000/\000w\000w\000w\000.\000h\000h\000s\000.\000g\000o\000v\000/\000o\000" @@ -231,17 +242,6 @@ TEST_CASE("scan_email16", "[support]") { } } -TEST_CASE("sbuf_decompress_zlib_new", "[support]") { - auto *sbufp = map_file("test_hello.gz"); - REQUIRE( sbuf_decompress::is_gzip_header( *sbufp, 0) == true); - REQUIRE( sbuf_decompress::is_gzip_header( *sbufp, 10) == false); - auto *decomp = sbuf_decompress::sbuf_new_decompress( *sbufp, 1024*1024, "GZIP", sbuf_decompress::mode_t::GZIP, 0 ); - REQUIRE( decomp != nullptr); - REQUIRE( decomp->asString() == "hello@world.com\n"); - delete decomp; - delete sbufp; -} - TEST_CASE("scan_exif", "[scanners]") { auto *sbufp = map_file("1.jpg"); REQUIRE( sbufp->bufsize == 7323 ); @@ -258,20 +258,6 @@ TEST_CASE("scan_msxml","[scanners]") { delete sbufp; } -TEST_CASE("scan_pdf", "[scanners]") { - auto *sbufp = map_file("pdf_words2.pdf"); - pdf_extractor pe(*sbufp); - pe.find_streams(); - REQUIRE( pe.streams.size() == 4 ); - REQUIRE( pe.streams[1].stream_start == 2214); - REQUIRE( pe.streams[1].endstream_tag == 4827); - pe.decompress_streams_extract_text(); - REQUIRE( pe.texts.size() == 1 ); - REQUIRE( pe.texts[0].txt.substr(0,30) == "-rw-r--r-- 1 simsong staff"); - delete sbufp; -} - - TEST_CASE("scan_json1", "[scanners]") { /* Make a scanner set with a single scanner and a single command to enable all the scanners. */ @@ -407,10 +393,22 @@ TEST_CASE("scan_net", "[scanners]") { /* Break the port and make sure that the header is no longer valid */ buf[frame_offset + ETHERNET_FRAME_SIZE] += 0x10; // increment header length REQUIRE( scan_net_t::sanityCheckIP46Header( sbufip, 0 , &h) == false ); +} - +TEST_CASE("scan_pdf", "[scanners]") { + auto *sbufp = map_file("pdf_words2.pdf"); + pdf_extractor pe(*sbufp); + pe.find_streams(); + REQUIRE( pe.streams.size() == 4 ); + REQUIRE( pe.streams[1].stream_start == 2214); + REQUIRE( pe.streams[1].endstream_tag == 4827); + pe.decompress_streams_extract_text(); + REQUIRE( pe.texts.size() == 1 ); + REQUIRE( pe.texts[0].txt.substr(0,30) == "-rw-r--r-- 1 simsong staff"); + delete sbufp; } + TEST_CASE("scan_vcard", "[scanners]") { /* Make a scanner set with a single scanner and a single command to enable all the scanners. */ @@ -559,12 +557,16 @@ std::filesystem::path validate(std::string image_fname, std::vector &expe if (ends_with(pos,"|0")) { pos = pos.substr(0,pos.size()-2); } - if (words.size()>=2 && - (words[0]==expected[i].feature.pos) && - (words[1]==expected[i].feature.feature) && - (words.size()==2 || - (words[2]==expected[i].feature.context || expected[i].feature.context.size()==0))) { - found = true; + if (words.size()==2 && (words[0]==expected[i].feature.pos) && (words[1]==expected[i].feature.feature)){ + found=true; + break; + } + if (words.size()==3 + && (words[0]==expected[i].feature.pos) + && (words[1]==expected[i].feature.feature) + && ((words[2]==expected[i].feature.context) || + starts_with(words[2],expected[i].feature.context))){ + found=true; break; } } @@ -658,6 +660,14 @@ TEST_CASE("test_base16json", "[phase1]") { validate("test_base16json.txt", ex2); } +TEST_CASE("test_elf", "[phase1]") { + std::vector ex { + Check("elf.txt", Feature( "0", "9e218cee3b190e8f59ef323b27f4d339481516e9", "")) + }; + validate("hello_elf", ex); + +} + TEST_CASE("test_gzip", "[phase1]") { std::vector ex3 { Check("email.txt", Feature( "0-GZIP-0", "hello@world.com", "hello@world.com\\012")) diff --git a/src/tests/hello_win64.exe b/src/tests/hello_win64_exe similarity index 100% rename from src/tests/hello_win64.exe rename to src/tests/hello_win64_exe From ebe848de975409b10cfd92fcc8266933bb9e133c Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Wed, 8 Sep 2021 21:53:20 -0400 Subject: [PATCH 26/89] properly records filename in feature recorder --- src/be13_api | 2 +- src/main.cpp | 53 +++++++++++++++++++++++++--------------------------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/be13_api b/src/be13_api index 3692145c..5e86d460 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 3692145cec13d8cbb9bd40b463b27f16842202bd +Subproject commit 5e86d460766db30c138a0bee02204ef9fdb2dfa3 diff --git a/src/main.cpp b/src/main.cpp index 0aac6223..4be8aa5a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -314,7 +314,6 @@ struct notify_opts { } } - int main(int argc,char **argv) { mtrace(); @@ -469,10 +468,22 @@ int main(int argc,char **argv) argc -= optind; argv += optind; + /* Get image or directory */ + if (argc==0 || *argv == nullptr) { + if (cfg.opt_recurse) { + std::cerr << "filedir not provided\n"; + } else { + std::cerr << "imagefile not provided\n"; + } + exit(1); + } + sc.input_fname = *argv; + /* Create a configuration that will be used to initialize the scanners */ /* Make individual configuration options appear on the command line interface. */ - sc.get_global_config("debug_histogram_malloc_fail_frequency",&AtomicUnicodeHistogram::debug_histogram_malloc_fail_frequency, - "Set >0 to make histogram maker fail with memory allocations"); + sc.get_global_config("debug_histogram_malloc_fail_frequency", + &AtomicUnicodeHistogram::debug_histogram_malloc_fail_frequency, + "Set >0 to make histogram maker fail with memory allocations"); sc.get_global_config("hash_alg",&be_hash_name,"Specifies hash algorithm to be used for all hash calculations"); //sc.get_global_config("write_feature_files",&opt_write_feature_files,"Write features to flat files"); //sc.get_global_config("write_feature_sqlite3",&opt_write_sqlite3,"Write feature files to report.sqlite3"); @@ -495,7 +506,6 @@ int main(int argc,char **argv) ss.add_scanners(scanners_builtin); /* Print usage if necessary. Requires scanner set, but not commands applied. - * This would create the outdir if one was specified. */ if ( opt_h ) { usage(progname, ss); @@ -506,8 +516,17 @@ int main(int argc,char **argv) exit(1); } - /* Remember if directory is clean or not */ - bool clean_start = directory_empty(sc.outdir); + /* The zap option wipes the contents of a directory, useful for debugging */ + if (opt_zap){ + for (const auto &entry : std::filesystem::recursive_directory_iterator( sc.outdir ) ) { + if (! std::filesystem::is_directory(entry.path())){ + std::cout << "erasing " << entry.path().string() << "\n"; + std::filesystem::remove( entry ); + } + } + } + + bool clean_start = opt_zap || directory_empty(sc.outdir); /* Applying the scanner commands will create the alert recorder. */ try { @@ -531,34 +550,12 @@ int main(int argc,char **argv) } } - /* The zap option wipes the contents of a directory, useful for debugging */ - if (opt_zap){ - for (const auto &entry : std::filesystem::recursive_directory_iterator( sc.outdir ) ) { - if (! std::filesystem::is_directory(entry.path())){ - std::cout << "erasing " << entry.path().string() << "\n"; - std::filesystem::remove( entry ); - } - } - clean_start = true; - } - if (clean_start==false){ /* Restarting */ bulk_extractor_restarter r(sc,cfg); r.restart(); // load the restart file and rename report.xml } - /* Get image or directory */ - if (*argv == NULL) { - if (cfg.opt_recurse) { - std::cerr << "filedir not provided\n"; - } else { - std::cerr << "imagefile not provided\n"; - } - exit(1); - } - sc.input_fname = *argv; - image_process *p = image_process::open( sc.input_fname, cfg.opt_recurse, cfg.opt_pagesize, cfg.opt_marginsize); /* are we supposed to run the path printer? */ From d9377fd1c9800c71f2e4c7aa709e8c29c9f3526e Mon Sep 17 00:00:00 2001 From: "Simson L. Garfinkel" Date: Wed, 8 Sep 2021 22:06:01 -0400 Subject: [PATCH 27/89] refactored main() out from bulk_extractor.cpp, so that we can do end-to-end testing on the executable --- configure.ac | 2 +- src/Makefile.am | 1 + src/bulk_extractor.cpp | 732 +++++++++++++++++++++++++++++++++++++++++ src/bulk_extractor.h | 128 +------ src/main.cpp | 721 +--------------------------------------- 5 files changed, 741 insertions(+), 843 deletions(-) create mode 100644 src/bulk_extractor.cpp diff --git a/configure.ac b/configure.ac index 7d6c6fb0..3a0179b6 100644 --- a/configure.ac +++ b/configure.ac @@ -134,7 +134,7 @@ AC_SYS_LARGEFILE AC_SYS_POSIX_TERMIOS ## Check for headers used by bulk Extractor -AC_CHECK_HEADERS([dlfcn.h fcntl.h inttypes.h libgen.h limits.h mmap.h pwd.h signal.h stdint.h sys/cdefs.h sys/disk.h sys/fcntl.h sys/ioctl.h sys/mman.h sys/mmap.h sys/mount.h sys/param.h sys/socket.h sys/stat.h sys/types.h sys/time.h sys/resource.h sys/sysctl.h sys/vmmeter.h termcap.h time.h unistd.h windows.h CoreServices/CoreServices.h mach-o/dyld.h]) +AC_CHECK_HEADERS([dlfcn.h fcntl.h inttypes.h libgen.h limits.h mmap.h pwd.h signal.h stdint.h sys/cdefs.h curses.h sys/disk.h sys/fcntl.h sys/ioctl.h sys/mman.h sys/mmap.h sys/mount.h sys/param.h sys/socket.h sys/stat.h sys/types.h sys/time.h sys/resource.h sys/sysctl.h sys/vmmeter.h term.h time.h unistd.h windows.h CoreServices/CoreServices.h mach-o/dyld.h]) AC_CHECK_FUNCS([getuid getpwuid gethostname getrusage gmtime_r getprogname isxdigit ishexnumber le64toh localtime_r _lseeki64 inet_ntop ioctl isatty pread64 pread printf mmap munmap MD5 mkstemp mktemp sleep SleepEx strptime usleep vasprintf _NSGetExecutablePath]) AC_CHECK_FUNCS([CreateProcess LoadLibrary IncrementAtomic InterlockedIncrement]) diff --git a/src/Makefile.am b/src/Makefile.am index 7e5e344f..801ef7ae 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -111,6 +111,7 @@ bulk_extractor_parts = \ bulk_extractor_scanners.h \ base64_forensic.cpp \ base64_forensic.h \ + bulk_extractor.cpp \ bulk_extractor.h \ findopts.h \ image_process.cpp \ diff --git a/src/bulk_extractor.cpp b/src/bulk_extractor.cpp new file mode 100644 index 00000000..e60e7c20 --- /dev/null +++ b/src/bulk_extractor.cpp @@ -0,0 +1,732 @@ +/* + * main.cpp + * + * The main() for bulk_extractor. + * This has all of the code and global variables that aren't needed when BE is running as a library. + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_MCHECK +#include +#else +void mtrace(){} +void muntrace(){} +#endif + +#ifdef HAVE_SYS_RESOURCE_H +#include +#endif + +#ifdef HAVE_CURSES_H +#include +#endif +#ifdef HAVE_TERM_H +#include +#endif + +// Open standard input in binary mode by default on Win32. +// See http://gnuwin32.sourceforge.net/compile.html for more +#ifdef WIN32 +int _CRT_fmode = _O_BINARY; +#endif + +#ifdef HAVE_SYS_IOCTL_H +#include +#endif + + +#include "dfxml_cpp/src/dfxml_writer.h" +#include "dfxml_cpp/src/hash_t.h" // needs config.h + +#include "be13_api/aftimer.h" +#include "be13_api/scanner_params.h" +#include "be13_api/scanner_set.h" +#include "be13_api/utils.h" // needs config.h +#include "be13_api/word_and_context_list.h" + +#include "findopts.h" +#include "image_process.h" +#include "phase1.h" +#include "path_printer.h" + +/* Bring in the definitions */ +#include "bulk_extractor_scanners.h" +#include "bulk_extractor_restarter.h" + +/** + * Output the #defines for our debug parameters. Used by the automake system. + */ +[[noreturn]] void debug_help() +{ + puts("#define DEBUG_PEDANTIC 0x0001 // check values more rigorously"); + puts("#define DEBUG_PRINT_STEPS 0x0002 // prints as each scanner is started"); + puts("#define DEBUG_SCANNER 0x0004 // dump all feature writes to stderr"); + puts("#define DEBUG_NO_SCANNERS 0x0008 // do not run the scanners "); + puts("#define DEBUG_DUMP_DATA 0x0010 // dump data as it is seen "); + puts("#define DEBUG_INFO 0x0040 // print extra info"); + puts("#define DEBUG_EXIT_EARLY 1000 // just print the size of the volume and exis "); + puts("#define DEBUG_ALLOCATE_512MiB 1002 // Allocate 512MiB, but don't set any flags "); + exit(1); +} + +/**************************************************************** + *** Usage for the stand-alone program + ****************************************************************/ + +void usage(const char *progname, scanner_set &ss) +{ + Phase1::Config cfg; // get a default config + + std::cout << "bulk_extractor version " PACKAGE_VERSION " " << /* svn_revision << */ "\n"; + std::cout << "Usage: " << progname << " [options] imagefile\n"; + std::cout << " runs bulk extractor and outputs to stdout a summary of what was found where\n"; + std::cout << "\n"; + std::cout << "Required parameters:\n"; + std::cout << " imagefile - the file to extract\n"; + std::cout << " or -R filedir - recurse through a directory of files\n"; + std::cout << " -o outdir - specifies output directory. Must not exist.\n"; + std::cout << " bulk_extractor creates this directory.\n"; + std::cout << "Options:\n"; + std::cout << " -i - INFO mode. Do a quick random sample and print a report.\n"; + std::cout << " -b banner.txt- Add banner.txt contents to the top of every output file.\n"; + std::cout << " -r alert_list.txt - a file containing the alert list of features to alert\n"; + std::cout << " (can be a feature file or a list of globs)\n"; + std::cout << " (can be repeated.)\n"; + std::cout << " -w stop_list.txt - a file containing the stop list of features (white list\n"; + std::cout << " (can be a feature file or a list of globs)s\n"; + std::cout << " (can be repeated.)\n"; + std::cout << " -F - Read a list of regular expressions from to find\n"; + std::cout << " -f - find occurrences of ; may be repeated.\n"; + std::cout << " results go into find.txt\n"; + std::cout << " -q - quiet - no status output (changed in v2.0).\n"; + std::cout << " -s frac[:passes] - Set random sampling parameters\n"; + std::cout << " -1 - bulk_extractor v1.x legacy mode\n"; + std::cout << "\nTuning parameters:\n"; + // std::cout << " -C NN - specifies the size of the context window (default " << feature_recorder::context_window_default << ")\n"; + std::cout << " -S fr::window=NN specifies context window for recorder to NN\n"; + std::cout << " -S fr::window_before=NN specifies context window before to NN for recorder\n"; + std::cout << " -S fr::window_after=NN specifies context window after to NN for recorder\n"; + std::cout << " -G NN - specify the page size (default " << cfg.opt_pagesize << ")\n"; + std::cout << " -g NN - specify margin (default " < - maximum number of minutes to wait after all data read\n"; + std::cout << " default is " << cfg.max_bad_alloc_errors << "\n"; + std::cout << "\nPath Processing Mode:\n"; + std::cout << " -p /f - print the value of with a given format.\n"; + std::cout << " formats: r = raw; h = hex.\n"; + std::cout << " Specify -p - for interactive mode.\n"; + std::cout << " Specify -p -http for HTTP mode.\n"; + std::cout << "\nParallelizing:\n"; + std::cout << " -Y - Start processing at o1 (o1 may be 1, 1K, 1M or 1G)\n"; + std::cout << " -Y - - Process o1-o2\n"; + std::cout << " -A - Add to all reported feature offsets\n"; + std::cout << "\nDebugging:\n"; + std::cout << " -h - print this message\n"; + std::cout << " -H - print detailed info on the scanners\n"; + std::cout << " -V - print version number\n"; + std::cout << " -z nn - start on page nn\n"; + std::cout << " -dN - debug mode (see source code)\n"; + std::cout << " -Z - zap (erase) output directory\n"; + std::cout << "\nControl of Scanners:\n"; + std::cout << " -P - Specifies a plugin directory\n"; + std::cout << " Default dirs include /usr/local/lib/bulk_extractor /usr/lib/bulk_extractor and\n"; + std::cout << " BE_PATH environment variable\n"; + std::cout << " -e enables -- -e all enables all\n"; + std::cout << " -x disable -- -x all disables all\n"; + std::cout << " -E - turn off all scanners except \n"; + std::cout << " (Same as -x all -e )\n"; + std::cout << " note: -e, -x and -E commands are executed in order\n"; + std::cout << " e.g.: '-E gzip -e facebook' runs only gzip and facebook\n"; + std::cout << " -S name=value - sets a bulk extractor option name to be value\n"; + std::cout << "\n"; + ss.info_scanners(std::cerr, false, true, 'e', 'x'); +#ifdef HAVE_LIBEWF + std::cout << " HAS SUPPORT FOR E01 FILES\n"; +#endif +#ifdef HAVE_EXIV2 + std::cout << " EXIV2 ENABLED\n"; +#endif +#ifdef HAVE_LIBLIGHTGREP + std::cout << " LIGHTGREP ENABLED\n"; +#endif + std::cout << "\n"; +} + +[[noreturn]] void throw_FileNotFoundError(const std::string &fname) +{ + std::cerr << "Cannot open: " << fname << "\n"; + throw std::runtime_error("Cannot open file"); +} + +/** + * scaled_stoi64: + * Like a normal stoi, except it can handle modifies k, m, and g + */ + + +/* + * Make sure that the filename provided is sane. + * That is, do not allow the analysis of a *.E02 file... + */ +void validate_path(const std::filesystem::path fn) +{ + if (!std::filesystem::exists(fn)){ + std::cerr << "file does not exist: " << fn << "\n"; + throw std::runtime_error("file not found."); + } + if (fn.extension()=="E02" || fn.extension()=="e02"){ + std::cerr << "Error: invalid file name\n"; + std::cerr << "Do not use bulk_extractor to process individual EnCase files.\n"; + std::cerr << "Instead, just run bulk_extractor with FILENAME.E01\n"; + std::cerr << "The other files in an EnCase multi-volume archive will be opened\n"; + std::cerr << "automatically.\n"; + throw std::runtime_error("run on E02."); + } +} + +/** + * Create the dfxml output + */ + +std::string be_hash_name {"sha1"}; +static void add_if_present(std::vector &scanner_dirs,const std::string &dir) +{ + if (access(dir.c_str(),O_RDONLY) == 0){ + scanner_dirs.push_back(dir); + } +} + +struct notify_opts { + scanner_set *ssp; + aftimer *master_timer; + std::atomic *fraction_done; + bool opt_legacy; +}; + +[[noreturn]] void notify_thread(struct notify_opts *o) +{ + assert(o->ssp != nullptr); + const char *cl=""; + const char *ho=""; + const char *ce=""; + const char *cd=""; + int cols = 80; +#ifdef HAVE_LIBTERMCAP + char buf[65536], *table=buf; + cols = tgetnum("co"); + if (!o->opt_legacy) { + const char *str = ::getenv("TERM"); + if (!str){ + std::cerr << "Warning: TERM environment variable not set." << std::endl; + } else { + switch (tgetent(buf, str)) { + case 0: + std::cerr << "Warning: No terminal entry '" << str << "'. " << std::endl; + break; + case -1: + std::cerr << "Warning: terminfo database culd not be found." << std::endl; + break; + case 1: // success + ho = tgetstr("ho", &table); // home + cl = tgetstr("cl", &table); // clear screen + ce = tgetstr("ce", &table); // clear to end of line + cd = tgetstr("cd", &table); // clear to end of screen + break; + } + } + } +#endif + + std::cout << cl; // clear screen + while(true){ + + // get screen size change if we can! +#if defined(HAVE_IOCTL) && defined(HAVE_STRUCT_WINSIZE_WS_COL) + struct winsize ws; + if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws)==0){ + cols = ws.ws_col; + } +#endif + + time_t rawtime = time (0); + struct tm timeinfo = *(localtime(&rawtime)); + std::map stats = o->ssp->get_realtime_stats(); + + // get the times + o->master_timer->lap(); + stats["elapsed_time"] = o->master_timer->elapsed_text(); + if (o->fraction_done) { + double done = *o->fraction_done; + stats["fraction_read"] = std::to_string(done * 100) + std::string(" %"); + stats["estimated_time_remaining"] = o->master_timer->eta_text(done); + stats["estimated_date_completion"] = o->master_timer->eta_date(done); + + // print the legacy status + if(o->opt_legacy) { + char buf1[64], buf2[64]; + snprintf(buf1, sizeof(buf1), "%2d:%02d:%02d",timeinfo.tm_hour,timeinfo.tm_min,timeinfo.tm_sec); + snprintf(buf2, sizeof(buf2), "(%.2f%%)", done * 100); + uint64_t max_offset = strtoll( stats[ scanner_set::MAX_OFFSET ].c_str() , nullptr, 10); + std::cout << buf1 << " Offset " << max_offset / (1000*1000) << "MB " + << buf2 << " Done in " << stats["estimated_time_remaining"] + << " at " << stats["estimated_time_completion"] << std::endl; + } + } + if (!o->opt_legacy) { + std::cout << ho << "bulk_extractor " << asctime(&timeinfo) << " " << std::endl; + for(const auto &it : stats ){ + std::cout << it.first << ": " << it.second; + if (ce[0] ){ + std::cout << ce; + } else { + // Space out to the 50 column to erase any junk + int spaces = 50 - (it.first.size() + it.second.size()); + for(int i=0;ifraction_done ){ + if (cols>10){ + double done = *o->fraction_done; + int before = (cols - 3) * done; + int after = (cols - 3) * (1.0 - done); + std::cout << std::string(before,'=') << '>' << std::string(after,'.') << '|' << ce << std::endl; + } + } + std::cout << cd << std::endl << std::endl; + } + std::this_thread::sleep_for(std::chrono::seconds(1)); + } +} + +int bulk_extractor_main(int argc,char **argv) +{ + mtrace(); + + const char *progname = argv[0]; + const auto original_argc = argc; + const auto original_argv = argv; + + word_and_context_list alert_list; /* shold be flagged */ + word_and_context_list stop_list; /* should be ignored */ + std::atomic fraction_done = 0; /* a callback of sorts */ + aftimer master_timer; + + scanner_config sc; // config for be13_api + Phase1::Config cfg; // config for the image_processing system + + cfg.fraction_done = &fraction_done; + + /* Options */ + std::string opt_path {}; + int opt_zap = 0; + int opt_h = 0; + int opt_H = 0; + std::string opt_sampling_params; + //bool opt_write_feature_files = true; + //bool opt_write_sqlite3 = false; + + /* Startup */ + setvbuf(stdout,0,_IONBF,0); // don't buffer stdout + std::vector scanner_dirs; // where to look for scanners + + /* Add the default plugin_path */ + add_if_present(scanner_dirs,"/usr/local/lib/bulk_extractor"); + add_if_present(scanner_dirs,"/usr/lib/bulk_extractor"); + add_if_present(scanner_dirs,"."); + + if (getenv("BE_PATH")) { + std::vector dirs = split(getenv("BE_PATH"),':'); + for(std::vector::const_iterator it = dirs.begin(); it!=dirs.end(); it++){ + add_if_present(scanner_dirs,*it); + } + } + +#ifdef WIN32 + setmode(1,O_BINARY); // make stdout binary +#endif + + if (argc==1) opt_h=1; // generate help if no arguments provided + + /* Process options */ + const std::string ALL { "all" }; + int ch; + char *empty = strdup(""); + while ((ch = getopt(argc, argv, "A:B:b:C:d:E:e:F:f:G:g:HhiJj:M:m:o:P:p:qRr:S:s:VW:w:x:Y:z:Z1")) != -1) { + if (optarg==nullptr) optarg=empty; + std::string arg = optarg!=ALL ? optarg : scanner_config::scanner_command::ALL_SCANNERS; + switch (ch) { + case 'A': sc.offset_add = stoi64(optarg);break; + case 'b': sc.banner_file = optarg; break; + case 'C': sc.context_window_default = atoi(optarg);break; + case 'd': + { + if (strcmp(optarg,"h")==0) debug_help(); + cfg.debug = atoi(optarg); + if (cfg.debug==0) cfg.debug=1; + } + break; + case 'E': /* Enable all scanners */ + sc.push_scanner_command( scanner_config::scanner_command::ALL_SCANNERS, scanner_config::scanner_command::DISABLE); + sc.push_scanner_command( arg, scanner_config::scanner_command::ENABLE); + break; + case 'e': /* enable a spedcific scanner */ + sc.push_scanner_command(arg, scanner_config::scanner_command::ENABLE); + break; + case 'F': FindOpts::get().Files.push_back(optarg); break; + case 'f': FindOpts::get().Patterns.push_back(optarg); break; + case 'G': cfg.opt_pagesize = scaled_stoi64(optarg); break; + case 'g': cfg.opt_marginsize = scaled_stoi64(optarg); break; + case 'i': + std::cout << "info mode:\n"; + cfg.opt_info = true; + break; + case 'j': cfg.num_threads = atoi(optarg); break; + case 'J': cfg.num_threads = 0; break; + case 'M': sc.max_depth = atoi(optarg); break; + case 'm': cfg.max_bad_alloc_errors = atoi(optarg); break; + case 'o': sc.outdir = optarg;break; + case 'P': scanner_dirs.push_back(optarg);break; + case 'p': opt_path = optarg; break; + case 'q': cfg.opt_quiet = true; break; + case 'r': + if (alert_list.readfile(optarg)){ + throw_FileNotFoundError(optarg); + } + break; + case 'R': cfg.opt_recurse = true; break; + case 'S': + { + std::vector params = split(optarg,'='); + if (params.size()!=2){ + std::cerr << "Invalid paramter: " << optarg << "\n"; + exit(1); + } + sc.namevals[params[0]] = params[1]; + continue; + } + case 's': + opt_sampling_params = optarg; + break; + case 'V': std::cout << "bulk_extractor " << PACKAGE_VERSION << "\n"; exit (0); + case 'W': + fprintf(stderr,"-W has been deprecated. Specify with -S word_min=NN and -S word_max=NN\n"); + exit(1); + break; + case 'w': if (stop_list.readfile(optarg)){ + throw_FileNotFoundError(optarg); + } + break; + case 'x': + sc.push_scanner_command( arg, scanner_config::scanner_command::DISABLE); + break; + case 'Y': { + std::string optargs = optarg; + size_t dash = optargs.find('-'); + if (dash==std::string::npos){ + cfg.opt_offset_start = stoi64(optargs); + } else { + cfg.opt_offset_start = scaled_stoi64(optargs.substr(0,dash)); + cfg.opt_offset_end = scaled_stoi64(optargs.substr(dash+1)); + } + break; + } + case 'z': cfg.opt_page_start = stoi64(optarg);break; + case 'Z': opt_zap=true;break; + case '1': cfg.opt_legacy = true; break; + case 'H': + opt_H++; + continue; + case 'h': + opt_h++; + continue; + } + } + + /* Legacy mode if stdout is not a tty */ +#ifdef HAVE_ISATTY + if (!isatty(1)){ + cfg.opt_legacy = true; + } +#endif + + argc -= optind; + argv += optind; + + /* Get image or directory */ + if (argc==0 || *argv == nullptr) { + if (cfg.opt_recurse) { + std::cerr << "filedir not provided\n"; + } else { + std::cerr << "imagefile not provided\n"; + } + exit(1); + } + sc.input_fname = *argv; + + /* Create a configuration that will be used to initialize the scanners */ + /* Make individual configuration options appear on the command line interface. */ + sc.get_global_config("debug_histogram_malloc_fail_frequency", + &AtomicUnicodeHistogram::debug_histogram_malloc_fail_frequency, + "Set >0 to make histogram maker fail with memory allocations"); + sc.get_global_config("hash_alg",&be_hash_name,"Specifies hash algorithm to be used for all hash calculations"); + //sc.get_global_config("write_feature_files",&opt_write_feature_files,"Write features to flat files"); + //sc.get_global_config("write_feature_sqlite3",&opt_write_sqlite3,"Write feature files to report.sqlite3"); + sc.get_global_config("report_read_errors",&cfg.opt_report_read_errors,"Report read errors"); + + /* Load all the scanners and enable the ones we care about */ + + //plugin::load_scanner_directories(scanner_dirs,sc); + if (opt_H || opt_h) { + sc.outdir = scanner_config::NO_OUTDIR; // don't create outdir if we are getting help. + } + + if (sc.outdir.empty()){ + std::cerr << "error: -o outdir must be specified\n"; + exit(1); + } + + struct feature_recorder_set::flags_t f; + scanner_set ss(sc, f, nullptr); // make a scanner_set but with no XML writer. We will create it below + ss.add_scanners(scanners_builtin); + + /* Print usage if necessary. Requires scanner set, but not commands applied. + */ + if ( opt_h ) { + usage(progname, ss); + exit(1); + } + if ( opt_H ) { + ss.info_scanners(std::cout, true, true, 'e', 'x'); + exit(1); + } + + /* The zap option wipes the contents of a directory, useful for debugging */ + if (opt_zap){ + for (const auto &entry : std::filesystem::recursive_directory_iterator( sc.outdir ) ) { + if (! std::filesystem::is_directory(entry.path())){ + std::cout << "erasing " << entry.path().string() << "\n"; + std::filesystem::remove( entry ); + } + } + } + + bool clean_start = opt_zap || directory_empty(sc.outdir); + + /* Applying the scanner commands will create the alert recorder. */ + try { + ss.apply_scanner_commands(); + } + catch (const scanner_set::NoSuchScanner &e) { + std::cerr << "no such scanner: " << e.what() << "\n"; + exit(1); + } + + /* Give an error if a find list was specified + * but no scanner that uses the find list is enabled. + */ + + if (!FindOpts::get().empty()) { + /* Look through the enabled scanners and make sure that + * at least one of them is a FIND scanner + */ + if (!ss.is_find_scanner_enabled()){ + throw std::runtime_error("find words are specified with -F but no find scanner is enabled.\n"); + } + } + + if (clean_start==false){ + /* Restarting */ + bulk_extractor_restarter r(sc,cfg); + r.restart(); // load the restart file and rename report.xml + } + + image_process *p = image_process::open( sc.input_fname, cfg.opt_recurse, cfg.opt_pagesize, cfg.opt_marginsize); + + /* are we supposed to run the path printer? */ + if (opt_path.size() > 0){ + if (argc!=1) throw std::runtime_error("-p requires a single argument."); + path_printer pp(&ss, p, std::cout); + if (opt_path=="-http" || opt_path=="--http"){ + pp.process_http(std::cin); + } else if (opt_path=="-i" || opt_path=="-"){ + pp.process_interactive(std::cin); + } else { + pp.process_path(opt_path); + } + exit(0); + } + + /* Open the image file (or the device) now. + * We use *p because we don't know which subclass we will be getting. + */ + + dfxml_writer *xreport = new dfxml_writer(sc.outdir / Phase1::REPORT_FILENAME, false); // do not make DTD + ss.set_dfxml_writer( xreport ); + /* Start the clock */ + master_timer.start(); + + Phase1 phase1(cfg, *p, ss); + + /* Validate the args */ + if ( argc == 0 ) throw std::runtime_error("No disk image provided. Run with -h for help."); + if ( argc > 1 ){ + throw std::runtime_error("Too many arguments provided. Run with -h for help."); + } + validate_path(sc.input_fname); + + /* Create the DFXML file in the report directory. + * If we are restarting, the dfxml file was renamed. + */ + + /* Determine the feature files that will be used from the scanners that were enabled */ + auto feature_file_names = ss.feature_file_list(); +#if 0 + uint32_t flags = 0; + if (stop_list.size()>0) flags |= feature_recorder_set::CREATE_STOP_LIST_RECORDERS; + if (opt_write_sqlite3) flags |= feature_recorder_set::ENABLE_SQLITE3_RECORDERS; + if (!opt_write_feature_files) flags |= feature_recorder_set::DISABLE_FILE_RECORDERS; + +#endif + + /* provide documentation to the user; the DFXML information comes from elsewhere */ + if (!cfg.opt_quiet){ + std::cout << "bulk_extractor version: " << PACKAGE_VERSION << "\n"; + std::cout << "Input file: " << sc.input_fname << "\n"; + std::cout << "Output directory: " << sc.outdir << "\n"; + std::cout << "Disk Size: " << p->image_size() << "\n"; + std::cout << "Scanners: "; + for (auto const &it : ss.get_enabled_scanners()){ + std::cout << it << " "; + } + std::cout << "\n"; + + if (cfg.num_threads>0){ + std::cout << "Threads: " << cfg.num_threads << "\n"; + } else { + std::cout << "Threading Disabled\n"; + } + } + + /*** PHASE 1 --- Run on the input image */ + struct notify_opts o; + o.ssp = &ss; + o.master_timer = &master_timer; + o.fraction_done = &fraction_done; + o.opt_legacy = cfg.opt_legacy; + new std::thread(¬ify_thread, &o); // launch the notify thread + ss.phase_scan(); + +#if 0 + if ( fs.flag_set(feature_recorder_set::ENABLE_SQLITE3_RECORDERS )) { + fs.db_transaction_begin(); + } +#endif + if (opt_sampling_params.size()>0){ + cfg.set_sampling_parameters(opt_sampling_params); + } + + /* Go multi-threaded if requested */ + if (cfg.num_threads > 0){ + std::cout << "going multi-threaded...(" << cfg.num_threads << ")\n"; + ss.launch_workers(cfg.num_threads); + } else { + std::cout << "running single-threaded (DEBUG)...\n"; + + } + + phase1.dfxml_write_create( original_argc, original_argv); + xreport->xmlout("provided_filename", sc.input_fname); // save this information + xreport->add_timestamp("phase1 start"); + + std::cerr << "Calling check_previously_processed at one\n"; + + try { + phase1.phase1_run(); + ss.join(); // wait for threads to come together + } + catch (const feature_recorder::DiskWriteError &e) { + std::cerr << "Disk write error during Phase 1 (scanning). Disk is probably full." << std::endl + << "Remove extra files and restart bulk_extractor with the exact same command line to continue." << std::endl; + exit(1); + } + +#if 0 + if ( fs.flag_set(feature_recorder_set::ENABLE_SQLITE3_RECORDERS )) { + fs.db_transaction_commit(); + } +#endif + xreport->add_timestamp("phase1 end"); + if (phase1.image_hash.size() > 0 ){ + std::cout << "Hash of Disk Image: " << phase1.image_hash << "\n"; + } + + /*** PHASE 2 --- Shutdown ***/ + if (!cfg.opt_quiet) std::cout << "Phase 2. Shutting down scanners\n"; + xreport->add_timestamp("phase2 start"); + try { + ss.shutdown(); + } + catch (const feature_recorder::DiskWriteError &e) { + std::cerr << "Disk write error during Phase 2 (histogram making). Disk is probably full." << std::endl + << "Remove extra files and restart bulk_extractor with the exact same command line to continue." << std::endl; + exit(1); + } + + xreport->add_timestamp("phase2 end"); + master_timer.stop(); + + /*** PHASE 3 --- report and then print final usage information ***/ + xreport->push("report"); + xreport->xmlout("total_bytes",phase1.total_bytes); + xreport->xmlout("elapsed_seconds",master_timer.elapsed_seconds()); + xreport->xmlout("max_depth_seen",ss.get_max_depth_seen()); + xreport->xmlout("dup_bytes_encountered",ss.get_dup_bytes_encountered()); + ss.dump_scanner_stats(); + ss.dump_name_count_stats(); + xreport->pop("report"); + xreport->add_rusage(); + xreport->pop("dfxml"); // bulk_extractor + xreport->close(); + + if (cfg.opt_quiet==0){ + float mb_per_sec = (phase1.total_bytes / 1000000.0) / master_timer.elapsed_seconds(); + + std::cout << "All Threads Finished!\n"; + std::cout.precision(4); + std::cout << "Elapsed time: " << master_timer.elapsed_seconds() << " sec." << std::endl + << "Total MB processed: " << int(phase1.total_bytes / 1000000) << std::endl + << "Overall performance: " << mb_per_sec << " << MBytes/sec "; + if (cfg.num_threads>0){ + std::cout << mb_per_sec/cfg.num_threads << " (MBytes/sec/thread)\n"; + } + std::cout << "sbufs created: " << sbuf_t::sbuf_total << std::endl; + std::cout << "sbufs unaccounted: " << sbuf_t::sbuf_count << " (should be 0) " << std::endl; + } + + try { + feature_recorder &fr = ss.fs.named_feature_recorder("email"); + std::cout << "Total " << fr.name << " features found: " << fr.features_written << std::endl; + } + catch (const feature_recorder_set::NoSuchFeatureRecorder &e) { + std::cout << "Did not scan for email addresses." << std::endl; + } + + muntrace(); + exit(0); +} diff --git a/src/bulk_extractor.h b/src/bulk_extractor.h index f936cb38..60d406ac 100644 --- a/src/bulk_extractor.h +++ b/src/bulk_extractor.h @@ -1,133 +1,15 @@ /* * - * Bulk Extractor's master include file. - * This is being phased out. */ #ifndef _BULK_EXTRACTOR_H_ #define _BULK_EXTRACTOR_H_ -/* Don't include config.h twice */ -#ifndef PACKAGE_NAME -#include "config.h" -#endif - -#ifdef WIN32 -# include -# include -# include -#endif - -#ifdef _WIN32 -/* For some reason this doesn't work properly with mingw */ -#undef HAVE_EXTERN_PROGNAME -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_UNISTD_H -# include -#endif - -//#ifdef HAVE_SYS_TIME_H -//#include -//#endif - -//#ifdef HAVE_SYS_TYPES_H -//# include -//#endif - -#ifdef HAVE_SYS_PARAM_H -# include -#endif - -#ifdef HAVE_SYS_STAT_H -# include -#endif - -#ifdef HAVE_SYS_IOCTL_H -# include -#endif - -#ifdef HAVE_SYS_MOUNT_H -# include -#endif - -#ifdef HAVE_SYS_DISK_H -# include -#endif - -#ifdef HAVE_LIBGEN_H -# include -#endif - -#ifdef HAVE_SYS_CDEFS_H -# include -#endif - -#ifdef HAVE_FCNTL_H -#include -#endif - -#ifdef HAVE_SYS_FCNTL_H -#include -#endif - -#ifdef HAVE_STDINT_H -#include -#endif - -#ifdef HAVE_SYS_RESOURCE_H -#include -#endif - -#ifdef HAVE_UNISTD_H -#include -#endif - -#ifdef HAVE_SIGNAL_H -#include -#endif - -#ifdef HAVE_MMAP_H -#include -#endif - -#ifdef HAVE_SYS_MMAP_H -#include -#endif - -#ifdef HAVE_SYS_MMAN_H -#include -#endif - -//#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7) -//# define ATTR_FORMAT(param,arg) __attribute__ ((__printf__,param,arg)) -//#else -//# define ATTR_FORMAT(spec) /* empty */ -//#endif - - -/* bulk_extractor.cpp */ +#include "be13_api/scanner_set.h" -//#include "be13_api/beregex.h" -//#include "word_and_context_list.h" +void debug_help(); +void usage(const char *progname, scanner_set &ss); +void validate_path(const std::filesystem::path fn); +int bulk_extractor_main(int argc,char **argv); -//extern scanner_t *scanners_builtin[]; #endif diff --git a/src/main.cpp b/src/main.cpp index 4be8aa5a..3ee11d15 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,727 +6,10 @@ */ #include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_MCHECK -#include -#else -void mtrace(){} -void muntrace(){} -#endif - -#ifdef HAVE_SYS_RESOURCE_H -#include -#endif - -#ifdef HAVE_TERMCAP_H -//#include -#include -#include -#endif - -// Open standard input in binary mode by default on Win32. -// See http://gnuwin32.sourceforge.net/compile.html for more -#ifdef WIN32 -int _CRT_fmode = _O_BINARY; -#endif - -#ifdef HAVE_SYS_IOCTL_H -#include -#endif - - -#include "dfxml_cpp/src/dfxml_writer.h" -#include "dfxml_cpp/src/hash_t.h" // needs config.h - -#include "be13_api/aftimer.h" -#include "be13_api/scanner_params.h" -#include "be13_api/scanner_set.h" -#include "be13_api/utils.h" // needs config.h -#include "be13_api/word_and_context_list.h" - -#include "findopts.h" -#include "image_process.h" -#include "phase1.h" -#include "path_printer.h" - -/* Bring in the definitions */ -#include "bulk_extractor_scanners.h" -#include "bulk_extractor_restarter.h" - -/** - * Output the #defines for our debug parameters. Used by the automake system. - */ -[[noreturn]] void debug_help() -{ - puts("#define DEBUG_PEDANTIC 0x0001 // check values more rigorously"); - puts("#define DEBUG_PRINT_STEPS 0x0002 // prints as each scanner is started"); - puts("#define DEBUG_SCANNER 0x0004 // dump all feature writes to stderr"); - puts("#define DEBUG_NO_SCANNERS 0x0008 // do not run the scanners "); - puts("#define DEBUG_DUMP_DATA 0x0010 // dump data as it is seen "); - puts("#define DEBUG_INFO 0x0040 // print extra info"); - puts("#define DEBUG_EXIT_EARLY 1000 // just print the size of the volume and exis "); - puts("#define DEBUG_ALLOCATE_512MiB 1002 // Allocate 512MiB, but don't set any flags "); - exit(1); -} - -/**************************************************************** - *** Usage for the stand-alone program - ****************************************************************/ - -static void usage(const char *progname, scanner_set &ss) -{ - Phase1::Config cfg; // get a default config - - std::cout << "bulk_extractor version " PACKAGE_VERSION " " << /* svn_revision << */ "\n"; - std::cout << "Usage: " << progname << " [options] imagefile\n"; - std::cout << " runs bulk extractor and outputs to stdout a summary of what was found where\n"; - std::cout << "\n"; - std::cout << "Required parameters:\n"; - std::cout << " imagefile - the file to extract\n"; - std::cout << " or -R filedir - recurse through a directory of files\n"; - std::cout << " -o outdir - specifies output directory. Must not exist.\n"; - std::cout << " bulk_extractor creates this directory.\n"; - std::cout << "Options:\n"; - std::cout << " -i - INFO mode. Do a quick random sample and print a report.\n"; - std::cout << " -b banner.txt- Add banner.txt contents to the top of every output file.\n"; - std::cout << " -r alert_list.txt - a file containing the alert list of features to alert\n"; - std::cout << " (can be a feature file or a list of globs)\n"; - std::cout << " (can be repeated.)\n"; - std::cout << " -w stop_list.txt - a file containing the stop list of features (white list\n"; - std::cout << " (can be a feature file or a list of globs)s\n"; - std::cout << " (can be repeated.)\n"; - std::cout << " -F - Read a list of regular expressions from to find\n"; - std::cout << " -f - find occurrences of ; may be repeated.\n"; - std::cout << " results go into find.txt\n"; - std::cout << " -q - quiet - no status output (changed in v2.0).\n"; - std::cout << " -s frac[:passes] - Set random sampling parameters\n"; - std::cout << " -1 - bulk_extractor v1.x legacy mode\n"; - std::cout << "\nTuning parameters:\n"; - // std::cout << " -C NN - specifies the size of the context window (default " << feature_recorder::context_window_default << ")\n"; - std::cout << " -S fr::window=NN specifies context window for recorder to NN\n"; - std::cout << " -S fr::window_before=NN specifies context window before to NN for recorder\n"; - std::cout << " -S fr::window_after=NN specifies context window after to NN for recorder\n"; - std::cout << " -G NN - specify the page size (default " << cfg.opt_pagesize << ")\n"; - std::cout << " -g NN - specify margin (default " < - maximum number of minutes to wait after all data read\n"; - std::cout << " default is " << cfg.max_bad_alloc_errors << "\n"; - std::cout << "\nPath Processing Mode:\n"; - std::cout << " -p /f - print the value of with a given format.\n"; - std::cout << " formats: r = raw; h = hex.\n"; - std::cout << " Specify -p - for interactive mode.\n"; - std::cout << " Specify -p -http for HTTP mode.\n"; - std::cout << "\nParallelizing:\n"; - std::cout << " -Y - Start processing at o1 (o1 may be 1, 1K, 1M or 1G)\n"; - std::cout << " -Y - - Process o1-o2\n"; - std::cout << " -A - Add to all reported feature offsets\n"; - std::cout << "\nDebugging:\n"; - std::cout << " -h - print this message\n"; - std::cout << " -H - print detailed info on the scanners\n"; - std::cout << " -V - print version number\n"; - std::cout << " -z nn - start on page nn\n"; - std::cout << " -dN - debug mode (see source code)\n"; - std::cout << " -Z - zap (erase) output directory\n"; - std::cout << "\nControl of Scanners:\n"; - std::cout << " -P - Specifies a plugin directory\n"; - std::cout << " Default dirs include /usr/local/lib/bulk_extractor /usr/lib/bulk_extractor and\n"; - std::cout << " BE_PATH environment variable\n"; - std::cout << " -e enables -- -e all enables all\n"; - std::cout << " -x disable -- -x all disables all\n"; - std::cout << " -E - turn off all scanners except \n"; - std::cout << " (Same as -x all -e )\n"; - std::cout << " note: -e, -x and -E commands are executed in order\n"; - std::cout << " e.g.: '-E gzip -e facebook' runs only gzip and facebook\n"; - std::cout << " -S name=value - sets a bulk extractor option name to be value\n"; - std::cout << "\n"; - ss.info_scanners(std::cerr, false, true, 'e', 'x'); -#ifdef HAVE_LIBEWF - std::cout << " HAS SUPPORT FOR E01 FILES\n"; -#endif -#ifdef HAVE_EXIV2 - std::cout << " EXIV2 ENABLED\n"; -#endif -#ifdef HAVE_LIBLIGHTGREP - std::cout << " LIGHTGREP ENABLED\n"; -#endif - std::cout << "\n"; -} - - -[[noreturn]] void throw_FileNotFoundError(const std::string &fname) -{ - std::cerr << "Cannot open: " << fname << "\n"; - throw std::runtime_error("Cannot open file"); -} - -/** - * scaled_stoi64: - * Like a normal stoi, except it can handle modifies k, m, and g - */ - - -/* - * Make sure that the filename provided is sane. - * That is, do not allow the analysis of a *.E02 file... - */ -void validate_path(const std::filesystem::path fn) -{ - if (!std::filesystem::exists(fn)){ - std::cerr << "file does not exist: " << fn << "\n"; - throw std::runtime_error("file not found."); - } - if (fn.extension()=="E02" || fn.extension()=="e02"){ - std::cerr << "Error: invalid file name\n"; - std::cerr << "Do not use bulk_extractor to process individual EnCase files.\n"; - std::cerr << "Instead, just run bulk_extractor with FILENAME.E01\n"; - std::cerr << "The other files in an EnCase multi-volume archive will be opened\n"; - std::cerr << "automatically.\n"; - throw std::runtime_error("run on E02."); - } -} - -/** - * Create the dfxml output - */ - -std::string be_hash_name {"sha1"}; -static void add_if_present(std::vector &scanner_dirs,const std::string &dir) -{ - if (access(dir.c_str(),O_RDONLY) == 0){ - scanner_dirs.push_back(dir); - } -} - -struct notify_opts { - scanner_set *ssp; - aftimer *master_timer; - std::atomic *fraction_done; - bool opt_legacy; -}; - -[[noreturn]] void notify_thread(struct notify_opts *o) -{ - assert(o->ssp != nullptr); - const char *cl=""; - const char *ho=""; - const char *ce=""; - const char *cd=""; - int cols = 80; -#ifdef HAVE_LIBTERMCAP - char buf[65536], *table=buf; - cols = tgetnum("co"); - if (!o->opt_legacy) { - const char *str = ::getenv("TERM"); - if (!str){ - std::cerr << "Warning: TERM environment variable not set." << std::endl; - } else { - switch (tgetent(buf, str)) { - case 0: - std::cerr << "Warning: No terminal entry '" << str << "'. " << std::endl; - break; - case -1: - std::cerr << "Warning: terminfo database culd not be found." << std::endl; - break; - case 1: // success - ho = tgetstr("ho", &table); // home - cl = tgetstr("cl", &table); // clear screen - ce = tgetstr("ce", &table); // clear to end of line - cd = tgetstr("cd", &table); // clear to end of screen - break; - } - } - } -#endif - - std::cout << cl; // clear screen - while(true){ - - // get screen size change if we can! -#if defined(HAVE_IOCTL) && defined(HAVE_STRUCT_WINSIZE_WS_COL) - struct winsize ws; - if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws)==0){ - cols = ws.ws_col; - } -#endif - - time_t rawtime = time (0); - struct tm timeinfo = *(localtime(&rawtime)); - std::map stats = o->ssp->get_realtime_stats(); - - // get the times - o->master_timer->lap(); - stats["elapsed_time"] = o->master_timer->elapsed_text(); - if (o->fraction_done) { - double done = *o->fraction_done; - stats["fraction_read"] = std::to_string(done * 100) + std::string(" %"); - stats["estimated_time_remaining"] = o->master_timer->eta_text(done); - stats["estimated_date_completion"] = o->master_timer->eta_date(done); - - // print the legacy status - if(o->opt_legacy) { - char buf1[64], buf2[64]; - snprintf(buf1, sizeof(buf1), "%2d:%02d:%02d",timeinfo.tm_hour,timeinfo.tm_min,timeinfo.tm_sec); - snprintf(buf2, sizeof(buf2), "(%.2f%%)", done * 100); - uint64_t max_offset = strtoll( stats[ scanner_set::MAX_OFFSET ].c_str() , nullptr, 10); - std::cout << buf1 << " Offset " << max_offset / (1000*1000) << "MB " - << buf2 << " Done in " << stats["estimated_time_remaining"] - << " at " << stats["estimated_time_completion"] << std::endl; - } - } - if (!o->opt_legacy) { - std::cout << ho << "bulk_extractor " << asctime(&timeinfo) << " " << std::endl; - for(const auto &it : stats ){ - std::cout << it.first << ": " << it.second; - if (ce[0] ){ - std::cout << ce; - } else { - // Space out to the 50 column to erase any junk - int spaces = 50 - (it.first.size() + it.second.size()); - for(int i=0;ifraction_done ){ - if (cols>10){ - double done = *o->fraction_done; - int before = (cols - 3) * done; - int after = (cols - 3) * (1.0 - done); - std::cout << std::string(before,'=') << '>' << std::string(after,'.') << '|' << ce << std::endl; - } - } - std::cout << cd << std::endl << std::endl; - } - std::this_thread::sleep_for(std::chrono::seconds(1)); - } -} +#include "bulk_extractor.h" int main(int argc,char **argv) { - mtrace(); - - const char *progname = argv[0]; - const auto original_argc = argc; - const auto original_argv = argv; - - word_and_context_list alert_list; /* shold be flagged */ - word_and_context_list stop_list; /* should be ignored */ - std::atomic fraction_done = 0; /* a callback of sorts */ - aftimer master_timer; - - scanner_config sc; // config for be13_api - Phase1::Config cfg; // config for the image_processing system - - cfg.fraction_done = &fraction_done; - - /* Options */ - std::string opt_path {}; - int opt_zap = 0; - int opt_h = 0; - int opt_H = 0; - std::string opt_sampling_params; - //bool opt_write_feature_files = true; - //bool opt_write_sqlite3 = false; - - /* Startup */ - setvbuf(stdout,0,_IONBF,0); // don't buffer stdout - std::vector scanner_dirs; // where to look for scanners - - /* Add the default plugin_path */ - add_if_present(scanner_dirs,"/usr/local/lib/bulk_extractor"); - add_if_present(scanner_dirs,"/usr/lib/bulk_extractor"); - add_if_present(scanner_dirs,"."); - - if (getenv("BE_PATH")) { - std::vector dirs = split(getenv("BE_PATH"),':'); - for(std::vector::const_iterator it = dirs.begin(); it!=dirs.end(); it++){ - add_if_present(scanner_dirs,*it); - } - } - -#ifdef WIN32 - setmode(1,O_BINARY); // make stdout binary -#endif - - if (argc==1) opt_h=1; // generate help if no arguments provided - - /* Process options */ - const std::string ALL { "all" }; - int ch; - char *empty = strdup(""); - while ((ch = getopt(argc, argv, "A:B:b:C:d:E:e:F:f:G:g:HhiJj:M:m:o:P:p:qRr:S:s:VW:w:x:Y:z:Z1")) != -1) { - if (optarg==nullptr) optarg=empty; - std::string arg = optarg!=ALL ? optarg : scanner_config::scanner_command::ALL_SCANNERS; - switch (ch) { - case 'A': sc.offset_add = stoi64(optarg);break; - case 'b': sc.banner_file = optarg; break; - case 'C': sc.context_window_default = atoi(optarg);break; - case 'd': - { - if (strcmp(optarg,"h")==0) debug_help(); - cfg.debug = atoi(optarg); - if (cfg.debug==0) cfg.debug=1; - } - break; - case 'E': /* Enable all scanners */ - sc.push_scanner_command( scanner_config::scanner_command::ALL_SCANNERS, scanner_config::scanner_command::DISABLE); - sc.push_scanner_command( arg, scanner_config::scanner_command::ENABLE); - break; - case 'e': /* enable a spedcific scanner */ - sc.push_scanner_command(arg, scanner_config::scanner_command::ENABLE); - break; - case 'F': FindOpts::get().Files.push_back(optarg); break; - case 'f': FindOpts::get().Patterns.push_back(optarg); break; - case 'G': cfg.opt_pagesize = scaled_stoi64(optarg); break; - case 'g': cfg.opt_marginsize = scaled_stoi64(optarg); break; - case 'i': - std::cout << "info mode:\n"; - cfg.opt_info = true; - break; - case 'j': cfg.num_threads = atoi(optarg); break; - case 'J': cfg.num_threads = 0; break; - case 'M': sc.max_depth = atoi(optarg); break; - case 'm': cfg.max_bad_alloc_errors = atoi(optarg); break; - case 'o': sc.outdir = optarg;break; - case 'P': scanner_dirs.push_back(optarg);break; - case 'p': opt_path = optarg; break; - case 'q': cfg.opt_quiet = true; break; - case 'r': - if (alert_list.readfile(optarg)){ - throw_FileNotFoundError(optarg); - } - break; - case 'R': cfg.opt_recurse = true; break; - case 'S': - { - std::vector params = split(optarg,'='); - if (params.size()!=2){ - std::cerr << "Invalid paramter: " << optarg << "\n"; - exit(1); - } - sc.namevals[params[0]] = params[1]; - continue; - } - case 's': - opt_sampling_params = optarg; - break; - case 'V': std::cout << "bulk_extractor " << PACKAGE_VERSION << "\n"; exit (0); - case 'W': - fprintf(stderr,"-W has been deprecated. Specify with -S word_min=NN and -S word_max=NN\n"); - exit(1); - break; - case 'w': if (stop_list.readfile(optarg)){ - throw_FileNotFoundError(optarg); - } - break; - case 'x': - sc.push_scanner_command( arg, scanner_config::scanner_command::DISABLE); - break; - case 'Y': { - std::string optargs = optarg; - size_t dash = optargs.find('-'); - if (dash==std::string::npos){ - cfg.opt_offset_start = stoi64(optargs); - } else { - cfg.opt_offset_start = scaled_stoi64(optargs.substr(0,dash)); - cfg.opt_offset_end = scaled_stoi64(optargs.substr(dash+1)); - } - break; - } - case 'z': cfg.opt_page_start = stoi64(optarg);break; - case 'Z': opt_zap=true;break; - case '1': cfg.opt_legacy = true; break; - case 'H': - opt_H++; - continue; - case 'h': - opt_h++; - continue; - } - } - - /* Legacy mode if stdout is not a tty */ -#ifdef HAVE_ISATTY - if (!isatty(1)){ - cfg.opt_legacy = true; - } -#endif - - argc -= optind; - argv += optind; - - /* Get image or directory */ - if (argc==0 || *argv == nullptr) { - if (cfg.opt_recurse) { - std::cerr << "filedir not provided\n"; - } else { - std::cerr << "imagefile not provided\n"; - } - exit(1); - } - sc.input_fname = *argv; - - /* Create a configuration that will be used to initialize the scanners */ - /* Make individual configuration options appear on the command line interface. */ - sc.get_global_config("debug_histogram_malloc_fail_frequency", - &AtomicUnicodeHistogram::debug_histogram_malloc_fail_frequency, - "Set >0 to make histogram maker fail with memory allocations"); - sc.get_global_config("hash_alg",&be_hash_name,"Specifies hash algorithm to be used for all hash calculations"); - //sc.get_global_config("write_feature_files",&opt_write_feature_files,"Write features to flat files"); - //sc.get_global_config("write_feature_sqlite3",&opt_write_sqlite3,"Write feature files to report.sqlite3"); - sc.get_global_config("report_read_errors",&cfg.opt_report_read_errors,"Report read errors"); - - /* Load all the scanners and enable the ones we care about */ - - //plugin::load_scanner_directories(scanner_dirs,sc); - if (opt_H || opt_h) { - sc.outdir = scanner_config::NO_OUTDIR; // don't create outdir if we are getting help. - } - - if (sc.outdir.empty()){ - std::cerr << "error: -o outdir must be specified\n"; - exit(1); - } - - struct feature_recorder_set::flags_t f; - scanner_set ss(sc, f, nullptr); // make a scanner_set but with no XML writer. We will create it below - ss.add_scanners(scanners_builtin); - - /* Print usage if necessary. Requires scanner set, but not commands applied. - */ - if ( opt_h ) { - usage(progname, ss); - exit(1); - } - if ( opt_H ) { - ss.info_scanners(std::cout, true, true, 'e', 'x'); - exit(1); - } - - /* The zap option wipes the contents of a directory, useful for debugging */ - if (opt_zap){ - for (const auto &entry : std::filesystem::recursive_directory_iterator( sc.outdir ) ) { - if (! std::filesystem::is_directory(entry.path())){ - std::cout << "erasing " << entry.path().string() << "\n"; - std::filesystem::remove( entry ); - } - } - } - - bool clean_start = opt_zap || directory_empty(sc.outdir); - - /* Applying the scanner commands will create the alert recorder. */ - try { - ss.apply_scanner_commands(); - } - catch (const scanner_set::NoSuchScanner &e) { - std::cerr << "no such scanner: " << e.what() << "\n"; - exit(1); - } - - /* Give an error if a find list was specified - * but no scanner that uses the find list is enabled. - */ - - if (!FindOpts::get().empty()) { - /* Look through the enabled scanners and make sure that - * at least one of them is a FIND scanner - */ - if (!ss.is_find_scanner_enabled()){ - throw std::runtime_error("find words are specified with -F but no find scanner is enabled.\n"); - } - } - - if (clean_start==false){ - /* Restarting */ - bulk_extractor_restarter r(sc,cfg); - r.restart(); // load the restart file and rename report.xml - } - - image_process *p = image_process::open( sc.input_fname, cfg.opt_recurse, cfg.opt_pagesize, cfg.opt_marginsize); - - /* are we supposed to run the path printer? */ - if (opt_path.size() > 0){ - if (argc!=1) throw std::runtime_error("-p requires a single argument."); - path_printer pp(&ss, p, std::cout); - if (opt_path=="-http" || opt_path=="--http"){ - pp.process_http(std::cin); - } else if (opt_path=="-i" || opt_path=="-"){ - pp.process_interactive(std::cin); - } else { - pp.process_path(opt_path); - } - exit(0); - } - - /* Open the image file (or the device) now. - * We use *p because we don't know which subclass we will be getting. - */ - - dfxml_writer *xreport = new dfxml_writer(sc.outdir / Phase1::REPORT_FILENAME, false); // do not make DTD - ss.set_dfxml_writer( xreport ); - /* Start the clock */ - master_timer.start(); - - Phase1 phase1(cfg, *p, ss); - - /* Validate the args */ - if ( argc == 0 ) throw std::runtime_error("No disk image provided. Run with -h for help."); - if ( argc > 1 ){ - throw std::runtime_error("Too many arguments provided. Run with -h for help."); - } - validate_path(sc.input_fname); - - /* Create the DFXML file in the report directory. - * If we are restarting, the dfxml file was renamed. - */ - - /* Determine the feature files that will be used from the scanners that were enabled */ - auto feature_file_names = ss.feature_file_list(); -#if 0 - uint32_t flags = 0; - if (stop_list.size()>0) flags |= feature_recorder_set::CREATE_STOP_LIST_RECORDERS; - if (opt_write_sqlite3) flags |= feature_recorder_set::ENABLE_SQLITE3_RECORDERS; - if (!opt_write_feature_files) flags |= feature_recorder_set::DISABLE_FILE_RECORDERS; - -#endif - - /* provide documentation to the user; the DFXML information comes from elsewhere */ - if (!cfg.opt_quiet){ - std::cout << "bulk_extractor version: " << PACKAGE_VERSION << "\n"; - std::cout << "Input file: " << sc.input_fname << "\n"; - std::cout << "Output directory: " << sc.outdir << "\n"; - std::cout << "Disk Size: " << p->image_size() << "\n"; - std::cout << "Scanners: "; - for (auto const &it : ss.get_enabled_scanners()){ - std::cout << it << " "; - } - std::cout << "\n"; - - if (cfg.num_threads>0){ - std::cout << "Threads: " << cfg.num_threads << "\n"; - } else { - std::cout << "Threading Disabled\n"; - } - } - - /*** PHASE 1 --- Run on the input image */ - struct notify_opts o; - o.ssp = &ss; - o.master_timer = &master_timer; - o.fraction_done = &fraction_done; - o.opt_legacy = cfg.opt_legacy; - new std::thread(¬ify_thread, &o); // launch the notify thread - ss.phase_scan(); - -#if 0 - if ( fs.flag_set(feature_recorder_set::ENABLE_SQLITE3_RECORDERS )) { - fs.db_transaction_begin(); - } -#endif - if (opt_sampling_params.size()>0){ - cfg.set_sampling_parameters(opt_sampling_params); - } - - /* Go multi-threaded if requested */ - if (cfg.num_threads > 0){ - std::cout << "going multi-threaded...(" << cfg.num_threads << ")\n"; - ss.launch_workers(cfg.num_threads); - } else { - std::cout << "running single-threaded (DEBUG)...\n"; - - } - - phase1.dfxml_write_create( original_argc, original_argv); - xreport->xmlout("provided_filename", sc.input_fname); // save this information - xreport->add_timestamp("phase1 start"); - - std::cerr << "Calling check_previously_processed at one\n"; - - try { - phase1.phase1_run(); - ss.join(); // wait for threads to come together - } - catch (const feature_recorder::DiskWriteError &e) { - std::cerr << "Disk write error during Phase 1 (scanning). Disk is probably full." << std::endl - << "Remove extra files and restart bulk_extractor with the exact same command line to continue." << std::endl; - exit(1); - } - -#if 0 - if ( fs.flag_set(feature_recorder_set::ENABLE_SQLITE3_RECORDERS )) { - fs.db_transaction_commit(); - } -#endif - xreport->add_timestamp("phase1 end"); - if (phase1.image_hash.size() > 0 ){ - std::cout << "Hash of Disk Image: " << phase1.image_hash << "\n"; - } - - /*** PHASE 2 --- Shutdown ***/ - if (!cfg.opt_quiet) std::cout << "Phase 2. Shutting down scanners\n"; - xreport->add_timestamp("phase2 start"); - try { - ss.shutdown(); - } - catch (const feature_recorder::DiskWriteError &e) { - std::cerr << "Disk write error during Phase 2 (histogram making). Disk is probably full." << std::endl - << "Remove extra files and restart bulk_extractor with the exact same command line to continue." << std::endl; - exit(1); - } - - xreport->add_timestamp("phase2 end"); - master_timer.stop(); - - /*** PHASE 3 --- report and then print final usage information ***/ - xreport->push("report"); - xreport->xmlout("total_bytes",phase1.total_bytes); - xreport->xmlout("elapsed_seconds",master_timer.elapsed_seconds()); - xreport->xmlout("max_depth_seen",ss.get_max_depth_seen()); - xreport->xmlout("dup_bytes_encountered",ss.get_dup_bytes_encountered()); - ss.dump_scanner_stats(); - ss.dump_name_count_stats(); - xreport->pop("report"); - xreport->add_rusage(); - xreport->pop("dfxml"); // bulk_extractor - xreport->close(); - - if (cfg.opt_quiet==0){ - float mb_per_sec = (phase1.total_bytes / 1000000.0) / master_timer.elapsed_seconds(); - - std::cout << "All Threads Finished!\n"; - std::cout.precision(4); - std::cout << "Elapsed time: " << master_timer.elapsed_seconds() << " sec." << std::endl - << "Total MB processed: " << int(phase1.total_bytes / 1000000) << std::endl - << "Overall performance: " << mb_per_sec << " << MBytes/sec "; - if (cfg.num_threads>0){ - std::cout << mb_per_sec/cfg.num_threads << " (MBytes/sec/thread)\n"; - } - std::cout << "sbufs created: " << sbuf_t::sbuf_total << std::endl; - std::cout << "sbufs unaccounted: " << sbuf_t::sbuf_count << " (should be 0) " << std::endl; - } - - try { - feature_recorder &fr = ss.fs.named_feature_recorder("email"); - std::cout << "Total " << fr.name << " features found: " << fr.features_written << std::endl; - } - catch (const feature_recorder_set::NoSuchFeatureRecorder &e) { - std::cout << "Did not scan for email addresses." << std::endl; - } - - muntrace(); + bulk_extractor_main(argc,argv); exit(0); } From 14e8cb31647c7a31f5acd2e30f2dfc14c12e129c Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Fri, 10 Sep 2021 16:00:14 -0400 Subject: [PATCH 28/89] created distinct_character_counter --- src/be13_api | 2 +- src/scan_aes.cpp | 37 ++++++++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/be13_api b/src/be13_api index 5e86d460..0153d2cc 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 5e86d460766db30c138a0bee02204ef9fdb2dfa3 +Subproject commit 0153d2cc4c3854728b4150bc173784af70b75ca9 diff --git a/src/scan_aes.cpp b/src/scan_aes.cpp index c3ba0ad5..59232774 100644 --- a/src/scan_aes.cpp +++ b/src/scan_aes.cpp @@ -39,6 +39,7 @@ #include "be13_api/scanner_params.h" #include "be13_api/scanner_set.h" +#include "be13_api/distinct_character_counter.h" /* old aes.h file */ @@ -374,6 +375,13 @@ int scan_aes_256 = 1; +void preload(distinct_character_counter &dcc, const uint8_t *buf, size_t keysize) +{ + for (size_t i = 0; ibufsize && pos < sp.sbuf->pagesize; pos++){ /* TODO: Remove direct memory access with mediated access */ const uint8_t *p2 = sp.sbuf->get_buf() + pos; - if (scan_aes_128 && (sp.sbuf->bufsize-pos >= AES128_KEY_SCHEDULE_SIZE) && - (sp.sbuf->distinct_characters( pos, AES128_KEY_SIZE) > AES128_KEY_SIZE/4)){ - if (valid_aes128_schedule(p2)) { + if (scan_aes_128 && (sp.sbuf->bufsize-pos >= AES128_KEY_SCHEDULE_SIZE)){ + if (pos==0) preload(distinct128, p2, AES128_KEY_SIZE); + distinct128.add(p2[AES128_KEY_SIZE-1]); + if (distinct128.distinct_count > AES128_KEY_SIZE/4 && valid_aes128_schedule(p2)) { std::string key = key_to_string(p2, AES128_KEY_SIZE); - aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES128")); - } + aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES128")); + } + distinct128.remove(p2[0]); } - if (scan_aes_192 && (sp.sbuf->bufsize-pos >= AES192_KEY_SCHEDULE_SIZE) && - (sp.sbuf->distinct_characters( pos, AES192_KEY_SIZE) > AES192_KEY_SIZE/4)){ - if (valid_aes192_schedule(p2)) { + if (scan_aes_192 && (sp.sbuf->bufsize-pos >= AES192_KEY_SCHEDULE_SIZE)) { + distinct192.add(p2[AES192_KEY_SIZE-1]); + if (pos==0) preload(distinct192, p2, AES192_KEY_SIZE); + if (distinct192.distinct_count > AES192_KEY_SIZE/4 && valid_aes192_schedule(p2)) { std::string key = key_to_string(p2, AES192_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES192")); } + distinct192.remove(p2[0]); } - if (scan_aes_256 && (sp.sbuf->bufsize-pos >= AES256_KEY_SCHEDULE_SIZE) && - (sp.sbuf->distinct_characters( pos, AES256_KEY_SIZE) > AES256_KEY_SIZE/4)){ - if (valid_aes256_schedule(p2)) { + if (scan_aes_256 && (sp.sbuf->bufsize-pos >= AES256_KEY_SCHEDULE_SIZE)) { + if (pos==0) preload(distinct256, p2, AES256_KEY_SIZE); + distinct256.add(p2[AES256_KEY_SIZE-1]); + if (distinct256.distinct_count > AES256_KEY_SIZE/4 && valid_aes256_schedule(p2)) { std::string key = key_to_string(p2, AES256_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES256")); } + distinct256.remove(p2[0]); } } } From 952dec14bd89a2d426a7c52fed0a3ebb09a0b9ce Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Sat, 11 Sep 2021 09:31:51 -0400 Subject: [PATCH 29/89] speed up sbuf a bit --- src/be13_api | 2 +- src/bulk_extractor.cpp | 32 +++++++++++++++++--------------- src/phase1.cpp | 2 ++ src/scan_aes.cpp | 19 +++++++++++++------ src/scan_xor.cpp | 5 ++++- 5 files changed, 37 insertions(+), 23 deletions(-) diff --git a/src/be13_api b/src/be13_api index 0153d2cc..e1610e1b 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 0153d2cc4c3854728b4150bc173784af70b75ca9 +Subproject commit e1610e1b51163bad8d6ebc229d54b039b4542146 diff --git a/src/bulk_extractor.cpp b/src/bulk_extractor.cpp index e60e7c20..7cfeb18a 100644 --- a/src/bulk_extractor.cpp +++ b/src/bulk_extractor.cpp @@ -468,17 +468,6 @@ int bulk_extractor_main(int argc,char **argv) argc -= optind; argv += optind; - /* Get image or directory */ - if (argc==0 || *argv == nullptr) { - if (cfg.opt_recurse) { - std::cerr << "filedir not provided\n"; - } else { - std::cerr << "imagefile not provided\n"; - } - exit(1); - } - sc.input_fname = *argv; - /* Create a configuration that will be used to initialize the scanners */ /* Make individual configuration options appear on the command line interface. */ sc.get_global_config("debug_histogram_malloc_fail_frequency", @@ -496,15 +485,28 @@ int bulk_extractor_main(int argc,char **argv) sc.outdir = scanner_config::NO_OUTDIR; // don't create outdir if we are getting help. } + struct feature_recorder_set::flags_t f; + scanner_set ss(sc, f, nullptr); // make a scanner_set but with no XML writer. We will create it below + ss.add_scanners(scanners_builtin); + + /* Get image or directory */ + if (argc==0 || *argv == nullptr) { + if (cfg.opt_recurse) { + std::cerr << "filedir not provided\n"; + } else { + std::cerr << "imagefile not provided\n"; + } + usage(progname, ss); + exit(1); + } + sc.input_fname = *argv; + if (sc.outdir.empty()){ std::cerr << "error: -o outdir must be specified\n"; + usage(progname, ss); exit(1); } - struct feature_recorder_set::flags_t f; - scanner_set ss(sc, f, nullptr); // make a scanner_set but with no XML writer. We will create it below - ss.add_scanners(scanners_builtin); - /* Print usage if necessary. Requires scanner set, but not commands applied. */ if ( opt_h ) { diff --git a/src/phase1.cpp b/src/phase1.cpp index 56833b65..c9592019 100644 --- a/src/phase1.cpp +++ b/src/phase1.cpp @@ -54,6 +54,8 @@ void Phase1::Config::set_sampling_parameters(std::string param) } sampling_fraction = atof(params.at(0).c_str()); if (sampling_fraction<=0 || sampling_fraction>=1){ + std::cerr << "params.at(0): " << params.at(0) << std::endl; + std::cerr << "sampling_fraction: " << sampling_fraction << std::endl; throw std::runtime_error("error: sampling fraction f must be 0scanner_flags.recurse = true; sp.info->scanner_flags.recurse_always = true; sp.get_scanner_config("xor_mask",&xor_mask,"XOR mask value, in decimal"); + if (xor_mask<0 || xor_mask>255){ + throw std::runtime_error("invalid xor_mask"); + } return; } if (sp.phase==scanner_params::PHASE_SCAN) { From 933a1f8299e86bf7800d1c14a68d5a5accf34262 Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Sat, 11 Sep 2021 10:21:23 -0400 Subject: [PATCH 30/89] fixed carving extensions --- src/be13_api | 2 +- src/scan_aes.cpp | 25 +++++++++++-------------- src/scan_evtx.cpp | 2 +- src/scan_ntfsindx.cpp | 8 ++++---- src/scan_ntfslogfile.cpp | 8 ++++---- src/scan_ntfsmft.cpp | 4 ++-- src/scan_ntfsusn.cpp | 8 ++++---- 7 files changed, 27 insertions(+), 30 deletions(-) diff --git a/src/be13_api b/src/be13_api index e1610e1b..dee8002d 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit e1610e1b51163bad8d6ebc229d54b039b4542146 +Subproject commit dee8002d84dc6fddaafaa68fadf56ebd96cbecd4 diff --git a/src/scan_aes.cpp b/src/scan_aes.cpp index 6969f777..79ebe268 100644 --- a/src/scan_aes.cpp +++ b/src/scan_aes.cpp @@ -376,13 +376,6 @@ int scan_aes_256 = 1; -void preload(distinct_character_counter &dcc, const uint8_t *buf, size_t keysize) -{ - for (size_t i = 0; ibufsize && pos < sp.sbuf->pagesize; pos++){ - /* TODO: Remove direct memory access with mediated access */ const uint8_t *p2 = sp.sbuf->get_buf() + pos; if (scan_aes_128 && (sp.sbuf->bufsize-pos >= AES128_KEY_SCHEDULE_SIZE)){ - if (pos==0) preload(distinct128, p2, AES128_KEY_SIZE); + if (pos==0) distinct128.preload(p2, AES128_KEY_SIZE-1); + if (pos==0) hbc.preload(p2, AES128_KEY_SCHEDULE_SIZE-1); distinct128.add(p2[AES128_KEY_SIZE-1]); - if (distinct128.distinct_count > AES128_KEY_SIZE/4 && valid_aes128_schedule(p2)) { + hbc.add(p2[AES128_KEY_SCHEDULE_SIZE-1]); + + if (distinct128.distinct_count > AES128_KEY_SIZE/4 && hbc.highbit_count>0 && valid_aes128_schedule(p2)) { std::string key = key_to_string(p2, AES128_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES128")); } distinct128.remove(p2[0]); + hbc.remove(p2[0]); } if (scan_aes_192 && (sp.sbuf->bufsize-pos >= AES192_KEY_SCHEDULE_SIZE)) { distinct192.add(p2[AES192_KEY_SIZE-1]); - if (pos==0) preload(distinct192, p2, AES192_KEY_SIZE); - if (distinct192.distinct_count > AES192_KEY_SIZE/4 && valid_aes192_schedule(p2)) { + if (pos==0) distinct192.preload(p2, AES192_KEY_SIZE-1); + if (distinct192.distinct_count > AES192_KEY_SIZE/4 && hbc.highbit_count>0 && valid_aes192_schedule(p2)) { std::string key = key_to_string(p2, AES192_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES192")); } distinct192.remove(p2[0]); } if (scan_aes_256 && (sp.sbuf->bufsize-pos >= AES256_KEY_SCHEDULE_SIZE)) { - if (pos==0) preload(distinct256, p2, AES256_KEY_SIZE); + if (pos==0) distinct256.preload(p2, AES256_KEY_SIZE-1); distinct256.add(p2[AES256_KEY_SIZE-1]); - if (distinct256.distinct_count > AES256_KEY_SIZE/4 && valid_aes256_schedule(p2)) { + if (distinct256.distinct_count > AES256_KEY_SIZE/4 && hbc.highbit_count>0 && valid_aes256_schedule(p2)) { std::string key = key_to_string(p2, AES256_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES256")); } diff --git a/src/scan_evtx.cpp b/src/scan_evtx.cpp index fdc1c07d..7a97f82a 100644 --- a/src/scan_evtx.cpp +++ b/src/scan_evtx.cpp @@ -246,7 +246,7 @@ void scan_evtx(scanner_params &sp) int64_t result_record_size = check_evtxrecord_signature(offset+i, sbuf); if (result_record_size > 0) { sbuf_t data(sbuf,offset+i, result_record_size); - evtx_recorder.carve(data, "evtx_orphan_record"); + evtx_recorder.carve(data, ".evtx_orphan_record"); i += result_record_size; } else { i += 8; diff --git a/src/scan_ntfsindx.cpp b/src/scan_ntfsindx.cpp index 2350273d..b2bec891 100644 --- a/src/scan_ntfsindx.cpp +++ b/src/scan_ntfsindx.cpp @@ -121,17 +121,17 @@ void scan_ntfsindx(scanner_params &sp) else break; } - ntfsindx_recorder.carve(sbuf_t(sbuf,offset,total_record_size), "INDX"); + ntfsindx_recorder.carve(sbuf_t(sbuf,offset,total_record_size), ".INDX"); } else if(record_type == 2) { - ntfsindx_recorder.carve(sbuf_t(sbuf,offset,total_record_size),"INDX_ObjId-O"); + ntfsindx_recorder.carve(sbuf_t(sbuf,offset,total_record_size),".INDX_ObjId-O"); } else { // 0 - Other INDX record (Secure-SDH, Secure-SII, etc.) - ntfsindx_recorder.carve(sbuf_t(sbuf,offset,total_record_size),"INDX_Misc"); + ntfsindx_recorder.carve(sbuf_t(sbuf,offset,total_record_size),".INDX_Misc"); } } else if (result_type == 2) { - ntfsindx_recorder.carve(sbuf_t(sbuf,offset,total_record_size),"INDX_corrupted"); + ntfsindx_recorder.carve(sbuf_t(sbuf,offset,total_record_size),".INDX_corrupted"); } else { // result_type == 0 } diff --git a/src/scan_ntfslogfile.cpp b/src/scan_ntfslogfile.cpp index c5bd2957..a3d38466 100644 --- a/src/scan_ntfslogfile.cpp +++ b/src/scan_ntfslogfile.cpp @@ -114,16 +114,16 @@ void scan_ntfslogfile(scanner_params &sp) else break; } - ntfslogfile_recorder.carve( sbuf_t(sbuf,offset,total_record_size), "LogFile-RCRD"); + ntfslogfile_recorder.carve( sbuf_t(sbuf,offset,total_record_size), ".LogFile-RCRD"); } else if (result_type == 2) { - ntfslogfile_recorder.carve( sbuf_t(sbuf,offset,total_record_size), "LogFile-RCRD_corrupted"); + ntfslogfile_recorder.carve( sbuf_t(sbuf,offset,total_record_size), ".LogFile-RCRD_corrupted"); } else if (result_type == 3) { - ntfslogfile_recorder.carve( sbuf_t(sbuf,offset,total_record_size), "LogFile-RSTR"); + ntfslogfile_recorder.carve( sbuf_t(sbuf,offset,total_record_size), ".LogFile-RSTR"); } else if (result_type == 4) { - ntfslogfile_recorder.carve( sbuf_t(sbuf,offset,total_record_size),"LogFile-RSTR_corrupted"); + ntfslogfile_recorder.carve( sbuf_t(sbuf,offset,total_record_size),".LogFile-RSTR_corrupted"); } else { // result_type == 0 - not RCRD record } diff --git a/src/scan_ntfsmft.cpp b/src/scan_ntfsmft.cpp index 5e223b7f..39d8f37a 100644 --- a/src/scan_ntfsmft.cpp +++ b/src/scan_ntfsmft.cpp @@ -97,10 +97,10 @@ void scan_ntfsmft(scanner_params &sp) else break; } - ntfsmft_recorder.carve(sbuf_t(sbuf,offset,total_record_size),"MFT"); + ntfsmft_recorder.carve(sbuf_t(sbuf,offset,total_record_size),".mft"); } else if (result_type == 2) { - ntfsmft_recorder.carve(sbuf_t(sbuf,offset,total_record_size),"MFT_corrputed"); + ntfsmft_recorder.carve(sbuf_t(sbuf,offset,total_record_size),".mft_corrputed"); } else { // result_type == 0 - not MFT record } diff --git a/src/scan_ntfsusn.cpp b/src/scan_ntfsusn.cpp index 1d39aac0..edc45014 100644 --- a/src/scan_ntfsusn.cpp +++ b/src/scan_ntfsusn.cpp @@ -98,16 +98,16 @@ void scan_ntfsusn(scanner_params &sp) if (record_size % 8 != 0) { // illegal size uint8_t padding; padding = 8 - (record_size % 8); - ntfsusn_recorder.carve( sbuf_t(sbuf,offset,record_size+padding), "UsnJrnl-J_corrupted"); + ntfsusn_recorder.carve( sbuf_t(sbuf,offset,record_size+padding), ".UsnJrnl-J_corrupted"); offset += record_size+padding; continue; } total_record_size = record_size; if (offset+total_record_size > stop) { if(offset+total_record_size < sbuf.bufsize) - ntfsusn_recorder.carve( sbuf_t(sbuf,offset,total_record_size), "UsnJrnl-J"); + ntfsusn_recorder.carve( sbuf_t(sbuf,offset,total_record_size), ".UsnJrnl-J"); else - ntfsusn_recorder.carve( sbuf_t(sbuf,offset,total_record_size), "UsnJrnl-J_corrupted"); + ntfsusn_recorder.carve( sbuf_t(sbuf,offset,total_record_size), ".UsnJrnl-J_corrupted"); break; } // found one record then also checks following valid records and writes all at once @@ -142,7 +142,7 @@ void scan_ntfsusn(scanner_params &sp) } } } - ntfsusn_recorder.carve(sbuf_t(sbuf,offset,total_record_size),"UsnJrnl-J"); + ntfsusn_recorder.carve(sbuf_t(sbuf,offset,total_record_size),".UsnJrnl-J"); offset += total_record_size; } } From 6ebbbdbc36f6805f6dd9d40d5fcc8f5f332697b9 Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Sat, 11 Sep 2021 21:32:09 -0400 Subject: [PATCH 31/89] added support for scanner `carve` flag --- src/be13_api | 2 +- src/scan_aes.cpp | 20 +++++---- src/scan_base64.cpp | 18 -------- src/scan_evtx.cpp | 12 +++--- src/scan_exif.cpp | 4 +- src/scan_hiberfile.cpp | 1 + src/scan_kml.cpp | 4 +- src/scan_ntfsindx.cpp | 5 ++- src/scan_ntfslogfile.cpp | 6 ++- src/scan_ntfsmft.cpp | 5 ++- src/scan_ntfsusn.cpp | 6 ++- src/scan_rar.cpp | 1 + src/scan_sqlite.cpp | 5 ++- src/scan_utmp.cpp | 4 +- src/scan_vcard.cpp | 4 +- src/scan_windirs.cpp | 1 + src/scan_winlnk.cpp | 1 + src/scan_winpe.cpp | 4 +- src/scan_zip.cpp | 6 ++- src/test_be.cpp | 93 +++++++++++++++++++++++----------------- 20 files changed, 118 insertions(+), 84 deletions(-) diff --git a/src/be13_api b/src/be13_api index dee8002d..d6e02464 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit dee8002d84dc6fddaafaa68fadf56ebd96cbecd4 +Subproject commit d6e0246405646b1c6427cd4e632a995cec94380d diff --git a/src/scan_aes.cpp b/src/scan_aes.cpp index 79ebe268..b66fcf12 100644 --- a/src/scan_aes.cpp +++ b/src/scan_aes.cpp @@ -384,7 +384,8 @@ void scan_aes(struct scanner_params &sp) sp.info->set_name("aes"); sp.info->author = "Sam Trenholme, Jesse Kornblum and Simson Garfinkel"; sp.info->description = "Search for AES key schedules"; - sp.info->scanner_version = "1.1"; + sp.info->scanner_version = "1.2"; + sp.info->scanner_flags.scanner_wants_memory = true; sp.info->feature_defs.push_back( feature_recorder_def("aes_keys")); sp.info->min_sbuf_size = AES128_KEY_SCHEDULE_SIZE; sp.get_scanner_config("scan_aes_128", &scan_aes_128, "Scan for 128-bit AES keys; 0=No, 1=Yes"); @@ -401,6 +402,7 @@ void scan_aes(struct scanner_params &sp) } if(sp.phase==scanner_params::PHASE_SCAN){ + if (scan_aes_128==0 && scan_aes_192==0 && scan_aes_256==0) return; auto &aes_recorder = *aes_recorderp; distinct_character_counter distinct128,distinct192,distinct256; highbit_character_counter hbc; @@ -413,24 +415,25 @@ void scan_aes(struct scanner_params &sp) * This is less efficient than before, but the code is simpler, and now the code is correctly computing the histogram * for the 128, 192 and 256-byte cases. */ - for (size_t pos = 0 ; pos < sp.sbuf->bufsize && pos < sp.sbuf->pagesize; pos++){ - const uint8_t *p2 = sp.sbuf->get_buf() + pos; + assert(sp.sbuf->bufsize >= AES128_KEY_SCHEDULE_SIZE); + const uint8_t *buf = sp.sbuf->get_buf(); + for (size_t pos = 0 ; pos < sp.sbuf->bufsize && pos < sp.sbuf->bufsize - AES128_KEY_SCHEDULE_SIZE; pos++){ + const uint8_t *p2 = buf + pos; + if (pos==0) hbc.preload(p2, AES128_KEY_SCHEDULE_SIZE-1); + hbc.add(p2[AES128_KEY_SCHEDULE_SIZE-1]); + if (scan_aes_128 && (sp.sbuf->bufsize-pos >= AES128_KEY_SCHEDULE_SIZE)){ if (pos==0) distinct128.preload(p2, AES128_KEY_SIZE-1); - if (pos==0) hbc.preload(p2, AES128_KEY_SCHEDULE_SIZE-1); distinct128.add(p2[AES128_KEY_SIZE-1]); - hbc.add(p2[AES128_KEY_SCHEDULE_SIZE-1]); - if (distinct128.distinct_count > AES128_KEY_SIZE/4 && hbc.highbit_count>0 && valid_aes128_schedule(p2)) { std::string key = key_to_string(p2, AES128_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES128")); } distinct128.remove(p2[0]); - hbc.remove(p2[0]); } if (scan_aes_192 && (sp.sbuf->bufsize-pos >= AES192_KEY_SCHEDULE_SIZE)) { - distinct192.add(p2[AES192_KEY_SIZE-1]); if (pos==0) distinct192.preload(p2, AES192_KEY_SIZE-1); + distinct192.add(p2[AES192_KEY_SIZE-1]); if (distinct192.distinct_count > AES192_KEY_SIZE/4 && hbc.highbit_count>0 && valid_aes192_schedule(p2)) { std::string key = key_to_string(p2, AES192_KEY_SIZE); aes_recorder.write(sp.sbuf->pos0+pos,key,std::string("AES192")); @@ -446,6 +449,7 @@ void scan_aes(struct scanner_params &sp) } distinct256.remove(p2[0]); } + hbc.remove(p2[0]); } } } diff --git a/src/scan_base64.cpp b/src/scan_base64.cpp index 2e2ebd08..bd9c9c61 100644 --- a/src/scan_base64.cpp +++ b/src/scan_base64.cpp @@ -107,24 +107,6 @@ sbuf_t *decode_base64(const sbuf_t &sbuf, size_t start, size_t src_len) // could not decode delete sbufr; return nullptr; - -#if 0 - - // Make room for the destination. - size_t dst_len = src_len + 4; // it can only get smaller, but give some extra space - pos0_t pos0 = ; - uint8_t *dst = reinterpret_cast(malloc(dst_len)); - if (dst==nullptr) { - throw std::bad_alloc(); - } - // Perform the conversion - int conv_len = b64_pton_forensic(src, src_len, dst, dst_len); - if (conv_len>0){ - return sbuf_t::sbuf_new(pos0, dst, conv_len, conv_len); - } - free(dst); - return nullptr; -#endif } void process_base64(const scanner_params &sp, size_t start, size_t src_len) diff --git a/src/scan_evtx.cpp b/src/scan_evtx.cpp index 7a97f82a..eccded57 100644 --- a/src/scan_evtx.cpp +++ b/src/scan_evtx.cpp @@ -23,7 +23,8 @@ #define CLUSTER_SIZE 4096 #define ELFFILE_SIZE 4096 #define ELFCHNK_SIZE 65536 -#define FEATURE_FILE_NAME "evtx_carved" + +const std::string FEATURE_FILE_NAME {"evtx_carved"}; struct elffile { struct elffilepart { @@ -149,7 +150,10 @@ void scan_evtx(scanner_params &sp) sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for EVTX Chunks and generates valid EVTX file"; sp.info->scanner_version = "1.0"; - sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME)); + + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; + sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME, carve_flag)); return; } if(sp.phase==scanner_params::PHASE_SCAN){ @@ -224,6 +228,7 @@ void scan_evtx(scanner_params &sp) header.flags = 0; uint32_t table[256]; crc32::generate_table(table); + // CRC32 of the first 120 bytes == header.part struct header.crc32 = crc32::update(table, 0, &header.part, 120); memset(header.unknown2,'\0', sizeof(header.unknown2)); @@ -233,12 +238,9 @@ void scan_evtx(scanner_params &sp) // generate evtx header based on elfchnk information // make an sbuf for the header that will free it automatically when we are finished sbuf_t *sbuf_header = sbuf_t::sbuf_new(pos0_t(), header_buf, sizeof(header), sizeof(header)); - //sbuf_t sbuf_header(pos0_t(), reinterpret_cast(&header), sizeof(header), sizeof(header), 0, false, false, false ); sbuf_t sbuf_records(sbuf, offset, total_size); evtx_recorder.carve(*sbuf_header, sbuf_records, filename); delete sbuf_header; - //evtx_recorder.write_data((unsigned char *)&header,sizeof(elffile),filename); - //evtx_recorder.carve(sbuf, offset, total_size, filename); offset += total_size; } else { // scans orphan record size_t i=0; diff --git a/src/scan_exif.cpp b/src/scan_exif.cpp index 3007a1be..9f411f15 100644 --- a/src/scan_exif.cpp +++ b/src/scan_exif.cpp @@ -514,9 +514,11 @@ void scan_exif (scanner_params &sp) sp.info->min_sbuf_size = jpeg_validator::MIN_JPEG_SIZE; struct feature_recorder_def::flags_t xml_flag; xml_flag.xml = true; + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; sp.info->feature_defs.push_back( feature_recorder_def("exif", xml_flag)); sp.info->feature_defs.push_back( feature_recorder_def("gps")); - sp.info->feature_defs.push_back( feature_recorder_def("jpeg_carved")); + sp.info->feature_defs.push_back( feature_recorder_def("jpeg_carved", carve_flag)); sp.get_scanner_config("exif_debug",&exif_debug,"debug exif decoder"); return; } diff --git a/src/scan_hiberfile.cpp b/src/scan_hiberfile.cpp index af088b73..d49cd6a0 100644 --- a/src/scan_hiberfile.cpp +++ b/src/scan_hiberfile.cpp @@ -43,6 +43,7 @@ void scan_hiberfile(scanner_params &sp) sp.info->description = "Scans for Microsoft-XPress compressed data"; sp.info->scanner_version= "1.0"; sp.info->scanner_flags.recurse = true; + sp.info->scanner_flags.scanner_produces_memory = true; return; /* no features */ } if (sp.phase==scanner_params::PHASE_SHUTDOWN) return; diff --git a/src/scan_kml.cpp b/src/scan_kml.cpp index 5f6f809d..61f4b08e 100644 --- a/src/scan_kml.cpp +++ b/src/scan_kml.cpp @@ -32,7 +32,9 @@ void scan_kml(scanner_params &sp) sp.info->author = "Simson Garfinkel "; sp.info->description = "Scans for KML files"; sp.info->scanner_version= "1.0"; - sp.info->feature_defs.push_back( feature_recorder_def("kml")); + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; + sp.info->feature_defs.push_back( feature_recorder_def("kml", carve_flag)); return; } if(sp.phase==scanner_params::PHASE_SCAN){ diff --git a/src/scan_ntfsindx.cpp b/src/scan_ntfsindx.cpp index b2bec891..92e35537 100644 --- a/src/scan_ntfsindx.cpp +++ b/src/scan_ntfsindx.cpp @@ -81,7 +81,10 @@ void scan_ntfsindx(scanner_params &sp) sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for NTFS $INDEX_ALLOCATION INDX record"; sp.info->scanner_version = "1.1"; - sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME)); + sp.info->scanner_flags.scanner_wants_filesystems = true; + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; + sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME, carve_flag)); return; } if(sp.phase==scanner_params::PHASE_SCAN){ diff --git a/src/scan_ntfslogfile.cpp b/src/scan_ntfslogfile.cpp index a3d38466..f0765359 100644 --- a/src/scan_ntfslogfile.cpp +++ b/src/scan_ntfslogfile.cpp @@ -82,8 +82,12 @@ void scan_ntfslogfile(scanner_params &sp) sp.info->set_name("ntfslogfile"); sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for NTFS $LogFile RCRD record"; + sp.info->scanner_flags.scanner_wants_filesystems = true; sp.info->scanner_version = "1.1"; - sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME)); + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; + + sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME, carve_flag)); return; } if(sp.phase==scanner_params::PHASE_SCAN){ diff --git a/src/scan_ntfsmft.cpp b/src/scan_ntfsmft.cpp index 39d8f37a..855a88ab 100644 --- a/src/scan_ntfsmft.cpp +++ b/src/scan_ntfsmft.cpp @@ -65,7 +65,10 @@ void scan_ntfsmft(scanner_params &sp) sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for NTFS MFT record"; sp.info->scanner_version = "1.0"; - sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME)); + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; + sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME, carve_flag)); + sp.info->scanner_flags.scanner_wants_filesystems = true; return; } if(sp.phase==scanner_params::PHASE_SCAN){ diff --git a/src/scan_ntfsusn.cpp b/src/scan_ntfsusn.cpp index edc45014..b9aea218 100644 --- a/src/scan_ntfsusn.cpp +++ b/src/scan_ntfsusn.cpp @@ -76,7 +76,11 @@ void scan_ntfsusn(scanner_params &sp) sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for USN_RECORD v2/v4 record"; sp.info->scanner_version = "1.1"; - sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME)); + sp.info->scanner_flags.scanner_wants_filesystems = true; + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; + + sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME, carve_flag)); return; } if(sp.phase==scanner_params::PHASE_SCAN){ diff --git a/src/scan_rar.cpp b/src/scan_rar.cpp index 2f7cb88d..3e0e1eff 100644 --- a/src/scan_rar.cpp +++ b/src/scan_rar.cpp @@ -595,6 +595,7 @@ void scan_rar(scanner_params &sp) sp.info->description = "RAR volume locator and component decompresser"; feature_recorder_def::flags_t flags; flags.xml = true; + flags.carve = true; auto rar_def = feature_recorder_def(RAR_RECORDER_NAME, flags); rar_def.default_carve_mode = feature_recorder_def::carve_mode_t::CARVE_ENCODED; diff --git a/src/scan_sqlite.cpp b/src/scan_sqlite.cpp index cd7d0515..64fb21d1 100644 --- a/src/scan_sqlite.cpp +++ b/src/scan_sqlite.cpp @@ -42,7 +42,10 @@ void scan_sqlite(scanner_params &sp) sp.info->author = "Simson Garfinkel"; sp.info->description = "Scans for SQLITE3 data"; sp.info->scanner_version = "1.1"; - sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME)); + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; + + sp.info->feature_defs.push_back( feature_recorder_def(FEATURE_FILE_NAME, carve_flag)); return; } if(sp.phase==scanner_params::PHASE_SCAN){ diff --git a/src/scan_utmp.cpp b/src/scan_utmp.cpp index bd7201b8..acbc71a1 100644 --- a/src/scan_utmp.cpp +++ b/src/scan_utmp.cpp @@ -85,7 +85,9 @@ void scan_utmp(scanner_params &sp) sp.info->author = "Teru Yamazaki"; sp.info->description = "Scans for utmp record"; sp.info->scanner_version = "1.1"; - sp.info->feature_defs.push_back(feature_recorder_def(FEATURE_FILE_NAME)); + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; + sp.info->feature_defs.push_back(feature_recorder_def(FEATURE_FILE_NAME, carve_flag)); return; } if(sp.phase==scanner_params::PHASE_SCAN){ diff --git a/src/scan_vcard.cpp b/src/scan_vcard.cpp index 00e3874b..20b17e2c 100644 --- a/src/scan_vcard.cpp +++ b/src/scan_vcard.cpp @@ -83,7 +83,9 @@ void scan_vcard(scanner_params &sp) sp.info->author = "Simson Garfinkel and Tony Melaragno"; sp.info->description = "Scans for VCARD data"; sp.info->scanner_version= "1.1"; - sp.info->feature_defs.push_back( feature_recorder_def("vcard")); + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; + sp.info->feature_defs.push_back( feature_recorder_def("vcard", carve_flag)); return; } if(sp.phase==scanner_params::PHASE_SCAN){ diff --git a/src/scan_windirs.cpp b/src/scan_windirs.cpp index 046a7b77..473db36e 100644 --- a/src/scan_windirs.cpp +++ b/src/scan_windirs.cpp @@ -490,6 +490,7 @@ void scan_windirs(scanner_params &sp) sp.info->set_name("windirs" ); sp.info->author = "Simson Garfinkel and Maxim Suhanov"; sp.info->description = "Scans Microsoft directory structures"; + sp.info->scanner_flags.scanner_wants_filesystems = true; // should we look for compressed windows disk images? Gosh, I don't know... sp.info->scanner_flags.depth0_only = true; // don't look for compressed windows disk images becuase diff --git a/src/scan_winlnk.cpp b/src/scan_winlnk.cpp index a236d395..25a4957d 100644 --- a/src/scan_winlnk.cpp +++ b/src/scan_winlnk.cpp @@ -305,6 +305,7 @@ void scan_winlnk(scanner_params &sp) sp.info->author = "Simson Garfinkel"; sp.info->description = "Search for Windows LNK files"; sp.info->feature_defs.push_back( feature_recorder_def("winlnk")); + sp.info->scanner_flags.scanner_wants_filesystems = true; sp.info->min_sbuf_size = SMALLEST_LNK_FILE; return; } diff --git a/src/scan_winpe.cpp b/src/scan_winpe.cpp index 9767a051..3775e1c9 100644 --- a/src/scan_winpe.cpp +++ b/src/scan_winpe.cpp @@ -1018,8 +1018,10 @@ void scan_winpe (scanner_params &sp) sp.info->set_name("winpe" ); sp.info->description = "Scan for Windows PE headers"; sp.info->scanner_version = "1.1.0"; + struct feature_recorder_def::flags_t carve_flag; + carve_flag.carve = true; sp.info->feature_defs.push_back( feature_recorder_def("winpe")); - sp.info->feature_defs.push_back( feature_recorder_def("winpe_carved")); + sp.info->feature_defs.push_back( feature_recorder_def("winpe_carved", carve_flag)); return; } diff --git a/src/scan_zip.cpp b/src/scan_zip.cpp index 6c88a6ca..b5cd1688 100644 --- a/src/scan_zip.cpp +++ b/src/scan_zip.cpp @@ -158,10 +158,12 @@ void scan_zip(scanner_params &sp) sp.check_version(); if (sp.phase==scanner_params::PHASE_INIT){ - feature_recorder_def::flags_t xml; xml.xml = true; + feature_recorder_def::flags_t flags; + flags.xml = true; + flags.carve = true; sp.info->set_name("zip" ); sp.info->scanner_flags.recurse = true; - sp.info->feature_defs.push_back( feature_recorder_def(ZIP_RECORDER_NAME, xml )); + sp.info->feature_defs.push_back( feature_recorder_def(ZIP_RECORDER_NAME, flags )); sp.get_scanner_config("zip_min_uncompr_size",&zip_min_uncompr_size,"Minimum size of a ZIP uncompressed object"); sp.get_scanner_config("zip_max_uncompr_size",&zip_max_uncompr_size,"Maximum size of a ZIP uncompressed object"); sp.get_scanner_config("zip_name_len_max",&zip_name_len_max,"Maximum name of a ZIP component filename"); diff --git a/src/test_be.cpp b/src/test_be.cpp index 4ca81135..3dc5cb34 100644 --- a/src/test_be.cpp +++ b/src/test_be.cpp @@ -127,11 +127,26 @@ std::filesystem::path test_scanners(const std::vector & scanners, s ss.apply_scanner_commands(); REQUIRE (ss.get_enabled_scanners().size() == scanners.size()); // the one scanner - std::cerr << "\n## output in " << sc.outdir << " for " << ss.get_enabled_scanners()[0] << "\n"; + if (ss.get_enabled_scanners().size()>0){ + std::cerr << "\n## output in " << sc.outdir << " for " << ss.get_enabled_scanners()[0] << std::endl; + } else { + std::cerr << "\n## output in " << sc.outdir << " but no enabled scanner! " << std::endl; + } REQUIRE(sbuf->children == 0); ss.phase_scan(); REQUIRE(sbuf->children == 0); - ss.schedule_sbuf(sbuf); + try { + ss.schedule_sbuf(sbuf); + } catch (sbuf_t::range_exception_t &e) { + std::cerr << "sbuf_t range exception: " << e.what() << std::endl; + throw std::runtime_error(e.what()); + } catch (scanner_set::NoSuchScanner &e) { + std::cerr << "no such scanner: " << e.what() << std::endl; + } catch (std::exception &e) { + std::cerr << "unknown exception: " << e.what() << std::endl; + throw e; + } + ss.shutdown(); return sc.outdir; } @@ -174,46 +189,44 @@ TEST_CASE("scan_base64_functions", "[support]" ){ } /* scan_email.flex checks */ -TEST_CASE("scan_email8", "[support]") { - { - REQUIRE( extra_validate_email("this@that.com")==true); - REQUIRE( extra_validate_email("this@that..com")==false); - auto s1 = sbuf_t("this@that.com"); - auto s2 = sbuf_t("this_that.com"); - REQUIRE( find_host_in_email(s1) == 5); - REQUIRE( find_host_in_email(s2) == -1); - - auto s3 = sbuf_t("https://domain.com/foobar"); - size_t domain_len = 0; - REQUIRE( find_host_in_url(s3, &domain_len)==8); - REQUIRE( domain_len == 10); - } - - { - /* This is text from a PDF, decompressed */ - auto *sbufp = new sbuf_t("q Q q 72 300 460 420 re W n /Gs1 gs /Cs1 cs 1 sc 72 300 460 420re f 0 sc./Gs2 gs q 1 0 0 -1 72720 cm BT 10 0 0 -10 5 10 Tm /F1.0 1 Tf (plain_text_pdf@textedit.com).Tj ET Q Q"); - auto outdir = test_scanner(scan_email, sbufp); - auto email_txt = getLines( outdir / "email.txt" ); - REQUIRE( requireFeature(email_txt,"135\tplain_text_pdf@textedit.com")); - } +TEST_CASE("scan_email1", "[support]") { + REQUIRE( extra_validate_email("this@that.com")==true); + REQUIRE( extra_validate_email("this@that..com")==false); + auto s1 = sbuf_t("this@that.com"); + auto s2 = sbuf_t("this_that.com"); + REQUIRE( find_host_in_email(s1) == 5); + REQUIRE( find_host_in_email(s2) == -1); + + auto s3 = sbuf_t("https://domain.com/foobar"); + size_t domain_len = 0; + REQUIRE( find_host_in_url(s3, &domain_len)==8); + REQUIRE( domain_len == 10); +} + +TEST_CASE("scan_email2", "[support]") { + /* This is text from a PDF, decompressed */ + auto *sbufp = new sbuf_t("q Q q 72 300 460 420 re W n /Gs1 gs /Cs1 cs 1 sc 72 300 460 420re f 0 sc./Gs2 gs q 1 0 0 -1 72720 cm BT 10 0 0 -10 5 10 Tm /F1.0 1 Tf (plain_text_pdf@textedit.com).Tj ET Q Q"); + auto outdir = test_scanner(scan_email, sbufp); + auto email_txt = getLines( outdir / "email.txt" ); + REQUIRE( requireFeature(email_txt,"135\tplain_text_pdf@textedit.com")); +} - { - auto *sbufp = new sbuf_t("plain_text_pdf@textedit.com"); - auto outdir = test_scanner(scan_email, sbufp); - auto email_txt = getLines( outdir / "email.txt" ); - REQUIRE( requireFeature(email_txt,"0\tplain_text_pdf@textedit.com")); - } +TEST_CASE("scan_email3", "[support]") { + auto *sbufp = new sbuf_t("plain_text_pdf@textedit.com"); + auto outdir = test_scanner(scan_email, sbufp); + auto email_txt = getLines( outdir / "email.txt" ); + REQUIRE( requireFeature(email_txt,"0\tplain_text_pdf@textedit.com")); +} - { - std::vectorscanners = {scan_email, scan_pdf }; - auto *sbufp = map_file("nps-2010-emails.100k.raw"); - auto outdir = test_scanners(scanners, sbufp); - auto email_txt = getLines( outdir / "email.txt" ); - REQUIRE( requireFeature(email_txt,"80896\tplain_text@textedit.com")); - REQUIRE( requireFeature(email_txt,"70727-PDF-0\tplain_text_pdf@textedit.com\t")); - REQUIRE( requireFeature(email_txt,"81991-PDF-0\trtf_text_pdf@textedit.com\t")); - REQUIRE( requireFeature(email_txt,"92231-PDF-0\tplain_utf16_pdf@textedit.com\t")); - } +TEST_CASE("scan_email4", "[support]") { + std::vectorscanners = {scan_email, scan_pdf }; + auto *sbufp = map_file("nps-2010-emails.100k.raw"); + auto outdir = test_scanners(scanners, sbufp); + auto email_txt = getLines( outdir / "email.txt" ); + REQUIRE( requireFeature(email_txt,"80896\tplain_text@textedit.com")); + REQUIRE( requireFeature(email_txt,"70727-PDF-0\tplain_text_pdf@textedit.com\t")); + REQUIRE( requireFeature(email_txt,"81991-PDF-0\trtf_text_pdf@textedit.com\t")); + REQUIRE( requireFeature(email_txt,"92231-PDF-0\tplain_utf16_pdf@textedit.com\t")); } TEST_CASE("sbuf_decompress_zlib_new", "[support]") { From 80609d8d0a18000ddf26a676e14196a692276229 Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Sun, 12 Sep 2021 10:44:09 -0400 Subject: [PATCH 32/89] added end-to-end test of bulk_extractor_main() --- src/be13_api | 2 +- src/bulk_extractor.cpp | 4 ++-- src/bulk_extractor.h | 2 +- src/main.cpp | 4 ++-- src/phase1.cpp | 4 +++- src/test_be.cpp | 41 +++++++++++++++++++++++++++++++++++++++-- 6 files changed, 48 insertions(+), 9 deletions(-) diff --git a/src/be13_api b/src/be13_api index d6e02464..57f2ab7f 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit d6e0246405646b1c6427cd4e632a995cec94380d +Subproject commit 57f2ab7fdd2dab6d6200f0ba28cd98e83f5c259f diff --git a/src/bulk_extractor.cpp b/src/bulk_extractor.cpp index 7cfeb18a..7acc6807 100644 --- a/src/bulk_extractor.cpp +++ b/src/bulk_extractor.cpp @@ -314,7 +314,7 @@ struct notify_opts { } } -int bulk_extractor_main(int argc,char **argv) +int bulk_extractor_main(int argc,char * const *argv) { mtrace(); @@ -730,5 +730,5 @@ int bulk_extractor_main(int argc,char **argv) } muntrace(); - exit(0); + return(0); } diff --git a/src/bulk_extractor.h b/src/bulk_extractor.h index 60d406ac..b0f3dbce 100644 --- a/src/bulk_extractor.h +++ b/src/bulk_extractor.h @@ -10,6 +10,6 @@ void debug_help(); void usage(const char *progname, scanner_set &ss); void validate_path(const std::filesystem::path fn); -int bulk_extractor_main(int argc,char **argv); +int bulk_extractor_main(int argc,char * const *argv); #endif diff --git a/src/main.cpp b/src/main.cpp index 3ee11d15..6f67ef1c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -8,8 +8,8 @@ #include "config.h" #include "bulk_extractor.h" -int main(int argc,char **argv) +int main(int argc,char * const *argv) { - bulk_extractor_main(argc,argv); + bulk_extractor_main(argc, argv); exit(0); } diff --git a/src/phase1.cpp b/src/phase1.cpp index c9592019..0d91a1b8 100644 --- a/src/phase1.cpp +++ b/src/phase1.cpp @@ -7,6 +7,7 @@ #include "phase1.h" #include "be13_api/utils.h" // needs config.h #include "be13_api/aftimer.h" // needs config.h +#include "be13_api/dfxml_cpp/src/dfxml_writer.h" /** @@ -90,8 +91,9 @@ sbuf_t *Phase1::get_sbuf(image_process::iterator &it) << " (retry_count=" << retry_count << " of " << config.max_bad_alloc_errors << ")\n"; + std::stringstream str; - str << "name='bad_alloc' " << "pos0='" << it.get_pos0() << "' " << "retry_count='" << retry_count << "' "; + str << "name='bad_alloc' " << "pos0='" << dfxml_writer::xmlescape(it.get_pos0().str()) << "' " << "retry_count='" << retry_count << "' "; xreport.xmlout("debug:exception", e.what(), str.str(), true); } if (retry_count < config.max_bad_alloc_errors+1){ diff --git a/src/test_be.cpp b/src/test_be.cpp index 3dc5cb34..ecd3ae8b 100644 --- a/src/test_be.cpp +++ b/src/test_be.cpp @@ -25,6 +25,7 @@ #include "be13_api/scanner_set.h" #include "be13_api/utils.h" // needs config.h +#include "bulk_extractor.h" #include "base64_forensic.h" #include "bulk_extractor_restarter.h" #include "bulk_extractor_scanners.h" @@ -869,8 +870,8 @@ TEST_CASE("path_printer", "[path_printer]") { /**************************************************************** - ** Test restarter - **/ + * Test restarter + */ TEST_CASE("restarter", "[restarter]") { scanner_config sc; // config for be13_api @@ -889,3 +890,39 @@ TEST_CASE("restarter", "[restarter]") { REQUIRE( cfg.seen_page_ids.find("369098752") != cfg.seen_page_ids.end() ); REQUIRE( cfg.seen_page_ids.find("369098752+") == cfg.seen_page_ids.end() ); } + + +/**************************************************************** + * end-to-end tests + */ + +TEST_CASE("e2ev1", "[end-to-end]") { + std::string inpath = test_dir() / "nps-2010-emails.100k.raw"; + std::string outdir = NamedTemporaryDirectory(); + const char *n_argv[] ={"bulk_extractor", "-1", "-o", outdir.c_str(), inpath.c_str(), nullptr}; + char * const *argv = const_cast(n_argv); + + std::cout << "testing with command line:" << std::endl; + int argc=0; + while(argv[argc]){ + std::cout << argv[argc++] << " "; + } + std::cout << std::endl; + + /* SBUF accounting is off from above; don't worry about unaccounted for sbufs. + * of course, if this runs multi-threaded, it will still be off. + */ + //sbuf_t::sbuf_total = 0; + //sbuf_t::sbuf_count = 0; + std::cout << "starting bulk_extractor" << std::endl; + + int ret = bulk_extractor_main(argc, argv); + std::cout << "ending bulk_extractor" << std::endl; + REQUIRE( ret==0 ); + + /* Validate the output dfxml file */ + std::string validate = std::string("xmllint --noout ") + outdir + "/report.xml"; + REQUIRE( system( validate.c_str()) == 0); + + /* Look for output files */ +} From b2e33ced3556eeaaa3b1edcaa2a424c47c7f2ede Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Sun, 12 Sep 2021 10:53:44 -0400 Subject: [PATCH 33/89] add installation of xmllint --- .github/workflows/continuous-integration-pip.yml | 4 ++-- src/test_be.cpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/continuous-integration-pip.yml b/.github/workflows/continuous-integration-pip.yml index fe50be45..0fe5fb26 100644 --- a/.github/workflows/continuous-integration-pip.yml +++ b/.github/workflows/continuous-integration-pip.yml @@ -26,12 +26,12 @@ jobs: - name: Install C++ dependencies on MacOS if: startsWith(matrix.os, 'macos') run: | - brew install autoconf automake libtool + brew install autoconf automake libtool libxml2 - name: Install C++ dependencies on Ubuntu if: startsWith(matrix.os, 'ubuntu') run: | - sudo apt install -y libtool autoconf automake libssl-dev pkg-config + sudo apt install -y libtool autoconf automake libssl-dev pkg-config libxml2-utils - name: Update autoconf on Ubuntu if: startsWith(matrix.os, 'ubuntu') diff --git a/src/test_be.cpp b/src/test_be.cpp index ecd3ae8b..a944f638 100644 --- a/src/test_be.cpp +++ b/src/test_be.cpp @@ -922,7 +922,8 @@ TEST_CASE("e2ev1", "[end-to-end]") { /* Validate the output dfxml file */ std::string validate = std::string("xmllint --noout ") + outdir + "/report.xml"; - REQUIRE( system( validate.c_str()) == 0); + int code = system( validate.c_str()); + REQUIRE( code == 0); /* Look for output files */ } From 5a94d2edfbdc68cd69e9f405f97682b27761cba9 Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Sun, 12 Sep 2021 21:12:41 -0400 Subject: [PATCH 34/89] fixed handling of matcher --- src/be13_api | 2 +- src/test_be.cpp | 109 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 80 insertions(+), 31 deletions(-) diff --git a/src/be13_api b/src/be13_api index 57f2ab7f..c79b5589 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit 57f2ab7fdd2dab6d6200f0ba28cd98e83f5c259f +Subproject commit c79b55892948c9b5a91e4bb5effceccc33a367a8 diff --git a/src/test_be.cpp b/src/test_be.cpp index a944f638..1eb5b569 100644 --- a/src/test_be.cpp +++ b/src/test_be.cpp @@ -286,6 +286,8 @@ TEST_CASE("scan_json1", "[scanners]") { REQUIRE(true); } + + /**************************************************************** ** Network test cases */ @@ -458,9 +460,11 @@ TEST_CASE("scan_zip", "[scanners]") { struct Check { + Check(const Check &ck):fname(ck.fname),feature(ck.feature){}; + Check &operator=(const Check &ck) = delete; + Check(std::string fname_, Feature feature_): - fname(fname_), - feature(feature_) {}; + fname(fname_), feature(feature_) {}; std::string fname; Feature feature; // defined in be13_api/feature_recorder.h }; @@ -484,6 +488,52 @@ TEST_CASE("test_validate", "[phase1]" ) { } +bool feature_match(const Check &exp, const std::string &line) +{ + auto words = split(line, '\t'); + if (words.size() != 3) return false; + + std::cerr << "words[0] = " << words[0] << " size=" << words.size() << "\n"; + std::string pos = exp.feature.pos.str(); + if (pos.size() < 2 ){ + return false; + } + if (ends_with(pos,"-0")) { + pos.resize(pos.size()-2); + } + if (ends_with(pos,"|0")) { + pos.resize(pos.size()-2); + } + std::cerr << "words[0] = " << words[0] << " words[1] = " << words[1] << "\n"; + std::cerr << " exp.feature.pos=" << exp.feature.pos << "\n"; + std::cerr << " exp.feature.feature=" << exp.feature.feature << std::endl; + std::cerr << " exp.feature.context=" << exp.feature.context << std::endl; + + if (words[0] != exp.feature.pos){ + std::cerr << " pos does not match\n"; + return false; + } + std::cerr << " pos matches!\n"; + + if (words[1] != exp.feature.feature){ + std::cerr << " feature does not match\n"; + return true; + } + std::cerr << " feature matches!\n"; + + if (exp.feature.context=="") return true; + if (words[2] == exp.feature.context) return true; + + if (ends_with(exp.feature.context, "*")) { + std::string ctx = exp.feature.context; + ctx.resize(ctx.size(), -1); + if (starts_with(words[2], ctx )){ + return true; + } + } + return false; +} + /* * Run all of the built-in scanners on a specific image, look for the given features, and return the directory. @@ -491,13 +541,14 @@ TEST_CASE("test_validate", "[phase1]" ) { std::filesystem::path validate(std::string image_fname, std::vector &expected, bool recurse=true, size_t offset=0) { sbuf_t::debug_range_exception = true; - std::cerr << "================ validate " << image_fname << " ================\n"; scanner_config sc; sc.outdir = NamedTemporaryDirectory(); sc.scanner_commands = enable_all_scanners; sc.allow_recurse = recurse; + std::cerr << "================ validate " << image_fname << " (outdir: " << sc.outdir << ") ================\n"; + if (offset==0) { sc.input_fname = test_dir() / image_fname; } else { @@ -546,48 +597,33 @@ std::filesystem::path validate(std::string image_fname, std::vector &expe xreport->close(); delete xreport; - for (size_t i=0; i ex { - Check("elf.txt", Feature( "0", "9e218cee3b190e8f59ef323b27f4d339481516e9", "")) + Check("elf.txt", Feature( "0", "9e218cee3b190e8f59ef323b27f4d339481516e9", "*")) }; validate("hello_elf", ex); @@ -796,6 +832,19 @@ TEST_CASE("test_net80", "[phase1]") { validate("ntlm80.pcap", ex2); } +TEST_CASE("test_winpe", "[phase1]") { + std::vector ex2 { + Check("winpe.txt", Feature( "0", + "074b9b371de190a96fb0cb987326cd238142e9d1", + " Date: Sun, 12 Sep 2021 21:37:17 -0400 Subject: [PATCH 35/89] finally all tests pass --- src/test_be.cpp | 56 ++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/test_be.cpp b/src/test_be.cpp index 1eb5b569..a112fb9a 100644 --- a/src/test_be.cpp +++ b/src/test_be.cpp @@ -491,46 +491,48 @@ TEST_CASE("test_validate", "[phase1]" ) { bool feature_match(const Check &exp, const std::string &line) { auto words = split(line, '\t'); - if (words.size() != 3) return false; + if (words.size() <2 || words.size() > 3) return false; + + //std::cerr << "check line=" << line << "\n"; - std::cerr << "words[0] = " << words[0] << " size=" << words.size() << "\n"; std::string pos = exp.feature.pos.str(); - if (pos.size() < 2 ){ - return false; - } - if (ends_with(pos,"-0")) { - pos.resize(pos.size()-2); - } - if (ends_with(pos,"|0")) { - pos.resize(pos.size()-2); + if ( pos.size() > 2 ){ + if (ends_with(pos,"-0")) { + pos.resize(pos.size()-2); + } + if (ends_with(pos,"|0")) { + pos.resize(pos.size()-2); + } } - std::cerr << "words[0] = " << words[0] << " words[1] = " << words[1] << "\n"; - std::cerr << " exp.feature.pos=" << exp.feature.pos << "\n"; - std::cerr << " exp.feature.feature=" << exp.feature.feature << std::endl; - std::cerr << " exp.feature.context=" << exp.feature.context << std::endl; - if (words[0] != exp.feature.pos){ - std::cerr << " pos does not match\n"; + if ( words[0] != exp.feature.pos ){ + //std::cerr << " pos " << exp.feature.pos << " does not match\n"; return false; } - std::cerr << " pos matches!\n"; - if (words[1] != exp.feature.feature){ - std::cerr << " feature does not match\n"; - return true; + if ( words[1] != exp.feature.feature ){ + //std::cerr << " feature '" << exp.feature.feature << "' does not match feature '" << words[1] << "'\n"; + return false; } - std::cerr << " feature matches!\n"; - if (exp.feature.context=="") return true; - if (words[2] == exp.feature.context) return true; + std::string ctx = exp.feature.context; + if (words.size()==2) return ctx==""; + + if ( (ctx=="") || (ctx==words[2]) ) return true; + + //std::cerr << " context '" << ctx << "' (len=" << ctx.size() << ") " + //<< "does not match context '" << words[2] << "' (" << words[2].size() << ")\n"; - if (ends_with(exp.feature.context, "*")) { - std::string ctx = exp.feature.context; - ctx.resize(ctx.size(), -1); + if ( ends_with(ctx, "*") ) { + ctx.resize(ctx.size()-1 ); if (starts_with(words[2], ctx )){ return true; } + //std::cerr << " context did not start with '" << ctx << "'\n"; + } else { + //std::cerr << " context does not end with *\n"; } + return false; } @@ -610,8 +612,6 @@ std::filesystem::path validate(std::string image_fname, std::vector &expe throw std::runtime_error("validate_scanners:[phase1] Could not open "+fname.string()); } while (std::getline(inFile, line)) { - std::cerr << "pass=" << pass << "line=" << line << std::endl; - if (pass==1) { std::cerr << fname << ":" << line << "\n"; // print the file the second time through } From 445aaced1205982045a22c5afd70e711f0ec146c Mon Sep 17 00:00:00 2001 From: Simson Garfinkel Date: Mon, 13 Sep 2021 19:21:31 -0400 Subject: [PATCH 36/89] option processing improvements --- src/be13_api | 2 +- src/bulk_extractor.cpp | 176 +-- src/cxxopts.hpp | 2599 ++++++++++++++++++++++++++++++++++++++++ src/image_process.cpp | 6 +- src/image_process.h | 5 + src/main.cpp | 3 +- src/phase1.h | 1 + src/scan_accts.flex | 2 +- src/scan_net.cpp | 135 +-- src/scan_net.h | 119 +- src/test_be.cpp | 53 +- 11 files changed, 2893 insertions(+), 208 deletions(-) create mode 100644 src/cxxopts.hpp diff --git a/src/be13_api b/src/be13_api index c79b5589..90c83420 160000 --- a/src/be13_api +++ b/src/be13_api @@ -1 +1 @@ -Subproject commit c79b55892948c9b5a91e4bb5effceccc33a367a8 +Subproject commit 90c83420400920aeea7063cf2d778ce45b3960c9 diff --git a/src/bulk_extractor.cpp b/src/bulk_extractor.cpp index 7acc6807..21f509f5 100644 --- a/src/bulk_extractor.cpp +++ b/src/bulk_extractor.cpp @@ -64,6 +64,8 @@ int _CRT_fmode = _O_BINARY; #include "bulk_extractor_scanners.h" #include "bulk_extractor_restarter.h" +#include "ketopt.h" + /** * Output the #defines for our debug parameters. Used by the automake system. */ @@ -111,6 +113,7 @@ void usage(const char *progname, scanner_set &ss) std::cout << " results go into find.txt\n"; std::cout << " -q - quiet - no status output (changed in v2.0).\n"; std::cout << " -s frac[:passes] - Set random sampling parameters\n"; + std::cout << " -0 - Do not run notification thread\n"; std::cout << " -1 - bulk_extractor v1.x legacy mode\n"; std::cout << "\nTuning parameters:\n"; // std::cout << " -C NN - specifies the size of the context window (default " << feature_recorder::context_window_default << ")\n"; @@ -314,20 +317,23 @@ struct notify_opts { } } +void launch_notify_thread(struct notify_opts *o) +{ + new std::thread(¬ify_thread, o); // launch the notify thread +} + int bulk_extractor_main(int argc,char * const *argv) { mtrace(); - - const char *progname = argv[0]; + const char *progname = oargv[0]; const auto original_argc = argc; - const auto original_argv = argv; + const auto original_argv = oargv; - word_and_context_list alert_list; /* shold be flagged */ - word_and_context_list stop_list; /* should be ignored */ - std::atomic fraction_done = 0; /* a callback of sorts */ + word_and_context_list alert_list; /* should be flagged */ + word_and_context_list stop_list; /* should be ignored */ + std::atomic fraction_done = 0; /* a shared memory space */ aftimer master_timer; - scanner_config sc; // config for be13_api Phase1::Config cfg; // config for the image_processing system cfg.fraction_done = &fraction_done; @@ -361,26 +367,41 @@ int bulk_extractor_main(int argc,char * const *argv) setmode(1,O_BINARY); // make stdout binary #endif - if (argc==1) opt_h=1; // generate help if no arguments provided - /* Process options */ + if (argc==1) opt_h=1; // generate help if no arguments provided const std::string ALL { "all" }; - int ch; - char *empty = strdup(""); - while ((ch = getopt(argc, argv, "A:B:b:C:d:E:e:F:f:G:g:HhiJj:M:m:o:P:p:qRr:S:s:VW:w:x:Y:z:Z1")) != -1) { - if (optarg==nullptr) optarg=empty; - std::string arg = optarg!=ALL ? optarg : scanner_config::scanner_command::ALL_SCANNERS; - switch (ch) { - case 'A': sc.offset_add = stoi64(optarg);break; - case 'b': sc.banner_file = optarg; break; - case 'C': sc.context_window_default = atoi(optarg);break; + + + /* 2021-09-13 - slg - option processing rewritten to use cxxopts */ + scanner_config sc; // config for be13_api + cxxopts::Options options("bulk_extractor", "A high-performance flexible digital forensics program."); + options.add_options() + ("A,offset_add", "Offset added to feature locations", cxxopts::value()->default_value("0")) + ("b,banner_file", "Path of file whose contents are prepended to top of all feature files", cxxoptions::value()) + ("C,context_window", "Size of context window reported in bytes", + cxxopts::value()->default_value(std::to_string(sc.context_window_default))) + + ; + + auto result = options.parse(argc, argv); + if (result.count("help")) { + std::cout << options.help() << std::endl; + usage(progname, ss); + return 1; + } + if (result.count("info_scanners")) { + ss.info_scanners(std::cout, true, true, 'e', 'x'); + return 2; + } + +#if 0 + +sc.context_window_default = atoi(optarg);break; case 'd': - { if (strcmp(optarg,"h")==0) debug_help(); cfg.debug = atoi(optarg); if (cfg.debug==0) cfg.debug=1; - } - break; + break; case 'E': /* Enable all scanners */ sc.push_scanner_command( scanner_config::scanner_command::ALL_SCANNERS, scanner_config::scanner_command::DISABLE); sc.push_scanner_command( arg, scanner_config::scanner_command::ENABLE); @@ -411,24 +432,23 @@ int bulk_extractor_main(int argc,char * const *argv) break; case 'R': cfg.opt_recurse = true; break; case 'S': - { - std::vector params = split(optarg,'='); - if (params.size()!=2){ - std::cerr << "Invalid paramter: " << optarg << "\n"; - exit(1); - } - sc.namevals[params[0]] = params[1]; - continue; - } - case 's': - opt_sampling_params = optarg; - break; - case 'V': std::cout << "bulk_extractor " << PACKAGE_VERSION << "\n"; exit (0); - case 'W': - fprintf(stderr,"-W has been deprecated. Specify with -S word_min=NN and -S word_max=NN\n"); - exit(1); + if (strchr(optarg,'=')==nullptr){ + std::cerr << "Invalid -S paramter: must be key=value format\n"; + return 2; + } else { + std::vector params = split(optarg,'='); + if (params.size()!=2){ + std::cerr << "Invalid paramter: " << optarg << "\n"; + return 2; + } + sc.namevals[params[0]] = params[1]; + } break; - case 'w': if (stop_list.readfile(optarg)){ + case 's': opt_sampling_params = optarg; break; + case 'V': std::cout << "bulk_extractor " << PACKAGE_VERSION << "\n"; return 0; + case 'W': fprintf(stderr,"-W has been deprecated. Specify with -S word_min=NN and -S word_max=NN\n"); return 1; + case 'w': + if (stop_list.readfile(optarg)){ throw_FileNotFoundError(optarg); } break; @@ -448,15 +468,23 @@ int bulk_extractor_main(int argc,char * const *argv) } case 'z': cfg.opt_page_start = stoi64(optarg);break; case 'Z': opt_zap=true;break; + case '0': cfg.opt_notification = false; + printf("SET cfg.opt_notification to false\n"); + printf("****** cfg=%p\n",&cfg); + break; case '1': cfg.opt_legacy = true; break; - case 'H': - opt_H++; - continue; - case 'h': - opt_h++; - continue; + case 'H': opt_H++; break; + case 'h': opt_h++; break; } } + std::string arg = optarg!=ALL ? optarg : scanner_config::scanner_command::ALL_SCANNERS; + argc -= optind; + argv += optind; + if (argc>0) { + sc.input_fname = argv[0]; + } + + printf("cfg.opt_notificaiton=%d cfg=%p\n",cfg.opt_notification, &cfg); /* Legacy mode if stdout is not a tty */ #ifdef HAVE_ISATTY @@ -465,9 +493,6 @@ int bulk_extractor_main(int argc,char * const *argv) } #endif - argc -= optind; - argv += optind; - /* Create a configuration that will be used to initialize the scanners */ /* Make individual configuration options appear on the command line interface. */ sc.get_global_config("debug_histogram_malloc_fail_frequency", @@ -489,33 +514,32 @@ int bulk_extractor_main(int argc,char * const *argv) scanner_set ss(sc, f, nullptr); // make a scanner_set but with no XML writer. We will create it below ss.add_scanners(scanners_builtin); + /* Print usage if necessary. Requires scanner set, but not commands applied. + */ + if ( opt_h ) { + usage(progname, ss); + return 1; + } + if ( opt_H ) { + ss.info_scanners(std::cout, true, true, 'e', 'x'); + return 1; + } + /* Get image or directory */ - if (argc==0 || *argv == nullptr) { + if (sc.input_fname=="") { if (cfg.opt_recurse) { std::cerr << "filedir not provided\n"; } else { std::cerr << "imagefile not provided\n"; } usage(progname, ss); - exit(1); + return 1; } - sc.input_fname = *argv; - if (sc.outdir.empty()){ + if (sc.outdir.empty() || sc.outdir==scanner_config::NO_OUTDIR){ std::cerr << "error: -o outdir must be specified\n"; usage(progname, ss); - exit(1); - } - - /* Print usage if necessary. Requires scanner set, but not commands applied. - */ - if ( opt_h ) { - usage(progname, ss); - exit(1); - } - if ( opt_H ) { - ss.info_scanners(std::cout, true, true, 'e', 'x'); - exit(1); + return 1; } /* The zap option wipes the contents of a directory, useful for debugging */ @@ -536,7 +560,7 @@ int bulk_extractor_main(int argc,char * const *argv) } catch (const scanner_set::NoSuchScanner &e) { std::cerr << "no such scanner: " << e.what() << "\n"; - exit(1); + return 1; } /* Give an error if a find list was specified @@ -571,7 +595,7 @@ int bulk_extractor_main(int argc,char * const *argv) } else { pp.process_path(opt_path); } - exit(0); + return 0; } /* Open the image file (or the device) now. @@ -628,10 +652,20 @@ int bulk_extractor_main(int argc,char * const *argv) /*** PHASE 1 --- Run on the input image */ struct notify_opts o; o.ssp = &ss; - o.master_timer = &master_timer; + o.master_timer = &master_timer; o.fraction_done = &fraction_done; - o.opt_legacy = cfg.opt_legacy; - new std::thread(¬ify_thread, &o); // launch the notify thread + o.opt_legacy = cfg.opt_legacy; + + printf("cfg.opt_notificaiton=%d cfg=%p\n",cfg.opt_notification, &cfg); + + if (cfg.opt_notification) { + printf("****** OMG. IT IS STILL TRUE??? cfg=%p\n",&cfg); + for(int i=0;oargv[i];i++){ + printf("oargv[%d]=%s\n",i,oargv[i]); + } + exit(1); + launch_notify_thread(&o); + } ss.phase_scan(); #if 0 @@ -665,7 +699,7 @@ int bulk_extractor_main(int argc,char * const *argv) catch (const feature_recorder::DiskWriteError &e) { std::cerr << "Disk write error during Phase 1 (scanning). Disk is probably full." << std::endl << "Remove extra files and restart bulk_extractor with the exact same command line to continue." << std::endl; - exit(1); + return 1; } #if 0 @@ -687,7 +721,7 @@ int bulk_extractor_main(int argc,char * const *argv) catch (const feature_recorder::DiskWriteError &e) { std::cerr << "Disk write error during Phase 2 (histogram making). Disk is probably full." << std::endl << "Remove extra files and restart bulk_extractor with the exact same command line to continue." << std::endl; - exit(1); + return 1; } xreport->add_timestamp("phase2 end"); @@ -731,4 +765,4 @@ int bulk_extractor_main(int argc,char * const *argv) muntrace(); return(0); -} + } diff --git a/src/cxxopts.hpp b/src/cxxopts.hpp new file mode 100644 index 00000000..af87540b --- /dev/null +++ b/src/cxxopts.hpp @@ -0,0 +1,2599 @@ +/* + +Copyright (c) 2014, 2015, 2016, 2017 Jarryd Beck + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifndef CXXOPTS_HPP_INCLUDED +#define CXXOPTS_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__GNUC__) && !defined(__clang__) +# if (__GNUC__ * 10 + __GNUC_MINOR__) < 49 +# define CXXOPTS_NO_REGEX true +# endif +#endif + +#ifndef CXXOPTS_NO_REGEX +# include +#endif // CXXOPTS_NO_REGEX + +#ifdef __cpp_lib_optional +#include +#define CXXOPTS_HAS_OPTIONAL +#endif + +#if __cplusplus >= 201603L +#define CXXOPTS_NODISCARD [[nodiscard]] +#else +#define CXXOPTS_NODISCARD +#endif + +#ifndef CXXOPTS_VECTOR_DELIMITER +#define CXXOPTS_VECTOR_DELIMITER ',' +#endif + +#define CXXOPTS__VERSION_MAJOR 3 +#define CXXOPTS__VERSION_MINOR 0 +#define CXXOPTS__VERSION_PATCH 0 + +#if (__GNUC__ < 10 || (__GNUC__ == 10 && __GNUC_MINOR__ < 1)) && __GNUC__ >= 6 + #define CXXOPTS_NULL_DEREF_IGNORE +#endif + +namespace cxxopts +{ + static constexpr struct { + uint8_t major, minor, patch; + } version = { + CXXOPTS__VERSION_MAJOR, + CXXOPTS__VERSION_MINOR, + CXXOPTS__VERSION_PATCH + }; +} // namespace cxxopts + +//when we ask cxxopts to use Unicode, help strings are processed using ICU, +//which results in the correct lengths being computed for strings when they +//are formatted for the help output +//it is necessary to make sure that can be found by the +//compiler, and that icu-uc is linked in to the binary. + +#ifdef CXXOPTS_USE_UNICODE +#include + +namespace cxxopts +{ + using String = icu::UnicodeString; + + inline + String + toLocalString(std::string s) + { + return icu::UnicodeString::fromUTF8(std::move(s)); + } + +#if defined(__GNUC__) +// GNU GCC with -Weffc++ will issue a warning regarding the upcoming class, we want to silence it: +// warning: base class 'class std::enable_shared_from_this' has accessible non-virtual destructor +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#pragma GCC diagnostic ignored "-Weffc++" +// This will be ignored under other compilers like LLVM clang. +#endif + class UnicodeStringIterator : public + std::iterator + { + public: + + UnicodeStringIterator(const icu::UnicodeString* string, int32_t pos) + : s(string) + , i(pos) + { + } + + value_type + operator*() const + { + return s->char32At(i); + } + + bool + operator==(const UnicodeStringIterator& rhs) const + { + return s == rhs.s && i == rhs.i; + } + + bool + operator!=(const UnicodeStringIterator& rhs) const + { + return !(*this == rhs); + } + + UnicodeStringIterator& + operator++() + { + ++i; + return *this; + } + + UnicodeStringIterator + operator+(int32_t v) + { + return UnicodeStringIterator(s, i + v); + } + + private: + const icu::UnicodeString* s; + int32_t i; + }; +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + + inline + String& + stringAppend(String&s, String a) + { + return s.append(std::move(a)); + } + + inline + String& + stringAppend(String& s, size_t n, UChar32 c) + { + for (size_t i = 0; i != n; ++i) + { + s.append(c); + } + + return s; + } + + template + String& + stringAppend(String& s, Iterator begin, Iterator end) + { + while (begin != end) + { + s.append(*begin); + ++begin; + } + + return s; + } + + inline + size_t + stringLength(const String& s) + { + return s.length(); + } + + inline + std::string + toUTF8String(const String& s) + { + std::string result; + s.toUTF8String(result); + + return result; + } + + inline + bool + empty(const String& s) + { + return s.isEmpty(); + } +} + +namespace std +{ + inline + cxxopts::UnicodeStringIterator + begin(const icu::UnicodeString& s) + { + return cxxopts::UnicodeStringIterator(&s, 0); + } + + inline + cxxopts::UnicodeStringIterator + end(const icu::UnicodeString& s) + { + return cxxopts::UnicodeStringIterator(&s, s.length()); + } +} + +//ifdef CXXOPTS_USE_UNICODE +#else + +namespace cxxopts +{ + using String = std::string; + + template + T + toLocalString(T&& t) + { + return std::forward(t); + } + + inline + size_t + stringLength(const String& s) + { + return s.length(); + } + + inline + String& + stringAppend(String&s, const String& a) + { + return s.append(a); + } + + inline + String& + stringAppend(String& s, size_t n, char c) + { + return s.append(n, c); + } + + template + String& + stringAppend(String& s, Iterator begin, Iterator end) + { + return s.append(begin, end); + } + + template + std::string + toUTF8String(T&& t) + { + return std::forward(t); + } + + inline + bool + empty(const std::string& s) + { + return s.empty(); + } +} // namespace cxxopts + +//ifdef CXXOPTS_USE_UNICODE +#endif + +namespace cxxopts +{ + namespace + { +#ifdef _WIN32 + const std::string LQUOTE("\'"); + const std::string RQUOTE("\'"); +#else + const std::string LQUOTE("‘"); + const std::string RQUOTE("’"); +#endif + } // namespace + +#if defined(__GNUC__) +// GNU GCC with -Weffc++ will issue a warning regarding the upcoming class, we want to silence it: +// warning: base class 'class std::enable_shared_from_this' has accessible non-virtual destructor +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#pragma GCC diagnostic ignored "-Weffc++" +// This will be ignored under other compilers like LLVM clang. +#endif + class Value : public std::enable_shared_from_this + { + public: + + virtual ~Value() = default; + + virtual + std::shared_ptr + clone() const = 0; + + virtual void + parse(const std::string& text) const = 0; + + virtual void + parse() const = 0; + + virtual bool + has_default() const = 0; + + virtual bool + is_container() const = 0; + + virtual bool + has_implicit() const = 0; + + virtual std::string + get_default_value() const = 0; + + virtual std::string + get_implicit_value() const = 0; + + virtual std::shared_ptr + default_value(const std::string& value) = 0; + + virtual std::shared_ptr + implicit_value(const std::string& value) = 0; + + virtual std::shared_ptr + no_implicit_value() = 0; + + virtual bool + is_boolean() const = 0; + }; +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + class OptionException : public std::exception + { + public: + explicit OptionException(std::string message) + : m_message(std::move(message)) + { + } + + CXXOPTS_NODISCARD + const char* + what() const noexcept override + { + return m_message.c_str(); + } + + private: + std::string m_message; + }; + + class OptionSpecException : public OptionException + { + public: + + explicit OptionSpecException(const std::string& message) + : OptionException(message) + { + } + }; + + class OptionParseException : public OptionException + { + public: + explicit OptionParseException(const std::string& message) + : OptionException(message) + { + } + }; + + class option_exists_error : public OptionSpecException + { + public: + explicit option_exists_error(const std::string& option) + : OptionSpecException("Option " + LQUOTE + option + RQUOTE + " already exists") + { + } + }; + + class invalid_option_format_error : public OptionSpecException + { + public: + explicit invalid_option_format_error(const std::string& format) + : OptionSpecException("Invalid option format " + LQUOTE + format + RQUOTE) + { + } + }; + + class option_syntax_exception : public OptionParseException { + public: + explicit option_syntax_exception(const std::string& text) + : OptionParseException("Argument " + LQUOTE + text + RQUOTE + + " starts with a - but has incorrect syntax") + { + } + }; + + class option_not_exists_exception : public OptionParseException + { + public: + explicit option_not_exists_exception(const std::string& option) + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " does not exist") + { + } + }; + + class missing_argument_exception : public OptionParseException + { + public: + explicit missing_argument_exception(const std::string& option) + : OptionParseException( + "Option " + LQUOTE + option + RQUOTE + " is missing an argument" + ) + { + } + }; + + class option_requires_argument_exception : public OptionParseException + { + public: + explicit option_requires_argument_exception(const std::string& option) + : OptionParseException( + "Option " + LQUOTE + option + RQUOTE + " requires an argument" + ) + { + } + }; + + class option_not_has_argument_exception : public OptionParseException + { + public: + option_not_has_argument_exception + ( + const std::string& option, + const std::string& arg + ) + : OptionParseException( + "Option " + LQUOTE + option + RQUOTE + + " does not take an argument, but argument " + + LQUOTE + arg + RQUOTE + " given" + ) + { + } + }; + + class option_not_present_exception : public OptionParseException + { + public: + explicit option_not_present_exception(const std::string& option) + : OptionParseException("Option " + LQUOTE + option + RQUOTE + " not present") + { + } + }; + + class option_has_no_value_exception : public OptionException + { + public: + explicit option_has_no_value_exception(const std::string& option) + : OptionException( + !option.empty() ? + ("Option " + LQUOTE + option + RQUOTE + " has no value") : + "Option has no value") + { + } + }; + + class argument_incorrect_type : public OptionParseException + { + public: + explicit argument_incorrect_type + ( + const std::string& arg + ) + : OptionParseException( + "Argument " + LQUOTE + arg + RQUOTE + " failed to parse" + ) + { + } + }; + + class option_required_exception : public OptionParseException + { + public: + explicit option_required_exception(const std::string& option) + : OptionParseException( + "Option " + LQUOTE + option + RQUOTE + " is required but not present" + ) + { + } + }; + + template + void throw_or_mimic(const std::string& text) + { + static_assert(std::is_base_of::value, + "throw_or_mimic only works on std::exception and " + "deriving classes"); + +#ifndef CXXOPTS_NO_EXCEPTIONS + // If CXXOPTS_NO_EXCEPTIONS is not defined, just throw + throw T{text}; +#else + // Otherwise manually instantiate the exception, print what() to stderr, + // and exit + T exception{text}; + std::cerr << exception.what() << std::endl; + std::exit(EXIT_FAILURE); +#endif + } + + namespace values + { + namespace parser_tool + { + struct IntegerDesc + { + std::string negative = ""; + std::string base = ""; + std::string value = ""; + }; + struct ArguDesc { + std::string arg_name = ""; + bool grouping = false; + bool set_value = false; + std::string value = ""; + }; +#ifdef CXXOPTS_NO_REGEX + inline IntegerDesc SplitInteger(const std::string &text) + { + if (text.empty()) + { + throw_or_mimic(text); + } + IntegerDesc desc; + const char *pdata = text.c_str(); + if (*pdata == '-') + { + pdata += 1; + desc.negative = "-"; + } + if (strncmp(pdata, "0x", 2) == 0) + { + pdata += 2; + desc.base = "0x"; + } + if (*pdata != '\0') + { + desc.value = std::string(pdata); + } + else + { + throw_or_mimic(text); + } + return desc; + } + + inline bool IsTrueText(const std::string &text) + { + const char *pdata = text.c_str(); + if (*pdata == 't' || *pdata == 'T') + { + pdata += 1; + if (strncmp(pdata, "rue\0", 4) == 0) + { + return true; + } + } + else if (strncmp(pdata, "1\0", 2) == 0) + { + return true; + } + return false; + } + + inline bool IsFalseText(const std::string &text) + { + const char *pdata = text.c_str(); + if (*pdata == 'f' || *pdata == 'F') + { + pdata += 1; + if (strncmp(pdata, "alse\0", 5) == 0) + { + return true; + } + } + else if (strncmp(pdata, "0\0", 2) == 0) + { + return true; + } + return false; + } + + inline std::pair SplitSwitchDef(const std::string &text) + { + std::string short_sw, long_sw; + const char *pdata = text.c_str(); + if (isalnum(*pdata) && *(pdata + 1) == ',') { + short_sw = std::string(1, *pdata); + pdata += 2; + } + while (*pdata == ' ') { pdata += 1; } + if (isalnum(*pdata)) { + const char *store = pdata; + pdata += 1; + while (isalnum(*pdata) || *pdata == '-' || *pdata == '_') { + pdata += 1; + } + if (*pdata == '\0') { + long_sw = std::string(store, pdata - store); + } else { + throw_or_mimic(text); + } + } + return std::pair(short_sw, long_sw); + } + + inline ArguDesc ParseArgument(const char *arg, bool &matched) + { + ArguDesc argu_desc; + const char *pdata = arg; + matched = false; + if (strncmp(pdata, "--", 2) == 0) + { + pdata += 2; + if (isalnum(*pdata)) + { + argu_desc.arg_name.push_back(*pdata); + pdata += 1; + while (isalnum(*pdata) || *pdata == '-' || *pdata == '_') + { + argu_desc.arg_name.push_back(*pdata); + pdata += 1; + } + if (argu_desc.arg_name.length() > 1) + { + if (*pdata == '=') + { + argu_desc.set_value = true; + pdata += 1; + if (*pdata != '\0') + { + argu_desc.value = std::string(pdata); + } + matched = true; + } + else if (*pdata == '\0') + { + matched = true; + } + } + } + } + else if (strncmp(pdata, "-", 1) == 0) + { + pdata += 1; + argu_desc.grouping = true; + while (isalnum(*pdata)) + { + argu_desc.arg_name.push_back(*pdata); + pdata += 1; + } + matched = !argu_desc.arg_name.empty() && *pdata == '\0'; + } + return argu_desc; + } + +#else // CXXOPTS_NO_REGEX + + namespace + { + + std::basic_regex integer_pattern + ("(-)?(0x)?([0-9a-zA-Z]+)|((0x)?0)"); + std::basic_regex truthy_pattern + ("(t|T)(rue)?|1"); + std::basic_regex falsy_pattern + ("(f|F)(alse)?|0"); + + std::basic_regex option_matcher + ("--([[:alnum:]][-_[:alnum:]]+)(=(.*))?|-([[:alnum:]]+)"); + std::basic_regex option_specifier + ("(([[:alnum:]]),)?[ ]*([[:alnum:]][-_[:alnum:]]*)?"); + + } // namespace + + inline IntegerDesc SplitInteger(const std::string &text) + { + std::smatch match; + std::regex_match(text, match, integer_pattern); + + if (match.length() == 0) + { + throw_or_mimic(text); + } + + IntegerDesc desc; + desc.negative = match[1]; + desc.base = match[2]; + desc.value = match[3]; + + if (match.length(4) > 0) + { + desc.base = match[5]; + desc.value = "0"; + return desc; + } + + return desc; + } + + inline bool IsTrueText(const std::string &text) + { + std::smatch result; + std::regex_match(text, result, truthy_pattern); + return !result.empty(); + } + + inline bool IsFalseText(const std::string &text) + { + std::smatch result; + std::regex_match(text, result, falsy_pattern); + return !result.empty(); + } + + inline std::pair SplitSwitchDef(const std::string &text) + { + std::match_results result; + std::regex_match(text.c_str(), result, option_specifier); + if (result.empty()) + { + throw_or_mimic(text); + } + + const std::string& short_sw = result[2]; + const std::string& long_sw = result[3]; + + return std::pair(short_sw, long_sw); + } + + inline ArguDesc ParseArgument(const char *arg, bool &matched) + { + std::match_results result; + std::regex_match(arg, result, option_matcher); + matched = !result.empty(); + + ArguDesc argu_desc; + if (matched) { + argu_desc.arg_name = result[1].str(); + argu_desc.set_value = result[2].length() > 0; + argu_desc.value = result[3].str(); + if (result[4].length() > 0) + { + argu_desc.grouping = true; + argu_desc.arg_name = result[4].str(); + } + } + + return argu_desc; + } + +#endif // CXXOPTS_NO_REGEX +#undef CXXOPTS_NO_REGEX + } + + namespace detail + { + template + struct SignedCheck; + + template + struct SignedCheck + { + template + void + operator()(bool negative, U u, const std::string& text) + { + if (negative) + { + if (u > static_cast((std::numeric_limits::min)())) + { + throw_or_mimic(text); + } + } + else + { + if (u > static_cast((std::numeric_limits::max)())) + { + throw_or_mimic(text); + } + } + } + }; + + template + struct SignedCheck + { + template + void + operator()(bool, U, const std::string&) const {} + }; + + template + void + check_signed_range(bool negative, U value, const std::string& text) + { + SignedCheck::is_signed>()(negative, value, text); + } + } // namespace detail + + template + void + checked_negate(R& r, T&& t, const std::string&, std::true_type) + { + // if we got to here, then `t` is a positive number that fits into + // `R`. So to avoid MSVC C4146, we first cast it to `R`. + // See https://github.com/jarro2783/cxxopts/issues/62 for more details. + r = static_cast(-static_cast(t-1)-1); + } + + template + void + checked_negate(R&, T&&, const std::string& text, std::false_type) + { + throw_or_mimic(text); + } + + template + void + integer_parser(const std::string& text, T& value) + { + parser_tool::IntegerDesc int_desc = parser_tool::SplitInteger(text); + + using US = typename std::make_unsigned::type; + constexpr bool is_signed = std::numeric_limits::is_signed; + + const bool negative = int_desc.negative.length() > 0; + const uint8_t base = int_desc.base.length() > 0 ? 16 : 10; + const std::string & value_match = int_desc.value; + + US result = 0; + + for (char ch : value_match) + { + US digit = 0; + + if (ch >= '0' && ch <= '9') + { + digit = static_cast(ch - '0'); + } + else if (base == 16 && ch >= 'a' && ch <= 'f') + { + digit = static_cast(ch - 'a' + 10); + } + else if (base == 16 && ch >= 'A' && ch <= 'F') + { + digit = static_cast(ch - 'A' + 10); + } + else + { + throw_or_mimic(text); + } + + const US next = static_cast(result * base + digit); + if (result > next) + { + throw_or_mimic(text); + } + + result = next; + } + + detail::check_signed_range(negative, result, text); + + if (negative) + { + checked_negate(value, result, text, std::integral_constant()); + } + else + { + value = static_cast(result); + } + } + + template + void stringstream_parser(const std::string& text, T& value) + { + std::stringstream in(text); + in >> value; + if (!in) { + throw_or_mimic(text); + } + } + + template ::value>::type* = nullptr + > + void parse_value(const std::string& text, T& value) + { + integer_parser(text, value); + } + + inline + void + parse_value(const std::string& text, bool& value) + { + if (parser_tool::IsTrueText(text)) + { + value = true; + return; + } + + if (parser_tool::IsFalseText(text)) + { + value = false; + return; + } + + throw_or_mimic(text); + } + + inline + void + parse_value(const std::string& text, std::string& value) + { + value = text; + } + + // The fallback parser. It uses the stringstream parser to parse all types + // that have not been overloaded explicitly. It has to be placed in the + // source code before all other more specialized templates. + template ::value>::type* = nullptr + > + void + parse_value(const std::string& text, T& value) { + stringstream_parser(text, value); + } + + template + void + parse_value(const std::string& text, std::vector& value) + { + std::stringstream in(text); + std::string token; + while(!in.eof() && std::getline(in, token, CXXOPTS_VECTOR_DELIMITER)) { + T v; + parse_value(token, v); + value.emplace_back(std::move(v)); + } + } + +#ifdef CXXOPTS_HAS_OPTIONAL + template + void + parse_value(const std::string& text, std::optional& value) + { + T result; + parse_value(text, result); + value = std::move(result); + } +#endif + + inline + void parse_value(const std::string& text, char& c) + { + if (text.length() != 1) + { + throw_or_mimic(text); + } + + c = text[0]; + } + + template + struct type_is_container + { + static constexpr bool value = false; + }; + + template + struct type_is_container> + { + static constexpr bool value = true; + }; + + template + class abstract_value : public Value + { + using Self = abstract_value; + + public: + abstract_value() + : m_result(std::make_shared()) + , m_store(m_result.get()) + { + } + + explicit abstract_value(T* t) + : m_store(t) + { + } + + ~abstract_value() override = default; + + abstract_value& operator=(const abstract_value&) = default; + + abstract_value(const abstract_value& rhs) + { + if (rhs.m_result) + { + m_result = std::make_shared(); + m_store = m_result.get(); + } + else + { + m_store = rhs.m_store; + } + + m_default = rhs.m_default; + m_implicit = rhs.m_implicit; + m_default_value = rhs.m_default_value; + m_implicit_value = rhs.m_implicit_value; + } + + void + parse(const std::string& text) const override + { + parse_value(text, *m_store); + } + + bool + is_container() const override + { + return type_is_container::value; + } + + void + parse() const override + { + parse_value(m_default_value, *m_store); + } + + bool + has_default() const override + { + return m_default; + } + + bool + has_implicit() const override + { + return m_implicit; + } + + std::shared_ptr + default_value(const std::string& value) override + { + m_default = true; + m_default_value = value; + return shared_from_this(); + } + + std::shared_ptr + implicit_value(const std::string& value) override + { + m_implicit = true; + m_implicit_value = value; + return shared_from_this(); + } + + std::shared_ptr + no_implicit_value() override + { + m_implicit = false; + return shared_from_this(); + } + + std::string + get_default_value() const override + { + return m_default_value; + } + + std::string + get_implicit_value() const override + { + return m_implicit_value; + } + + bool + is_boolean() const override + { + return std::is_same::value; + } + + const T& + get() const + { + if (m_store == nullptr) + { + return *m_result; + } + return *m_store; + } + + protected: + std::shared_ptr m_result{}; + T* m_store{}; + + bool m_default = false; + bool m_implicit = false; + + std::string m_default_value{}; + std::string m_implicit_value{}; + }; + + template + class standard_value : public abstract_value + { + public: + using abstract_value::abstract_value; + + CXXOPTS_NODISCARD + std::shared_ptr + clone() const override + { + return std::make_shared>(*this); + } + }; + + template <> + class standard_value : public abstract_value + { + public: + ~standard_value() override = default; + + standard_value() + { + set_default_and_implicit(); + } + + explicit standard_value(bool* b) + : abstract_value(b) + { + set_default_and_implicit(); + } + + std::shared_ptr + clone() const override + { + return std::make_shared>(*this); + } + + private: + + void + set_default_and_implicit() + { + m_default = true; + m_default_value = "false"; + m_implicit = true; + m_implicit_value = "true"; + } + }; + } // namespace values + + template + std::shared_ptr + value() + { + return std::make_shared>(); + } + + template + std::shared_ptr + value(T& t) + { + return std::make_shared>(&t); + } + + class OptionAdder; + + class OptionDetails + { + public: + OptionDetails + ( + std::string short_, + std::string long_, + String desc, + std::shared_ptr val + ) + : m_short(std::move(short_)) + , m_long(std::move(long_)) + , m_desc(std::move(desc)) + , m_value(std::move(val)) + , m_count(0) + { + m_hash = std::hash{}(m_long + m_short); + } + + OptionDetails(const OptionDetails& rhs) + : m_desc(rhs.m_desc) + , m_value(rhs.m_value->clone()) + , m_count(rhs.m_count) + { + } + + OptionDetails(OptionDetails&& rhs) = default; + + CXXOPTS_NODISCARD + const String& + description() const + { + return m_desc; + } + + CXXOPTS_NODISCARD + const Value& + value() const { + return *m_value; + } + + CXXOPTS_NODISCARD + std::shared_ptr + make_storage() const + { + return m_value->clone(); + } + + CXXOPTS_NODISCARD + const std::string& + short_name() const + { + return m_short; + } + + CXXOPTS_NODISCARD + const std::string& + long_name() const + { + return m_long; + } + + size_t + hash() const + { + return m_hash; + } + + private: + std::string m_short{}; + std::string m_long{}; + String m_desc{}; + std::shared_ptr m_value{}; + int m_count; + + size_t m_hash{}; + }; + + struct HelpOptionDetails + { + std::string s; + std::string l; + String desc; + bool has_default; + std::string default_value; + bool has_implicit; + std::string implicit_value; + std::string arg_help; + bool is_container; + bool is_boolean; + }; + + struct HelpGroupDetails + { + std::string name{}; + std::string description{}; + std::vector options{}; + }; + + class OptionValue + { + public: + void + parse + ( + const std::shared_ptr& details, + const std::string& text + ) + { + ensure_value(details); + ++m_count; + m_value->parse(text); + m_long_name = &details->long_name(); + } + + void + parse_default(const std::shared_ptr& details) + { + ensure_value(details); + m_default = true; + m_long_name = &details->long_name(); + m_value->parse(); + } + + void + parse_no_value(const std::shared_ptr& details) + { + m_long_name = &details->long_name(); + } + +#if defined(CXXOPTS_NULL_DEREF_IGNORE) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnull-dereference" +#endif + + CXXOPTS_NODISCARD + size_t + count() const noexcept + { + return m_count; + } + +#if defined(CXXOPTS_NULL_DEREF_IGNORE) +#pragma GCC diagnostic pop +#endif + + // TODO: maybe default options should count towards the number of arguments + CXXOPTS_NODISCARD + bool + has_default() const noexcept + { + return m_default; + } + + template + const T& + as() const + { + if (m_value == nullptr) { + throw_or_mimic( + m_long_name == nullptr ? "" : *m_long_name); + } + +#ifdef CXXOPTS_NO_RTTI + return static_cast&>(*m_value).get(); +#else + return dynamic_cast&>(*m_value).get(); +#endif + } + + private: + void + ensure_value(const std::shared_ptr& details) + { + if (m_value == nullptr) + { + m_value = details->make_storage(); + } + } + + + const std::string* m_long_name = nullptr; + // Holding this pointer is safe, since OptionValue's only exist in key-value pairs, + // where the key has the string we point to. + std::shared_ptr m_value{}; + size_t m_count = 0; + bool m_default = false; + }; + + class KeyValue + { + public: + KeyValue(std::string key_, std::string value_) + : m_key(std::move(key_)) + , m_value(std::move(value_)) + { + } + + CXXOPTS_NODISCARD + const std::string& + key() const + { + return m_key; + } + + CXXOPTS_NODISCARD + const std::string& + value() const + { + return m_value; + } + + template + T + as() const + { + T result; + values::parse_value(m_value, result); + return result; + } + + private: + std::string m_key; + std::string m_value; + }; + + using ParsedHashMap = std::unordered_map; + using NameHashMap = std::unordered_map; + + class ParseResult + { + public: + + ParseResult() = default; + ParseResult(const ParseResult&) = default; + + ParseResult(NameHashMap&& keys, ParsedHashMap&& values, std::vector sequential, std::vector&& unmatched_args) + : m_keys(std::move(keys)) + , m_values(std::move(values)) + , m_sequential(std::move(sequential)) + , m_unmatched(std::move(unmatched_args)) + { + } + + ParseResult& operator=(ParseResult&&) = default; + ParseResult& operator=(const ParseResult&) = default; + + size_t + count(const std::string& o) const + { + auto iter = m_keys.find(o); + if (iter == m_keys.end()) + { + return 0; + } + + auto viter = m_values.find(iter->second); + + if (viter == m_values.end()) + { + return 0; + } + + return viter->second.count(); + } + + const OptionValue& + operator[](const std::string& option) const + { + auto iter = m_keys.find(option); + + if (iter == m_keys.end()) + { + throw_or_mimic(option); + } + + auto viter = m_values.find(iter->second); + + if (viter == m_values.end()) + { + throw_or_mimic(option); + } + + return viter->second; + } + + const std::vector& + arguments() const + { + return m_sequential; + } + + const std::vector& + unmatched() const + { + return m_unmatched; + } + + private: + NameHashMap m_keys{}; + ParsedHashMap m_values{}; + std::vector m_sequential{}; + std::vector m_unmatched{}; + }; + + struct Option + { + Option + ( + std::string opts, + std::string desc, + std::shared_ptr value = ::cxxopts::value(), + std::string arg_help = "" + ) + : opts_(std::move(opts)) + , desc_(std::move(desc)) + , value_(std::move(value)) + , arg_help_(std::move(arg_help)) + { + } + + std::string opts_; + std::string desc_; + std::shared_ptr value_; + std::string arg_help_; + }; + + using OptionMap = std::unordered_map>; + using PositionalList = std::vector; + using PositionalListIterator = PositionalList::const_iterator; + + class OptionParser + { + public: + OptionParser(const OptionMap& options, const PositionalList& positional, bool allow_unrecognised) + : m_options(options) + , m_positional(positional) + , m_allow_unrecognised(allow_unrecognised) + { + } + + ParseResult + parse(int argc, const char* const* argv); + + bool + consume_positional(const std::string& a, PositionalListIterator& next); + + void + checked_parse_arg + ( + int argc, + const char* const* argv, + int& current, + const std::shared_ptr& value, + const std::string& name + ); + + void + add_to_option(OptionMap::const_iterator iter, const std::string& option, const std::string& arg); + + void + parse_option + ( + const std::shared_ptr& value, + const std::string& name, + const std::string& arg = "" + ); + + void + parse_default(const std::shared_ptr& details); + + void + parse_no_value(const std::shared_ptr& details); + + private: + + void finalise_aliases(); + + const OptionMap& m_options; + const PositionalList& m_positional; + + std::vector m_sequential{}; + bool m_allow_unrecognised; + + ParsedHashMap m_parsed{}; + NameHashMap m_keys{}; + }; + + class Options + { + public: + + explicit Options(std::string program, std::string help_string = "") + : m_program(std::move(program)) + , m_help_string(toLocalString(std::move(help_string))) + , m_custom_help("[OPTION...]") + , m_positional_help("positional parameters") + , m_show_positional(false) + , m_allow_unrecognised(false) + , m_width(76) + , m_tab_expansion(false) + , m_options(std::make_shared()) + { + } + + Options& + positional_help(std::string help_text) + { + m_positional_help = std::move(help_text); + return *this; + } + + Options& + custom_help(std::string help_text) + { + m_custom_help = std::move(help_text); + return *this; + } + + Options& + show_positional_help() + { + m_show_positional = true; + return *this; + } + + Options& + allow_unrecognised_options() + { + m_allow_unrecognised = true; + return *this; + } + + Options& + set_width(size_t width) + { + m_width = width; + return *this; + } + + Options& + set_tab_expansion(bool expansion=true) + { + m_tab_expansion = expansion; + return *this; + } + + ParseResult + parse(int argc, const char* const* argv); + + OptionAdder + add_options(std::string group = ""); + + void + add_options + ( + const std::string& group, + std::initializer_list