From e20b010d9758984810586cf61fd2e1cc25967eb6 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Mon, 23 Oct 2023 09:00:14 +0300 Subject: [PATCH 1/4] Update pypi.yml (#446) * Update pypi.yml * Update __init__.py --- .github/workflows/pypi.yml | 3 +++ credsweeper/__init__.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 0900f370d..fb0876260 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -39,6 +39,9 @@ jobs: contents: write runs-on: ubuntu-latest steps: + - name: Install hub tool + run: | + sudo apt-get update && sudo apt-get install -y hub - name: Upload Assets uses: samsung/supplychainassurance/.github/actions/upload-release-asset@v1.0.2 env: diff --git a/credsweeper/__init__.py b/credsweeper/__init__.py index d10a1457a..ad8b0799b 100644 --- a/credsweeper/__init__.py +++ b/credsweeper/__init__.py @@ -20,4 +20,4 @@ '__version__' ] -__version__ = "1.5.8" +__version__ = "1.5.9" From 34671700e8cbaf332ffed6c41dae5424e15a1838 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 1 Nov 2023 11:16:27 +0200 Subject: [PATCH 2/4] Update --doc rules only (#447) * ValuePatternCheck * ValueAllowlistCheck with asterisk improves * ip_id_password tripple - fix * style fix * url fix * slight fix for test * style --- credsweeper/filters/value_allowlist_check.py | 2 +- credsweeper/rules/config.yaml | 7 +- tests/__init__.py | 2 +- tests/data/depth_3.json | 6 +- tests/data/doc.json | 177 ++----------------- tests/data/ml_threshold_0.json | 6 +- tests/data/output.json | 6 +- tests/filters/test_value_allowlist_check.py | 4 +- tests/samples/doc_id_pair_passwd_pair | 2 +- tests/samples/doc_ip_id_password_triple | 5 + tests/test_main.py | 4 +- 11 files changed, 45 insertions(+), 176 deletions(-) diff --git a/credsweeper/filters/value_allowlist_check.py b/credsweeper/filters/value_allowlist_check.py index 0db5085a5..be30dedca 100644 --- a/credsweeper/filters/value_allowlist_check.py +++ b/credsweeper/filters/value_allowlist_check.py @@ -11,7 +11,7 @@ class ValueAllowlistCheck(Filter): """Check that patterns from the list is not present in the candidate value.""" ALLOWED = [ - r"ENC\(.*\)", r"ENC\[.*\]", r"\$\{.*\}", r"#\{.*\}", r"\{\{.+\}\}", r"([.a-z0-9]|->)+\(.*\)", r"\*\*\*\*\*" + r"ENC\(.*\)", r"ENC\[.*\]", r"\$\{.*\}", r"#\{.*\}", r"\{\{.+\}\}", r"([.a-z0-9]|->)+\(.*\)", r"\S{0,5}\*{5,}" ] ALLOWED_PATTERN = re.compile( # Util.get_regex_combine_or(ALLOWED), # diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 71b7c5b1c..342f0c664 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -5,6 +5,7 @@ - (?P[`'\"]?(?i:token|secret|key|키|암호|암호화|토큰)[`'\"]?)((\s)*[=:](\s)*)(?P[`'\"(])?(?P\S{4,})(?(quote)[)`'\"]) filter_type: - ValueAllowlistCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - token @@ -26,6 +27,7 @@ - (?P[`'\"]?(?i:(?[`'\"(])?(?P\S{4,})(?(quote)[)`'\"]) filter_type: - ValueAllowlistCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - pass @@ -43,9 +45,10 @@ severity: medium type: pattern values: - - (^|(?P(?i:\bip[\s/]+id[\s/]+pw[\s/:]*))|(?P://)|\s)(?P[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})((?P\s*\()?\s*|(?(variable)[\s,/]+|\s*(?(url)[,]|[,/])\s*))[\w.-]{3,}[\s,/]+(?P(?(lpar)[^)\s/]{4,}|(?(url)[^\s/]{4,}|[^\s]{4,}))) + - (^|(?P(?i:\bip[\s/]+id[\s/]+pw[\s/:]*))|(?P://)|\s)(?P[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})((?P\s*(\w+\s+)?\()?\s*|(?(variable)[\s,/]+|\s*(?(url)[,]|[,/]))\s*)[\w.-]{3,}[\s,/]+(?P(?(lpar)[^)\s]{4,}|[^\s/]{4,}))(?:\s|[^/]|$) filter_type: - ValueAllowlistCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - "." @@ -59,6 +62,7 @@ - (?P--)?(?P(?i:user\s*)?(?i:id|login|account|root|admin|user|name|wifi|role|host|default|계정|아이디))\s*?(?(ddash)[ =]|[ :=])\s*?(?P\S+) filter_type: - ValueAllowlistCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - pass @@ -79,6 +83,7 @@ filter_type: - ValueAllowlistCheck - ValueDictionaryKeywordCheck + - ValuePatternCheck min_line_len: 10 required_substrings: - pw diff --git a/tests/__init__.py b/tests/__init__.py index 4a0d5fd44..103aa501b 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,7 +11,7 @@ SAMPLES_POST_CRED_COUNT: int = 293 # with option --doc -SAMPLES_IN_DOC = 426 +SAMPLES_IN_DOC = 422 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 16 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 514f03140..22696a82f 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -2341,13 +2341,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "tests/samples/doc_id_pair_passwd_pair|RAW", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", diff --git a/tests/data/doc.json b/tests/data/doc.json index 5c28311c4..7e5727f9c 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -5155,13 +5155,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "tests/samples/doc_id_pair_passwd_pair|RAW", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", @@ -5170,17 +5170,17 @@ } }, { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "tests/samples/doc_id_pair_passwd_pair|RAW", - "value": "xxx", + "value": "master", "value_start": 9, - "value_end": 12, + "value_end": 15, "variable": "username", "entropy_validation": { - "iterator": "HEX_CHARS", - "entropy": 0.0, + "iterator": "BASE64_CHARS", + "entropy": 2.584962500721156, "valid": false } } @@ -5194,13 +5194,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "tests/samples/doc_id_pair_passwd_pair|RAW", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", @@ -8186,13 +8186,13 @@ "line_num": 14, "path": "tests/samples/doc_various", "info": "tests/samples/doc_various|RAW", - "value": "(master/IhqSb1Gg)", - "value_start": 17, - "value_end": 34, + "value": "IhqSb1Gg", + "value_start": 25, + "value_end": 33, "variable": null, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 3.606584859926771, + "entropy": 3.0, "valid": false } } @@ -8750,13 +8750,13 @@ "line_num": 48, "path": "tests/samples/doc_various", "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg,master", + "value": "IhqSb1Gg,master/IhqSb1Gg", "value_start": 19, - "value_end": 34, + "value_end": 43, "variable": null, "entropy_validation": { "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, + "entropy": 3.727255729857775, "valid": false } } @@ -8834,45 +8834,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "ID_PAIR_PASSWD_PAIR", - "severity": "medium", - "line_data_list": [ - { - "line": "ANY_user:xxxx ANY_pwd:IhqSb1Gg", - "line_num": 61, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg", - "value_start": 22, - "value_end": 30, - "variable": "ANY_pwd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.0, - "valid": false - } - }, - { - "line": "ANY_user:xxxx ANY_pwd:IhqSb1Gg", - "line_num": 61, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "xxxx", - "value_start": 9, - "value_end": 13, - "variable": "user", - "entropy_validation": { - "iterator": "HEX_CHARS", - "entropy": 0.0, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -8897,45 +8858,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "ID_PAIR_PASSWD_PAIR", - "severity": "medium", - "line_data_list": [ - { - "line": "Acount name:xxxx Initial Password:IhqSb1Gg", - "line_num": 62, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg", - "value_start": 34, - "value_end": 42, - "variable": "Password", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.0, - "valid": false - } - }, - { - "line": "Acount name:xxxx Initial Password:IhqSb1Gg", - "line_num": 62, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "xxxx", - "value_start": 12, - "value_end": 16, - "variable": "name", - "entropy_validation": { - "iterator": "HEX_CHARS", - "entropy": 0.0, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -9581,45 +9503,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "ID_PAIR_PASSWD_PAIR", - "severity": "medium", - "line_data_list": [ - { - "line": "ID:gildong.hong@xxxx.net mailto:{1} pw:IhqSb1Gg", - "line_num": 106, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg", - "value_start": 39, - "value_end": 47, - "variable": "pw", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.0, - "valid": false - } - }, - { - "line": "ID:gildong.hong@xxxx.net mailto:{1} pw:IhqSb1Gg", - "line_num": 106, - "path": "tests/samples/doc_various", - "info": "tests/samples/doc_various|RAW", - "value": "gildong.hong@xxxx.net", - "value_start": 3, - "value_end": 24, - "variable": "ID", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.8358066002709883, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -10793,30 +10676,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "PASSWD_PAIR", - "severity": "medium", - "line_data_list": [ - { - "line": "mypw: KrAcMe12345,", - "line_num": 2, - "path": "tests/samples/passwd.groovy", - "info": "tests/samples/passwd.groovy|RAW", - "value": "KrAcMe12345,", - "value_start": 6, - "value_end": 18, - "variable": "pw", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.2862156256610597, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/data/ml_threshold_0.json b/tests/data/ml_threshold_0.json index 9a28ec61e..9d04198a4 100644 --- a/tests/data/ml_threshold_0.json +++ b/tests/data/ml_threshold_0.json @@ -2437,13 +2437,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", diff --git a/tests/data/output.json b/tests/data/output.json index d2e2f7d19..a537ea66d 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -2317,13 +2317,13 @@ "severity": "medium", "line_data_list": [ { - "line": "username:xxx pw:ihqSb1Gg", + "line": "username:master pw:ihqSb1Gg", "line_num": 90, "path": "tests/samples/doc_id_pair_passwd_pair", "info": "", "value": "ihqSb1Gg", - "value_start": 16, - "value_end": 24, + "value_start": 19, + "value_end": 27, "variable": "pw", "entropy_validation": { "iterator": "BASE64_CHARS", diff --git a/tests/filters/test_value_allowlist_check.py b/tests/filters/test_value_allowlist_check.py index 9c1374c27..9e50eecf2 100644 --- a/tests/filters/test_value_allowlist_check.py +++ b/tests/filters/test_value_allowlist_check.py @@ -11,9 +11,7 @@ def test_value_allowlist_check_p(self, file_path: pytest.fixture, success_line: line_data = get_line_data(file_path, line=success_line, pattern=LINE_VALUE_PATTERN) assert ValueAllowlistCheck().run(line_data, DUMMY_ANALYSIS_TARGET) is False - @pytest.mark.parametrize("line", [ - "ENC(Crackle123)", - ]) + @pytest.mark.parametrize("line", ["test*****", "ENC(Crackle123)"]) def test_value_allowlist_check_n(self, file_path: pytest.fixture, line: str) -> None: line_data = get_line_data(file_path, line=line, pattern=LINE_VALUE_PATTERN) assert ValueAllowlistCheck().run(line_data, DUMMY_ANALYSIS_TARGET) is True diff --git a/tests/samples/doc_id_pair_passwd_pair b/tests/samples/doc_id_pair_passwd_pair index 10989cef6..2a2b441ad 100644 --- a/tests/samples/doc_id_pair_passwd_pair +++ b/tests/samples/doc_id_pair_passwd_pair @@ -87,7 +87,7 @@ id/pw id:master pw:ihqSb1Gg user:master,pwd:ihqSb1Gg ID master PWD ihqSb1Gg username:master/pw:master -username:xxx pw:ihqSb1Gg +username:master pw:ihqSb1Gg PW:ihqSb1Gg ID:master 아이디:master 비밀번호:ihqSb1Gg ID:master,PW ihqSb1Gg diff --git a/tests/samples/doc_ip_id_password_triple b/tests/samples/doc_ip_id_password_triple index 30ed935d7..08cffcf2f 100644 --- a/tests/samples/doc_ip_id_password_triple +++ b/tests/samples/doc_ip_id_password_triple @@ -4,3 +4,8 @@ IP ID PW 192.168.0.1 master IhqSb1Gg IP/ID/PW 192.168.0.1/master/IhqSb1Gg + +# FALSE +

sirius 192.168.0.1/mnt/user/USBx/\\[Root_Test\\]UseCase/12345678/76125-733_FFB_2038_judgement_day6_238811_6524.apk

+http://192.168.0.1/master/IhqSb1Gg/api +http://192.168.0.1/master/branch diff --git a/tests/test_main.py b/tests/test_main.py index 89a05eaa1..6619cc1a8 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -823,7 +823,9 @@ def test_param_p(self) -> None: # internal parametrized tests to keep items = [(" STP_PASSWORD=qbgomdtpqch \\", "qbgomdtpqch")] for i in items: - content_provider: FilesProvider = TextProvider(["test.template", io.BytesIO(i[0].encode())]) + content_provider: FilesProvider = TextProvider([ + ("test.template", io.BytesIO(i[0].encode())), + ]) cred_sweeper = CredSweeper(ml_threshold=0) cred_sweeper.run(content_provider=content_provider) creds = cred_sweeper.credential_manager.get_credentials() From f76d16ae8acd44640dd7f0cfea903d8d2101049f Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 1 Nov 2023 11:34:02 +0200 Subject: [PATCH 3/4] Removed ML from well known pattern (#448) * removed extra keys * removed ml for well-known prefixes patterns * tests fixed * benchmark scores fix * benchmark scores fix 2 --- cicd/benchmark.txt | 8 +- credsweeper/rules/config.yaml | 23 ---- tests/__init__.py | 2 +- tests/data/depth_3.json | 192 +++++++++++++++++++++++----------- tests/data/doc.json | 72 ++++++------- tests/data/output.json | 164 +++++++++++++++++++++-------- tests/test_app.py | 4 +- 7 files changed, 293 insertions(+), 172 deletions(-) diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index 3bd78ff61..394e5d888 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -10,16 +10,16 @@ Predefined Pattern 326 2 40 Private Key 1001 1 3 Seed, Salt, Nonce 40 4 4 TOTAL: 5307 63688 5644 -Detected Credentials: 5993 -credsweeper result_cnt : 5337, lost_cnt : 0, true_cnt : 4439, false_cnt : 898 +Detected Credentials: 5997 +credsweeper result_cnt : 5339, lost_cnt : 0, true_cnt : 4441, false_cnt : 898 Category TP FP TN FN FPR FNR ACC PRC RCL F1 -------------------------- ---- ---- -------- ---- --------- --------- -------- -------- -------- -------- Authentication Key & Token 54 4 28 16 0.125 0.228571 0.803922 0.931034 0.771429 0.84375 Generic Secret 973 3 215 83 0.0137615 0.0785985 0.932496 0.996926 0.921402 0.957677 -Generic Token 287 7 596 46 0.0116086 0.138138 0.943376 0.97619 0.861862 0.91547 +Generic Token 289 7 596 44 0.0116086 0.132132 0.945513 0.976351 0.867868 0.918919 Other 818 750 63395 258 0.0116923 0.239777 0.984545 0.521684 0.760223 0.618759 Password 995 130 4150 410 0.0303738 0.291815 0.905013 0.884444 0.708185 0.786561 Predefined Pattern 309 2 40 17 0.0476191 0.0521472 0.94837 0.993569 0.947853 0.970173 Private Key 967 0 4 34 0.033966 0.966169 1 0.966034 0.982724 Seed, Salt, Nonce 36 2 6 4 0.25 0.1 0.875 0.947368 0.9 0.923077 - 4439 898 19428253 868 4.622e-05 0.163558 0.999909 0.831741 0.836442 0.834085 + 4441 898 19428253 866 4.622e-05 0.163181 0.999909 0.831804 0.836819 0.834304 diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 342f0c664..0aabb7fa1 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -199,7 +199,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?P(ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - A min_line_len: 20 @@ -212,7 +211,6 @@ - (^|[^.0-9A-Za-z_/+-])(?P(AKIA|ASIA)[0-9A-Z]{16,17})([^=0-9A-Za-z_/+-]|$) - (?P[0-9a-zA-Z/+]{40}) filter_type: GeneralPattern - use_ml: true required_substrings: - AKIA - ASIA @@ -224,7 +222,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pamzn\.mws\.[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - amzn min_line_len: 30 @@ -247,7 +244,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pdt0[a-zA-Z]{1}[0-9]{2}\.[A-Z0-9]{24}\.[A-Z0-9]{64})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - dt0 min_line_len: 90 @@ -258,7 +254,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PEAAC[0-9A-Za-z]{27,}) filter_type: GeneralPattern - use_ml: true required_substrings: - EAAC min_line_len: 31 @@ -282,7 +277,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PAIza[0-9A-Za-z_-]{35})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false validations: - GoogleApiKeyValidation required_substrings: @@ -296,7 +290,6 @@ - (?P[0-9]+\-[0-9A-Za-z_]{32}\.apps\.googleusercontent\.com) - (?[0-9a-zA-Z_-]{24})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false validations: - GoogleMultiValidation required_substrings: @@ -309,7 +302,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pya29\.[0-9A-Za-z_-]{22,}) filter_type: GeneralPattern - use_ml: true required_substrings: - ya29. min_line_len: 27 @@ -320,7 +312,6 @@ values: - (?i)(?Pheroku(.{0,20})?[0-9a-f]{8}(-[0-9a-f]{4})+-[0-9a-f]{12})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - heroku min_line_len: 24 @@ -331,7 +322,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PIGQVJ[\w]{100,}) filter_type: GeneralPattern - use_ml: true required_substrings: - IGQVJ min_line_len: 105 @@ -353,7 +343,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?P[0-9a-zA-Z]{32}-us[0-9]{1,2})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false validations: - MailChimpKeyValidation required_substrings: @@ -366,7 +355,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pkey-[0-9a-zA-Z]{32})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - key- min_line_len: 36 @@ -390,7 +378,6 @@ values: - (?Paccess_token\$production\$[0-9a-z]{16}\$[0-9a-z]{32})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false required_substrings: - access_token$production$ min_line_len: 72 @@ -410,7 +397,6 @@ values: - (?Psk_live_[0-9a-z]{32})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false required_substrings: - sk_live_ min_line_len: 40 @@ -433,7 +419,6 @@ values: - (?PSG\.[\w_]{16,32}\.[\w_]{16,64}) filter_type: GeneralPattern - use_ml: false required_substrings: - SG. min_line_len: 34 @@ -454,7 +439,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Pxox[a|b|p|r|o|s]\-[-a-zA-Z0-9]{10,250}) filter_type: GeneralPattern - use_ml: true validations: - SlackTokenValidation required_substrings: @@ -467,7 +451,6 @@ values: - (?Phooks\.slack\.com/services/T\w{8}/B\w{8}/\w{24}) filter_type: GeneralPattern - use_ml: true required_substrings: - hooks.slack.com/services/T min_line_len: 61 @@ -478,7 +461,6 @@ values: - (?Psk_live_[0-9a-zA-Z]{24})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true validations: - StripeApiKeyValidation required_substrings: @@ -491,7 +473,6 @@ values: - (?Prk_live_[0-9a-zA-Z]{24})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - rk_live_ min_line_len: 32 @@ -502,7 +483,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PEAAA[0-9A-Za-z_-]{60})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true validations: - SquareAccessTokenValidation required_substrings: @@ -515,7 +495,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?Psq0[a-z]{3}-[0-9A-Za-z_-]{22})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true validations: - SquareClientIdValidation required_substrings: @@ -528,7 +507,6 @@ values: - (?Psq0csp-[0-9A-Za-z_-]{43})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: false required_substrings: - sq0csp min_line_len: 50 @@ -551,7 +529,6 @@ values: - (^|[^.0-9A-Za-z_/+-])(?PSK[0-9a-fA-F]{32})([^=0-9A-Za-z_/+-]|$) filter_type: GeneralPattern - use_ml: true required_substrings: - SK min_line_len: 34 diff --git a/tests/__init__.py b/tests/__init__.py index 103aa501b..b62f5b743 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -8,7 +8,7 @@ SAMPLES_CRED_LINE_COUNT: int = 402 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 293 +SAMPLES_POST_CRED_COUNT: int = 296 # with option --doc SAMPLES_IN_DOC = 422 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 22696a82f..a28445ab9 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -97,8 +97,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -121,8 +121,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -145,8 +145,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -184,8 +184,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -208,8 +208,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -247,8 +247,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91871, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS MWS Key", "severity": "high", "line_data_list": [ @@ -271,8 +271,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91871, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Key", "severity": "medium", "line_data_list": [ @@ -4831,8 +4831,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99108, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Dynatrace API Token", "severity": "high", "line_data_list": [ @@ -4903,8 +4903,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83427, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -4927,8 +4927,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83427, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Token", "severity": "medium", "line_data_list": [ @@ -5326,8 +5326,80 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99757, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Auth", + "severity": "medium", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "tests/samples/google_oauth_key|RAW", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": "google_oauth_key", + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Google OAuth Access Token", + "severity": "high", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "tests/samples/google_oauth_key|RAW", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Key", + "severity": "medium", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "tests/samples/google_oauth_key|RAW", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": "google_oauth_key", + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -5422,8 +5494,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95517, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Heroku API Key", "severity": "high", "line_data_list": [ @@ -5470,8 +5542,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.71488, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Instagram Access Token", "severity": "high", "line_data_list": [ @@ -5902,8 +5974,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99189, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "MailGun API Key", "severity": "high", "line_data_list": [ @@ -5926,8 +5998,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -5950,8 +6022,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -6013,8 +6085,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -6037,8 +6109,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -6148,8 +6220,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99994, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -6172,8 +6244,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99994, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Password", "severity": "medium", "line_data_list": [ @@ -8146,8 +8218,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99994, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -8290,8 +8362,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.89421, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Token", "severity": "high", "line_data_list": [ @@ -8314,8 +8386,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6364, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Webhook", "severity": "high", "line_data_list": [ @@ -8338,8 +8410,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7944, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Access Token", "severity": "high", "line_data_list": [ @@ -8362,8 +8434,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.75821, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Client ID", "severity": "medium", "line_data_list": [ @@ -8770,8 +8842,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85074, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -8818,8 +8890,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85074, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -8866,8 +8938,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.76194, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -8890,8 +8962,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.76194, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -8938,8 +9010,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6423, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Twilio API Key", "severity": "high", "line_data_list": [ diff --git a/tests/data/doc.json b/tests/data/doc.json index 7e5727f9c..f505ae0ad 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -184,8 +184,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -208,8 +208,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -9625,8 +9625,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99108, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Dynatrace API Token", "severity": "high", "line_data_list": [ @@ -10120,8 +10120,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99757, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -10216,8 +10216,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95517, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Heroku API Key", "severity": "high", "line_data_list": [ @@ -10240,8 +10240,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.71488, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Instagram Access Token", "severity": "high", "line_data_list": [ @@ -10408,8 +10408,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99189, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "MailGun API Key", "severity": "high", "line_data_list": [ @@ -10432,8 +10432,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -10456,8 +10456,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -10519,8 +10519,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -10543,8 +10543,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -11296,8 +11296,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.89421, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Token", "severity": "high", "line_data_list": [ @@ -11320,8 +11320,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6364, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Webhook", "severity": "high", "line_data_list": [ @@ -11344,8 +11344,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7944, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Access Token", "severity": "high", "line_data_list": [ @@ -11368,8 +11368,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.75821, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Client ID", "severity": "medium", "line_data_list": [ @@ -11632,8 +11632,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85074, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -11680,8 +11680,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.76194, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -11728,8 +11728,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6423, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Twilio API Key", "severity": "high", "line_data_list": [ diff --git a/tests/data/output.json b/tests/data/output.json index a537ea66d..565999a64 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -97,8 +97,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -121,8 +121,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -145,8 +145,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -184,8 +184,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -208,8 +208,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 1.0, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -247,8 +247,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91871, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS MWS Key", "severity": "high", "line_data_list": [ @@ -271,8 +271,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91871, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Key", "severity": "medium", "line_data_list": [ @@ -4807,8 +4807,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99108, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Dynatrace API Token", "severity": "high", "line_data_list": [ @@ -4831,8 +4831,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83427, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Facebook Access Token", "severity": "high", "line_data_list": [ @@ -4855,8 +4855,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83427, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Token", "severity": "medium", "line_data_list": [ @@ -5254,8 +5254,80 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99757, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Auth", + "severity": "medium", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": "google_oauth_key", + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Google OAuth Access Token", + "severity": "high", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Key", + "severity": "medium", + "line_data_list": [ + { + "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", + "line_num": 1, + "path": "tests/samples/google_oauth_key", + "info": "", + "value": "ya29.gi_reo_gi_crackle_ln22", + "value_start": 20, + "value_end": 47, + "variable": "google_oauth_key", + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 3.1797273164975133, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Google OAuth Access Token", "severity": "high", "line_data_list": [ @@ -5350,8 +5422,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95517, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Heroku API Key", "severity": "high", "line_data_list": [ @@ -5398,8 +5470,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.71488, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Instagram Access Token", "severity": "high", "line_data_list": [ @@ -5782,8 +5854,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99189, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "MailGun API Key", "severity": "high", "line_data_list": [ @@ -5806,8 +5878,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -5830,8 +5902,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -5893,8 +5965,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Client ID", "severity": "high", "line_data_list": [ @@ -5917,8 +5989,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99998, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "AWS Multi", "severity": "high", "line_data_list": [ @@ -6910,8 +6982,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.89421, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Token", "severity": "high", "line_data_list": [ @@ -6934,8 +7006,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6364, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Slack Webhook", "severity": "high", "line_data_list": [ @@ -6958,8 +7030,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7944, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Access Token", "severity": "high", "line_data_list": [ @@ -6982,8 +7054,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.75821, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Square Client ID", "severity": "medium", "line_data_list": [ @@ -7198,8 +7270,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6423, + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, "rule": "Twilio API Key", "severity": "high", "line_data_list": [ diff --git a/tests/test_app.py b/tests/test_app.py index b5dfdc044..03e58231a 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -129,7 +129,7 @@ def test_it_works_with_multiline_in_patch_p(self) -> None: / value: 'AKIAQWADE5R42RDZ4JEM' / entropy_validation: BASE64_CHARS 3.684184 False] / api_validation: NOT_AVAILABLE - / ml_validation: VALIDATED_KEY + / ml_validation: NOT_AVAILABLE rule: AWS Multi / severity: high / line_data_list: @@ -144,7 +144,7 @@ def test_it_works_with_multiline_in_patch_p(self) -> None: / value: 'V84C7sDU001tFFodKU95USNy97TkqXymnvsFmYhQ' / entropy_validation: BASE64_CHARS 4.784184 True] / api_validation: NOT_AVAILABLE - / ml_validation: VALIDATED_KEY + / ml_validation: NOT_AVAILABLE rule: Token / severity: medium / line_data_list: From 7a838cf6b0fc22e7bd98e8343aa8f98e898660a0 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 1 Nov 2023 12:36:44 +0200 Subject: [PATCH 4/4] Parse docx in --doc mode (#439) * parsing docx in --doc mode as text * Apply suggestions from code review * merge issue fix * udpdated tests for FilePathExtractor * style * documentation updated --- .mypy.ini | 3 + credsweeper/config/config.py | 1 + credsweeper/deep_scanner/deep_scanner.py | 3 + credsweeper/deep_scanner/docx_scanner.py | 43 ++++ .../file_handler/file_path_extractor.py | 3 + credsweeper/secret/config.json | 7 +- docs/source/overall_architecture.rst | 6 + requirements.txt | 2 + setup.py | 1 + tests/__init__.py | 6 +- tests/conftest.py | 3 + tests/data/depth_3.json | 168 ++++++++++++--- tests/data/doc.json | 120 +++++++++++ .../file_handler/test_file_path_extractor.py | 192 +++++++++++++----- tests/samples/password.docx | Bin 4306 -> 0 bytes tests/samples/sample.docx | Bin 0 -> 5624 bytes tests/samples/sample.docx.gz | Bin 0 -> 3358 bytes tests/samples/sample.pdf | Bin 19295 -> 23165 bytes tests/samples/sample_bad_empty.docx | Bin 0 -> 2550 bytes tests/samples/small.pdf | Bin 0 -> 628 bytes tests/test_main.py | 21 +- 21 files changed, 488 insertions(+), 91 deletions(-) create mode 100644 credsweeper/deep_scanner/docx_scanner.py delete mode 100644 tests/samples/password.docx create mode 100644 tests/samples/sample.docx create mode 100644 tests/samples/sample.docx.gz create mode 100644 tests/samples/sample_bad_empty.docx create mode 100644 tests/samples/small.pdf diff --git a/.mypy.ini b/.mypy.ini index 38c842392..cf4fd1082 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -45,3 +45,6 @@ ignore_missing_imports = True [mypy-password_strength.*] ignore_missing_imports = True + +[mypy-docx.*] +ignore_missing_imports = True diff --git a/credsweeper/config/config.py b/credsweeper/config/config.py index 275b49107..f53d9b794 100644 --- a/credsweeper/config/config.py +++ b/credsweeper/config/config.py @@ -19,6 +19,7 @@ def __init__(self, config: Dict[str, Any]) -> None: self.exclude_patterns: List[re.Pattern] = [re.compile(pattern) for pattern in config["exclude"]["pattern"]] self.exclude_paths: List[str] = config["exclude"]["path"] self.exclude_containers: List[str] = config["exclude"]["containers"] + self.exclude_documents: List[str] = config["exclude"]["documents"] self.exclude_extensions: List[str] = config["exclude"]["extension"] self.exclude_lines: Set[str] = set(config["exclude"].get("lines", [])) self.exclude_values: Set[str] = set(config["exclude"].get("values", [])) diff --git a/credsweeper/deep_scanner/deep_scanner.py b/credsweeper/deep_scanner/deep_scanner.py index e5db0bd76..16838cccb 100644 --- a/credsweeper/deep_scanner/deep_scanner.py +++ b/credsweeper/deep_scanner/deep_scanner.py @@ -16,6 +16,7 @@ from credsweeper.utils import Util from .byte_scanner import ByteScanner from .bzip2_scanner import Bzip2Scanner +from .docx_scanner import DocxScanner from .encoder_scanner import EncoderScanner from .gzip_scanner import GzipScanner from .html_scanner import HtmlScanner @@ -34,6 +35,7 @@ class DeepScanner( ByteScanner, # Bzip2Scanner, # + DocxScanner, # EncoderScanner, # GzipScanner, # HtmlScanner, # @@ -71,6 +73,7 @@ def get_deep_scanners(data: bytes) -> List[Any]: deep_scanners.append(ZipScanner) # probably, there might be a docx, xlxs and so on. # It might be scanned with text representation in third-party libraries. + deep_scanners.append(DocxScanner) elif Util.is_bzip2(data): deep_scanners.append(Bzip2Scanner) elif Util.is_tar(data): diff --git a/credsweeper/deep_scanner/docx_scanner.py b/credsweeper/deep_scanner/docx_scanner.py new file mode 100644 index 000000000..c40df37b7 --- /dev/null +++ b/credsweeper/deep_scanner/docx_scanner.py @@ -0,0 +1,43 @@ +import io +import logging +from abc import ABC +from typing import List + +import docx + +from credsweeper.credentials import Candidate +from credsweeper.deep_scanner.abstract_scanner import AbstractScanner +from credsweeper.file_handler.data_content_provider import DataContentProvider +from credsweeper.file_handler.string_content_provider import StringContentProvider + +logger = logging.getLogger(__name__) + + +class DocxScanner(AbstractScanner, ABC): + """Implements docx scanning""" + + def data_scan( + self, # + data_provider: DataContentProvider, # + depth: int, # + recursive_limit_size: int) -> List[Candidate]: + """Tries to scan DOCX text with splitting by lines""" + candidates: List[Candidate] = [] + + try: + docx_lines: List[str] = [] + + doc = docx.Document(io.BytesIO(data_provider.data)) + for paragraph in doc.paragraphs: + for line in paragraph.text.splitlines(): + if line: + docx_lines.append(line) + + string_data_provider = StringContentProvider(lines=docx_lines, + file_path=data_provider.file_path, + file_type=data_provider.file_type, + info=f"{data_provider.info}|DOCX") + candidates = self.scanner.scan(string_data_provider) + except Exception as docx_exc: + logger.debug(f"{data_provider.file_path}:{docx_exc}") + return candidates diff --git a/credsweeper/file_handler/file_path_extractor.py b/credsweeper/file_handler/file_path_extractor.py index 84fa1213c..ba8dc6f5b 100644 --- a/credsweeper/file_handler/file_path_extractor.py +++ b/credsweeper/file_handler/file_path_extractor.py @@ -143,6 +143,9 @@ def check_exclude_file(config: Config, path: str) -> bool: return True if not config.depth and file_extension in config.exclude_containers: return True + # --depth or --doc enables scan for all documents extensions + if not (config.depth or config.doc) and file_extension in config.exclude_documents: + return True return False @staticmethod diff --git a/credsweeper/secret/config.json b/credsweeper/secret/config.json index 6914ac849..fa50bb5f5 100644 --- a/credsweeper/secret/config.json +++ b/credsweeper/secret/config.json @@ -4,13 +4,15 @@ "containers": [ ".apk", ".bz2", - ".docx", ".gz", - ".pdf", ".tar", ".xlsx", ".zip" ], + "documents": [ + ".docx", + ".pdf" + ], "extension": [ ".7z", ".aac", @@ -71,6 +73,7 @@ "/__pycache__/", "/node_modules/", "/target/", + "/.venv/", "/venv/" ], "lines": [], diff --git a/docs/source/overall_architecture.rst b/docs/source/overall_architecture.rst index 3344454a1..47412c496 100644 --- a/docs/source/overall_architecture.rst +++ b/docs/source/overall_architecture.rst @@ -15,6 +15,7 @@ When paths to scan are entered, get the files in that paths and the files are ex - exclude - pattern: Regex patterns to exclude scan. - containers: Extensions in lower case of container files which might be scan with --depth option + - documents: Extensions in lower case of container files which might be scan with --doc and/or --depth option - extension: Extensions in lower case to exclude scan. - path: Paths to exclude scan. - source_ext: List of extensions for scanning categorized as source files. @@ -36,6 +37,11 @@ When paths to scan are entered, get the files in that paths and the files are ex ".zip", ... ], + "documents": [ + ".docx", + ".pdf", + ... + ], "extension": [ ".7z", ".jpg", diff --git a/requirements.txt b/requirements.txt index 0e903ccea..bee75005f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ openpyxl==3.1.2 pandas==2.0.3 # ^ the version supports by python 3.8 PyYAML==6.0.1 +python-docx==1.0.1 requests==2.31.0 schwifty==2023.9.0 typing_extensions==4.8.0 @@ -48,3 +49,4 @@ types-python-dateutil types-regex types-humanfriendly yapf + diff --git a/setup.py b/setup.py index 0353ce5e6..a5f591e8c 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ "password-strength", # "pdfminer.six", # "PyYAML", # + "python-docx", # "requests", # "scipy", # "schwifty", # diff --git a/tests/__init__.py b/tests/__init__.py index b62f5b743..7bdebc357 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT: int = 120 +SAMPLES_FILES_COUNT: int = 123 # credentials count after scan SAMPLES_CRED_COUNT: int = 383 @@ -11,10 +11,10 @@ SAMPLES_POST_CRED_COUNT: int = 296 # with option --doc -SAMPLES_IN_DOC = 422 +SAMPLES_IN_DOC = 427 # archived credentials that are not found without --depth -SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 16 +SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 21 SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 16 SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 3 diff --git a/tests/conftest.py b/tests/conftest.py index 49f685146..4f8da811b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,6 +38,9 @@ def config() -> Config: config_dict["validation"]["api_validation"] = False config_dict["use_filters"] = True config_dict["find_by_ext"] = False + config_dict["exclude"]["containers"] = [".gz", ".zip"] + config_dict["exclude"]["documents"] = [".docx", ".pdf"] + config_dict["exclude"]["extension"] = [".jpg", ".bmp"] config_dict["depth"] = 0 config_dict["doc"] = False config_dict["find_by_ext_list"] = [".txt", ".inf"] diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index a28445ab9..880f7a216 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -6338,30 +6338,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97709, - "rule": "Password", - "severity": "medium", - "line_data_list": [ - { - "line": "password = Xdj@jcN834b.", - "line_num": 2, - "path": "tests/samples/password.docx", - "info": "tests/samples/password.docx|ZIP|word/document.xml|HTML", - "value": "Xdj@jcN834b.", - "value_start": 11, - "value_end": 23, - "variable": "password", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.8208020839342964, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -8216,6 +8192,102 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.94412, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t : Password = WeR15tr0n6", + "line_num": 1, + "path": "tests/samples/sample.docx", + "info": "tests/samples/sample.docx|ZIP|word/document.xml|XML", + "value": "WeR15tr0n6", + "value_start": 77, + "value_end": 87, + "variable": "Password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.321928094887362, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "line_num": 2, + "path": "tests/samples/sample.docx", + "info": "tests/samples/sample.docx|DOCX", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.632263329852917, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.94412, + "rule": "Password", + "severity": "medium", + "line_data_list": [ + { + "line": "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t : Password = WeR15tr0n6", + "line_num": 1, + "path": "tests/samples/sample.docx.gz", + "info": "tests/samples/sample.docx.gz|GZIP|tests/samples/sample.docx|ZIP|word/document.xml|XML", + "value": "WeR15tr0n6", + "value_start": 77, + "value_end": 87, + "variable": "Password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.321928094887362, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "line_num": 2, + "path": "tests/samples/sample.docx.gz", + "info": "tests/samples/sample.docx.gz|GZIP|tests/samples/sample.docx|DOCX", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.632263329852917, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -8264,6 +8336,30 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd", + "line_num": 1, + "path": "tests/samples/sample.pdf", + "info": "tests/samples/sample.pdf|PDF:1|RAW", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.732263329852917, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", @@ -8408,6 +8504,30 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Azure Secret Value", + "severity": "high", + "line_data_list": [ + { + "line": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P", + "line_num": 1, + "path": "tests/samples/small.pdf", + "info": "tests/samples/small.pdf|PDF:1|RAW", + "value": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.620007704961091, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/data/doc.json b/tests/data/doc.json index f505ae0ad..e88d39e40 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -11222,6 +11222,102 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "PASSWD_PAIR", + "severity": "medium", + "line_data_list": [ + { + "line": "Password = WeR15tr0n6", + "line_num": 1, + "path": "tests/samples/sample.docx", + "info": "tests/samples/sample.docx|DOCX", + "value": "WeR15tr0n6", + "value_start": 11, + "value_end": 21, + "variable": "Password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.321928094887362, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "line_num": 2, + "path": "tests/samples/sample.docx", + "info": "tests/samples/sample.docx|DOCX", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.632263329852917, + "valid": true + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "PASSWD_PAIR", + "severity": "medium", + "line_data_list": [ + { + "line": "password = Xdj@jcN834b", + "line_num": 1, + "path": "tests/samples/sample.pdf", + "info": "tests/samples/sample.pdf|PDF", + "value": "Xdj@jcN834b", + "value_start": 11, + "value_end": 22, + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.963119653306635, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Github Token", + "severity": "high", + "line_data_list": [ + { + "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd", + "line_num": 3, + "path": "tests/samples/sample.pdf", + "info": "tests/samples/sample.pdf|PDF", + "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.732263329852917, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -11342,6 +11438,30 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "Azure Secret Value", + "severity": "high", + "line_data_list": [ + { + "line": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P", + "line_num": 1, + "path": "tests/samples/small.pdf", + "info": "tests/samples/small.pdf|PDF", + "value": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P", + "value_start": 0, + "value_end": 40, + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 4.620007704961091, + "valid": true + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/file_handler/test_file_path_extractor.py b/tests/file_handler/test_file_path_extractor.py index 16fb5236c..49bcfaf13 100644 --- a/tests/file_handler/test_file_path_extractor.py +++ b/tests/file_handler/test_file_path_extractor.py @@ -1,9 +1,11 @@ import os.path +import re import tempfile +import unittest +from typing import List from unittest import mock import git -import pytest from humanfriendly import parse_size from credsweeper.config import Config @@ -11,20 +13,62 @@ from tests import AZ_STRING -class TestFilePathExtractor: +class TestFilePathExtractor(unittest.TestCase): + + def setUp(self): + config_dict = { + "size_limit": None, + "find_by_ext": False, + "find_by_ext_list": [], + "doc": False, + "depth": 0, + "exclude": { + "path": [], + "pattern": [], + "containers": [], + "documents": [], + "extension": [] + }, + "source_ext": [], + "source_quote_ext": [], + "check_for_literals": [], + "validation": { + "api_validation": False + }, + "use_filters": False, + "line_data_output": [], + "candidate_output": [], + "min_keyword_value_length": 0, + "min_pattern_value_length": 0, + } + self.config = Config(config_dict) + + # excluded always not_allowed_path_pattern + self.paths_not = ["dummy.css", "tmp/dummy.css", "c:\\temp\\dummy.css"] + # pattern + self.paths_reg = ["tmp/Magic/dummy.Number", "/tmp/log/MagicNumber.txt"] + # "/.git/" + self.paths_git = ["C:\\.git\\dummy", "./.git/dummy.sample", "~/.git\\dummy.txt"] + # not excluded + self.paths_src = ["dummy.py", "/tmp/dummy.py", "tmp/dummy.py", "C:\\dummy.py", "temp\\dummy.py"] + # not excluded when --depth are set + self.paths_pak = ["dummy.gz", "/tmp/dummy.gz", "tmp/dummy.gz", "C:\\dummy.gz", "temp\\dummy.gz"] + # not excluded when --doc or --depth are set + self.paths_doc = ["dummy.pdf", "/tmp/dummy.pdf", "tmp/dummy.pdf", "C:\\dummy.pdf", "temp\\dummy.pdf"] + # extension to be excluded always + self.paths_ext = ["dummy.so", "dummy.so", "/tmp/dummy.so", "tmp/dummy.so", "C:\\dummy.so", "temp\\dummy.so"] + + def tearDown(self): + del self.config def test_apply_gitignore_p(self) -> None: """Evaluate that code files would be included after filtering with .gitignore""" - files = ["file.py", "src/file.py", "src/dir/file.py"] - filtered_files = FilePathExtractor.apply_gitignore(files) - - assert set(filtered_files) == set(files) + self.assertSetEqual(set(files), set(filtered_files)) def test_apply_gitignore_n(self) -> None: """Evaluate that .gitignore correctly filters out files from project""" - with tempfile.TemporaryDirectory() as tmp_dir: git.Repo.init(tmp_dir) with open(os.path.join(tmp_dir, ".gitignore"), "w") as f: @@ -40,53 +84,97 @@ def test_apply_gitignore_n(self) -> None: ] filtered_files = FilePathExtractor.apply_gitignore(files) - assert len(filtered_files) == 1 - assert filtered_files[0] == os.path.join(tmp_dir, "src", "dir", "file.cpp") - - @pytest.mark.parametrize("file_path", [ - "/tmp/test/dummy.p12", - "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\test\\dummy.p12", - "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\TarGet\\dummy.p12", - ]) - def test_check_exclude_file_p(self, config: Config, file_path: pytest.fixture) -> None: - config.find_by_ext = True - assert not FilePathExtractor.check_exclude_file(config, file_path), f"{file_path}" - - @pytest.mark.parametrize("file_path", [ - "dummy.JPG", - "/tmp/target/dummy.p12", - "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\target\\dummy.p12", - ]) - def test_check_exclude_file_n(self, config: Config, file_path: pytest.fixture) -> None: - config.find_by_ext = True - assert FilePathExtractor.check_exclude_file(config, file_path) - - @pytest.mark.parametrize("file_type", [".inf", ".txt"]) - def test_find_by_ext_file_p(self, config: Config, file_type: pytest.fixture) -> None: - config.find_by_ext = True - assert FilePathExtractor.is_find_by_ext_file(config, file_type) - - @pytest.mark.parametrize("file_type", [".bmp", ".doc"]) - def test_find_by_ext_file_n(self, config: Config, file_type: pytest.fixture) -> None: - assert not FilePathExtractor.is_find_by_ext_file(config, file_type) - config.find_by_ext = False - assert not FilePathExtractor.is_find_by_ext_file(config, file_type) + self.assertEqual(1, len(filtered_files)) + expected_path = os.path.join(tmp_dir, "src", "dir", "file.cpp") + self.assertEqual(expected_path, filtered_files[0]) + + def assert_true_check_exclude_file(self, paths: List[str]): + for i in paths: + self.assertTrue(FilePathExtractor.check_exclude_file(self.config, i), i) + + def assert_false_check_exclude_file(self, paths: List[str]): + for i in paths: + self.assertFalse(FilePathExtractor.check_exclude_file(self.config, i), i) + + def test_check_exclude_file_p(self) -> None: + # matched only not_allowed_path_pattern + self.config.exclude_containers = [".gz"] + self.config.exclude_documents = [".pdf"] + self.config.exclude_extensions = [".so"] + self.config.exclude_paths = ["/.git/"] + self.config.exclude_patterns = [re.compile(r".*magic.*number.*")] + self.config.depth = 1 + self.config.doc = False + self.assert_true_check_exclude_file(self.paths_not) + self.assert_true_check_exclude_file(self.paths_reg) + self.assert_true_check_exclude_file(self.paths_git) + self.assert_false_check_exclude_file(self.paths_src) + self.assert_false_check_exclude_file(self.paths_pak) + self.assert_false_check_exclude_file(self.paths_doc) + self.assert_true_check_exclude_file(self.paths_ext) + + # pdf should be not filtered + self.config.depth = 0 + self.config.doc = True + self.assert_true_check_exclude_file(self.paths_not) + self.assert_true_check_exclude_file(self.paths_reg) + self.assert_true_check_exclude_file(self.paths_git) + self.assert_false_check_exclude_file(self.paths_src) + self.assert_true_check_exclude_file(self.paths_pak) + self.assert_false_check_exclude_file(self.paths_doc) + self.assert_true_check_exclude_file(self.paths_ext) + + def test_check_exclude_file_n(self) -> None: + # none of extension are in config, only not_allowed_path_pattern matches + self.assert_true_check_exclude_file(self.paths_not) + self.assert_false_check_exclude_file(self.paths_reg) + self.assert_false_check_exclude_file(self.paths_git) + self.assert_false_check_exclude_file(self.paths_src) + self.assert_false_check_exclude_file(self.paths_pak) + self.assert_false_check_exclude_file(self.paths_doc) + self.assert_false_check_exclude_file(self.paths_ext) + + # matched only exclude_extensions + self.config.exclude_containers = [".gz"] + self.config.exclude_documents = [".pdf"] + self.config.exclude_extensions = [".so"] + self.assert_true_check_exclude_file(self.paths_not) + self.assert_false_check_exclude_file(self.paths_reg) + self.assert_false_check_exclude_file(self.paths_git) + self.assert_false_check_exclude_file(self.paths_src) + self.assert_true_check_exclude_file(self.paths_pak) + self.assert_true_check_exclude_file(self.paths_doc) + self.assert_true_check_exclude_file(self.paths_ext) + + def test_find_by_ext_file_p(self) -> None: + self.config.find_by_ext = True + self.config.find_by_ext_list = [".p12", ".jpg"] + self.assertTrue(FilePathExtractor.is_find_by_ext_file(self.config, ".p12")) + self.assertTrue(FilePathExtractor.is_find_by_ext_file(self.config, ".jpg")) + self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".bmp")) + + def test_find_by_ext_file_n(self) -> None: + self.config.find_by_ext = False + self.config.find_by_ext_list = [".p12", ".bmp"] + self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".p12")) + self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".bmp")) + self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".jpg")) @mock.patch("os.path.getsize") - def test_check_file_size_p(self, mock_getsize, config: Config) -> None: + def test_check_file_size_p(self, mock_getsize) -> None: mock_getsize.return_value = parse_size("11MiB") - config.size_limit = parse_size("10MiB") - assert FilePathExtractor.check_file_size(config, "") + self.config.size_limit = parse_size("10MiB") + self.assertTrue(FilePathExtractor.check_file_size(self.config, "")) @mock.patch("os.path.getsize") - def test_check_file_size_n(self, mock_getsize, config: Config) -> None: + def test_check_file_size_n(self, mock_getsize) -> None: mock_getsize.return_value = parse_size("11MiB") - config.size_limit = None - assert not FilePathExtractor.check_file_size(config, "") - config.size_limit = parse_size("11MiB") - assert not FilePathExtractor.check_file_size(config, "") + self.config.size_limit = None + self.assertFalse(FilePathExtractor.check_file_size(self.config, "")) + self.config.size_limit = parse_size("11MiB") + self.assertFalse(FilePathExtractor.check_file_size(self.config, "")) - def test_skip_symlink_n(self, config: Config) -> None: + def test_skip_symlink_n(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: sub_dir = os.path.join(tmp_dir, "sub_dir") os.mkdir(sub_dir) @@ -103,9 +191,9 @@ def test_skip_symlink_n(self, config: Config) -> None: for root, dirs, files in os.walk(tmp_dir): files_walked.update(files) dirs_walked.update(dirs) - assert dirs_walked == {"sub_dir", "s_dir_link"} - assert files_walked == {"target", "s_link"} + self.assertEqual({"sub_dir", "s_dir_link"}, dirs_walked) + self.assertEqual({"target", "s_link"}, files_walked) - paths = FilePathExtractor.get_file_paths(config, tmp_dir) - assert len(paths) == 1 - assert paths[0] == target_path + paths = FilePathExtractor.get_file_paths(self.config, tmp_dir) + self.assertEqual(1, len(paths)) + self.assertEqual(target_path, paths[0]) diff --git a/tests/samples/password.docx b/tests/samples/password.docx deleted file mode 100644 index 6d6db3a52ef375600d13248073e819f4949baed2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4306 zcmaJ^2{_dK7RHcWc4f#~COg@`u}pTtkbSH%Gzf6cu1U3G%tqUzLCBK9yKc0qAH9}+ax}OJb zt#b^yV~_ImAI8N&+Di}+Z}GuKahHGzMKLbBVX;OtrF$bfnO1@&(~*zt8MPCmJFl!3 zOG$WRvUr>fjJmmrp#6JUM)FgRb12D^S$O0yqQ5x8L?XI}R#4O9rzKpi8bJVSb) zce@SR%*ORSY99lB%Z2KaQ!EV-^Nr!}t4Kmb1pU7mVZfho^mKswczPkl96WvCq5Fn|wE zIV^AaR}p1bdO5@h7#OuVUm}yNa9S@$8g5%d`mk$5Bz*={lrY;RK5~ms!O};@d^x_@ z5`Yyq=7z|}rGQ!m$^m>|I>|+l=+rh&DXY|-`MhbE1O=+ex&~Nl^dao#2dDlm{}Cw0 z$%F=)#0lhiL*(}o;HlE+qqwCI6k>VBFsTc{-i5PQu;G{GPtaBf?O(6N#oM#JLtlT- zv@Sv3dv^f)p^7hzA6$VElnwnzhVhd3Jg-_J4_7bG+I?I}x_;*tuKwQ2U&-B*$C^5? z{Cs4LJO@l=s#y9y2o2RO5oJ|;S)60KzoNk&o|x^$;`>?YV2dlxlfaLr%)o1VVj`l6 zKk`F=k{^37F9JQ;hJzk$+{`yVm`U~7h=`23&~^&Lb{r*B{2FC%dAV9xa))aihhnl; zwgMVI<+RJQiCR3|6IHnBXH+VPO_oYf@fIlzT0i__(ro;6O4L_Rg8<092eW%ekdOxhLfM#lo!j9Ek4AU)%;=zR8!kLL)^Nux%64 zBzh^}SBwYssmosDq1agWBuzl84EQ5;y(V0uPgf~(!&^SPfR@=pW>L6Pj?FyUC!>gl zO+AM}>TctFg=bve7{7LV1663SW9QDy^?E;{9u3&XrTcj>NDj>DEDoaY4W*q4AJ_WO z4D7l^eD1V5fS2E*?%A6fWp5Po%Nu&%fCLH>Ji61JCgjP z`7tTudp1VGH`=p}1;)5@3qRl7H#o%%xG-Tta!~O)JWcjaOO?B|YZR~)tH{otG!x;$ zd0Q*B8N#@ZN$DY05>?(W3UQ}R+kF3VqP29puj8OnS>@6Fg->ib*+o&>Sn`joC_*}R z#l&BB;xEhe$8-=B<3~D9CZaG^FL8pKo}i?xxQ>kr)`g=A6RIO6cQlf!({^Gd>a-&u zfCSmY?WpfUQ)wkN!4ghNIf2gNp#dR=k_z56B)%HSyEoOWE`cH|Bsb?i_qyrvu3$tl zq8SED7kv)Iz)GwpzInXN$8h zAvkc_NO7xRTS^RO3syEO3ZHlvyT%%~f2N@TyHZ;#?w^-^WO`gMPycS(sFqBahGcGZ z4+0u{Yht{_kRt4HUCyhE2C2J1f(SO*G~k)Q$}@Zc&VwAj!oLkSLBV1c&u2MZT)wN7 zPHXPR4&0AGzZaPjj8z6QU2r;xS-ebzYL-%p(J@`Sx*gut?GpE&+a4X)vD@0;9_aosSwwR!1^rpy_B zF>B}cmoY`PpJsu!S&i}eqo>6(I-ulr`>*y(I5`6L4k^O1poc?zKA$)^ zR=;2GAxVwVX|PgT{dg4Q)kU(Rg62-`h%O(j?u|3})ceHGH9{sY1kv3ay@37feP*1$ zn`AX<2y`+1;??lSr52I8a3go)o1;d%mm?p4T$d6K*BwolR~i#EUhe>vN}^IRyv7z? zOl3NTdmB{>ERRH>{QLA5UxL$vRJNt$ls?^T@>9P}rY@NyS|BOTHX^cN&o<&(k}UKl zj~TmHx@-Ty6dv%N2Yl!#BR0nB~dR#sSc=82$ z#12)ixX84%4DRsO zG_;GpI*n5{yquZNey@Z}i8XC!e-C+UCZjJMB4Sf|nss=sx*r@P^lldCd-P~%TOY>7 zuw9lipwF=}cyGOALw=}4rLuc~De6srexF=W%L?}wy5X!I^e56acwl$W({p_NN1cgm zw6e4tb~BGXEr&qgG>ux%9|<2`E?;~KiT?cQ>ZsvC+O)upAg=Kdk!K#I`BbYb3)^S> zObv6_nyeI3U1-Mpdsrt}!*maFiapJ~io_@hQM0QyIV>Kgwm2-J3_z7(4o$dOdE?6B zE8ew*rtwFz<#b7TO`5r8rST$`fNi?%3>&iXlo#J=3CbALcah~GzOXTre=2Ooe;2ls zr-!ex{WZ5AW~v+t`P)oU^`e$*$Ri)2Sr}&{gYp)LVa~(m)$fwt@qfeB>kIMp<^mb;;^HCer}t)eTA7 z%$x|U+LXqr?3g&Ps#ny*g?X5 zeGwkcKXe0rPtl`YoCZI7+h%NfDF>AK{Oca+78Q`&IV!N)#oScu)-$k0Ys};WJJ*sD z4Vn(0am7&=uF6SOT@(U=%)eU6$*%i_goQ-$Qs1W2q#JpeCb>FsXUfW%(a=QHSG8w* z@+CK;t3x$skAtYZ5p)!}3EI2jdlYN0+N6E84*H)2%tz~(ep>}P3#5iq;4_a?fq$sJzX-?aNls>P+5-CVaW~~AzuI`sMQ9()I9My z`~SwC34a2niZ4XGRk1Y=^nxR;2}=7S^@p`r#~sG@z*6h0R4C4WzS+&#Na5?pbPfp<3@)vJhX|+Pf`JPAzr%25C1868mbYe&dFRQcJ_? zcp6}H8|iUA$&+sTyd~Uk#!Y=#tFAbt(Jgb-9EE!oc24^ulr3(*uILsk*mkyMD)ZPO zGV1J`-L0B^+g4Q|l()z*=WnDU=`0EskD~o1?{WDl)7m99#@qebPtH|#Lj2{v1D<2_ zMc5;#858&(ZQ~-)73aynnG2>m(%)j`PH=1i3ha5ISl=IUK~PKm1GZl{r@Y{F({n{J zz*WImZAFDUr?XfOJ{P^;OPF<4GhHn_Y^9c!X~hPC0Tjg2VM&+QCKJV9A#g3i%6JmU zDX8BvbQ|LdvH3Cjh)I}#aUtmQ&#p)25bKU|AZG6p$2>Vr-VvMch{2A zAW|ZN->&a_j_CUxXRqhEo}w%J)@(6i${e;NJxm4BC2hGbwP;H-}jtgw$1|l zn0L`olREBoF#fz(`u!v?_iBA5XhmblzzbDxzv!MF{1=+4c}A!?9blc#-6kgClhswW zoLPpmx1RZNj0AwfI{#3l#b$HHyz6mgL-6ey_&s<@uF~hV`^8;a9YB?sFMiK^Rn1nD z*#e!0oj)6OT_>fh@)*WlsI%S?JlpcKA2PHnLRMDszI02EIUkQmQ!>6rPs(Z34ylfn z=*l(8_gC_V?VGfp#J$uIo`(&5-DTPFvRaGjZPJU@;J*O`Xt<^2Sf)_Un&?46h7;dE zE9LhvtRE%kST9cRH+5!7NA)KwhsCB(dSmNo5D>079VxA$uRnl;g{AYa1|moQ1B$TJ zaY8sa3s@qYVEm8lY`t|&wVMU0ypIj|yirr6VC(9bmkVkXl!ElAhwhg~i)^Nv=TXyk zwiy(toj0ooGbQc&iiVH822>zI@)mN@1kVhTqCJpiW5nM<9_>O&Wm(UnGuY@vv=R4< zxk^NCZmRoNx9bzNE(%XJbwy)PHH9*rdN9l8I@YI8KE<^s&%>_WF$! z9R;|Echp|VafAh}!410!VFyS2Ae+qB^%l3zRV7RWlXhpC^`g(aU&WIYe2fDKyRApQ zC6AtEPW=#Xg1Y0;btXI1LH3kcYYeLL!Sco%h5~Uougyf*$K703>q1bi_4JmEjQ_NK z9(L^n&{!^Ur$v@o=nNm@DXKk}1siNEteHQ0it-mvxjMTb>@dDc?JEPKeHFMcn@>17 z4Izjbstl8mf*eQBRbx9NI&Oz~d$A#RB7>P_w1%b66kmfU$sfKE;N`};MhfD*hB(WW zP;2U(H)nd1v4YQ6?>2721_(4^XuA1+&hQy$wk{ zSW(6N-iawq9 zMC;2)Q@KDFdc?vm=Xi6_K&EVEH$#3Ei zddK>7)R@8LeY&E|8b-4BTwepKh#Cs@$-?6teXI^Tr?iNe786UWZb&>jU6)f%uo@1( zSsX88dXx@(+i%s`Q5uE#f^<*TI-WopA65!9#~C~;#a7w@K7EMiOfj_Zr86|!C!{j z(i4?+?qx4kAKH2r)>o+Rc-==~d;qET=u<&sOZt!4VnF-A9pMBO_)o4xCj?hJn7s?e z75~Yby4`4yfg4V#Eq29flK`99xYFSFuQQa83Qa3@V(qv`b|jWURLEwB+)p;l!Ys+-+!LikH46r#F;uLy$XS zlO3?eW1oAY3wXjFxjJ(w%5pjYomTLhrXec*_Hd8cPA?&%x#~_nNYuM*KCSrL*90xSo>UGu3sQDG{(%FW-kRh>e@EJ4#APN#%4@>~l6{Qv+J!_;ftuGD6g|)UR&h z%%V)guW|{|bW=EUSo@2OWbCg8SGbz-3;}uUmg#aj_5F>a->{NWPlV&KWrV-l3gL<= ze0Beco4^Q6&Y#}KmFa`&P|Ogd?v1+t`!UkYNX4$#h16DRPV~4d6E_<3ET#@@3uQ;| z5zQ@)1w%}ExkR}XA{YD- zBM^x1Ih?+?qDk?P%LY{tzr>j{H7&K}=`aSiG5WJiHNZi;6Bdm zqfLp>8u;CEnY1SZ?n49;MYbXG^ZvvfMUKN0;C3LiRD?~@U%vzJqJQWnSrCP^Y2ceo z*7l<`B}ahtRgH3yZk!IdX3DC-a#A|3*kMeq9xd5ybA|*{-1Iub<)v%kp=1@R6C92~Y2mgmtg0?H9v zxSK}F*B!R&cx@-#9BN30cZ?eG?y58qXd5pi7z-eU)mD2bP|oG?%Jd|}H86OM{$gsB zqi;rQeWB-9VobDO4zOX5L1THB;7=Ks>~B0fyLi~boH0?}cGm(iB?vxydyjlYij-wU zA6~hks}C!jsH~QKFG0JwFp;!PwN`?Xe99V~lkK8=>c8kdv$E6pAxXesGtR3&LnQY8 zB3a+pw*<(pkf&{}zMc)+reD&;l$4%mU{m_yPQ9!9b^sF{d(a~9^<3cKk?0T$fgkmd{B=&K5C=CrVnvUMx zqxnQ#+A*Wjhj(~%Ql7$EuOAGohQ4OElnz-0k7jh)ss}i7kvzo8*>aKrwF$z! zA57NVu{)_=pUWL8C<_doINi$W41k_;5x38ZV2$db!yo&*XMUl^f_17oY-R=j>BnG zZtD}98p$Y&Ez=5DQ}Pvfi7D0TNW`h6G0vXu)u1|k`D3kn=KxGNB|P8E zqrk<&(jfU$I8po^P7e_FE)a|Rwm&#@qmH~*3%Wk+1G|sMtjP$KZ}YIX5*^BQ<5u_O zl|f6`N~}9}9fwsE!0b z*(4&KWb1bIsMz>Cr3Z}}d%9m7eLLK)m}(QB@zYF=Dp8@g19(`R0kWz%ieC~VSs+!I zVCYuJK=+I`=@aFpd*dog0pleg?lff_qmXqRd(H0hh8#*$t$E$GRL73{0jZCUKya2d ze4M4H&UnThe6bHFaII`*wOZtu{l=7L!}rtP3c->#EU19H_}h<>UGkL^DkHmv7`arcGi6t9l8XUc6q)I zvnLWH?qY0AhaaHKjwUE5w4MK*B>q!^onbC6*7jCEk`_#--o8bUWagAQ=*TECSSDOT zJ~2a4isKARC65w2fh)vi=3ukCmDDKXa%cV0Uag65P^N^%1MM==uyP@Q%B|F>;IYSB zV#nHf=Kd2+a^%L&Dq@?XvrZ4`l;he0?L%fXizGvH+Kt?9k-X>N2oF7s!wsoEG3V?u z?OKjD%(WYl1Yv){&oe}Hp1Qm%3hOng>3=WN`oDv8MkT_)VMk6jQ0PM?iUPK zC|PxBCCrkvuU#g$LS{$H`4nzHuBTwbyDh4HHb-XGw>y$~2i|jjFK^!TYMz;T{9hYh zyPuM~W;AIJ+lI~$sYVxrvKpF3_q*x|!GD=ew9IUK=LgAdOuHNWAbKARX(WlzcFaOQ z|87U>zp?59huOgde!Q<^3czfr8%%%xGbX(A8X z7TYe-SC&L;v7}&F#Cqc~CFtgEx4-lKl3gEFf3xRe4G^M8`>S-(a5tE9Mtr4EqH-6z-I#tt$gVXSj*G`Bl-$_$X|~o zc_M+0`7+-#YF7z*kv+simXPmssmwCO5oU7ij~1CV#cZ6gpD;Ql`O+6-4=o*BduRRW zvXitd8CLENS4(3ypf$GE!dQncav9v8IIv=TzULP#@j&|fmo60OOJKIwd5wXO*!;Sa znt=Ndve`kVSvxz#^${60vT$iFqC(hPzFhY)W1Ce4Ax$v;JKda78Tk_1{3Ii9_Jx8G zDS|JHbz1rzX*>zot}~py^5zgnjW9-4el33Ga5(mq;AW^Hf|M8kT5p1M(~|I1fDnSW z;6#{XorC|4XMTR|RjsMO&#>5SKjN|)&ZeCR*&J?ww!8fh0aJaYvVJY=QDkB~*j4HK zj6X>f2Si+sxZLtiwp`IsuZgu~jd^G^62%Qsb?SE6 z_tD)QPC5_>hF!`&q7@z=C<4R|j=m#{E$acD?0nxsoj0kjA~f)skM?^$a$=&DP$gxQ z4NWk{3IBvLOsi_Fh|Y%S9^xLv!vW@OiosZ8d^aW~Dj%PPb{NOi!u-MNQ-CNAmpfw} z6R{&;S!jLknOD0Sgq12efkrH5G0Cg{<`vF#J4ov5CJ$>D`Ky!9*)!gd>ly$&%x>n4 zL&X?Z!ioUt=EX+a}^Ehl}3G+5JXJUhr8(A zmn3u7LsAy~Exxp4-Z{PljbyBcX$~`5K-G^ufw%Gm{ol4~!QDRkd{u4!^hS7W$%p}Z zO7Je2cCtUx_fjYDmvp#*7uT@G-u^#-YOcIU~C`9Ky`V zew7Oov)>gj&MGiT`KPd=xB8z+((mdQM*{FH7qs(z~fGv4@pfs6etW^(XTve5

!nRSAJ;yA4INB*Z=?k literal 0 HcmV?d00001 diff --git a/tests/samples/sample.docx.gz b/tests/samples/sample.docx.gz new file mode 100644 index 0000000000000000000000000000000000000000..7c4c560125a8fa2fd51fe9d351742122de65c3d9 GIT binary patch literal 3358 zcmV+(4dL=1iwFq+xGQA<19M?*aBO8RWN%}50PP!1ZzM;xlg%#4hL3~<6p27uqrHWh zo}LeTJhCU+*m1I~*K4h3;{bxCrn_dQ*Zq~M9*?~zQ2qcXKq3+nNI7uH1tAXHkvJeO zfeQjf3Q{BvC>JgWiB})pUHvik*dA|4#7J#-SG{`g)qAhrM_2XE%}bA6)!6Ut_U*mj z{_LF(wy$U!yPnaW*S@_T_!0J_d;Oyj)9=&{J@;<^3GGtndoSV-e)A%p96qT%^4N!$ zG>!jReF*^c15BtB_-hrjR1{WD#xY+tWVb}<0uNdhC!_{ z_7JTFA@-qX7!VId@JL2Rh^&1y!bYQBUotGgQ8i*XD!tVWJVX;W(qB1*WlqbA>PpTV-EN%n+sjI?|LdbatUan}?0QO-p7e$P)1oW!wHF|~KK4RzsTjnV-u8nC zxqZjOJ+lMt$%xhbYKRLYjMrY~XQcG6=?!g@HV=FRN z?E0W()t6Vyp=B-)td`j^(bDqJUa4Dcgs|OSUb0QAzG|qX7~R)}CVFPOR)-%4h}%|o zAU4pR38uhrR`+1J(_U-U5I3vqV?>Y@0eG*yvT7)gO4ptfjdvl?0D>h`1yVYI57&CnKqv-v=fC;ke>C7UQ8OVj$p%r!+{=7MMt?<+dtcN3HqX-$fq$jpV z+e%P|6XJ3Nwq;-!Gs~lfSu>3k7DVdN(gD0jf%Ke+d*%|*xHEx1q)q}jola}hCIUqXWFq6K5 z4XQZx8#dH)H*B+__f@Mu6YgV{Y#)&kh5%BmgN$;98uK$$7ILzDl}5D!OL46-gQMmm zIVwp}iXuu2w;3$87Rgc;BIT!8)bQLhW0}V+s>`q_IEwk!RDSrA@$l0w%me$E-j88X zvg`RQl0sSdDP5^2>s-`wsjiip@nNjXq>xKF$d-){V=2a!oFWCaYJ3<=kC$~R{X)rj zss)#<7=dQ~74!J!+wcAPaZO{_m$FDwUM?hSL|slmCRJ!2q1psZc_l8BMdbK#Qbrb{ zl#1^2DqhqQdY`MP7qj|`%raSn$N84cE5q8}i4ty1SdW?V)B z;6_EbA^%A@31IIxPQup^_=<2jd>jB-7EGl`+w7oby(wW8AaPs5Lvv{bW#JY4%>q1X zHgOJrGlg#z;5z`F!{18b+XZ-QWyMNlq5z58r4smtY2pk#2O7JMzh5eG9787{!@zy5 z6ujLwQD-2*Qz%0*A2HsKGRCCXk%#UFWD@|ma=}u4o#oj>s1esH*NKDNiXJ+VHQq#? z;~oKv72Se*k}{uSGSaAvkuqij-$$=c^j+lBlSf+gY$J^vF6kkDuY(Xn8GQxY123L6vYpZrfIW+N{gJRCxaI-@0h zgg$|!w0H5qMj2;=(dMVqe)AJEGRxK|1opTk_g?LH-~95*&mYk=c0CHw~ z2M@k+`vHP1fE9&8HA5b9b3}KKyg}fW(rPT4mPktGo)(=tl%A0kln$7PMYW5GGvri} z%QqBbJb8H=Wt^Fd7J|wd%a=+R6LNYu>`>sLN@t1aLOGE)c~|k3xAw37^D~;pt|Ts~ zZ<;R1Q^X|XDi78w6XJK}60ar>v*qMB-dT(|s#$aPcq_ZA9SV^&^FDL@me3xG`uPU-xOYQILqy4UtP_ zhUF-Kt-uXP4AZDL%!UksG(?t@ra+QC`XGTdrDNmV8Odx(!m*KX9`$w*rA&nSn!bnc znC*zv{Urt=e4@ngf5{akHyV?BHz&)^{>Jpp4f9ppm^Al>=G|M(2aSyUW{uyS3uiF{0^b{!>W)G=C_#?GL~lMmjMym$86!u#OcLP2O>>1g2~YssFLj!o7~mFS za%Ea%K+P6|AeKG=Q4*M{p26--2AMjvFi&-MVaixTvR>D2`oAqpVOr}_4RMu)OXjpg+Pk16^xxo-@pgRMM$c*s0#w&N;2D9K!q)Cp-y50t&yL zl>7-Fak(FRKO>96D2ScPNFWwe7@uNdJn}BKvnB%s1NW7LUX2@XZgNvF6fUQZ9ZcD= zN)ounoNSi{pw#aVFjyRxZU+se3z`w7lmPJY8dD~qvDY*oyNyxtsxhy9D?aCz(LSgO z6Sgr@Fo3j2O$sv8Wg9XbsiH|zjDt3l`t1as$}RFWh)gVTEpQ~;g()G5?c08txw6n1 zx4#ULpbyV84S65qa9bIW8f(!q8!go~<-5C6St8%}C^baclea@?p34jsWPqkG=I42pzIHFhe@S=9!hB!scgNsH_6k>D2r;$e0!3Cdr(GXuBEt4>AeNR{!a&enr==Q<@7UYSM;`s!rCFaiK9?LX z`qdBE-_j2rFN->*aL6b{T>L;KM*NiapX)ybQp)Elm&LiIW6%{pR#Ex7QPxS29BV{pYmgQ1g5e%^qo1S-kprEqlQE z^vqq(F!#dKT6{S4WZsGg-BeWHjZvL$ve{#z8GU3CmK+*A{p_b%B2-W9&1aq2&21I+zrU)LZFSFwHoJ|gqW$m%t#ng$KD^mI7ZvX_*R--7nEB9V oU&yFvZ+%@WzMh$d^zs#<=%=)u%bNCnhZ(|u0q^6uoku>$ld--t)d|=}0$?NROE`WaPLY-25qzr2wrpM+yjL@P*mN zt`w!>?K3{g$TnrDE^DQuIZhD{+_5#$sdi!#0A3umfnRncGQ8j%oGMYrCY`Bq5X~{o z!@)5#{q74ZGb86AEhr5-MMWfHt74X_C+$zC-Ii$#`}k{WNJGBRlz-mb6x?6ba-_;A zx8yR|&lWN+<9cgy0_5cf=TrX*4uW;8*$w!usH{kd9n0pz#001`Z3~&c_=>r~N`xJa zZ2NQHFJk2F^knwSj`GU$L42q*%J#Ie$c^o02 zQhceu9iCkXcUy^3)L_&V`arm^@R8O>EFeU+2kLcZ^?nQ^YSbKQ7!!4YDAGumO zprgZR!L+{*cp*IhkwKvU5{ig`RNyYQo_4el7~%wp4iMlK;1Lr;{}+aD&ZmHYK<(Mt zJ=5J0VOW+2^Vz_N1(byhzgvWP8%c&*Fq|4_xOTF-c5{H8BN$Kv zG@Y+H)-;_jIUb3|TpZak*i5PG(>Hm_7m~!E?uI9u;UhPv=RSM(jl5rqGYPln+BU$` z=z>AK{j&(&OM>t{b*F>YS2s`2Si~7VtVk+Y0P;q!e+;#Vb1+-q=GP|qQ9X_x4DhJXT9yrREBL z2^~pS;pktkKg2HKSU$qD@%0F_bjbs3g5OszdT?`a(%*>(H(ubP*-vmY>*5!$X#rw~ zPdAxy2%xuhn%URsdP%^KyB8Q?KmDUAnOO!zx1H++V@g>hrxd+u8E?+4PEn=O)RUd) z)e#}U)X`IS{BP_gnUoqf?t|*dA_|F!d{r_gh1NmUfnkqr=S=0`#5YsQVrVEWX*usr z#pM@>GWKH!Kh$^|I;n?sDpSMH5kT5Vl)j(-doWTYL&>03?W2IiEs>L~%x{G?Uad(h zp=$?Hc9*t)iOlTNep6&KCF#faZRMf7o3-?2&h>1UfQAa zbvj(TI>{VQ@;KVqkvN*(=vnO@&w5%pLT^(75i8{#ngOIuUg5a*SF_ntsubk&HAWQ1 z9y>%8dR4}gwe6kN-Ug$ZW&VSkvKXKmLoi^vvoa;2_-yjf;L#QA#;>K*NqF1<#zn?l zBv~`cJn%`<;qxwPOLYv+2pT~nn@$lg(H$PXDS zAEcvuHM)Df*}e7zQ0u=N?AM}8VU)=jzsY6od)UBs0e(Ha-dhCzVr-hs-4b&X!2P1Cr$GjtFcM!y|2<|2 zI1vl|jd(tng|=m9FRcc}rzTD4~H3 zha8CSY8kku1EMmMLsf@X2)}8tu|!D1^Ja9mQVn0$YN#-hOqRvckH`eQO6Feq;BJ^- z%BLb-qwuN^DGZqXiB#+X)Juov)F|Ngg%>|+mk0`}FSywYkj2j6F_}Q=XH+y8p~~RM zz0_;OzMSXhDxBF2qvGJX8od5kqk)?_*eLZ7_OKzLgAKmi3ep>L6KB*1A8>Q#WM3 z6ZubQ1O@utv#Krc7-h%DBan^>WVPr#*-=&i`Mv?;qClwtg|?U0$ywG#gtP{Qi&a~r zfx#gM-I%nn?t!rZf`-%6h0hwMx^Lbue*MG45)^$;(97ECDi%xxLxS?fw z)QHcOd&^lvr!OO8xx9Snb2_pu2dLp2<<_{BKDQpDNQu{u#8sd?^W~Y6dHNU#f8g#2 zpb_)m{e*M(b(udCMFT}|XpP&k)o)U0{l%6!=Zi>HXmyt60>-^wwB@J|n z26hjspD`qlkU7OTYeXL3(XD-_h3(qU9AEc-?F~B>^F4Z1cX!n&LP*HY@yG^)0q|qz z5IxoP&FNINQYBO?3XH6zyvw{{_)KN+LHTDfueU1dvo=AjjIH6#&{v#Mvy_+yF~M@+ z*SjkV&!1IcLDV+Gb|h&O#8`iX?4eZR{3^(onvu%v(SqrU&Ct8Kh>Rf&QJb)5=_i;k z-E+7ca%v50D^GE#L zj{A|lesTA}ZRc1t-{g80iQSh;S*O0=nqlIWQLqS3kW(LV!xe4ck61 zAzXo7xa_1;afbs+Hw%wdsMo=euy|Jr{+yn!w~0St@IDDWYNg(9R%2|;mS>mPb0G-n z^Hp73vd#}Ib-!+t5!Cv3L1v>Jm+9u>3|%t2!|`{f0LjujsyT(iS(mA$CN(rGumL$~ z!=h!Kk*c96xj>CA3NIc-!78N}>T>I$$@xmb(!L(Pob%u*i#ZyN(1t2Y;M$WUA&V~96DQBm?QnFK5zj(7 zqR?qIfsJtKRAz-bl`?{8f^xasudM`Xf)*1`C?Dl|_0$sJH^Rc>UjveJ0(9#S^I^^* zOjv4oULHc80S#H z#fz^9qLuNb)-fjSEgukti8)RV8_gPh#o0lIML@;TB+k=7hoO?EYF-U zgP>J!i1Gw2uN8y3qM+sG;q|rM_GT8r>2;C(2GVDP(GqlN5~r8DAd$k$M_B$babo)v zAIAqOHn>~Sd*d6(Aqst7L}xvgT;EN!R?=e$XMrt};mm#nEbnrdtbuvox;B8<6cF@b&4m{CBb zbVRG!T82*U3#z92f8?Kjahh33<6d+Gw*zt zuSOOhSC?APNa^Y5w%`{QK(tf$N)z7s-c=%hpT!Qq7K{BdveY$`e`h@XY0Y%yG~xcX z+(6{#90COT@mAceKP=A3`{dK7`pfB*Y9Bs{U5}7R6kyG6p}z5*_4|*XvZL}qsds^P za39|H0p6b%_AA&$9(9 zXyb0R*G5!Q6Zu z^>gl5eqSf^#V$9CGET<3SGL|fN?HSb>Nwl4=ov|=vu)Eq-q8#G=1u2oIyBaDw=mj9 z0*B#&4se~Xv~7k_u@f%Bd8e-imL@&A!#)MinZ-`t%99?^^x55!UD;nLF2uX%McnEN zD*GU&r--urJI0g*#*l>K-WPb(QcL3ZAj23R;K^{JMOawKOmKY;5ASx9VngofF|Yu% zC-6AsA|%6c7$jWHBM@7?{nFVKd%t3__S@=XJB!Ygi4XcXlMbE!LJhW0Vz}?g8Q(hWlid zb(p_tsLkgns>Ka84>vZ>DX`Ea4%P`cJxCTPLggeV~P$-crCw= z`apCI+86qr$bZojkq?%vj!_0!DYyhd*?O^e@=%EtPQX9oG4oJ7-jF%x@!If2MN>l! zI#%thpSm;?h)`~B(BcI6MxOx_W4&_J`+ltJ?+3#Yqs~OGL!$WoF4=I#&)hPja#JfI zvgZ`kYG=u-;}0ddaNwm9DQGhc8&5+;0u_X%dvvMUH6PogM&VJ9oq|WCQ_*6gG)?XM zi{BH7U_Nz9#t(GPa6MK&OCQb9==(kzY}uxm`*^~)LTNHAZ=9=WxxE~~w3$CJ_pR_6 z)Lf28N~DGp+hgTnPC>gCYT zMU=Nf_H}W1{0rt#h^RBaAhn^X`K~9UVf&jWp>47W>>|AuI|;$V0o#fuf9+02E2Mtx zKeNbhM*s0Cm4TmOYaS24#e3ZBc)h7Y4z-NvHGiYop#G$D2QW^(qj_hx8>*Qh%o%f%u!q2^M;9Rz!v#j{qqXJs!DEQX1Q_?Z9AF-r;(@p}2z1B&y`w&N`JOcYw9SdtUHC{c0G#+;rrUBf-)4*YIbkf>+4ehI{bAy(=qWHtFwcA(~HrB z!MQKBah@9UsLHX6q~ObgoFfN)*_ddx^`nnU+;}C;1l0PUW>XbJ^AhqaZN$uwI-sO9 zlND28OQ*Yym3GY?qSMfduN#NDMl4kG4R#>jaN`({Gt zM|6&Y0VecdjxjD2=xbo!aD+?OZirB7EqUwwch z49rRF(8i{rN1NJ&-uW&qqxRXGjkuo=bl3R%fWpa8P5Jlb!V$Yf<7K`R=1R$F)a=6X z{k$|5BHYPLn%u3A`493oX5odcs(XCkxQTb4!WZiDm|kV8AC1}vb9l`Zf82mqbHYb2 zWoLE2NWM;-9Rv-bSzz#VK*}oy%K$pgB;YzJ=~r+a7$`dy6>fa~FuS1GIQRlB0_jQ| z*wNd**ftvjUEy4*@V=>ktypclHuspcL=X1~b;idyWTXs}U4u;%2^%BM*WEQl?bL10 z?6HiiO(fSYO97o?UGqDLfBWr zWI`*_X+V}^ngeYaU8FJbgqQtuTin{|*#rk#ECRAFmIv-8Rzbr3S09ZWF z7wM|*eCeEtHWt;{wAIxKyFQC_n@vj+za5IZdbTEbcMJE}&lpAnRkXL)ZO4DacDY0`kkbS_tWrt31^nM6K9xcVOM&023f6xJHv)a}x+6aQ^1d9c!}>G% zcSenOe!pfKx>LJ85|xOMul4O5VvuCOvW||vDg+2u!-UCCM+#(cmZpw#Y>J|s&WH!RDXKX~utE#M`h$^dH; z8D0<^!^eE4+p%bk9jTINLk^&rch`xIeqjso@@sJy)5*gu*7mxz-T5*l;7NBeA{~*D z@k!^Ls>NoWyOxZkqd8iX;o|XF9eD(Yt|!R}Yg^np0moPYA)d|O(#E)v>3{h)@u54L8<^qTpGvxqRicNNH@t8N6=hs-|BYn{x6W|Hs2qW?6t6?LD*f;9?e+6 z5vd#1!$u*hJp=OuX6NQe2skqL70QsS**Ebbgv#@Pv*;(0D!6<|x+OXLlUninjKF?~ z2C*p3h4k^b;{uZvpuRR#NiJMXS9=+Yk@Fe1wN2`9PHudga(h`*7?SNJa^kLLx-7sL z#YO6_KiZy#6IqGfhQY(VGHVRpLuTF&t;GoCPBsP~hK6HA+Ua;A?ILf6K1G+D@*FBd zePi0qx|wG@JIPGJ7F=5J@^UkCNaPxD}*%Bd1G@Ch=Aft$h ztCV9~j_Gq5^lqaZrORNZbg!fQD3~Kgx#{!)YZhPBlKfe3GDV?FgvAfys$`c4tu4Ra{`x_T6$Q^pgv9ZqOqV~t- ze5I8SWhCWn5pUWDX9_l7@Rw+vsKPY+caT#t926OhuYi&y-68S)2(6@!JIO7eyyKs~c7DM=lef_9gM2F^ia zIbGMFrW^?q^}`(rTM{W92+Q&-7z|?%9NT>#<>&Ttib_S=!>>DjTapK-$M1PcZDxju zJCMy&*N9aq=%thIy`1v7Yt}5KpkIv%be+1*)DmRR`>h7m9+e;jYgb&KdFJM__1Wp* z)i}9&I(5XjOAV;>IheE8_KeGJ78gKalgp5K{>U7X}f?>=PgSu)u+ zwLOBOdmkcDiS3%0U7+msYb8F*t8J7o8*t|x%W8_%!QPb!T^+>wECFE4eAzKhUY+H7 zGUeDhMTzV2d^qH(Qk0CAceS;H*iE^u<>`L9bIFD9{&*?6uV*#=$%FVl4-RyR2H%Tj zKQv|K{H4p|iY}o?xu;=N9d2v9Uz?rRHww6{LTa|HCW61UtaM^i3%l=K_I|&8xe6Fn zF=wO*7dJIHUtMu$jP~zCVUu#`aW=M#^-gsfv>5gE*PMeJ?xYOah^>nNOS-PS(ddZ6 zpTVL<;miJWTtrB0p(*sF?4fRW=3YZC@^4mEQD<&IO~|utct$$x99iu{?2lPVGQfV% zVp3iWEPG|Xt&IVR`(AyCf4OOZmJS5|3JD3op-5J*<4Q%_4$Hz5Z`*ceEv!11SZ;keSKSUn0PY98~cRwtnpkizW=jgSCe3FCGER< z74j3zl6B%xy0>8q4TqA(jJiQK;M7-WbY3ez89;~Mq;jZxiTx2@55&IrWSas4M-c?3 z0t#%n9qd)37i`xKOLC&ELA;OpJISXLJwbOEM|-$8OiLd$2%@?_#L-#r_uETG|&M$n~`jW1=oIpA!lCb$Ex7U61FYK!L73(FX~+V5+T?hv?;YU!={IkP5y zwXJnw_7wJ3pr)~Pv!cc{jrSL@_@<@7VJy+Q!yCcybHFNrq-%hSdrap%IDT|~YxShN zqd?wpb6wy){e!(6Vqw=UE};bV?P!S}I!UlY{PD~vQH`m|A}^m>cmutR)_ke`E4K~p zt{3Te6&rd>79<^7vm8kEcXh(w6UvSg-FL^PSE=m)j>^T|-mm^d z@qx;8-m>V+FB!+a#pZ|Vq%1X&M%0~NhiM%$xf#qE9}^b$Vt&O)$N5F|?mwgO5V>;_j4YQ#nUPi(>JeR0JL#FnW`r3)6=C}R?-9TFXJFb}8Mvay^jLE2ADz`9U zHhp};rp=y$CW3TI&8&n5=rcWSu5xpr&3zH$t!X_}{=Ffa+Ar9#|Duz@>}qbILY*?A zW_|1Khb`Z?8UFrrF?ZAItar}K-zwJ3o708Dyk3ZCN;NBDba?oRAE#xtGxrt`Pdrju z9HCdo`ht;C@bnoruZn7}<9g$p)RWfZ{Z6CcGc$K9;6mq94R+V%(yh_?QOrrZ^U1NG$)|Z2 z$D>5=db95$BERalZa_G_{O4jY+jHrTsC?jf(@{FiC+H%;=g?Le&E?le{);s?0#(e% zTfvue>MblUtKx3e0)hQC>l^u_MYH1DRh#R055 z`CkYR%H4LZ<9Rlk=cr7opD zfIj%Yk{b6{q?Q*larm3eF~^;xqmWdZ)1<0%hXP%&z1#bq-?M?>;kmVL3h-w_w6FzG7wB7!h`h@ z+Vzpvf(YM*KJ*Ln9Z+Xty&lsr=@Z9=d+aQ7pnl{9;P2_kwK62LUHI%R8sJH(+3X=~eHW%OvMOXp^p$ z9<9ugowV0g={O~PV6aqn{T|sl=1leMnfEYdU8Sc>*}L|)K1O=k?<(JdmX=9ke9q5P zGt*T877l`W3lgjp-;{TSCB@cWyK{VLy2fjB^NXF;&dZ6_3TspP%0}vrM)~@jY9*Pb zYYTHETI_Hp4`n5mlxcfmD@&R93t7YER)(PYn2sZ@E8Jg8&0Zfjor4}99^&z*R)X-t zQhbfnjpM&J#wHM8%{RXFX>NRKu54r`68fP7_=dgxeTgb4w{C8X7~ZHq_~GHy>pHR3 z<_6Vvdiw$N3;{R0J0d<9%c{hhDcWZwBRZrBqi_Xuv-`b!sB=YOH{* z0wE|qZ~H!QEPKZs__r~Ls%Ml|7C0?EePe0s(^%ufvQsB8H#I*u7ZxUK$Y3PgCbYF+ zB>x&T6^bui=hgh7R7L~Wl4nZG_-#=Q|59X*X;a`2MmlJz5^Qd{i9lYrG+=4TFbyx| zDA(C!;W66r{!xcomU@6I4Rne8gjH>;9q4$=lBgQcr{Kt;Mp-<&Twi1~Nf0ZTXURP^ zH6QoRPLDCM!?RyWX!G0NPDppPNt+VA;%sAqjZ*DycUMoJYjiWykI9nnQ=gj1XV;2K zUC?H0pfNuFdvo2g-LdJQ0) zdRY?36J94YS{BYL(d#FfmqmPk!P*k?{b@ONc&s&_)b4s)eCmsq%V=vs%^&og7|U_i zWa+g-JHLmUddA>JgW)d=$&e1B*AL*!b{gR|3!Rg!P~qXyYM|&wmwfvXmXUD z>un0vfc!%`WkpP*COTD)V%*8BxI1Tn=8h_ufvb-h3?G;`4pagyo^~C)B?!PTd%ZR(uHl&v2biQC@s^S??KG?Shh zW6fC&`jtT)UmJg?*G}o<-j*SI8q_;d1^KG4u~09E_2!( zd>X(eUv;TI`_93rW5FA3HmWsG+c6PPb;o00TeWUu{h{=Y5%kG3$WsF&{>3n)7L(Rz zs0-gA>Py8NrWPBe7b0l4HOp*)vSkma=jGQPf<6sw?Qid&Wa)bePtCb15gbgSRlT{T zf7zJ-^Y!)zdr{U3o7V9hkPXCT){}xQK2RRCO#aG6SB<(hTwC3)Y!3M%-`e-sEcA!$W3%Ya z>8Q2Y%gXn=55&j=wPQ73KVBD3t|YPQcx=?3xF$jK$S^@;rvv{0>(Vq22%KkV$6oUq z{E1$fvq`hzFYo7@h1TOjrATu?%+>I=Aha0J9F!hlbgO>zH66n0Tcs*Ig2e*9`8MF zcJX%`cb|XyN`3JCS?{LX7r@Q)9dLR0b>Qn>?W?<;_ZeCDD+1OVI=2I2zEPQZ1YSJj z3w&59A1I%>UbG>f^}fIRXXZ&=x78s7PR1p*Y2H@A*ARow6f`*+^Hds9ar2D>vryjJ zmz;8BrhYk87orU_NP6pym6u1oEYHWu1O2}VKF9eiqHx^H@}Q#lk<%G)ll&~-&1@rx z|I^z$G3W1zx5B6==D#kmkKuWXehw@I7-QdloD=@^PdEGQkzwmPkmXFHv*|Map?p|8 zA7K7$8P;Pwa4EKcJDS6A@P@46gJ`y`{UpwtSozLj%x^CEM{k-$_ z;cX4@=2lyjV|M(>jPeh`mLD+kaI7)+epKp8ig)e@sHhr~JJ!J&8`ddGHdURNRY#rz_VZN^*aI>WcI!?K&KBZ24%A%#<157@7VybO{u zc4)7_haY6)Te#Zk6cS;)(A?`s-WC1K$oop4(^NrRLqB?l>QWWmhmhV)Sta7d6A`v} zNlX*%c)n1n2#G3`U=ntLhpa~rj9g>zc9bQF%M()P>a2Yl>wd~&EE?VNSGK(b{bRW@ z1v)z9-wT!@5GX(N|6a1pgYc9xFzh43MMNinnhUhAxhe!r;s-z%aay#HAJ%mWcjVW$C15W_BEU|iXYz=(_^ zdSnWeGD5myKLvyDuio4|Kt>RjK?dmJu7iSXS9JI3_ZITV5?Q2Mm#TcK^QZL08v|a> z)cNF=47>f{DnrTj&#)_M?abYb%|B`(L-1@LyJp0U3HwkI$1w(B*k%l_&3vk-!1m04 z4eDfAYeTgD(mw)v{Q;_ekPsqUJAI~Ue~)xugm=rs<4tOVO&d4m#k4cA<}LE7D+61v zBNzM8#3bCw@H9dAY5hRGAy@OV5Z(NZaD%QUcK><9H#+U82~G}As)fhn?*%>V->0`2 zhGN8&bu%@5dd?BEo7wUKEdF?Royo(cJE`6*RIlFb`Mf6Jfw8JED*sjY9IpnpK_u{# zK`*W=qKg>7K;yc7bX?dcKHfbEU=ahq9^A`pxo(+`D~iPXh;MgnPJpxm&?KXrVBKjT|qomZ!Uym8XV<`+b3W+}A4N zr5w2m^uH=l9i(F6=j!EokM3k)1$VyJ14%i#S~;SFq@-MZjd)?)g7+D^416K$_dxuVXIA=)V*mF3KXg^GaIs~D zyKw1fv7v)hE!-UyU2I(MN$~p+ZcTe%Qt*4z|79I2fKXC+#>gjz4wAm#{o`rx>LO#| z31^iNf`Xwi2pGZ(7KHNg@^OG+h&2UL8bNgMe_phFyu2`8S{vHGWYBwU1hFDBzz4pc z`tKL5%YQCGdG6``KQfr$eJuZ12D|6U|HycG{)xxK|4%$V!9QbxzZvHH-O>IBQ1Ex( z`MzsVpFL4o^*_IDZdUh^L^DEPni?(KuY5GbGEedGN5U>F$szlQJe1n$Z4 zH$5mA@|!9DBZK|JFci%9`+Mep^aO$54gY_IeE$H1@bmpc4+7!+hcl4-on8Nn2L=B# zolq#`FK2*1dG~a;uy=yH|L%!e_Wt)ZB?$UmXZK&Ge|5s&#H5A(sS9+FqRf50hzRgO zB_+XNL1`&TIZ0`VB%hpsBp3>jmXVZ|;^%|F#L)l0RqhK@0VLyUC2a?{a`f4xc=;ry1m&P|a{SVQy!=B delta 10700 zcma)?1yCGK*YAS_S=`-&%L=Z+3GM{TXSZg44-E)wsR(F{YSOZtU@pEyG)b6-vT7d)vt#%5wMa#e z%BKG-?P5>+S8Z5qF5=6kv^)v(P#6vKUKDP&`ZQBupeNOQkQc46aoI00od ztraAQ;`v3H$+JKRAxXCW5TRoug-*@x#Ti7@;mrQQ82P}zDAG~8k6eORHMHuhrJZxM z3u74RQOxWoA&jP*9slKS4U!qj&+J`libX8-!pYNpS2c$l7%yc&ipJd_}E6nE$VOpq7)iqpgLLC6rd+KO2BSe|z!2u7bgTjQ-&(j0YmX z^E85+yDQYp0pOke-Pg{WV6O3pF}Lq<9Z%g01LC>?v@G0(RELGP7BQ$BB=M&29Ly!U zlQ_wQYdPuAr$qFZ!`hWx3($Sa(3#WVu5LDoNXqi6nV--v68W$MmB(zUd5#?>a|4yh z`AbG`@DqM!iZWOrIdr*o-TjP%Wn?tY*?sJoU9|ft%91iq#3*PPQzQL@0((%dbD+k~33I!x0Zu{d3vTk$o=KRox$Y}d}aHRVZ zTPtZ&2vdk;>Y7{Y%T2*@MfaXXv_J}B?1{D|e zn%P(ot+Jo5k1d(4QMPJom((?uX)N2icJU13Fm(GdUeO0wo5Tl20w)OsjJ3@d^{Y_R zS?(~@dYoqHy*$5pRAlFwgdLcj@d!#BGdjT?AO)ewUuYLX(_7|nzEDIEeSEecnM=0? zlR8p&#oxm>i8v&YbN$RNPE9{J6u5>b?({y) zUkfd~^CERV9O*c$?Aj=Jy!`Bmhs0r~7;&kfRT@UhoyR@Hy*n=>E!S#QTK_4-gBxbm zmw_h}Z~m@M;&iHO4i>(D5M`Fk#7$%6I3%4+k*sRbP=k;5-k=$|txZUn&jGW+>67O} zYopzwzCmV|&fRPDSau4XX%o$6|AooLN(Cnwp9Sq`MMdHk65=)hNA^&<-S|NRd80yw zDH$!)pIma(wkc#+zxUY6>(l%87$+`L8#bVwXLHD;f4b_HB20bX(XNQaOJFqo+MTtt z*KNF@;cfOqnlk{u3^a_+6!=_^&U0gD>LYK<4ZH?w!QXqu*Anc1YXLA(G?QUIPQrSc{dJVdwNzcq*H@qkkpY*!3LsxxkrC>&VJ<_GN_roI~c3^~z^1y7wI5 zmYdTqT*|n+t@s^_UWlx=`7H)s4^O=iR#t2D=i!NA4XP)4mWzmsdc z>h^ZbVAjOSi}6@Cl`(&(JcbolRgeP!tD9HC=LcujY{@yF$1Dfs&1gi-sl z`pD=R@^9jde1mbp9~+zI6IgdDfMODgz1r9njV$JlIP1vuU&vERuZoqdSZBEsA*m%J zQQ^y^OE3?rl`UJDq0ERjGSN<8Hub^h!&V8*n5E>ghpM7^QOBKYA*Y11Gn<*rFSL0& zsfBR0Hy@tu#Iisf3uBkN6h6jN%}cOa?;I?b%T7-nI${q!%Ax|)5aQz;&tFB3FXRws z;^OwY_GdD)b+F)Jn{}gP*?^{uy+j;b?yQU0Ghs6#^ApN4mbDE~kIax;%Zi5Pj4}33 zACc)78MOPZPKSZH*H)!WZ@dRxXe#J>m=YJ7$m^>ceAni{|I!&o@^#wBF zl;UTVTYF>`3FvYdeTQNu`LtLhk4LCD3@6nn(Mbf~w6w++N}CWz$ba){D>}~id#j4^ z&e6rb(62lVYOR(xT)Uq=m$}8^PQWVb+61E$;$GQw8S!vzO=+^ua!5;;KeEh>$qNO(1 zf5D}GJ}sj<*dyon8HB^(H#+}vql@I*HlZf>mrlI_8%8-R-yq%9*f+rh(FwB;BJr?j zl+O@)9qA}6WBZ6qJ~Jt)8>(*@Au;NeLG(6lKVRTRON|H$6k17DG1}E1rvr! zK}or0sZdGC&0NwKXa%z+5iywAY=LwA*y6ZgNDfU%G`?07MOlg2&E- zc&>3K*Nr31Tt7-nX$jiq2=A*NtOVk%v20Q>wkz~kfn|D7Sj56=af&5I-o9V?EWmiv znsvqRO+h}!GH8~>fw z*8NHaP)5Lv)mZ;05=eDLI&-#sh6!>fbk@)rSpD`h8^3D)c3RiNNsiac>m%Tr>18kt zpJk`|O29#s@{M2ndxp~`Vr?s!@mG;yk;I!oD?n5}nNBdndDe*HNHh0i_KnVf@7mYI zBS6>K#>!~}qA!Txq`k-=a5anVs!OV&Pa<&c>Nc(Kp*mhrM@m>NKTND2nWTM*jG^lm zPhhBW-H$$)T{DP&q+;V9G=a{o;=P7$%iNzsEAURBfZ7B#8?=MGimnM$5!ecmXQnKq zr2AT{-0FF}oG~wk5%nI~_Cg(a10WCBxfen&{ZLuSG1N>v$2HZvxoBSHdnuFn3`^B| zmO6yS+R-OM?+jDcYhmhSB{)NPqcI$KKggXSg&^nfK6qgaEwr&QG{e|CsMCX_l{NSH zjI7RchvCx8`}hlk3WN#PU_j_=-ke2PO8+{S6?deb+znBJK^=&OJi{am0Nqdo$Hach z=Uqqad7i?xZuIs@o+L&+204U9OxXOiknEksmUA`_w~gm3vUh?#H_i>DE+0_p>AM;v zDA~i2o=1Hp5 zdrzzTt{JhmmyEsm{YfVgwl1Axs5o(#`O5%h^)m#3>eS8)hd5}<601A|szIFBrum6X zUc$he3C8CdDEZuHunO)yHT9GE*o!8}7SppSgL}-US!dGM zvol9$4phhPpMFf`eTgIJf#$)Zd^HQ=(BQSDbt8)VaTL`h*t!J8g{DR{R_bDYG6eJY z3`$pHPuJ4A4~73I3T`t3+jTdJ)8e{`{Yg63<$|SU85T0d^t@>z76Th3yRHYFg0fTu zK>^CM;@Zfy^;4d&4fORo@2_oVQ4Sg#w+Fm>&X{BWERy^@jR zBC2=RKEEyk>mTmBsXL!|z5dH5fW)n#Qe$s6NLUyn;LKmk@#84vn21i6?c(Qzsd2R6 z@6MuHoe01~*V_2JcowY9e&Mw-F^@oDVY*(@BHwB$9E+H|35#qMqjki^vy|s_P<~aT2S~ZYCmZv(KOFM4of$Q&_23&-SgjV>u_S zVW=i#Q>s9lNm_IrH+cnUJvr%D))Lf z_#8Vb05$^`{)C0z62Gs{T5|FOsW)!KV1`d8gkPuMu(%_8Y|vPeP(G%S z%zHz7e3MSEgqM}WN7{P!7jmpsJuwe1fQ|PM9fPu|TLR;-QG6K@55rwQe+ZG6Sl1FE z`uUjNIub^^?K{n6ZW%J?LhdQz^tx^`3?+S9hj&uM0;cc4m5iqE6fjPbKRQO?7dJ%~ zmcr^Fs4UH(aQ7Asc_W6Bu{_c9-IeSrYcVk0jObd*!77BqPxqHV#XjfodF|(pk&E3t z2RGh&ldk;m&Zq>dXZ97tx*Z1E3$?wizCiq$lFGgyA{om~kgb09A#|)# zB=3_g{|%d_5Ac+x`dTD+*O`Meb2hG7rw}5KELa;3zw?m<4^WEXkLX8 z``)~tG|zl#;QX){zojoO*$WAe)XQ&z=vXH%9@4gy8p;mu>|~C%T8>wnQ?8R@NxoA< zE=DVjT35phiF%f$kzP!syO=`c4L%n2c9$EU2&ByYp`JI$lM!dQHLM zTi#70B(^0_F_7tS*o?5+xJ+x`rOSF`0_;xqRlDAo@8cjc7lFTdkOkAcMUW=}PiD7G z5E((cQ&0o?CKIlbxRTmS`K53rT!_S5b9QCtZe$Clb!Cu45Gz6U(Alf&Yf2SZ?wgFX z!nEXYAA|Zsb5K^XMq*7RkPA85E@kfy=>Mbb51;Ktk=`#-~@AC3!oK9g|AUyE~=&EWCQkl z=Oaz$^;_z1)Xf~QO@xiyiT87%=?o;fROZUS&a?d`Y8L;3{2%Wk4X&zLOsRcdOzDYV z#piI-u)^JAYZ6N&@rLkl&~Kg-ixBJ94F(|^b^zW2)4e!8eJVHJpcIdS)eAp1!|vK1 zalL*k*NDN#l&k_1D;F}GM9ze zxy`&uNlaJLQ6u9aPf6i)M~h1c7TkWexsHb|z(l2qtsOj+=m}zyW2BdiK|f7C>*8v4=IsnKpa?KoV(hRx_+g;?qvB2dmXHlIQ}95hHPO{*%NwO@kt%>L z8nT{+=dIE5P7!l=Fo~>^;iMOnfE%CXMfQu^-6V%D-!b1+ER&P&ukKqXOZb+GfRedOu zS_OyitZ{duT1|Mgi;^HJ2THI-gF>99ZDXmLnh2|{o*Ju`s6^;Z2o?6yMn2WRVM3i$ zKQ=&EX=wvVxLy7<0&}5Y3R~Gzz#?4JGo5H5i%6o(Md>_a$xv^8xx!7iFT5G2^%-Z34k*AGxlD5p!) zu4>dLey|ej=`q8`cCASZCPaOpwBN}O4c5T6rz{r`80>*L33|hWXB^T$gV0r9KN;FC>*TQUD zY_ZWEIt47Xuwk8>-2Uu03vQumhOP78xcC=e$-2UhB8!T6#zj4uz8EWk4Hl%hJ4%#?mpo(p6$hN{NG zzH`FRaZ=WhF-aS?kW5KkEwofg0d=%+j15z|L(UK_Vkb7_ZL!yAnxWm2}#QU(kt}S+bo+S(&A?$CDlM+7%ZzqxrJK45IGT-{fR6VfN8(efxU_AO< z^sdO2%+C@VUlUjq4<;JlMiyT7s!}y-v}dS(KIf3fFsa)v*W#-hi=qDF@f{+xeE;Jt z`4hg`kA(-md8zmiZ)%=J(=h56@eAwn6t7;u_R6n+Qx>PWez*>y7O>#68KD-#QAb!s z5xqbY+O=$X0EAjMHGFIBQ|fXV8lnxH!2gD~`PBd;o*GJs<8-?dSL+E=Hy)n3`XYaF zM)h(*+uhM5a3eZo#|M2jjb_~K9O`FYX2!dHa`)4VsHQ zpUdh4ULlI4WRylR+45T?FQ&($)IJ;)^+U%xvt9Q(Q{f8Ao>!XMtW3H7mE8an1nSyr zsRx5jj3vq{qIiD6DB}l>8H4m-9KF@Iu-lP6{^?twqx-bN+92P0?vdViroz65SlVP) z1yS{+dqgb9QJyygNth7N<)w9E>&j=?MB?SoY6R`3p~5+vp_p>b!W=8dUry>Xn)qQi z!&j^RKetD{{5ccJm;Bh>t|cj?^9eqLI`6=&;@;5^TaH0Ha|APQkUo_x=HK$ZhrKf) z8fm$?v7!y5BtGvowZb?;IjtJgcwaD@|C-2IzvB~*1<$4Yed#=5qK(q3!wxqB19p1V*)gbJ?r zNj`oHv{S3uttT*6XZ-%4@9qzbQ-6+}A_&9&o({R1 z&2@RL#R1+Pfw=)S%@cf=3x;rb19i7iqiF~O)6e>{0@E*$AT*^Te!JX?+5I@X_a_K2s2Fdaplilh zYfE)_y^`CVj^O}ksYKkz`z45J@umqp(vOh2PkN=Q7M@Ji|nPD-**VAF+R0p;(U z_ov@u>Kh}cHvq>eOc(dc5dK}ad2?Pa!_Sjt&-S+F>j33OOfbe_lhlt*wY(+An}#(} zc?W4h-tEbqQSs_JG3R=J_hV$8)4S@NQPkJx zM*aAq!{ab(ZHSU)wKc%BhQELLa(ZfD=~H1xMbBWHY`C2l&#sZl-Qi+0ji~wbXA#ud z9GXf3;-{AoAKOwEetj2h{;71ha>rO}eAR~+8Z8>6S32O^@#=Gg0SqZN723%dSl5r# ztglwpN#tJ9Q;yP@VDUzi;G^}@*v%SZ{V#IsY;ShMU4w<)Z_SXFhldjz*vp3%atqjt z)5WIOelhI7B@HK?N&}bOcXk)n`bM(H8p?m_M0LHG%txOLb=}`o;%;%gwB~jm`es8} zDYX3SA;Nvr=mV`KrymTcSK65dn*U(EZ>7Q>DCJ8e@5(>fM9W39)+Q6^50v#6ojj;L z!n@I9J~eGawxyU>Nrr`0Yde;nNM=`iM^jMye@D}JR?hyX*A+} z->F(JabDLXb64m1@i->1zQ63oj{{OFYEO;;Rw_;cODdv>Z)`?H!;%F9%>#i z7Y-}D-`Az5Rt(V$u1y52>?lB3ZHX$^a=*IL?I#xIP&3a4-P`Lp50Nr(4EQrzf1`U_u6yAPT| zh?HqvokMcBGtV9$t*Ca{ z=!8jQ+MX6T0~#@z*-8U=wwtxCU z1o)XHpDc|Q??t*~7IOP4rhiEJg+vPq?l25qh1-U_HdCCWwJ{3U*^Z(MK>O}fN4|lq z5cv%>L&3mMjEF;0^;ppKCWLQDr$K!W5;B|bPxD4M-nHXZPE{H+6(M&7R3~{^!n1(N ztiPIGJ~@hBHN^F#nDQsAc#CF&Uz0^pu|l@3^5jE%;r7c(;fPG{{XN2xh*WIN11FfY zA?Ekrxi7u9qbGkbCgPX8N1I{WKbDNc{z{aeRLHE+(2VzW#wV0|sdg|h`Dan$i0h^L zin;8oObaxeFJc`C4>|e7(&dJp_VR}}XK03k{6gg(MGfEVSdHzOSx4iKibc4FtaGNL zNX=zT4rLYl15sHhOMl0x*@6dUEr*O@BSOD_UiJ;wnF(?+G{mbfD5#ezU3$F?V6yFE zm`7DTf)|Q(IpyB6Ds^zl_>xwzeq9Q`rn_&G&xQ8=AgcIi>_dH2TJI`OeF~ZxhGSi? zD%aD?Zt-cfCk$K{zAkRxOFK_4t8)pla3|DznL0He!4RpTD8r&;9SBswoakhPEuq+W zhS=H{xmIhsBkRFrwiBat23bq9d8XQNcm1?3_dIl&XHC?LwtHGySO+X47PGR8{6h38 zGN_8S9TM$c9m|uS;=Y&T9~&sk+sWFVK%3F|&F{ae8URo7i&c-Nzh;&({Q92s>v7>} z!VWxnv|?C}kgJP+ibfqLtj|3Va~v}UZs%;VCv(u>1;`9{vsTU%v1YT(>Uf-U zEi`i3?`b(_0NV|N7C#y1J}}hRJI-a_WW`FO&gTDWFz)|;Z<{DOXi_MV&6_qm!luu? z8!!2FK*GMPTvUF47$6iz^~!0GdX~TksAA_he_LW(`T);WjSJ}22}B%F6_*m@3!uCfxNy1OlA@_{ zABd+7zeo7Aro`#hvF6-#@EYr{zouHDL4RhALu_QN;_^^M<{g{CsD5*NTq%0EFRvWkXx>OEad z4Ie!x@Vxsod3)+st`XCtAYx3q{BXzAGme498-a8{g- z?kx##60u0@G*p4aB{+WS$Ydllx;%%vJR34~qQn~{F;RLfyh6(Si^~^yH1rb;=#i81 zdQ?QI)PyYazhTex8wa|cz!XA~KPbl^#zcUZ7yM611Pp;Es4&BEfc&idw1NWQ|1aFc z9mvG|hZXrZ+yu=1cmMxOk8txqp3o6mSkka#FbG%X%0D!1j~?+{fq||VIjFx7C*=|; zLbNzFsuol4#FL-QC$%zNt6LbP7~W9Q8O4R#Wbuuf9g&xU^7VGfx6#8NWUHg%v~}rJ%#3O!hYQ zadtCX;Y0BC;rnP;lGwtSv^a}Rx$S7$H9Oms92(V0i092WIE`(D$1eLHVMPvC|b?`+3} z0S@z(W_l#d5xm*;HtxKc`)%G#(a%1>e=;S+bUfSIbQ8XL7n-9%9=Mj2fvI!zlY(Hf zoZ(MQ;k#Uf@CvXzB7_fq0d`|D0sv*59NqtbL9VvW?oO_>V8~x_`WNg6%ni>1Qi5f{ zw1R)CfU*!;FwcJl^B+LcQ!xMZNDJnLU#N@Y|78f&fVw$(xLQEnXu*7NISpQTtOgkj z{0A`l8+4L#^7zvRC~NEL_H_M=Gy*D_{j>b9n9^Uh|JDu!)UtKAhl?2w2XjiX0CrIslS}k_!lUO z2VS5}i}>VrpSC_CSP)L5L-qWxNx^k>nAlW3-0f{0|IF)eT1v^x(V7M7$f>Q#`ma7i zK=3a*E-!eVypVcAq1E@6($9IcOdKQ6&K3d!oAjO#^{=`2W)m#Lvt9wE6zs zj-QwJXwbod6i}w3R_1*uM-665!_jOZ*oN%qJ-D6vuz-5X=wy!;Jlh_Ot~5 zrSbhqvH!3W;D3tLKQ-MyrKiP!JVo@M;$ND;Q}F*=`@gIfx8Q%<@q*wrdXz9QP{YaT zDINYwc>qw}(aMST&%B==VL*9lS_4`U9%)GlDQPed4-YRyQc94I_sQj_jN|8l@No0U z@<@mQ{@*6FPy0{W$wJBoYGLQ*;Xn)Ila-Z`;O6Is2txRt5>tSOn;#@0DZwKJeu^x= gpp5jNq;hjNb9MK2g<1i?0(|^n03)M}sx07t07@sKE&u=k diff --git a/tests/samples/sample_bad_empty.docx b/tests/samples/sample_bad_empty.docx new file mode 100644 index 0000000000000000000000000000000000000000..3b60e1683a722b3ef3ee0d8f51b67f11f5462802 GIT binary patch literal 2550 zcmb7`L2DC16vrn`HqhqK*pea=MX(@JNGX)wOF zHAMPF`~V(Al!ACJ(h9ye*?BYjW;07#vg9(q{NI0Gex&=XT3ZnK_dR^^QoYF@%c3n_ z?L@;=4%7YcbR?74os&U-FPsoGUe8*G?MT6#uq&0ZgLV3jv3EnE|&fG){<% zvvV6~d#fDmTFVB1v%%yrprXS(ab1Ji)Q1&odgC*|qR%{7ZlgN2fThNO)k*+Xq1Vg> zaaMy^*ytBxWi}`W@~sRAh~cgPm^lwnxS5_gUE}Hc5VbXzp{!xiR5JkElnsadkO$gd zNHcxOCvb_GIx+HB{R!HO&_SUS&HZp-D;IeMeh;WRT}PIqT|?@6u??xwd0zq+ed#IL zwWW>ytg+0Uz(sd@xUM;E?v3W!*QS($9`#`PzAokpep1T{ww(%D3pY2YgC|52{=0z? I=WtYr{}Suux&QzG literal 0 HcmV?d00001 diff --git a/tests/samples/small.pdf b/tests/samples/small.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7b6e0bf5fad1a09ee063e3a89b797b8de5215d2b GIT binary patch literal 628 zcmZ8e!A`31{K>-~}BX@cYG?Z&qfP?*LFd!;RkVfq96}@87NtzTe zZ(H&^YUpETfHz~dfn&9aow5$OLSAVBG*X2==1cT+tqjmWC8-?h6U-R`f-&eaR;p(P z=L+apUSnd%DidF$ESmWoM8#=X7WlhL$)(fd!j#XqA#k8WS*WV>sCJ*qq)XXISYMPN z=ehsy`S3@S3=}zS~=4pCX;L%8HJbINkh9PrPZtSuSX|={L{txczsU`pb literal 0 HcmV?d00001 diff --git a/tests/test_main.py b/tests/test_main.py index 6619cc1a8..22412c89a 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -463,9 +463,9 @@ def test_pdf_p(self) -> None: cred_sweeper = CredSweeper(depth=33) cred_sweeper.run(content_provider=content_provider) found_credentials = cred_sweeper.credential_manager.get_credentials() - self.assertEqual(2, len(found_credentials)) - self.assertSetEqual({"AWS Client ID", "Password"}, set(i.rule_name for i in found_credentials)) - self.assertSetEqual({"Xdj@jcN834b", "AKIAGIREOGIAWSKEY123"}, + self.assertEqual(3, len(found_credentials)) + self.assertSetEqual({"AWS Client ID", "Password", "Github Token"}, set(i.rule_name for i in found_credentials)) + self.assertSetEqual({"Xdj@jcN834b", "AKIAGIREOGIAWSKEY123", "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd"}, set(i.line_data_list[0].value for i in found_credentials)) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -556,19 +556,20 @@ def test_encoded_p(self) -> None: def test_docx_p(self) -> None: # test for finding credentials in docx - content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "password.docx"]) - cred_sweeper = CredSweeper(depth=5) + content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "sample.docx"]) + cred_sweeper = CredSweeper(doc=True) cred_sweeper.run(content_provider=content_provider) found_credentials = cred_sweeper.credential_manager.get_credentials() - self.assertEqual(1, len(found_credentials)) - self.assertEqual("Xdj@jcN834b.", found_credentials[0].line_data_list[0].value) + self.assertEqual(2, len(found_credentials)) + self.assertEqual("WeR15tr0n6", found_credentials[0].line_data_list[0].value) + self.assertEqual("ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", found_credentials[1].line_data_list[0].value) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # def test_docx_n(self) -> None: - # test docx - no credential should be found without 'depth' - content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "password.docx"]) - cred_sweeper = CredSweeper() + # test docx - no credential should be found without 'doc' + content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "sample.docx"]) + cred_sweeper = CredSweeper(doc=False) cred_sweeper.run(content_provider=content_provider) found_credentials = cred_sweeper.credential_manager.get_credentials() self.assertEqual(0, len(found_credentials))