diff --git a/credsweeper/__init__.py b/credsweeper/__init__.py index a701de63a..e557469fe 100644 --- a/credsweeper/__init__.py +++ b/credsweeper/__init__.py @@ -18,4 +18,4 @@ '__version__' ] -__version__ = "1.10.3" +__version__ = "1.10.4" diff --git a/credsweeper/app.py b/credsweeper/app.py index 12aece790..65792a7f3 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -324,7 +324,8 @@ def file_scan(self, content_provider: Union[DiffContentProvider, TextContentProv if FilePathExtractor.is_find_by_ext_file(self.config, content_provider.file_type): # Skip the file scanning and create fake candidate because the extension is suspicious dummy_candidate = Candidate.get_dummy_candidate(self.config, content_provider.file_path, - content_provider.file_type, content_provider.info) + content_provider.file_type, content_provider.info, + FilePathExtractor.FIND_BY_EXT_RULE) candidates.append(dummy_candidate) else: diff --git a/credsweeper/config/config.py b/credsweeper/config/config.py index 371fe7b1b..63845fa05 100644 --- a/credsweeper/config/config.py +++ b/credsweeper/config/config.py @@ -26,6 +26,7 @@ def __init__(self, config: Dict[str, Any]) -> None: self.source_extensions: List[str] = config["source_ext"] self.source_quote_ext: List[str] = config["source_quote_ext"] self.find_by_ext_list: List[str] = config["find_by_ext_list"] + self.bruteforce_list: List[str] = config["bruteforce_list"] self.check_for_literals: bool = config["check_for_literals"] self.not_allowed_path_pattern = re.compile(f"{Util.get_regex_combine_or(self.NOT_ALLOWED_PATH)}", flags=re.IGNORECASE) diff --git a/credsweeper/credentials/candidate.py b/credsweeper/credentials/candidate.py index 1056bc5f6..bd940dc52 100644 --- a/credsweeper/credentials/candidate.py +++ b/credsweeper/credentials/candidate.py @@ -23,6 +23,8 @@ class Candidate: use_ml: Whether the candidate should be validated with ML. If not - ml_probability is set None """ + DUMMY_PATTERN = re.compile(r"^") + def __init__(self, line_data_list: List[LineData], patterns: List[re.Pattern], @@ -128,12 +130,12 @@ def to_dict_list(self, hashed: bool, subtext: bool) -> List[dict]: return reported_output @classmethod - def get_dummy_candidate(cls, config: Config, file_path: str, file_type: str, info: str): + def get_dummy_candidate(cls, config: Config, file_path: str, file_type: str, info: str, rule_name: str): """Create dummy instance to use in searching file by extension""" return cls( # - line_data_list=[LineData(config, "dummy line", -1, 0, file_path, file_type, info, re.compile(r".*"))], - patterns=[re.compile(r".*")], # - rule_name="Dummy candidate", # + line_data_list=[LineData(config, '', -1, 0, file_path, file_type, info, cls.DUMMY_PATTERN)], + patterns=[cls.DUMMY_PATTERN], # + rule_name=rule_name, # severity=Severity.INFO, # config=config, # - confidence=Confidence.MODERATE) + confidence=Confidence.WEAK) diff --git a/credsweeper/deep_scanner/deep_scanner.py b/credsweeper/deep_scanner/deep_scanner.py index a74a09994..a5bf212d6 100644 --- a/credsweeper/deep_scanner/deep_scanner.py +++ b/credsweeper/deep_scanner/deep_scanner.py @@ -199,7 +199,8 @@ def recursive_scan( if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type): # Skip scanning file and makes fake candidate due the extension is suspicious dummy_candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path, - data_provider.file_type, data_provider.info) + data_provider.file_type, data_provider.info, + FilePathExtractor.FIND_BY_EXT_RULE) candidates.append(dummy_candidate) else: fallback = True diff --git a/credsweeper/deep_scanner/jks_scanner.py b/credsweeper/deep_scanner/jks_scanner.py index 2f95606cf..993cf3549 100644 --- a/credsweeper/deep_scanner/jks_scanner.py +++ b/credsweeper/deep_scanner/jks_scanner.py @@ -21,18 +21,25 @@ def data_scan( recursive_limit_size: int) -> Optional[List[Candidate]]: """Tries to scan JKS to open with standard password""" candidates = [] - for pw_probe in ["", "changeit", "changeme"]: + for pw_probe in self.config.bruteforce_list: try: keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True) - if keystore.private_keys or keystore.secret_keys: - candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path, - data_provider.file_type, - f"{data_provider.info}|JKS:'{pw_probe}' - has keys") - else: - candidate = Candidate.get_dummy_candidate( - self.config, data_provider.file_path, data_provider.file_type, - f"{data_provider.info}|JKS:'{pw_probe}' - default password") + # the password probe has passed, it will be the value + info = (f"{data_provider.info}|JKS:" + f"{'sensitive data' if keystore.private_keys or keystore.secret_keys else 'default password'}") + candidate = Candidate.get_dummy_candidate( + self.config, # + data_provider.file_path, # + data_provider.file_type, # + info, # + "Java Key Storage") + value = pw_probe or "" + candidate.line_data_list[0].line = f"'{value}' is the password" + candidate.line_data_list[0].value = pw_probe or "" + candidate.line_data_list[0].value_start = 1 + candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value) candidates.append(candidate) + break except Exception as jks_exc: logger.debug(f"{data_provider.file_path}:{pw_probe}:{jks_exc}") return candidates diff --git a/credsweeper/deep_scanner/pkcs12_scanner.py b/credsweeper/deep_scanner/pkcs12_scanner.py index 562cade8f..5f9653379 100644 --- a/credsweeper/deep_scanner/pkcs12_scanner.py +++ b/credsweeper/deep_scanner/pkcs12_scanner.py @@ -21,24 +21,27 @@ def data_scan( recursive_limit_size: int) -> Optional[List[Candidate]]: """Tries to scan PKCS12 to open with standard password""" candidates = [] - for pw_probe in [b"", b"changeit", b"changeme"]: + for pw_probe in self.config.bruteforce_list: try: (private_key, certificate, additional_certificates) \ = cryptography.hazmat.primitives.serialization.pkcs12.load_key_and_certificates(data_provider.data, - pw_probe) - if private_key: - candidate = Candidate.get_dummy_candidate( - self.config, # - data_provider.file_path, # - data_provider.file_type, # - f"{data_provider.info}|PKCS12:'{pw_probe.decode()}' - has keys PKCS12") - else: - candidate = Candidate.get_dummy_candidate( - self.config, # - data_provider.file_path, # - data_provider.file_type, # - f"{data_provider.info}|PKCS12:'{pw_probe.decode()}' - default password PKCS12") + pw_probe.encode()) + # the password probe has passed, it will be the value + value = pw_probe or "" + info = (f"{data_provider.info}|PKCS12:" + f"'{value}' {'sensitive data' if private_key else 'default password'}") + candidate = Candidate.get_dummy_candidate( + self.config, # + data_provider.file_path, # + data_provider.file_type, # + info, # + "PKCS12") + candidate.line_data_list[0].line = f"'{value}' is the password" + candidate.line_data_list[0].value = value + candidate.line_data_list[0].value_start = 1 + candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value) candidates.append(candidate) + break except Exception as pkcs_exc: - logger.debug(f"{data_provider.file_path}:{pw_probe.decode()}:{pkcs_exc}") + logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}") return candidates diff --git a/credsweeper/file_handler/file_path_extractor.py b/credsweeper/file_handler/file_path_extractor.py index ba8dc6f5b..f14b6ced4 100644 --- a/credsweeper/file_handler/file_path_extractor.py +++ b/credsweeper/file_handler/file_path_extractor.py @@ -6,6 +6,7 @@ from git import InvalidGitRepositoryError, NoSuchPathError, Repo +from credsweeper.common.constants import MIN_DATA_LEN from credsweeper.config import Config from credsweeper.utils import Util @@ -15,6 +16,7 @@ class FilePathExtractor: """Util class to browse files in directories""" + FIND_BY_EXT_RULE = "Suspicious File Extension" located_repos: Dict[Path, Repo] = {} @staticmethod @@ -56,11 +58,9 @@ def get_file_paths(config: Config, path: Union[str, Path]) -> List[str]: for dirpath, _, filenames in os.walk(path): for filename in filenames: file_path = os.path.join(f"{dirpath}", f"{filename}") - if FilePathExtractor.check_exclude_file(config, file_path) \ - or os.path.islink(file_path) \ - or FilePathExtractor.check_file_size(config, file_path): + if FilePathExtractor.check_exclude_file(config, file_path) or os.path.islink(file_path): continue - if os.path.isfile(file_path) and 0 < os.path.getsize(file_path): + if os.path.isfile(file_path) and not FilePathExtractor.check_file_size(config, file_path): file_paths.append(file_path) else: pass # symbolic links and so on @@ -152,18 +152,15 @@ def check_exclude_file(config: Config, path: str) -> bool: def check_file_size(config: Config, reference: Union[str, Path, io.BytesIO, Tuple[Union[str, Path], io.BytesIO]]) -> bool: """ - Checks whether the file is over the size limit from configuration + Checks whether the file is over the size limit from configuration or less MIN_DATA_LEN Args: config: Config reference: various types of a file reference Return: - True when the file is oversize + True when the file is oversize or less than MIN_DATA_LEN, or unsupported """ - if config.size_limit is None: - return False - file_size = None path = reference[1] if isinstance(reference, tuple) else reference if isinstance(path, str) or isinstance(path, Path): file_size = os.path.getsize(path) @@ -174,8 +171,12 @@ def check_file_size(config: Config, reference: Union[str, Path, io.BytesIO, Tupl path.seek(current_pos, io.SEEK_SET) else: logger.error(f"Unknown path type: {path}") + return True - if file_size and file_size > config.size_limit: + if MIN_DATA_LEN > file_size: + logger.debug(f"Size ({file_size}) of the file '{path}' is too small") + return True + elif isinstance(config.size_limit, int) and config.size_limit < file_size: logger.warning(f"Size ({file_size}) of the file '{path}' is over limit ({config.size_limit})") return True diff --git a/credsweeper/secret/config.json b/credsweeper/secret/config.json index f27af790c..4b4e8b83d 100644 --- a/credsweeper/secret/config.json +++ b/credsweeper/secret/config.json @@ -144,6 +144,11 @@ ".key", ".jks" ], + "bruteforce_list": [ + "", + "changeit", + "changeme" + ], "check_for_literals": true, "min_pattern_value_length": 12, "min_keyword_value_length": 4, diff --git a/tests/credentials/test_augment_candidates.py b/tests/credentials/test_augment_candidates.py index 876432dea..815d55173 100644 --- a/tests/credentials/test_augment_candidates.py +++ b/tests/credentials/test_augment_candidates.py @@ -11,7 +11,7 @@ class TestAugmentCandidates(unittest.TestCase): def test_augment_candidates_p(self): with patch.object(LineData, LineData.initialize.__name__): - candidate = Candidate.get_dummy_candidate(None, "file_path", "file_type", "info") + candidate = Candidate.get_dummy_candidate(None, "file_path", "file_type", "info", "rule_name") candidate.line_data_list[0].value = AZ_STRING candidates = [candidate] additional_candidates = copy.deepcopy(candidates) @@ -27,7 +27,7 @@ def test_augment_candidates_p(self): def test_augment_candidates_n(self): with patch.object(LineData, LineData.initialize.__name__): - candidate = Candidate.get_dummy_candidate(None, "file_path", "file_type", "info") + candidate = Candidate.get_dummy_candidate(None, "file_path", "file_type", "info", "rule_name") candidate.line_data_list[0].value = AZ_STRING candidates = [candidate] diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 9c5771e2a..00984a1ee 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -1378,76 +1378,76 @@ ] }, { - "rule": "Dummy candidate", + "rule": "Java Key Storage", "severity": "info", - "confidence": "moderate", + "confidence": "weak", "ml_probability": null, "line_data_list": [ { - "line": "dummy line", + "line": "'changeit' is the password", "line_num": 0, "path": "./tests/samples/changeit_crt.jks", - "info": "FILE|JKS:'changeit' - default password", - "value": null, - "value_start": -2, - "value_end": -2, + "info": "FILE|JKS:default password", + "value": "changeit", + "value_start": 1, + "value_end": 9, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { - "iterator": null, - "entropy": null, - "valid": null + "iterator": "BASE64STDPAD_CHARS", + "entropy": 3.0, + "valid": false } } ] }, { - "rule": "Dummy candidate", + "rule": "PKCS12", "severity": "info", - "confidence": "moderate", + "confidence": "weak", "ml_probability": null, "line_data_list": [ { - "line": "dummy line", + "line": "'changeit' is the password", "line_num": 0, "path": "./tests/samples/changeit_crt.pkcs12", - "info": "FILE|PKCS12:'changeit' - default password PKCS12", - "value": null, - "value_start": -2, - "value_end": -2, + "info": "FILE|PKCS12:'changeit' default password", + "value": "changeit", + "value_start": 1, + "value_end": 9, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { - "iterator": null, - "entropy": null, - "valid": null + "iterator": "BASE64STDPAD_CHARS", + "entropy": 3.0, + "valid": false } } ] }, { - "rule": "Dummy candidate", + "rule": "Java Key Storage", "severity": "info", - "confidence": "moderate", + "confidence": "weak", "ml_probability": null, "line_data_list": [ { - "line": "dummy line", + "line": "'changeme' is the password", "line_num": 0, "path": "./tests/samples/changeme_key.jks", - "info": "FILE|JKS:'changeme' - has keys", - "value": null, - "value_start": -2, - "value_end": -2, + "info": "FILE|JKS:sensitive data", + "value": "changeme", + "value_start": 1, + "value_end": 9, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { - "iterator": null, - "entropy": null, - "valid": null + "iterator": "BASE64STDPAD_CHARS", + "entropy": 2.75, + "valid": false } } ] diff --git a/tests/data/doc.json b/tests/data/doc.json index f2adaf3b5..b39b43888 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -1118,76 +1118,76 @@ ] }, { - "rule": "Dummy candidate", + "rule": "Java Key Storage", "severity": "info", - "confidence": "moderate", + "confidence": "weak", "ml_probability": null, "line_data_list": [ { - "line": "dummy line", + "line": "'changeit' is the password", "line_num": 0, "path": "./tests/samples/changeit_crt.jks", - "info": "FILE|JKS:'changeit' - default password", - "value": null, - "value_start": -2, - "value_end": -2, + "info": "FILE|JKS:default password", + "value": "changeit", + "value_start": 1, + "value_end": 9, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { - "iterator": null, - "entropy": null, - "valid": null + "iterator": "BASE64STDPAD_CHARS", + "entropy": 3.0, + "valid": false } } ] }, { - "rule": "Dummy candidate", + "rule": "PKCS12", "severity": "info", - "confidence": "moderate", + "confidence": "weak", "ml_probability": null, "line_data_list": [ { - "line": "dummy line", + "line": "'changeit' is the password", "line_num": 0, "path": "./tests/samples/changeit_crt.pkcs12", - "info": "FILE|PKCS12:'changeit' - default password PKCS12", - "value": null, - "value_start": -2, - "value_end": -2, + "info": "FILE|PKCS12:'changeit' default password", + "value": "changeit", + "value_start": 1, + "value_end": 9, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { - "iterator": null, - "entropy": null, - "valid": null + "iterator": "BASE64STDPAD_CHARS", + "entropy": 3.0, + "valid": false } } ] }, { - "rule": "Dummy candidate", + "rule": "Java Key Storage", "severity": "info", - "confidence": "moderate", + "confidence": "weak", "ml_probability": null, "line_data_list": [ { - "line": "dummy line", + "line": "'changeme' is the password", "line_num": 0, "path": "./tests/samples/changeme_key.jks", - "info": "FILE|JKS:'changeme' - has keys", - "value": null, - "value_start": -2, - "value_end": -2, + "info": "FILE|JKS:sensitive data", + "value": "changeme", + "value_start": 1, + "value_end": 9, "variable": null, "variable_start": -2, "variable_end": -2, "entropy_validation": { - "iterator": null, - "entropy": null, - "valid": null + "iterator": "BASE64STDPAD_CHARS", + "entropy": 2.75, + "valid": false } } ] diff --git a/tests/file_handler/test_file_path_extractor.py b/tests/file_handler/test_file_path_extractor.py index 0b3b09335..cc01e7d47 100644 --- a/tests/file_handler/test_file_path_extractor.py +++ b/tests/file_handler/test_file_path_extractor.py @@ -31,6 +31,7 @@ def setUp(self): }, "source_ext": [], "source_quote_ext": [], + "bruteforce_list": [], "check_for_literals": [], "use_filters": False, "line_data_output": [], diff --git a/tests/ml_model/test_ml_validator.py b/tests/ml_model/test_ml_validator.py index 5ee43a507..261f1af5c 100644 --- a/tests/ml_model/test_ml_validator.py +++ b/tests/ml_model/test_ml_validator.py @@ -39,8 +39,7 @@ def validate(self, _candidate: Candidate) -> Tuple[bool, float]: return is_cred_batch[0], probability_batch[0] def test_ml_validator_simple_n(self): - candidate = Candidate.get_dummy_candidate(self.config, "main.py", ".py", "info") - candidate.rule_name = "Password" + candidate = Candidate.get_dummy_candidate(self.config, "main.py", ".py", "info", "Password") candidate.line_data_list[0].line = 'password="Ahga%$FiQ@Ei8"' candidate.line_data_list[0].variable = "password" candidate.line_data_list[0].value_start = 16 @@ -66,8 +65,7 @@ def test_ml_validator_simple_n(self): self.assertAlmostEqual(0.9999191761016846, probability, delta=NEGLIGIBLE_ML_THRESHOLD) def test_ml_validator_auxiliary_p(self): - candidate = Candidate.get_dummy_candidate(self.config, "mycred", "", "") - candidate.rule_name = "Secret" + candidate = Candidate.get_dummy_candidate(self.config, "mycred", "", "", "Secret") candidate.line_data_list[0].line = "secret=238475614782" candidate.line_data_list[0].variable = "secret" candidate.line_data_list[0].value_start = 7 @@ -103,8 +101,7 @@ def test_ml_validator_auxiliary_p(self): self.assertAlmostEqual(0.9979498386383057, probability_batch[0], delta=NEGLIGIBLE_ML_THRESHOLD) def test_ml_validator_auxiliary_n(self): - candidate = Candidate.get_dummy_candidate(self.config, "secret", "", "") - candidate.rule_name = "Secret" + candidate = Candidate.get_dummy_candidate(self.config, "secret", "", "", "Secret") candidate.line_data_list[0].line = "secret=bace4d19-dead-beef-cafe-9129474bcd81" candidate.line_data_list[0].variable = "secret" candidate.line_data_list[0].value_start = 7 @@ -129,24 +126,22 @@ def test_ml_validator_auxiliary_n(self): self.assertAlmostEqual(0.9900616407394409, probability_batch[0], delta=NEGLIGIBLE_ML_THRESHOLD) def test_extract_features_n(self): - candidate1 = Candidate.get_dummy_candidate(self.config, "___.x3", ".x3", "") + candidate1 = Candidate.get_dummy_candidate(self.config, "___.x3", ".x3", "", "") candidate1.line_data_list[0].line = '' candidate1.line_data_list[0].variable = '' candidate1.line_data_list[0].value_start = 0 candidate1.line_data_list[0].value_end = 0 candidate1.line_data_list[0].value = '' - candidate1.rule_name = '' features1 = self.ml_validator.extract_features([candidate1]) self.assertEqual(0, np.count_nonzero(features1)) def test_extract_features_p(self): - candidate1 = Candidate.get_dummy_candidate(self.config, "???.py", ".py", "") + candidate1 = Candidate.get_dummy_candidate(self.config, "???.py", ".py", "", "???????") candidate1.line_data_list[0].line = '??????????????????????????' candidate1.line_data_list[0].variable = "???????" candidate1.line_data_list[0].value_start = 2 candidate1.line_data_list[0].value_end = 6 candidate1.line_data_list[0].value = "???????????????????" - candidate1.rule_name = "???????" features1_1 = self.ml_validator.extract_features([candidate1]) self.assertEqual(7, np.count_nonzero(features1_1)) @@ -172,13 +167,12 @@ def test_extract_features_p(self): self.assertEqual(15 + 1, np.count_nonzero(features3)) def testVariableNotAllowedPatternCheck_n(self): - candidate1 = Candidate.get_dummy_candidate(self.config, "???.py", ".py", "") + candidate1 = Candidate.get_dummy_candidate(self.config, "???.py", ".py", "", "##########") candidate1.line_data_list[0].line = '?????????????:!!!!!!!!!!!!!' candidate1.line_data_list[0].variable = "?????????????" candidate1.line_data_list[0].value_start = 14 candidate1.line_data_list[0].value_end = 27 candidate1.line_data_list[0].value = "!!!!!!!!!!!!!" - candidate1.rule_name = "##########" features1_1 = self.ml_validator.extract_features([candidate1]) self.assertEqual(6, np.count_nonzero(features1_1)) diff --git a/tests/test_main.py b/tests/test_main.py index e011702b0..b2cdbf55f 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -21,13 +21,13 @@ from credsweeper.__main__ import EXIT_FAILURE, EXIT_SUCCESS from credsweeper.app import APP_PATH from credsweeper.app import CredSweeper -from credsweeper.common.constants import ThresholdPreset, Severity +from credsweeper.common.constants import ThresholdPreset, Severity, MIN_DATA_LEN from credsweeper.file_handler.abstract_provider import AbstractProvider from credsweeper.file_handler.files_provider import FilesProvider from credsweeper.file_handler.text_content_provider import TextContentProvider from credsweeper.utils import Util from tests import SAMPLES_CRED_COUNT, SAMPLES_CRED_LINE_COUNT, SAMPLES_POST_CRED_COUNT, SAMPLES_PATH, TESTS_PATH, \ - SAMPLES_IN_DEEP_1, SAMPLES_IN_DEEP_3, SAMPLES_IN_DEEP_2, NEGLIGIBLE_ML_THRESHOLD + SAMPLES_IN_DEEP_1, SAMPLES_IN_DEEP_3, SAMPLES_IN_DEEP_2, NEGLIGIBLE_ML_THRESHOLD, AZ_DATA from tests.data import DATA_TEST_CFG @@ -334,21 +334,26 @@ def test_multi_jobs_p(self) -> None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # - def test_find_by_ext_p(self) -> None: - # test for finding files by extension - content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH]) - cred_sweeper = CredSweeper(find_by_ext=True) - cred_sweeper.run(content_provider=content_provider) - self.assertEqual(SAMPLES_POST_CRED_COUNT + 3, len(cred_sweeper.credential_manager.get_credentials())) - - # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # - def test_find_by_ext_n(self) -> None: # test for finding files by extension - content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH]) - cred_sweeper = CredSweeper(find_by_ext=False) - cred_sweeper.run(content_provider=content_provider) - self.assertEqual(SAMPLES_POST_CRED_COUNT, len(cred_sweeper.credential_manager.get_credentials())) + with tempfile.TemporaryDirectory() as tmp_dir: + ext_list = [".pem", ".cer", ".csr", ".der", ".pfx", ".p12", ".key", ".jks"] + for ext in ext_list: + with open(os.path.join(tmp_dir, f"dummy{ext}"), "wb") as f: + f.write(b'\x00' * MIN_DATA_LEN) + with open(os.path.join(tmp_dir, f"short{ext}"), "wb") as f: + f.write(b'\x00' * (MIN_DATA_LEN - 1)) + with open(os.path.join(tmp_dir, f"dummy{ext}.bak"), "wb") as f: + f.write(AZ_DATA) + content_provider: AbstractProvider = FilesProvider([tmp_dir]) + cred_sweeper = CredSweeper(find_by_ext=True) + cred_sweeper.run(content_provider=content_provider) + credentials = cred_sweeper.credential_manager.get_credentials() + self.assertEqual(len(ext_list), len(credentials)) + self.assertTrue(all("Suspicious File Extension" == x.rule_name for x in credentials)) + # aux checks - only 1/3 of all files will be found by extension + test_files_number = len(os.listdir(tmp_dir)) + self.assertEqual(len(ext_list), test_files_number // 3) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #