Skip to content

Commit

Permalink
Dummy candidate refactoring (Samsung#665)
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek authored Jan 30, 2025
1 parent 4680507 commit 0c8772a
Show file tree
Hide file tree
Showing 15 changed files with 152 additions and 131 deletions.
2 changes: 1 addition & 1 deletion credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@
'__version__'
]

__version__ = "1.10.3"
__version__ = "1.10.4"
3 changes: 2 additions & 1 deletion credsweeper/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,8 @@ def file_scan(self, content_provider: Union[DiffContentProvider, TextContentProv
if FilePathExtractor.is_find_by_ext_file(self.config, content_provider.file_type):
# Skip the file scanning and create fake candidate because the extension is suspicious
dummy_candidate = Candidate.get_dummy_candidate(self.config, content_provider.file_path,
content_provider.file_type, content_provider.info)
content_provider.file_type, content_provider.info,
FilePathExtractor.FIND_BY_EXT_RULE)
candidates.append(dummy_candidate)

else:
Expand Down
1 change: 1 addition & 0 deletions credsweeper/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, config: Dict[str, Any]) -> None:
self.source_extensions: List[str] = config["source_ext"]
self.source_quote_ext: List[str] = config["source_quote_ext"]
self.find_by_ext_list: List[str] = config["find_by_ext_list"]
self.bruteforce_list: List[str] = config["bruteforce_list"]
self.check_for_literals: bool = config["check_for_literals"]
self.not_allowed_path_pattern = re.compile(f"{Util.get_regex_combine_or(self.NOT_ALLOWED_PATH)}",
flags=re.IGNORECASE)
Expand Down
12 changes: 7 additions & 5 deletions credsweeper/credentials/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ class Candidate:
use_ml: Whether the candidate should be validated with ML. If not - ml_probability is set None
"""

DUMMY_PATTERN = re.compile(r"^")

def __init__(self,
line_data_list: List[LineData],
patterns: List[re.Pattern],
Expand Down Expand Up @@ -128,12 +130,12 @@ def to_dict_list(self, hashed: bool, subtext: bool) -> List[dict]:
return reported_output

@classmethod
def get_dummy_candidate(cls, config: Config, file_path: str, file_type: str, info: str):
def get_dummy_candidate(cls, config: Config, file_path: str, file_type: str, info: str, rule_name: str):
"""Create dummy instance to use in searching file by extension"""
return cls( #
line_data_list=[LineData(config, "dummy line", -1, 0, file_path, file_type, info, re.compile(r".*"))],
patterns=[re.compile(r".*")], #
rule_name="Dummy candidate", #
line_data_list=[LineData(config, '', -1, 0, file_path, file_type, info, cls.DUMMY_PATTERN)],
patterns=[cls.DUMMY_PATTERN], #
rule_name=rule_name, #
severity=Severity.INFO, #
config=config, #
confidence=Confidence.MODERATE)
confidence=Confidence.WEAK)
3 changes: 2 additions & 1 deletion credsweeper/deep_scanner/deep_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ def recursive_scan(
if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
# Skip scanning file and makes fake candidate due the extension is suspicious
dummy_candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path,
data_provider.file_type, data_provider.info)
data_provider.file_type, data_provider.info,
FilePathExtractor.FIND_BY_EXT_RULE)
candidates.append(dummy_candidate)
else:
fallback = True
Expand Down
25 changes: 16 additions & 9 deletions credsweeper/deep_scanner/jks_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,25 @@ def data_scan(
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan JKS to open with standard password"""
candidates = []
for pw_probe in ["", "changeit", "changeme"]:
for pw_probe in self.config.bruteforce_list:
try:
keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True)
if keystore.private_keys or keystore.secret_keys:
candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path,
data_provider.file_type,
f"{data_provider.info}|JKS:'{pw_probe}' - has keys")
else:
candidate = Candidate.get_dummy_candidate(
self.config, data_provider.file_path, data_provider.file_type,
f"{data_provider.info}|JKS:'{pw_probe}' - default password")
# the password probe has passed, it will be the value
info = (f"{data_provider.info}|JKS:"
f"{'sensitive data' if keystore.private_keys or keystore.secret_keys else 'default password'}")
candidate = Candidate.get_dummy_candidate(
self.config, #
data_provider.file_path, #
data_provider.file_type, #
info, #
"Java Key Storage")
value = pw_probe or "<EMPTY PASSWORD>"
candidate.line_data_list[0].line = f"'{value}' is the password"
candidate.line_data_list[0].value = pw_probe or "<EMPTY PASSWORD>"
candidate.line_data_list[0].value_start = 1
candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
candidates.append(candidate)
break
except Exception as jks_exc:
logger.debug(f"{data_provider.file_path}:{pw_probe}:{jks_exc}")
return candidates
33 changes: 18 additions & 15 deletions credsweeper/deep_scanner/pkcs12_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,27 @@ def data_scan(
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan PKCS12 to open with standard password"""
candidates = []
for pw_probe in [b"", b"changeit", b"changeme"]:
for pw_probe in self.config.bruteforce_list:
try:
(private_key, certificate, additional_certificates) \
= cryptography.hazmat.primitives.serialization.pkcs12.load_key_and_certificates(data_provider.data,
pw_probe)
if private_key:
candidate = Candidate.get_dummy_candidate(
self.config, #
data_provider.file_path, #
data_provider.file_type, #
f"{data_provider.info}|PKCS12:'{pw_probe.decode()}' - has keys PKCS12")
else:
candidate = Candidate.get_dummy_candidate(
self.config, #
data_provider.file_path, #
data_provider.file_type, #
f"{data_provider.info}|PKCS12:'{pw_probe.decode()}' - default password PKCS12")
pw_probe.encode())
# the password probe has passed, it will be the value
value = pw_probe or "<EMPTY PASSWORD>"
info = (f"{data_provider.info}|PKCS12:"
f"'{value}' {'sensitive data' if private_key else 'default password'}")
candidate = Candidate.get_dummy_candidate(
self.config, #
data_provider.file_path, #
data_provider.file_type, #
info, #
"PKCS12")
candidate.line_data_list[0].line = f"'{value}' is the password"
candidate.line_data_list[0].value = value
candidate.line_data_list[0].value_start = 1
candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
candidates.append(candidate)
break
except Exception as pkcs_exc:
logger.debug(f"{data_provider.file_path}:{pw_probe.decode()}:{pkcs_exc}")
logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
return candidates
21 changes: 11 additions & 10 deletions credsweeper/file_handler/file_path_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from git import InvalidGitRepositoryError, NoSuchPathError, Repo

from credsweeper.common.constants import MIN_DATA_LEN
from credsweeper.config import Config
from credsweeper.utils import Util

Expand All @@ -15,6 +16,7 @@
class FilePathExtractor:
"""Util class to browse files in directories"""

FIND_BY_EXT_RULE = "Suspicious File Extension"
located_repos: Dict[Path, Repo] = {}

@staticmethod
Expand Down Expand Up @@ -56,11 +58,9 @@ def get_file_paths(config: Config, path: Union[str, Path]) -> List[str]:
for dirpath, _, filenames in os.walk(path):
for filename in filenames:
file_path = os.path.join(f"{dirpath}", f"{filename}")
if FilePathExtractor.check_exclude_file(config, file_path) \
or os.path.islink(file_path) \
or FilePathExtractor.check_file_size(config, file_path):
if FilePathExtractor.check_exclude_file(config, file_path) or os.path.islink(file_path):
continue
if os.path.isfile(file_path) and 0 < os.path.getsize(file_path):
if os.path.isfile(file_path) and not FilePathExtractor.check_file_size(config, file_path):
file_paths.append(file_path)
else:
pass # symbolic links and so on
Expand Down Expand Up @@ -152,18 +152,15 @@ def check_exclude_file(config: Config, path: str) -> bool:
def check_file_size(config: Config, reference: Union[str, Path, io.BytesIO, Tuple[Union[str, Path],
io.BytesIO]]) -> bool:
"""
Checks whether the file is over the size limit from configuration
Checks whether the file is over the size limit from configuration or less MIN_DATA_LEN
Args:
config: Config
reference: various types of a file reference
Return:
True when the file is oversize
True when the file is oversize or less than MIN_DATA_LEN, or unsupported
"""
if config.size_limit is None:
return False
file_size = None
path = reference[1] if isinstance(reference, tuple) else reference
if isinstance(path, str) or isinstance(path, Path):
file_size = os.path.getsize(path)
Expand All @@ -174,8 +171,12 @@ def check_file_size(config: Config, reference: Union[str, Path, io.BytesIO, Tupl
path.seek(current_pos, io.SEEK_SET)
else:
logger.error(f"Unknown path type: {path}")
return True

if file_size and file_size > config.size_limit:
if MIN_DATA_LEN > file_size:
logger.debug(f"Size ({file_size}) of the file '{path}' is too small")
return True
elif isinstance(config.size_limit, int) and config.size_limit < file_size:
logger.warning(f"Size ({file_size}) of the file '{path}' is over limit ({config.size_limit})")
return True

Expand Down
5 changes: 5 additions & 0 deletions credsweeper/secret/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@
".key",
".jks"
],
"bruteforce_list": [
"",
"changeit",
"changeme"
],
"check_for_literals": true,
"min_pattern_value_length": 12,
"min_keyword_value_length": 4,
Expand Down
4 changes: 2 additions & 2 deletions tests/credentials/test_augment_candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class TestAugmentCandidates(unittest.TestCase):

def test_augment_candidates_p(self):
with patch.object(LineData, LineData.initialize.__name__):
candidate = Candidate.get_dummy_candidate(None, "file_path", "file_type", "info")
candidate = Candidate.get_dummy_candidate(None, "file_path", "file_type", "info", "rule_name")
candidate.line_data_list[0].value = AZ_STRING
candidates = [candidate]
additional_candidates = copy.deepcopy(candidates)
Expand All @@ -27,7 +27,7 @@ def test_augment_candidates_p(self):

def test_augment_candidates_n(self):
with patch.object(LineData, LineData.initialize.__name__):
candidate = Candidate.get_dummy_candidate(None, "file_path", "file_type", "info")
candidate = Candidate.get_dummy_candidate(None, "file_path", "file_type", "info", "rule_name")
candidate.line_data_list[0].value = AZ_STRING
candidates = [candidate]

Expand Down
60 changes: 30 additions & 30 deletions tests/data/depth_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -1378,76 +1378,76 @@
]
},
{
"rule": "Dummy candidate",
"rule": "Java Key Storage",
"severity": "info",
"confidence": "moderate",
"confidence": "weak",
"ml_probability": null,
"line_data_list": [
{
"line": "dummy line",
"line": "'changeit' is the password",
"line_num": 0,
"path": "./tests/samples/changeit_crt.jks",
"info": "FILE|JKS:'changeit' - default password",
"value": null,
"value_start": -2,
"value_end": -2,
"info": "FILE|JKS:default password",
"value": "changeit",
"value_start": 1,
"value_end": 9,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": null,
"entropy": null,
"valid": null
"iterator": "BASE64STDPAD_CHARS",
"entropy": 3.0,
"valid": false
}
}
]
},
{
"rule": "Dummy candidate",
"rule": "PKCS12",
"severity": "info",
"confidence": "moderate",
"confidence": "weak",
"ml_probability": null,
"line_data_list": [
{
"line": "dummy line",
"line": "'changeit' is the password",
"line_num": 0,
"path": "./tests/samples/changeit_crt.pkcs12",
"info": "FILE|PKCS12:'changeit' - default password PKCS12",
"value": null,
"value_start": -2,
"value_end": -2,
"info": "FILE|PKCS12:'changeit' default password",
"value": "changeit",
"value_start": 1,
"value_end": 9,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": null,
"entropy": null,
"valid": null
"iterator": "BASE64STDPAD_CHARS",
"entropy": 3.0,
"valid": false
}
}
]
},
{
"rule": "Dummy candidate",
"rule": "Java Key Storage",
"severity": "info",
"confidence": "moderate",
"confidence": "weak",
"ml_probability": null,
"line_data_list": [
{
"line": "dummy line",
"line": "'changeme' is the password",
"line_num": 0,
"path": "./tests/samples/changeme_key.jks",
"info": "FILE|JKS:'changeme' - has keys",
"value": null,
"value_start": -2,
"value_end": -2,
"info": "FILE|JKS:sensitive data",
"value": "changeme",
"value_start": 1,
"value_end": 9,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": null,
"entropy": null,
"valid": null
"iterator": "BASE64STDPAD_CHARS",
"entropy": 2.75,
"valid": false
}
}
]
Expand Down
Loading

0 comments on commit 0c8772a

Please sign in to comment.