diff --git a/detect_secrets/core/baseline.py b/detect_secrets/core/baseline.py index 41692f266..3fa0481da 100644 --- a/detect_secrets/core/baseline.py +++ b/detect_secrets/core/baseline.py @@ -75,7 +75,7 @@ def initialize( elif os.path.isfile(element): files_to_scan.append(element) else: - log.error('detect-secrets: %s: No such file or directory', element) + log.error('detect-secrets: "%s": No such file or directory', element) if not files_to_scan: return output @@ -262,7 +262,7 @@ def trim_baseline_of_removed_secrets(results, baseline, filelist): return updated -def merge_baseline(old_baseline, new_baseline): +def merge_baseline(old_baseline, new_baseline, keep_old_results=False): """Updates baseline to be compatible with the latest version of detect-secrets. @@ -283,13 +283,15 @@ def merge_baseline(old_baseline, new_baseline): new_baseline['results'] = merge_results( old_baseline['results'], new_baseline['results'], + keep_old_results, ) return new_baseline -def merge_results(old_results, new_results): +def merge_results(old_results, new_results, keep_old_results=False): """Update results in new baseline with audit information from old baseline. + Unless keep_old_results is set True Secrets only appear in old baseline are ignored. @@ -302,16 +304,29 @@ def merge_results(old_results, new_results): :type new_results: dict :param new_results: results to replaced status quo + :type keep_old_results: bool + :param keep_old_results: if set true keep old results in new_results + :rtype: dict """ for filename, old_secrets in old_results.items(): - if filename not in new_results: + next_iter = False + if not keep_old_results and filename not in new_results: continue old_secrets_mapping = {} for old_secret in old_secrets: old_secrets_mapping[old_secret['hashed_secret']] = old_secret + if keep_old_results and filename not in new_results: + if filename not in new_results: + new_results[filename] = [] + new_results[filename].append(old_secret) + next_iter = True + + if next_iter: + continue + for new_secret in new_results[filename]: if new_secret['hashed_secret'] not in old_secrets_mapping: # We don't join the two secret sets, because if the newer diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index 5d532f3d0..b1dc49cc6 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -645,6 +645,8 @@ def add_arguments(self): self._add_opt_in_options() self._add_keyword_exclude() self._add_ghe_instance() + self._add_keep_old_results() + self._add_path_file() return self @@ -788,3 +790,20 @@ def _add_ghe_instance(self): type=str, help='Instance URL for GHE i.e. github.ibm.com', ) + + def _add_keep_old_results(self): + self.parser.add_argument( + '--keep-old-results', + action='store_true', + help='Keep files from old result that don\'t appear in the current scan', + ) + + def _add_path_file(self): + self.parser.add_argument( + '--path-file', + type=str, + help=( + 'Read paths from this file.' + 'If paths are pass into the cmdline they will be ignored' + ), + ) diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 7bbc8f8f6..b62578677 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -1,3 +1,4 @@ +import errno import json import sys @@ -178,13 +179,22 @@ def _perform_scan(args, plugins, automaton, word_list_hash): if args.import_filename: _add_baseline_to_exclude_files(args) + paths = args.path + if args.path_file: + try: + with open(args.path_file) as file: + paths = [line.rstrip() for line in file] + except FileNotFoundError: + print('Path File not found: {}'.format(args.path_file), file=sys.stderr) + sys.exit(errno.ENOENT) + new_baseline = baseline.initialize( plugins=plugins, exclude_files_regex=args.exclude_files, exclude_lines_regex=args.exclude_lines, word_list_file=args.word_list_file, word_list_hash=word_list_hash, - path=args.path, + path=paths, should_scan_all_files=args.all_files, output_raw=args.output_raw, output_verified_false=args.output_verified_false, @@ -195,6 +205,7 @@ def _perform_scan(args, plugins, automaton, word_list_hash): new_baseline = baseline.merge_baseline( old_baseline, new_baseline, + args.keep_old_results, ) return new_baseline diff --git a/requirements-dev.txt b/requirements-dev.txt index d80acb49d..3b356c946 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -10,7 +10,7 @@ pytest pyyaml responses tox-pip-extensions -tox>=3.8 +tox<4.0 unidiff ibm_db boxsdk[jwt] diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index efd7c7ea2..00bef79bf 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -613,6 +613,23 @@ def test_old_results_have_shifted_subset(self): ], } + def test_old_results_completely_kept(self): + secretA = self.get_secret() + secretB = self.get_secret() + + assert merge_results( + { + 'filenameA': [secretA], + }, + { + 'filenameB': [secretB], + }, + True, + ) == { + 'filenameA': [secretA], + 'filenameB': [secretB], + } + def test_old_results_completely_overriden(self): secretA = self.get_secret() secretB = self.get_secret() diff --git a/tests/main_test.py b/tests/main_test.py index 0b0368b24..1b944b532 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -226,6 +226,7 @@ def test_reads_from_stdin(self, mock_merge_baseline): mock_merge_baseline.assert_called_once_with( {'key': 'value'}, Any(dict), + False, ) def test_reads_old_baseline_from_file(self, mock_merge_baseline): @@ -243,6 +244,7 @@ def test_reads_old_baseline_from_file(self, mock_merge_baseline): mock_merge_baseline.assert_called_once_with( {'key': 'value'}, Any(dict), + False, ) def test_reads_non_existed_baseline_from_file(