From 034f4846137a1eab6e9ca02d7d684ec24ae94b59 Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Fri, 8 Dec 2023 16:42:38 -0500 Subject: [PATCH 01/11] --diff-branch --- detect_secrets/core/baseline.py | 51 +++++++++++++++++++++++++++++++++ detect_secrets/core/usage.py | 14 ++++++++- detect_secrets/main.py | 3 +- docs/cheat-sheet.md | 3 ++ 4 files changed, 69 insertions(+), 2 deletions(-) diff --git a/detect_secrets/core/baseline.py b/detect_secrets/core/baseline.py index 41692f266..d2819e535 100644 --- a/detect_secrets/core/baseline.py +++ b/detect_secrets/core/baseline.py @@ -24,6 +24,7 @@ def initialize( output_raw=False, output_verified_false=False, suppress_unscannable_file_warnings=False, + diff_branch=None, ): """Scans the entire codebase for secrets, and returns a SecretsCollection object. @@ -49,6 +50,9 @@ def initialize( :type suppress_unscannable_file_warnings boolean :param suppress_unscannable_file_warnings: whether or not to suppress unscannable file warnings + :type diff_branch: str|None + :param diff_branch: optional name of branch to check for differences against in determining files to scan. + :rtype: SecretsCollection """ output = SecretsCollection( @@ -68,6 +72,10 @@ def initialize( files_to_scan.extend( _get_files_recursively(element), ) + elif diff_branch != None: + files_to_scan.extend( + _get_git_tracked_diff_files(element,diff_branch), + ) else: files_to_scan.extend( _get_git_tracked_files(element), @@ -380,6 +388,49 @@ def _get_git_tracked_files(rootdir='.'): return output +def _get_git_tracked_diff_files(rootdir='.',diff_branch=None): + """On incremental builds it is only necessary to scan the files that + have changed. This will allow a scan of files that have differences + from the named branch. The filter does not list filess that are + deleted because it is impossible to scan them now. + + :type rootdir: str + :param rootdir: root directory of where you want to list files from + + :type diff_branch: str + :param diff_branch: name of branch to check diferences from. + 'test' would find files with differences between the current branch + and the local test branch. + 'origin/main' would find files with differences between the current + branch and the remote main branch. + + :rtype: set|None + :returns: filepaths to files with differences frm the diff_branch + which git currently tracks (locally) + """ + output = [] + try: + with open(os.devnull, 'w') as fnull: + git_files = subprocess.check_output( + [ + 'git', + '-C', rootdir, + 'diff', + '--name-only', + '--diff-filter=ACMRTUX', + diff_branch, + ], + stderr=fnull, + ) + for filename in git_files.decode('utf-8').split(): + relative_path = util.get_relative_path_if_in_cwd(rootdir, filename) + if relative_path: + output.append(relative_path) + except subprocess.CalledProcessError: + pass + return output + + def _get_files_recursively(rootdir): """Sometimes, we want to use this tool with non-git repositories. This function allows us to do so. diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index 5d532f3d0..cfea3a18e 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -205,7 +205,8 @@ def add_arguments(self): self._add_initialize_baseline_argument()\ ._add_adhoc_scanning_argument()\ ._add_output_raw_argument()\ - ._add_suppress_unscannable_file_warnings() + ._add_suppress_unscannable_file_warnings()\ + ._add_diff_branch()\ PluginOptions(self.parser).add_arguments() @@ -289,6 +290,17 @@ def _add_suppress_unscannable_file_warnings(self): add_suppress_unscannable_file_warnings(self.parser) return self + def _add_diff_branch(self): + self.parser.add_argument( + '--diff-branch', + type=str, + help=( + 'Scan only files that are tracked to git containing differences from the named branch.', + ), + dest='diff_branch', + ) + return self + class AuditOptions: def __init__(self, subparser): diff --git a/detect_secrets/main.py b/detect_secrets/main.py index 7bbc8f8f6..8dd32a37c 100644 --- a/detect_secrets/main.py +++ b/detect_secrets/main.py @@ -189,6 +189,7 @@ def _perform_scan(args, plugins, automaton, word_list_hash): output_raw=args.output_raw, output_verified_false=args.output_verified_false, suppress_unscannable_file_warnings=args.suppress_unscannable_file_warnings, + diff_branch=args.diff_branch, ).format_for_baseline_output() if old_baseline: @@ -206,7 +207,7 @@ def _get_existing_baseline(import_filename): try: return _read_from_file(import_filename[0]) except FileNotFoundError as fnf_error: - if fnf_error.errno == 2: # create new baseline if not existed + if fnf_error.errno == 2 or fnf_error.errno == 129: # create new baseline if not existed, 129 is for z/OS return None else: # throw exception for other cases print( diff --git a/docs/cheat-sheet.md b/docs/cheat-sheet.md index 178bac794..e46a9007c 100644 --- a/docs/cheat-sheet.md +++ b/docs/cheat-sheet.md @@ -76,6 +76,9 @@ detect-secrets scan file1 file2 # Scan all files except for .gitignore detect-secrets scan --all-files + +# Scan only files that are tracked to git containing differences from the named branch +detect-secrets scan --diff-branch diff_branch_name ``` ### Ad-hoc scan on a single string From f31cc21899090182de12f0ceec3e06ad9d4c2c6e Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Fri, 8 Dec 2023 17:45:44 -0500 Subject: [PATCH 02/11] tests --- tests/core/baseline_test.py | 10 ++++++++++ tests/main_test.py | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 343560c3b..5142db39f 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -33,12 +33,14 @@ def get_results( path=['./test_data/files'], exclude_files_regex=None, scan_all_files=False, + diff_branch=None, ): return baseline.initialize( path, self.plugins, exclude_files_regex=exclude_files_regex, should_scan_all_files=scan_all_files, + diff_branch=diff_branch, ).json() @pytest.mark.parametrize( @@ -184,6 +186,14 @@ def test_scan_all_files_with_bad_symlinks(self): ) assert len(results.keys()) == 0 + def test_diff_branch(self): + results = self.get_results(diff_branch="staging") + + # No expected results, because differences + assert not results + + # more tests for diff-branch are difficult to concieve + # maybe a new branch which introduces secrets? class TestGetSecretsNotInBaseline: diff --git a/tests/main_test.py b/tests/main_test.py index 0b0368b24..fc2b5bd0c 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -96,6 +96,7 @@ def test_scan_basic(self, mock_baseline_initialize): word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, ) def test_scan_with_rootdir(self, mock_baseline_initialize): @@ -113,6 +114,7 @@ def test_scan_with_rootdir(self, mock_baseline_initialize): word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, ) def test_scan_with_exclude_args(self, mock_baseline_initialize): @@ -132,6 +134,7 @@ def test_scan_with_exclude_args(self, mock_baseline_initialize): word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, ) @pytest.mark.parametrize( @@ -217,6 +220,25 @@ def test_scan_with_all_files_flag(self, mock_baseline_initialize): word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, + ) + + def test_scan_with_diff_branch(self, mock_baseline_initialize): + with mock_stdin(): + assert main('scan --diff-branch staging'.split()) == 0 + + mock_baseline_initialize.assert_called_once_with( + plugins=Any(tuple), + exclude_files_regex=None, + exclude_lines_regex=None, + path='.', + should_scan_all_files=False, + output_raw=False, + output_verified_false=False, + word_list_file=None, + word_list_hash=None, + suppress_unscannable_file_warnings=False, + diff_branch='staging', ) def test_reads_from_stdin(self, mock_merge_baseline): @@ -274,6 +296,7 @@ def test_reads_non_existed_baseline_from_file( word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, + diff_branch=None, ) mock_merge_baseline.assert_not_called() From 1854c2033a043caf0726ef4bba3bf4db98b35230 Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Mon, 11 Dec 2023 16:48:08 -0500 Subject: [PATCH 03/11] https://docs.pytest.org/en/stable/deprecations.html#support-for-tests-written-for-nose --- tests/__init__.py | 0 tests/core/__init__.py | 0 tests/core/baseline_test.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 tests/__init__.py create mode 100644 tests/core/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 5142db39f..585f9cf50 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -22,7 +22,7 @@ class TestInitializeBaseline: - def setup(self): + def setup_method(self): self.plugins = ( Base64HighEntropyString(4.5), HexHighEntropyString(3), From 3c5fa4c0dde3d1f2123fb7cbeacb8206ff81f020 Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Mon, 11 Dec 2023 17:42:12 -0500 Subject: [PATCH 04/11] test --- tests/core/baseline_test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 585f9cf50..2d2ef3402 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -187,14 +187,11 @@ def test_scan_all_files_with_bad_symlinks(self): assert len(results.keys()) == 0 def test_diff_branch(self): - results = self.get_results(diff_branch="staging") + results = self.get_results(diff_branch="more_errors") # No expected results, because differences assert not results - # more tests for diff-branch are difficult to concieve - # maybe a new branch which introduces secrets? - class TestGetSecretsNotInBaseline: def test_nothing_new(self): From 574ec1f27396c6486ff021c21cc9c309edb702f6 Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Mon, 11 Dec 2023 17:43:37 -0500 Subject: [PATCH 05/11] + --- tests/core/baseline_test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 2d2ef3402..459b39ed8 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -186,12 +186,18 @@ def test_scan_all_files_with_bad_symlinks(self): ) assert len(results.keys()) == 0 - def test_diff_branch(self): - results = self.get_results(diff_branch="more_errors") + def test_diff_branch_nodiff(self): + results = self.get_results(diff_branch="staging") # No expected results, because differences assert not results + def test_diff_branch_diff(self): + results = self.get_results(diff_branch="more_errors") + + assert len(results['test_data/files/tmp/file_with_secrets.py']) == 3 + + class TestGetSecretsNotInBaseline: def test_nothing_new(self): From 2402eda9470dd285f48a75d3fedf6959a6825969 Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Tue, 12 Dec 2023 14:01:16 -0500 Subject: [PATCH 06/11] + --- tests/core/baseline_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 459b39ed8..f710e681e 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -187,13 +187,13 @@ def test_scan_all_files_with_bad_symlinks(self): assert len(results.keys()) == 0 def test_diff_branch_nodiff(self): - results = self.get_results(diff_branch="staging") + results = self.get_results(path=['test_data/files'],diff_branch="staging") # No expected results, because differences assert not results def test_diff_branch_diff(self): - results = self.get_results(diff_branch="more_errors") + results = self.get_results(path=['test_data/files'],diff_branch="more_errors") assert len(results['test_data/files/tmp/file_with_secrets.py']) == 3 From 5dca35d55e29071c00237edb7ae12b804b32749a Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Tue, 12 Dec 2023 14:07:10 -0500 Subject: [PATCH 07/11] test --- tests/core/baseline_test.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index f710e681e..792c30482 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -193,8 +193,19 @@ def test_diff_branch_nodiff(self): assert not results def test_diff_branch_diff(self): - results = self.get_results(path=['test_data/files'],diff_branch="more_errors") + with mock_git_calls( + 'detect_secrets.core.baseline.subprocess.check_output', + ( + SubprocessMock( + expected_input='git checkout more_errors', + should_throw_exception=True, + mocked_output='', + ), + ), + ): + results = self.get_results(path=['test_data/files'],diff_branch="origin/master") + assert not results assert len(results['test_data/files/tmp/file_with_secrets.py']) == 3 From 491a6c0839ba4f7664864e2f02b70615d72ada06 Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Tue, 12 Dec 2023 14:08:49 -0500 Subject: [PATCH 08/11] ? --- tests/core/baseline_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 792c30482..b9674ccd6 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -197,7 +197,7 @@ def test_diff_branch_diff(self): 'detect_secrets.core.baseline.subprocess.check_output', ( SubprocessMock( - expected_input='git checkout more_errors', + expected_input='git -C test_data/files diff --name-only --diff-filter=ACMRTUX origin/master', should_throw_exception=True, mocked_output='', ), From 567744756f57a6cbe1f2534a3bbf8791b8bafec6 Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Thu, 14 Dec 2023 17:57:55 -0500 Subject: [PATCH 09/11] fix git diff command syntax, tests --- detect_secrets/core/baseline.py | 6 ++---- tests/core/baseline_test.py | 28 +++++++++++++++++++++------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/detect_secrets/core/baseline.py b/detect_secrets/core/baseline.py index d2819e535..7e235d874 100644 --- a/detect_secrets/core/baseline.py +++ b/detect_secrets/core/baseline.py @@ -414,18 +414,16 @@ def _get_git_tracked_diff_files(rootdir='.',diff_branch=None): git_files = subprocess.check_output( [ 'git', - '-C', rootdir, 'diff', '--name-only', '--diff-filter=ACMRTUX', diff_branch, + '--', rootdir, ], stderr=fnull, ) for filename in git_files.decode('utf-8').split(): - relative_path = util.get_relative_path_if_in_cwd(rootdir, filename) - if relative_path: - output.append(relative_path) + output.append(filename) except subprocess.CalledProcessError: pass return output diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index b9674ccd6..4eb1fe94f 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -187,7 +187,7 @@ def test_scan_all_files_with_bad_symlinks(self): assert len(results.keys()) == 0 def test_diff_branch_nodiff(self): - results = self.get_results(path=['test_data/files'],diff_branch="staging") + results = self.get_results(path=['./test_data/files'],diff_branch="origin/master") # No expected results, because differences assert not results @@ -197,16 +197,30 @@ def test_diff_branch_diff(self): 'detect_secrets.core.baseline.subprocess.check_output', ( SubprocessMock( - expected_input='git -C test_data/files diff --name-only --diff-filter=ACMRTUX origin/master', - should_throw_exception=True, - mocked_output='', + expected_input='git diff --name-only --diff-filter=ACMRTUX origin/master -- ./test_data/files', + mocked_output=b'test_data/files/file_with_secrets.py\n', ), ), ): - results = self.get_results(path=['test_data/files'],diff_branch="origin/master") + results = self.get_results(path=['./test_data/files'],diff_branch="origin/master") + assert len(results.keys()) == 1 + assert len(results['test_data/files/file_with_secrets.py']) == 1 - assert not results - assert len(results['test_data/files/tmp/file_with_secrets.py']) == 3 + + def test_diff_branch_diff2(self): + with mock_git_calls( + 'detect_secrets.core.baseline.subprocess.check_output', + ( + SubprocessMock( + expected_input='git diff --name-only --diff-filter=ACMRTUX origin/master -- ./test_data/files', + mocked_output=b'test_data/files/file_with_secrets.py\ntest_data/files/tmp/file_with_secrets.py\n', + ), + ), + ): + results = self.get_results(path=['./test_data/files'],diff_branch="origin/master") + assert len(results.keys()) == 2 + assert len(results['test_data/files/file_with_secrets.py']) == 1 + assert len(results['test_data/files/tmp/file_with_secrets.py']) == 2 class TestGetSecretsNotInBaseline: From 87bf99546b3f39912fb2d0a0230f8e7b29c2dd4b Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Thu, 14 Dec 2023 18:06:28 -0500 Subject: [PATCH 10/11] spelling --- detect_secrets/core/baseline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect_secrets/core/baseline.py b/detect_secrets/core/baseline.py index 7e235d874..b1142b1f4 100644 --- a/detect_secrets/core/baseline.py +++ b/detect_secrets/core/baseline.py @@ -405,7 +405,7 @@ def _get_git_tracked_diff_files(rootdir='.',diff_branch=None): branch and the remote main branch. :rtype: set|None - :returns: filepaths to files with differences frm the diff_branch + :returns: filepaths to files with differences from the diff_branch which git currently tracks (locally) """ output = [] From f664ec8cc1decec0177c45b82887af4c2bf862c4 Mon Sep 17 00:00:00 2001 From: Nick Jones Date: Thu, 14 Dec 2023 18:08:45 -0500 Subject: [PATCH 11/11] test --- tests/main_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index fc2b5bd0c..d7edab6dd 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -225,7 +225,7 @@ def test_scan_with_all_files_flag(self, mock_baseline_initialize): def test_scan_with_diff_branch(self, mock_baseline_initialize): with mock_stdin(): - assert main('scan --diff-branch staging'.split()) == 0 + assert main('scan --diff-branch some_branch_here'.split()) == 0 mock_baseline_initialize.assert_called_once_with( plugins=Any(tuple), @@ -238,7 +238,7 @@ def test_scan_with_diff_branch(self, mock_baseline_initialize): word_list_file=None, word_list_hash=None, suppress_unscannable_file_warnings=False, - diff_branch='staging', + diff_branch='some_branch_here', ) def test_reads_from_stdin(self, mock_merge_baseline):