Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

--diff-branch option to allow for scans of changed files only #143

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"files": "test_data/.*|tests/.*|^.secrets.baseline$",
"lines": null
},
"generated_at": "2023-05-25T14:44:17Z",
"generated_at": "2023-12-18T21:34:54Z",
"plugins_used": [
{
"name": "AWSKeyDetector"
Expand Down Expand Up @@ -200,15 +200,15 @@
"hashed_secret": "f32a07369c6fd4eaacb1f5a8877824ef98204a1c",
"is_secret": false,
"is_verified": false,
"line_number": 102,
"line_number": 105,
"type": "Secret Keyword",
"verified_result": null
},
{
"hashed_secret": "1af17e73721dbe0c40011b82ed4bb1a7dbe3ce29",
"is_secret": false,
"is_verified": false,
"line_number": 105,
"line_number": 108,
"type": "Secret Keyword",
"verified_result": null
}
Expand Down
50 changes: 50 additions & 0 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def initialize(
output_raw=False,
output_verified_false=False,
suppress_unscannable_file_warnings=False,
diff_branch=None,
):
"""Scans the entire codebase for secrets, and returns a
SecretsCollection object.
Expand All @@ -49,6 +50,10 @@ def initialize(
:type suppress_unscannable_file_warnings boolean
:param suppress_unscannable_file_warnings: whether or not to suppress unscannable file warnings

:type diff_branch: str|None
:param diff_branch: optional name of branch to check for
differences against in determining files to scan.

:rtype: SecretsCollection
"""
output = SecretsCollection(
Expand All @@ -68,6 +73,10 @@ def initialize(
files_to_scan.extend(
_get_files_recursively(element),
)
elif diff_branch is not None:
files_to_scan.extend(
_get_git_tracked_diff_files(element, diff_branch),
)
else:
files_to_scan.extend(
_get_git_tracked_files(element),
Expand Down Expand Up @@ -380,6 +389,47 @@ def _get_git_tracked_files(rootdir='.'):
return output


def _get_git_tracked_diff_files(rootdir='.', diff_branch=None):
"""On incremental builds it is only necessary to scan the files that
have changed. This will allow a scan of files that have differences
from the named branch. The filter does not list filess that are
deleted because it is impossible to scan them now.

:type rootdir: str
:param rootdir: root directory of where you want to list files from

:type diff_branch: str
:param diff_branch: name of branch to check diferences from.
'test' would find files with differences between the current branch
and the local test branch.
'origin/main' would find files with differences between the current
branch and the remote main branch.

:rtype: set|None
:returns: filepaths to files with differences from the diff_branch
which git currently tracks (locally)
"""
output = []
try:
with open(os.devnull, 'w') as fnull:
git_files = subprocess.check_output(
[
'git',
'diff',
'--name-only',
'--diff-filter=ACMRTUX',
diff_branch,
'--', rootdir,
],
stderr=fnull,
)
for filename in git_files.decode('utf-8').split():
output.append(filename)
except subprocess.CalledProcessError:
pass
return output


def _get_files_recursively(rootdir):
"""Sometimes, we want to use this tool with non-git repositories.
This function allows us to do so.
Expand Down
15 changes: 14 additions & 1 deletion detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ def add_arguments(self):
self._add_initialize_baseline_argument()\
._add_adhoc_scanning_argument()\
._add_output_raw_argument()\
._add_suppress_unscannable_file_warnings()
._add_suppress_unscannable_file_warnings()\
._add_diff_branch()\

PluginOptions(self.parser).add_arguments()

Expand Down Expand Up @@ -289,6 +290,18 @@ def _add_suppress_unscannable_file_warnings(self):
add_suppress_unscannable_file_warnings(self.parser)
return self

def _add_diff_branch(self):
self.parser.add_argument(
'--diff-branch',
type=str,
help=(
'Scan only files that are tracked to git containing '
'differences from the named branch.'
),
dest='diff_branch',
)
return self


class AuditOptions:
def __init__(self, subparser):
Expand Down
4 changes: 3 additions & 1 deletion detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def _perform_scan(args, plugins, automaton, word_list_hash):
output_raw=args.output_raw,
output_verified_false=args.output_verified_false,
suppress_unscannable_file_warnings=args.suppress_unscannable_file_warnings,
diff_branch=args.diff_branch,
).format_for_baseline_output()

if old_baseline:
Expand All @@ -206,7 +207,8 @@ def _get_existing_baseline(import_filename):
try:
return _read_from_file(import_filename[0])
except FileNotFoundError as fnf_error:
if fnf_error.errno == 2: # create new baseline if not existed
if fnf_error.errno == 2 or fnf_error.errno == 129:
# create new baseline if not existed, 129 is for z/OS
return None
else: # throw exception for other cases
print(
Expand Down
3 changes: 3 additions & 0 deletions docs/cheat-sheet.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ detect-secrets scan file1 file2

# Scan all files except for .gitignore
detect-secrets scan --all-files

# Scan only files that are tracked to git containing differences from the named branch
detect-secrets scan --diff-branch diff_branch_name
```

### Ad-hoc scan on a single string
Expand Down
Empty file added tests/__init__.py
Empty file.
Empty file added tests/core/__init__.py
Empty file.
42 changes: 41 additions & 1 deletion tests/core/baseline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

class TestInitializeBaseline:

def setup(self):
def setup_method(self):
self.plugins = (
Base64HighEntropyString(4.5),
HexHighEntropyString(3),
Expand All @@ -33,12 +33,14 @@ def get_results(
path=['./test_data/files'],
exclude_files_regex=None,
scan_all_files=False,
diff_branch=None,
):
return baseline.initialize(
path,
self.plugins,
exclude_files_regex=exclude_files_regex,
should_scan_all_files=scan_all_files,
diff_branch=diff_branch,
).json()

@pytest.mark.parametrize(
Expand Down Expand Up @@ -184,6 +186,44 @@ def test_scan_all_files_with_bad_symlinks(self):
)
assert len(results.keys()) == 0

def test_diff_branch_nodiff(self):
results = self.get_results(path=['./test_data/files'], diff_branch='origin/master')

# No expected results, because differences
assert not results

def test_diff_branch_diff(self):
with mock_git_calls(
'detect_secrets.core.baseline.subprocess.check_output',
(
SubprocessMock(
expected_input='git diff --name-only --diff-filter=ACMRTUX '
+ 'origin/master -- ./test_data/files',
mocked_output=b'test_data/files/file_with_secrets.py\n',
),
),
):
results = self.get_results(path=['./test_data/files'], diff_branch='origin/master')
assert len(results.keys()) == 1
assert len(results['test_data/files/file_with_secrets.py']) == 1

def test_diff_branch_diff2(self):
with mock_git_calls(
'detect_secrets.core.baseline.subprocess.check_output',
(
SubprocessMock(
expected_input='git diff --name-only --diff-filter=ACMRTUX '
+ 'origin/master -- ./test_data/files',
mocked_output=b'test_data/files/file_with_secrets.py\n'
+ b'test_data/files/tmp/file_with_secrets.py\n',
),
),
):
results = self.get_results(path=['./test_data/files'], diff_branch='origin/master')
assert len(results.keys()) == 2
assert len(results['test_data/files/file_with_secrets.py']) == 1
assert len(results['test_data/files/tmp/file_with_secrets.py']) == 2


class TestGetSecretsNotInBaseline:

Expand Down
23 changes: 23 additions & 0 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def test_scan_basic(self, mock_baseline_initialize):
word_list_file=None,
word_list_hash=None,
suppress_unscannable_file_warnings=False,
diff_branch=None,
)

def test_scan_with_rootdir(self, mock_baseline_initialize):
Expand All @@ -113,6 +114,7 @@ def test_scan_with_rootdir(self, mock_baseline_initialize):
word_list_file=None,
word_list_hash=None,
suppress_unscannable_file_warnings=False,
diff_branch=None,
)

def test_scan_with_exclude_args(self, mock_baseline_initialize):
Expand All @@ -132,6 +134,7 @@ def test_scan_with_exclude_args(self, mock_baseline_initialize):
word_list_file=None,
word_list_hash=None,
suppress_unscannable_file_warnings=False,
diff_branch=None,
)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -217,6 +220,25 @@ def test_scan_with_all_files_flag(self, mock_baseline_initialize):
word_list_file=None,
word_list_hash=None,
suppress_unscannable_file_warnings=False,
diff_branch=None,
)

def test_scan_with_diff_branch(self, mock_baseline_initialize):
with mock_stdin():
assert main('scan --diff-branch some_branch_here'.split()) == 0

mock_baseline_initialize.assert_called_once_with(
plugins=Any(tuple),
exclude_files_regex=None,
exclude_lines_regex=None,
path='.',
should_scan_all_files=False,
output_raw=False,
output_verified_false=False,
word_list_file=None,
word_list_hash=None,
suppress_unscannable_file_warnings=False,
diff_branch='some_branch_here',
)

def test_reads_from_stdin(self, mock_merge_baseline):
Expand Down Expand Up @@ -274,6 +296,7 @@ def test_reads_non_existed_baseline_from_file(
word_list_file=None,
word_list_hash=None,
suppress_unscannable_file_warnings=False,
diff_branch=None,
)
mock_merge_baseline.assert_not_called()

Expand Down