Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calculate line number correctly in multi-line strings #676

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 40 additions & 19 deletions detect_secrets/core/scan.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import subprocess
from functools import lru_cache
from typing import Any
from typing import cast
from typing import Generator
Expand All @@ -24,6 +25,18 @@
from .log import log
from .plugins import Plugin
from .potential_secret import PotentialSecret
from detect_secrets.util.filetype import determine_file_type
from detect_secrets.util.filetype import FileType


@lru_cache(maxsize=1)
def read_raw_lines(file_name: str) -> List[str]:
try:
with open(file_name) as f:
return f.readlines()
except IOError:
log.debug(f"Can't open file {file_name}")
return []


def get_files_to_scan(
Expand Down Expand Up @@ -307,7 +320,7 @@ def _process_line_based_plugins(
filename: str,
) -> Generator[PotentialSecret, None, None]:
line_content = [line[1] for line in lines]

raw_code_snippet_lines = read_raw_lines(filename)
# NOTE: We iterate through lines *then* plugins, because we want to quit early if any of the
# filters return True.
for line_number, line in lines:
Expand All @@ -327,25 +340,33 @@ def _process_line_based_plugins(
):
continue

yield from (
secret
for plugin in get_plugins()
for plugin in get_plugins():
for secret in _scan_line(
plugin=plugin,
filename=filename,
line=line,
line_number=line_number,
context=code_snippet,
)
if not _is_filtered_out(
required_filter_parameters=['context'],
filename=secret.filename,
secret=secret.secret_value,
plugin=plugin,
line=line,
context=code_snippet,
)
)
plugin=plugin,
filename=filename,
line=line,
line_number=line_number,
context=code_snippet,
):
if not _is_filtered_out(
required_filter_parameters=['context'],
filename=secret.filename,
secret=secret.secret_value,
plugin=plugin,
line=line,
context=code_snippet,
):
if determine_file_type(filename) == FileType.YAML and secret.secret_value:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you looked at yaml transformer? Yaml files are transformed according to this transformer. So any changes should be done here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @jpdakran !
Yes I have, it seems it merges the lines into 1 which makes sense from scanning perspective, but not from finding perspective. Otherwise, the secret's line is the first one, even though the secret is on the third line.
The yaml transformer doesn't retain a mapping of this...

# YAML specifically has multi-line string parsing that groups the
# different lines as 1.
# Calculate actual line number in case of YAML multi-line string
actual_line_number = line_number
for i, l in enumerate(raw_code_snippet_lines[actual_line_number - 1:]):
if secret.secret_value in l:
actual_line_number += i
break
secret.line_number = actual_line_number
yield secret


def _scan_line(
Expand Down
13 changes: 13 additions & 0 deletions test_data/scan_test_multiline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
configuration:
datadogAgent:
enabled: true
name: name

image:
repository: gcr.io/some/scheduler
pullPolicy: Always

schedule: "* * * * *"

cmdList: "curl --retry-connrefused --retry 3 --retry-delay 5 -X POST http://someone:[email protected]/v1/?event=EXPIRE_SYNC&days=1|1440 \
||curl --retry-connrefused --retry 3 --retry-delay 5 -X POST http://anotherone:[email protected]/v1/?hours=1|60"
13 changes: 13 additions & 0 deletions tests/core/scan_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,19 @@ def test_handles_binary_files_gracefully():

assert not list(scan.scan_file(f.name))

@staticmethod
def test_multi_line_results_accuracy():
file_name = 'test_data/scan_test_multiline.yaml'
results = list(scan.scan_file(file_name))
assert len(results) > 0, f'Expected to find secrets in {file_name}'
lines_with_findings = set()
for secret in results:
if secret.line_number not in lines_with_findings:
lines_with_findings.add(secret.line_number)
else:
assert secret.line_number not in lines_with_findings,\
'Found multiple secrets on the same line number'


@pytest.fixture(autouse=True)
def configure_plugins():
Expand Down