diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e6cb99bfc..23a5cc9ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,8 @@ on: branches: [ master ] pull_request: workflow_dispatch: + schedule: + - cron: '0 0 1 * *' jobs: main: @@ -15,7 +17,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python: ['3.6', '3.7', '3.8', '3.9'] + python: ['3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index f96741176..4b2291e39 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest] - python: ['3.6', '3.7', '3.8', '3.9'] + python: ['3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/.gitignore b/.gitignore index 487c3491d..cdb857794 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,19 @@ .*ignore !.gitignore .python-version -.vscode + +# Ignore editor specific configs +/.idea +/.vscode +.project +.classpath +.c9/ +*.launch +.settings/ +*.sublime-workspace +.generators +.rakeTasks + +# System Files +.DS_Store +Thumbs.db diff --git a/.secrets.baseline b/.secrets.baseline index 707710ecc..7f945030e 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1,5 +1,5 @@ { - "version": "1.4.0", + "version": "1.5.0", "plugins_used": [ { "name": "ArtifactoryDetector" diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f2f363a0..47935533e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,9 +25,64 @@ If you love `detect-secrets`, please star our project on GitHub to show your sup [@xxxx]: https://github.com/xxxx --> - +### v1.5.0 +##### May 6th, 2024 + +We apologise for the extreme delay in publishing a new release for our beloved `detect-secrets`. We at Yelp appreciate your continued support and your contributions to this valuable project! + +#### :newspaper: News +- We're adding support for Python 3.10, 3.11 and 3.12 and we dropped support for Python 3.6 and 3.7! We hope this won't be too disruptive for you all. Be aware that in a next release, we'll remove support for Python 3.8 too, as it'll reach EOL in October 2024. + +#### :mega: Release Highlights +- Added support for OS-agnostic baseline files ([#586]) + +#### :tada: New Features +- Added a detector for IP addresses ([#692]) +- Added a detector for GitLab tokens ([#782]) +- Added a detector for Telegram tokens ([#808]) +- Added a detector for Pypi and TestPypi tokens ([#819]) +- Added a detector for OpenAI tokens ([#823]) + +#### :sparkles: Usability +- Added filenames in errors thrown when a plugin file specified in the `.secrets.baseline` is not found. ([#719]) +- Changed the wording of the audit prompt ([#738]) + +#### :telescope: Accuracy +- Improved DiscordBotTokenDetector to reduce false negatives ([#628]) +- Improved KeywordDetector to reduce false positive for Golang ([#675]) +- Improved AWSKeyDetector by adding more access key formats ([#796]) + +#### :bug: Bugfixes +- Fixed `NotImplementedError` in StatisticsAggregator ([#678]) +- Fixed bug in YAMLTransformer related to parsing YAML files with achors and tags ([#679]) +- Fixed `IndexError` in `is_prefixed_with_dollar_sign` caused by passing empty strings ([#712]) + +#### :snake: Miscellaneous +- Dropped support for Python 3.6 ([#672]) +- Dropped support for Python 3.7 ([#724]) +- Added support for Python 3.10 ([#724]) +- Added support for Python 3.11 ([#730]) +- Added support for Python 3.12 ([#810]) +- Multiple dependency updates + +[#586]: https://github.com/Yelp/detect-secrets/pull/586 +[#628]: https://github.com/Yelp/detect-secrets/pull/628 +[#672]: https://github.com/Yelp/detect-secrets/pull/672 +[#675]: https://github.com/Yelp/detect-secrets/pull/675 +[#678]: https://github.com/Yelp/detect-secrets/pull/678 +[#679]: https://github.com/Yelp/detect-secrets/pull/679 +[#692]: https://github.com/Yelp/detect-secrets/pull/692 +[#712]: https://github.com/Yelp/detect-secrets/pull/712 +[#719]: https://github.com/Yelp/detect-secrets/pull/719 +[#724]: https://github.com/Yelp/detect-secrets/pull/724 +[#730]: https://github.com/Yelp/detect-secrets/pull/730 +[#738]: https://github.com/Yelp/detect-secrets/pull/738 +[#782]: https://github.com/Yelp/detect-secrets/pull/782 +[#796]: https://github.com/Yelp/detect-secrets/pull/796 +[#808]: https://github.com/Yelp/detect-secrets/pull/808 +[#810]: https://github.com/Yelp/detect-secrets/pull/810 +[#819]: https://github.com/Yelp/detect-secrets/pull/819 +[#823]: https://github.com/Yelp/detect-secrets/pull/823 ### v1.4.0 ##### October 4th, 2022 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index af1fc3601..dbd7f723b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -70,10 +70,10 @@ sys 0m2.486s ### Running the Entire Test Suite -You can run the test suite in the interpreter of your choice (in this example, `py36`) by doing: +You can run the test suite in the interpreter of your choice (in this example, `py37`) by doing: ```bash -tox -e py36 +tox -e py37 ``` This will also run the code through our series of coverage tests, `mypy` rules and other linting diff --git a/README.md b/README.md index 03e7e36fb..1a08f131c 100644 --- a/README.md +++ b/README.md @@ -98,20 +98,25 @@ BasicAuthDetector CloudantDetector DiscordBotTokenDetector GitHubTokenDetector +GitLabTokenDetector Base64HighEntropyString HexHighEntropyString IbmCloudIamDetector IbmCosHmacDetector +IPPublicDetector JwtTokenDetector KeywordDetector MailchimpDetector NpmDetector +OpenAIDetector PrivateKeyDetector +PypiTokenDetector SendGridDetector SlackDetector SoftlayerDetector SquareOAuthDetector StripeDetector +TelegramBotTokenDetector TwilioKeyDetector ``` @@ -392,7 +397,7 @@ We recommend setting this up as a pre-commit hook. One way to do this is by usin # .pre-commit-config.yaml repos: - repo: https://github.com/Yelp/detect-secrets - rev: v1.4.0 + rev: v1.5.0 hooks: - id: detect-secrets args: ['--baseline', '.secrets.baseline'] diff --git a/detect_secrets/__version__.py b/detect_secrets/__version__.py index b2e817771..84262d36c 100644 --- a/detect_secrets/__version__.py +++ b/detect_secrets/__version__.py @@ -1 +1 @@ -VERSION = '1.4.0' +VERSION = '1.5.0' diff --git a/detect_secrets/audit/analytics.py b/detect_secrets/audit/analytics.py index 29facdbbb..c46aa491d 100644 --- a/detect_secrets/audit/analytics.py +++ b/detect_secrets/audit/analytics.py @@ -58,7 +58,14 @@ def _get_plugin_counter(self, secret_type: str) -> 'StatisticsCounter': return cast(StatisticsCounter, self.data[secret_type]['stats']) def __str__(self) -> str: - raise NotImplementedError + output = '' + + for secret_type, framework in self.data.items(): + output += f'Plugin: {get_mapping_from_secret_type_to_class()[secret_type].__name__}\n' + for value in framework.values(): + output += f'Statistics: {value}\n\n' + + return output def json(self) -> Dict[str, Any]: output = {} @@ -77,19 +84,36 @@ def __init__(self) -> None: self.incorrect: int = 0 self.unknown: int = 0 - def __repr__(self) -> str: + def __str__(self) -> str: return ( - f'{self.__class__.__name__}(correct={self.correct}, ' - 'incorrect={self.incorrect}, unknown={self.unknown},)' + f'True Positives: {self.correct}, False Positives: {self.incorrect}, ' + f'Unknown: {self.unknown}, Precision: {self.calculate_precision()}, ' + f'Recall: {self.calculate_recall()}' ) def json(self) -> Dict[str, Any]: + return { + 'raw': { + 'true-positives': self.correct, + 'false-positives': self.incorrect, + 'unknown': self.unknown, + }, + 'score': { + 'precision': self.calculate_precision(), + 'recall': self.calculate_recall(), + }, + } + + def calculate_precision(self) -> float: precision = ( round(float(self.correct) / (self.correct + self.incorrect), 4) if (self.correct and self.incorrect) else 0.0 ) + return precision + + def calculate_recall(self) -> float: # NOTE(2020-11-08|domanchi): This isn't the formal definition of `recall`, however, # this is the definition that we're going to attribute to it. # @@ -124,14 +148,4 @@ def json(self) -> Dict[str, Any]: else 0.0 ) - return { - 'raw': { - 'true-positives': self.correct, - 'false-positives': self.incorrect, - 'unknown': self.unknown, - }, - 'score': { - 'precision': precision, - 'recall': recall, - }, - } + return recall diff --git a/detect_secrets/audit/audit.py b/detect_secrets/audit/audit.py index da8972d3e..35ff049b2 100644 --- a/detect_secrets/audit/audit.py +++ b/detect_secrets/audit/audit.py @@ -78,7 +78,7 @@ def _classify_secrets(iterator: BidirectionalIterator) -> bool: if decision == io.InputOptions.BACK: iterator.step_back_on_next_iteration() - # The question asked is: "Should this be committed to the repository?" + # The question asked is: "Should this string be committed to the repository?" elif decision == io.InputOptions.NO: secret.is_secret = True has_changes = True diff --git a/detect_secrets/audit/io.py b/detect_secrets/audit/io.py index 15ecc504e..ab5086b70 100644 --- a/detect_secrets/audit/io.py +++ b/detect_secrets/audit/io.py @@ -130,7 +130,7 @@ def __init__(self, allow_labelling: bool, allow_backstep: bool) -> None: def __str__(self) -> str: if 'Y' in self.valid_input: - output = 'Is this a secret that should be committed to this repository?' + output = 'Should this string be committed to the repository?' else: output = 'What would you like to do?' diff --git a/detect_secrets/core/plugins/initialize.py b/detect_secrets/core/plugins/initialize.py index 6d8eeaeb9..b74eab3c3 100644 --- a/detect_secrets/core/plugins/initialize.py +++ b/detect_secrets/core/plugins/initialize.py @@ -31,7 +31,17 @@ def from_plugin_classname(classname: str) -> Plugin: """ :raises: TypeError """ - for plugin_type in get_mapping_from_secret_type_to_class().values(): + try: + plugin_types = get_mapping_from_secret_type_to_class().values() + except FileNotFoundError as e: + log.error(f'Error: Failed to load `{classname}` plugin: {e}') + log.error( + 'This error can occur when using a baseline that references a ' + 'custom plugin with a path that does not exist.', + ) + raise + + for plugin_type in plugin_types: if plugin_type.__name__ == classname: break else: diff --git a/detect_secrets/core/potential_secret.py b/detect_secrets/core/potential_secret.py index eba034c79..840d3291c 100644 --- a/detect_secrets/core/potential_secret.py +++ b/detect_secrets/core/potential_secret.py @@ -6,6 +6,7 @@ from ..util.color import AnsiColor from ..util.color import colorize +from ..util.path import convert_local_os_path class PotentialSecret: @@ -75,7 +76,7 @@ def load_secret_from_dict(cls, data: Dict[str, Union[str, int, bool]]) -> 'Poten """Custom JSON decoder""" kwargs: Dict[str, Any] = { 'type': str(data['type']), - 'filename': str(data['filename']), + 'filename': convert_local_os_path(str(data['filename'])), 'secret': 'will be replaced', } diff --git a/detect_secrets/core/scan.py b/detect_secrets/core/scan.py index f84d53c3c..1d23e95f5 100644 --- a/detect_secrets/core/scan.py +++ b/detect_secrets/core/scan.py @@ -138,8 +138,12 @@ def scan_line(line: str) -> Generator[PotentialSecret, None, None]: def scan_file(filename: str) -> Generator[PotentialSecret, None, None]: - if not get_plugins(): # pragma: no cover - log.error('No plugins to scan with!') + try: + if not get_plugins(): # pragma: no cover + log.error('No plugins to scan with!') + return + except FileNotFoundError: + log.error('Unable to load plugins!') return if _is_filtered_out(required_filter_parameters=['filename'], filename=filename): @@ -387,13 +391,19 @@ def _is_filtered_out(required_filter_parameters: Iterable[str], **kwargs: Any) - try: if call_function_with_arguments(filter_fn, **kwargs): if 'secret' in kwargs: - debug_msg = f'Skipping "{kwargs["secret"]}" due to `{filter_fn.path}`.' + debug_msg = f'Skipping "{0}" due to `{1}`.'.format( + kwargs['secret'], + filter_fn.path, + ) elif list(kwargs.keys()) == ['filename']: # We want to make sure this is only run if we're skipping files (as compared # to other filters that may include `filename` as a parameter). - debug_msg = f'Skipping "{kwargs["filename"]}" due to `{filter_fn.path}`' + debug_msg = 'Skipping "{0}" due to `{1}`'.format( + kwargs['filename'], + filter_fn.path, + ) else: - debug_msg = f'Skipping secret due to `{filter_fn.path}`.' + debug_msg = 'Skipping secret due to `{0}`.'.format(filter_fn.path) log.info(debug_msg) return True diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index d3fc4dd56..094a274ed 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -10,6 +10,7 @@ from typing import Tuple from . import scan +from ..util.path import convert_local_os_path from .potential_secret import PotentialSecret from detect_secrets.settings import configure_settings_from_baseline from detect_secrets.settings import get_settings @@ -40,7 +41,7 @@ def load_from_baseline(cls, baseline: Dict[str, Any]) -> 'SecretsCollection': for filename in baseline['results']: for item in baseline['results'][filename]: secret = PotentialSecret.load_secret_from_dict({'filename': filename, **item}) - output[filename].add(secret) + output[convert_local_os_path(filename)].add(secret) return output @@ -72,8 +73,8 @@ def scan_files(self, *filenames: str, num_processors: Optional[int] = None) -> N self[os.path.relpath(secret.filename, self.root)].add(secret) def scan_file(self, filename: str) -> None: - for secret in scan.scan_file(os.path.join(self.root, filename)): - self[filename].add(secret) + for secret in scan.scan_file(os.path.join(self.root, convert_local_os_path(filename))): + self[convert_local_os_path(filename)].add(secret) def scan_diff(self, diff: str) -> None: """ diff --git a/detect_secrets/filters/heuristic.py b/detect_secrets/filters/heuristic.py index 0dbdb4949..7fb078181 100644 --- a/detect_secrets/filters/heuristic.py +++ b/detect_secrets/filters/heuristic.py @@ -164,7 +164,7 @@ def is_prefixed_with_dollar_sign(secret: str) -> bool: # false negatives than `is_templated_secret` (e.g. secrets that actually start with a $). # This is best used with files that actually use this as a means of referencing variables. # TODO: More intelligent filetype handling? - return secret[0] == '$' + return bool(secret) and secret[0] == '$' def is_indirect_reference(line: str) -> bool: diff --git a/detect_secrets/plugins/aws.py b/detect_secrets/plugins/aws.py index ee822b6f7..9d22d3e83 100644 --- a/detect_secrets/plugins/aws.py +++ b/detect_secrets/plugins/aws.py @@ -25,14 +25,14 @@ class AWSKeyDetector(RegexBasedDetector): secret_keyword = r'(?:key|pwd|pw|password|pass|token)' denylist = ( - re.compile(r'AKIA[0-9A-Z]{16}'), + re.compile(r'(?:A3T[A-Z0-9]|ABIA|ACCA|AKIA|ASIA)[0-9A-Z]{16}'), # This examines the variable name to identify AWS secret tokens. - # The order is important since we want to prefer finding `AKIA`-based + # The order is important since we want to prefer finding access # keys (since they can be verified), rather than the secret tokens. re.compile( - r'aws.{{0,20}}?{secret_keyword}.{{0,20}}?[\'\"]([0-9a-zA-Z/+]{{40}})[\'\"]'.format( + r'aws.{{0,20}}?{secret_keyword}.{{0,20}}?[\'\"]?([0-9a-zA-Z/+]{{40}})[\'\"]?'.format( secret_keyword=secret_keyword, ), flags=re.IGNORECASE, diff --git a/detect_secrets/plugins/discord.py b/detect_secrets/plugins/discord.py index a4401e041..67664fc22 100644 --- a/detect_secrets/plugins/discord.py +++ b/detect_secrets/plugins/discord.py @@ -11,7 +11,8 @@ class DiscordBotTokenDetector(RegexBasedDetector): secret_type = 'Discord Bot Token' denylist = [ - # Discord Bot Token ([M|N]XXXXXXXXXXXXXXXXXXXXXXX.XXXXXX.XXXXXXXXXXXXXXXXXXXXXXXXXXX) + # Discord Bot Token ([M|N|O]XXXXXXXXXXXXXXXXXXXXXXX[XX].XXXXXX.XXXXXXXXXXXXXXXXXXXXXXXXXXX) # Reference: https://discord.com/developers/docs/reference#authentication - re.compile(r'[MN][a-zA-Z\d_-]{23}\.[a-zA-Z\d_-]{6}\.[a-zA-Z\d_-]{27}'), + # Also see: https://github.com/Yelp/detect-secrets/issues/627 + re.compile(r'[MNO][a-zA-Z\d_-]{23,25}\.[a-zA-Z\d_-]{6}\.[a-zA-Z\d_-]{27}'), ] diff --git a/detect_secrets/plugins/github_token.py b/detect_secrets/plugins/github_token.py index b1397316b..f4a73bfe4 100644 --- a/detect_secrets/plugins/github_token.py +++ b/detect_secrets/plugins/github_token.py @@ -12,5 +12,5 @@ class GitHubTokenDetector(RegexBasedDetector): denylist = [ # ref. https://github.blog/2021-04-05-behind-githubs-new-authentication-token-formats/ - re.compile(r'(ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36}'), + re.compile(r'(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36}'), ] diff --git a/detect_secrets/plugins/gitlab_token.py b/detect_secrets/plugins/gitlab_token.py new file mode 100644 index 000000000..ed197fd7d --- /dev/null +++ b/detect_secrets/plugins/gitlab_token.py @@ -0,0 +1,59 @@ +""" +This plugin searches for GitLab tokens +""" +import re + +from detect_secrets.plugins.base import RegexBasedDetector + + +class GitLabTokenDetector(RegexBasedDetector): + """Scans for GitLab tokens.""" + + secret_type = 'GitLab Token' + + denylist = [ + # ref: + # - https://docs.gitlab.com/ee/security/token_overview.html#gitlab-tokens + # - https://gitlab.com/groups/gitlab-org/-/epics/8923 + # - https://github.com/gitlabhq/gitlabhq/blob/master/gems + # /gitlab-secret_detection/lib/gitleaks.toml#L6-L76 + + # `gl..-` prefix and a token of length >20 + # characters are typically alphanumeric, underscore, dash + # Most tokens are generated either with: + # - `Devise.friendly_token`, a string with a default length of 20, or + # - `SecureRandom.hex`, default data size of 16 bytes, encoded in different ways. + # String length may vary depending on the type of token, and probably + # even GL-settings in the future, so we expect between 20 and 50 chars. + + # Personal Access Token - glpat + # Deploy Token - gldt + # Feed Token - glft + # OAuth Access Token - glsoat + # Runner Token - glrt + re.compile( + r'(glpat|gldt|glft|glsoat|glrt)-' + r'[A-Za-z0-9_\-]{20,50}(?!\w)', + ), + + # Runner Registration Token + re.compile(r'GR1348941[A-Za-z0-9_\-]{20,50}(?!\w)'), + + # CI/CD Token - `glcbt` or `glcbt-XY_` where XY is a 2-char hex 'partition_id' + re.compile(r'glcbt-([0-9a-fA-F]{2}_)?[A-Za-z0-9_\-]{20,50}(?!\w)'), + + # Incoming Mail Token - generated by SecureRandom.hex, default length 16 bytes + # resulting token length is 26 when Base-36 encoded + re.compile(r'glimt-[A-Za-z0-9_\-]{25}(?!\w)'), + + # Trigger Token - generated by `SecureRandom.hex(20)` + re.compile(r'glptt-[A-Za-z0-9_\-]{40}(?!\w)'), + + # Agent Token - generated by `Devise.friendly_token(50)` + # tokens have a minimum length of 50 chars, up to 1024 chars + re.compile(r'glagent-[A-Za-z0-9_\-]{50,1024}(?!\w)'), + + # GitLab OAuth Application Secret - generated by `SecureRandom.hex(32)` + # -> becomes 64 base64-encoded characters + re.compile(r'gloas-[A-Za-z0-9_\-]{64}(?!\w)'), + ] diff --git a/detect_secrets/plugins/ibm_cloud_iam.py b/detect_secrets/plugins/ibm_cloud_iam.py index 037d971b5..6920849c6 100644 --- a/detect_secrets/plugins/ibm_cloud_iam.py +++ b/detect_secrets/plugins/ibm_cloud_iam.py @@ -34,7 +34,7 @@ def verify(self, secret: str) -> VerifiedResult: def verify_cloud_iam_api_key(apikey: Union[str, bytes]) -> requests.Response: # pragma: no cover - if type(apikey) == bytes: + if type(apikey) is bytes: apikey = apikey.decode('UTF-8') headers = { diff --git a/detect_secrets/plugins/ip_public.py b/detect_secrets/plugins/ip_public.py new file mode 100644 index 000000000..f9a435904 --- /dev/null +++ b/detect_secrets/plugins/ip_public.py @@ -0,0 +1,47 @@ +import re + +from .base import RegexBasedDetector + + +class IPPublicDetector(RegexBasedDetector): + """Scans for public ip address (ipv4) + + Some non-public ipv4 addresses are ignored, such as: + - 127. + - 10. + - 172.(16-31) + - 192.168. + - 169.254. - Link Local Address IPv4 + + Reference: + https://www.iana.org/assignments/ipv4-address-space/ipv4-address-space.xhtml + https://en.wikipedia.org/wiki/Private_network + """ + secret_type = 'Public IP (ipv4)' + + denylist_ipv4_address = r""" + (? VerifiedResult: # pragma: no cover + response = requests.get( + 'https://api.telegram.org/bot{}/getMe'.format( + secret, + ), + ) + return ( + VerifiedResult.VERIFIED_TRUE + if response.status_code == 200 + else VerifiedResult.VERIFIED_FALSE + ) diff --git a/detect_secrets/pre_commit_hook.py b/detect_secrets/pre_commit_hook.py index fb75832e3..92757496c 100644 --- a/detect_secrets/pre_commit_hook.py +++ b/detect_secrets/pre_commit_hook.py @@ -147,10 +147,10 @@ def pretty_print_diagnostics(secrets: SecretsCollection, width: int = 80) -> Non ) for suggestion in [ 'For information about putting your secrets in a safer place, ' - f'please ask {os.environ.get("DETECT_SECRETS_SECURITY_TEAM", "in #security")}', + 'please ask {0}'.format(os.environ.get('DETECT_SECRETS_SECURITY_TEAM', 'in #security')), 'Mark false positives with an inline ' - f'`{color.colorize("pragma: allowlist secret", color.AnsiColor.BOLD)}` comment', + '`{0}` comment'.format(color.colorize('pragma: allowlist secret', color.AnsiColor.BOLD)), ]: print(wrapper.fill(suggestion)) diff --git a/detect_secrets/transformers/yaml.py b/detect_secrets/transformers/yaml.py index a61e4af6e..a587703b6 100644 --- a/detect_secrets/transformers/yaml.py +++ b/detect_secrets/transformers/yaml.py @@ -37,8 +37,17 @@ def parse_file(self, file: NamedIO) -> List[str]: except yaml.YAMLError: raise ParsingError + seen = set() + lines: List[str] = [] for item in items: + # Filter out previous lines seen before. This removes duplicates when it comes + # to anchor & and alias * tags. + if item in seen: + continue + else: + seen.add(item) + while len(lines) < item.line_number - 1: lines.append('') diff --git a/detect_secrets/types.py b/detect_secrets/types.py index 4f9d532e8..71c280f43 100644 --- a/detect_secrets/types.py +++ b/detect_secrets/types.py @@ -9,25 +9,6 @@ from .exceptions import SecretNotFoundOnSpecifiedLineError from .util.code_snippet import CodeSnippet -try: - from typing import NoReturn # noqa: F811 -except ImportError: # pragma: no cover - # NOTE: NoReturn was introduced in Python3.6.2. However, we need to support Python3.6.0. - # This section of code is inline imported from `typing-extensions`, so that we don't need - # to introduce an additional package for such an edge case. - from typing import _FinalTypingBase # type: ignore - - class _NoReturn(_FinalTypingBase): - __slots__ = () - - def __instancecheck__(self, obj: Any) -> None: - raise TypeError('NoReturn cannot be used with isinstance().') - - def __subclasscheck__(self, cls: Any) -> None: - raise TypeError('NoReturn cannot be used with issubclass().') - - NoReturn = _NoReturn(_root=True) - class SelfAwareCallable: """ diff --git a/detect_secrets/util/importlib.py b/detect_secrets/util/importlib.py index f5e90005c..dfa9307a6 100644 --- a/detect_secrets/util/importlib.py +++ b/detect_secrets/util/importlib.py @@ -1,3 +1,4 @@ +import errno import importlib.util import os import pkgutil @@ -85,7 +86,8 @@ def import_file_as_module(filename: str, name: Optional[str] = None) -> ModuleTy for you. """ if not os.path.exists(filename): - raise FileNotFoundError + # Source: https://stackoverflow.com/a/36077407 + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename) if not name: # NOTE: After several trial and error attempts, I could not discern the importance diff --git a/detect_secrets/util/path.py b/detect_secrets/util/path.py index 36eb15c64..acb744b81 100644 --- a/detect_secrets/util/path.py +++ b/detect_secrets/util/path.py @@ -16,3 +16,13 @@ def get_relative_path_if_in_cwd(path: str) -> Optional[str]: return filepath return None + + +def convert_local_os_path(path: str) -> str: + # Linux filesystem, replace \\ with / + if os.sep == '/': + path = path.replace('\\', '/') + return path + else: + path = path.replace('/', '\\') + return path diff --git a/docs/audit.md b/docs/audit.md index 5446ba1f6..42ed5f7a7 100644 --- a/docs/audit.md +++ b/docs/audit.md @@ -29,7 +29,7 @@ Secret Type: Secret Keyword 68: } 69: ], ---------- -Is this a secret that should be committed to this repository? (y)es, (n)o, (s)kip, (q)uit: +Should this string be committed to the repository? (y)es, (n)o, (s)kip, (q)uit: ``` There are two common cases for manual labelling: diff --git a/requirements-dev-minimal.txt b/requirements-dev-minimal.txt index e20833aaf..1d548da90 100644 --- a/requirements-dev-minimal.txt +++ b/requirements-dev-minimal.txt @@ -1,7 +1,5 @@ -# coveragepy==5.0 fails with `Safety level may not be changed inside a transaction -# on python 3.6.0 (xenial) -coverage<5 -flake8==3.5.0 +coverage +flake8==7.0.0 gibberish-detector>=0.1.1 monotonic mypy diff --git a/requirements-dev.txt b/requirements-dev.txt index 581d9d24a..91387b26d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,44 +1,43 @@ -attrs==21.4.0 -backports.entry-points-selectable==1.1.1 -certifi==2021.10.8 -cfgv==3.2.0 -charset-normalizer==2.0.7 -coverage==4.5.4 -distlib==0.3.6 -filelock==3.0.12 -flake8==3.5.0 +attrs==23.2.0 +backports.entry-points-selectable==1.3.0 +certifi==2024.7.4 +cfgv==3.4.0 +charset-normalizer==3.3.2 +coverage==7.5.3 +distlib==0.3.8 +filelock==3.14.0 +flake8==7.0.0 gibberish-detector==0.1.1 -identify==2.3.0 -idna==3.3 -importlib-metadata==4.8.1 -iniconfig==1.1.1 -mccabe==0.6.1 +identify==2.5.36 +idna==3.7 +iniconfig==2.0.0 +mccabe==0.7.0 monotonic==1.6 mypy==0.971 -mypy-extensions==0.4.3 -nodeenv==1.6.0 -packaging==21.3 -platformdirs==2.0.2 -pluggy==0.13.1 -pre-commit==2.17.0 +mypy-extensions==1.0.0 +nodeenv==1.9.1 +packaging==24.1 +platformdirs==4.2.2 +pluggy==1.5.0 +pre-commit==4.0.1 py==1.11.0 -pyahocorasick==1.4.4 -pycodestyle==2.3.1 -pyflakes==1.6.0 -pyparsing==2.4.7 -pytest==6.2.2 -PyYAML==6.0 -requests==2.26.0 -responses==0.16.0 +pyahocorasick==2.1.0 +pycodestyle==2.11.1 +pyflakes==3.2.0 +pyparsing==3.1.2 +pytest==7.4.3 +PyYAML==6.0.1 +requests==2.32.3 +responses==0.25.3 six==1.16.0 toml==0.10.2 -tox==3.24.4 +tox==4.15.0 tox-pip-extensions==1.6.0 -typed-ast==1.5.4 -types-PyYAML==6.0.11 -types-requests==2.28.9 -typing-extensions==3.10.0.2 -unidiff==0.7.4 -urllib3==1.26.9 -virtualenv==20.6.0 -zipp==3.6.0 +typed-ast==1.5.5 +types-PyYAML==6.0.12.12 +types-requests==2.31.0.20240106 +typing-extensions==4.12.2 +unidiff==0.7.5 +urllib3==2.2.2 +virtualenv==20.26.3 +zipp==3.19.2 diff --git a/setup.cfg b/setup.cfg index 30a1b76ff..1dd1a4b39 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.4.0 +current_version = 1.5.0 commit = True tag = True diff --git a/setup.py b/setup.py index 0ba463850..3613810f8 100644 --- a/setup.py +++ b/setup.py @@ -24,9 +24,8 @@ def get_version(): description='Tool for detecting secrets in the codebase', long_description=long_description, long_description_content_type='text/markdown', - license='Copyright Yelp, Inc. 2020', - author='Aaron Loo', - author_email='aaronloo@yelp.com', + author='Yelp, Inc.', + author_email='opensource@yelp.com', url='https://github.com/Yelp/detect-secrets', download_url='https://github.com/Yelp/detect-secrets/archive/{}.tar.gz'.format(VERSION), keywords=['secret-management', 'pre-commit', 'security', 'entropy-checks'], diff --git a/tests/audit/analytics_test.py b/tests/audit/analytics_test.py index 3938ce2fd..a5a099040 100644 --- a/tests/audit/analytics_test.py +++ b/tests/audit/analytics_test.py @@ -66,9 +66,14 @@ def test_no_divide_by_zero(secret): main(['audit', f.name, '--stats', '--json']) -@pytest.mark.skip(reason='TODO') -def test_basic_statistics_str(): - pass +def test_basic_statistics_str(printer): + with labelled_secrets() as filename: + main(['audit', filename, '--stats']) + + assert printer.message == ( + 'Plugin: BasicAuthDetector\nStatistics: True Positives: 1, ' + + 'False Positives: 2, Unknown: 1, Precision: 0.3333, Recall: 0.5\n\n\n' + ) @contextmanager diff --git a/tests/audit/audit_test.py b/tests/audit/audit_test.py index 46099c961..5de9d0636 100644 --- a/tests/audit/audit_test.py +++ b/tests/audit/audit_test.py @@ -1,5 +1,6 @@ import json import random +from pathlib import Path from typing import List from typing import Optional from unittest import mock @@ -135,7 +136,7 @@ def test_ensure_file_transformers_are_used(printer): run_logic(secrets, 'y') assert not m.called - line_number = list(secrets['test_data/config.env'])[0].line_number + line_number = list(secrets[str(Path('test_data/config.env'))])[0].line_number assert lines[line_number - 1] in printer.message diff --git a/tests/audit/io_test.py b/tests/audit/io_test.py index a75b829a0..775e18b2d 100644 --- a/tests/audit/io_test.py +++ b/tests/audit/io_test.py @@ -12,7 +12,7 @@ 'allow_backstep': True, }, ( - 'Is this a secret that should be committed to this repository? ' + 'Should this string be committed to the repository? ' '(y)es, (n)o, (s)kip, (b)ack, (q)uit: ' ), ), diff --git a/tests/audit/report_test.py b/tests/audit/report_test.py index a83e3585d..6414ab9d6 100644 --- a/tests/audit/report_test.py +++ b/tests/audit/report_test.py @@ -206,18 +206,24 @@ def create_file_with_content(content): @pytest.fixture def baseline_file(): # Create our own SecretsCollection manually, so that we have fine-tuned control. - first_content = textwrap.dedent(f""" + first_content = textwrap.dedent( + f""" url = {url_format.format(first_secret)} example = {url_format.format(random_secret)} link = {url_format.format(first_secret)} - """)[1:] - second_content = textwrap.dedent(f""" + """, + )[1:] + second_content = textwrap.dedent( + f""" url = {url_format.format(second_secret)} example = {url_format.format(random_secret)} - """)[1:] - third_content = textwrap.dedent(f""" + """, + )[1:] + third_content = textwrap.dedent( + f""" aws_access_key = {aws_secret} - """)[1:] + """, + )[1:] with create_file_with_content(first_content) as first_file, \ create_file_with_content(second_content) as second_file, \ diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index c8e24a8f3..f968fce78 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -1,4 +1,5 @@ import json +import os import subprocess import tempfile from pathlib import Path @@ -84,16 +85,21 @@ def test_scan_all_files(): def test_load_and_output(): with open('.secrets.baseline') as f: - data = json.loads(f.read()) + filedata = f.read() - secrets = baseline.load(data, filename='.secrets.baseline') + if os.sep == '\\': + # Replace Linux path seperators for Windows ones + filedata = filedata.replace('/', '\\\\') + + filedata_json = json.loads(filedata) + secrets = baseline.load(filedata_json, filename='.secrets.baseline') output = baseline.format_for_output(secrets) - for item in [data, output]: + for item in [filedata_json, output]: item.pop('generated_at') # We perform string matching because we want to ensure stable sorts. - assert json.dumps(output) == json.dumps(data) + assert json.dumps(output) == json.dumps(filedata_json) # We need to make sure that default values carry through, for future backwards compatibility. for plugin in output['plugins_used']: @@ -102,6 +108,25 @@ def test_load_and_output(): break +def test_plugin_not_found_in_baseline(): + # Test fix for the issue in #718 + data = { + 'version': '1.4.0', + 'plugins_used': [{ + 'name': 'FakeCustomPlugin', + 'path': 'file://./path/to/plugin/that/does/not/exist/plugin.py', + }], + 'results': {}, + } + secrets = baseline.load(data) + with pytest.raises(FileNotFoundError) as exc_info: + baseline.format_for_output(secrets) + + # Check that filename of file that was not found is in the error message + # (#718) + exc_info.match(r'\./path/to/plugin/that/does/not/exist/plugin\.py') + + def test_upgrade_does_nothing_if_newer_version(): current_baseline = {'version': '3.0.0'} assert baseline.upgrade(current_baseline) == current_baseline diff --git a/tests/core/secrets_collection_test.py b/tests/core/secrets_collection_test.py index 1f1fa3db8..8e7b9a6b7 100644 --- a/tests/core/secrets_collection_test.py +++ b/tests/core/secrets_collection_test.py @@ -1,3 +1,4 @@ +from pathlib import Path from unittest import mock import pytest @@ -43,7 +44,8 @@ def test_error_reading_file(mock_log_warning): ): SecretsCollection().scan_file('test_data/config.env') - assert 'Unable to open file: test_data/config.env' in mock_log_warning.warning_messages + file_warning = str(Path('test_data/config.env')) + assert 'Unable to open file: %s' % file_warning in mock_log_warning.warning_messages @staticmethod def test_line_based_success(): @@ -64,9 +66,9 @@ def test_line_based_success(): secrets = SecretsCollection() secrets.scan_file('test_data/each_secret.py') - secret = next(iter(secrets['test_data/each_secret.py'])) + secret = next(iter(secrets[str(Path('test_data/each_secret.py'))])) assert secret.secret_value.startswith('c2VjcmV0IG1lc') - assert len(secrets['test_data/each_secret.py']) == 1 + assert len(secrets[str(Path('test_data/each_secret.py'))]) == 1 @staticmethod def test_file_based_success_config(): @@ -80,11 +82,11 @@ def test_file_based_success_config(): secrets.scan_file('test_data/config.ini') assert [str(secret).splitlines()[1] for _, secret in secrets] == [ - 'Location: test_data/config.ini:2', - 'Location: test_data/config.ini:10', - 'Location: test_data/config.ini:21', - 'Location: test_data/config.ini:22', - 'Location: test_data/config.ini:32', + 'Location: %s:2' % str(Path('test_data/config.ini')), + 'Location: %s:10' % str(Path('test_data/config.ini')), + 'Location: %s:21' % str(Path('test_data/config.ini')), + 'Location: %s:22' % str(Path('test_data/config.ini')), + 'Location: %s:32' % str(Path('test_data/config.ini')), ] @staticmethod @@ -99,9 +101,9 @@ def test_file_based_success_yaml(): secrets.scan_file('test_data/config.yaml') assert [str(secret).splitlines()[1] for _, secret in secrets] == [ - 'Location: test_data/config.yaml:3', - 'Location: test_data/config.yaml:5', - 'Location: test_data/config.yaml:13', + 'Location: %s:3' % str(Path('test_data/config.yaml')), + 'Location: %s:5' % str(Path('test_data/config.yaml')), + 'Location: %s:13' % str(Path('test_data/config.yaml')), ] @staticmethod @@ -217,12 +219,12 @@ def test_deleted_secret(): secrets.scan_file('test_data/each_secret.py') results = SecretsCollection.load_from_baseline({'results': secrets.json()}) - results.data['test_data/each_secret.py'].pop() + results.data[str(Path('test_data/each_secret.py'))].pop() - original_size = len(secrets['test_data/each_secret.py']) + original_size = len(secrets[str(Path('test_data/each_secret.py'))]) secrets.trim(results) - assert len(secrets['test_data/each_secret.py']) < original_size + assert len(secrets[str(Path('test_data/each_secret.py'))]) < original_size @staticmethod def test_deleted_secret_file(): @@ -232,7 +234,7 @@ def test_deleted_secret_file(): secrets.trim(SecretsCollection()) assert secrets - secrets.trim(SecretsCollection(), filelist=['test_data/each_secret.py']) + secrets.trim(SecretsCollection(), filelist=[str(Path('test_data/each_secret.py'))]) assert not secrets @staticmethod @@ -288,7 +290,7 @@ def test_remove_non_existent_files(): secrets.scan_file('test_data/each_secret.py') assert bool(secrets) - secrets.data['does-not-exist'] = secrets.data.pop('test_data/each_secret.py') + secrets.data['does-not-exist'] = secrets.data.pop(str(Path('test_data/each_secret.py'))) secrets.trim() assert not bool(secrets) @@ -316,7 +318,7 @@ def test_bool(): secrets.scan_file('test_data/each_secret.py') assert secrets - secrets['test_data/each_secret.py'].clear() + secrets[str(Path('test_data/each_secret.py'))].clear() assert not secrets @@ -373,8 +375,8 @@ def test_basic(configure_plugins): assert secrets != baseline result = secrets - baseline - assert len(result['test_data/each_secret.py']) == 3 - assert len(secrets['test_data/each_secret.py']) == 5 + assert len(result[str(Path('test_data/each_secret.py'))]) == 3 + assert len(secrets[str(Path('test_data/each_secret.py'))]) == 5 @staticmethod def test_no_overlapping_files(configure_plugins): @@ -384,5 +386,5 @@ def test_no_overlapping_files(configure_plugins): secrets_a.scan_file('test_data/each_secret.py') secrets_b.scan_file('test_data/config.env') - assert (secrets_a - secrets_b).files == {'test_data/each_secret.py'} - assert (secrets_b - secrets_a).files == {'test_data/config.env'} + assert (secrets_a - secrets_b).files == {str(Path('test_data/each_secret.py'))} + assert (secrets_b - secrets_a).files == {str(Path('test_data/config.env'))} diff --git a/tests/core/upgrades/upgrade_to_v1_0_test.py b/tests/core/upgrades/upgrade_to_v1_0_test.py index c93ab17aa..1c54bdf44 100644 --- a/tests/core/upgrades/upgrade_to_v1_0_test.py +++ b/tests/core/upgrades/upgrade_to_v1_0_test.py @@ -26,7 +26,7 @@ def test_custom_plugins_does_not_pollute_settings(): assert new_baseline['plugins_used'] == [ { 'name': 'HippoDetector', - 'path': f'file://{os.path.abspath("testing/plugins.py")}', + 'path': 'file://{0}'.format(os.path.abspath('testing/plugins.py')), }, ] with pytest.raises(TypeError): diff --git a/tests/core/usage/plugins_usage_test.py b/tests/core/usage/plugins_usage_test.py index f7bc21b81..73b5a25ac 100644 --- a/tests/core/usage/plugins_usage_test.py +++ b/tests/core/usage/plugins_usage_test.py @@ -158,7 +158,7 @@ def test_success(parser): parser.parse_args(['--baseline', f.name]) assert get_settings().plugins['HippoDetector'] == { - 'path': f'file://{os.path.abspath("testing/plugins.py")}', + 'path': 'file://{0}'.format(os.path.abspath('testing/plugins.py')), } assert plugins.initialize.from_plugin_classname('HippoDetector') diff --git a/tests/filters/heuristic_filter_test.py b/tests/filters/heuristic_filter_test.py index a2f5dbb2b..90e1eb0de 100644 --- a/tests/filters/heuristic_filter_test.py +++ b/tests/filters/heuristic_filter_test.py @@ -121,9 +121,16 @@ def test_is_templated_secret(line, result): assert bool(list(scan_line(line))) is result -def test_is_prefixed_with_dollar_sign(): - assert filters.heuristic.is_prefixed_with_dollar_sign('$secret') - assert not filters.heuristic.is_prefixed_with_dollar_sign('secret') +@pytest.mark.parametrize( + 'secret, result', + ( + ('$secret', True), + ('secret', False), + ('', False), + ), +) +def test_is_prefixed_with_dollar_sign(secret, result): + assert filters.heuristic.is_prefixed_with_dollar_sign(secret) == result @pytest.mark.parametrize( diff --git a/tests/plugins/aws_key_test.py b/tests/plugins/aws_key_test.py index 6174a857c..166269aab 100644 --- a/tests/plugins/aws_key_test.py +++ b/tests/plugins/aws_key_test.py @@ -32,6 +32,22 @@ def setup(self): 'AKIAZZZ', False, ), + ( + 'A3T0ZZZZZZZZZZZZZZZZ', + True, + ), + ( + 'ABIAZZZZZZZZZZZZZZZZ', + True, + ), + ( + 'ACCAZZZZZZZZZZZZZZZZ', + True, + ), + ( + 'ASIAZZZZZZZZZZZZZZZZ', + True, + ), ( 'aws_access_key = "{}"'.format(EXAMPLE_SECRET), True, @@ -102,8 +118,8 @@ def counter(*args, **kwargs): self.example_key, get_code_snippet( [ - f'false_secret = {"TEST" * 10}', - f'real_secret = {EXAMPLE_SECRET}', + 'false_secret = {0}'.format('TEST' * 10), + 'real_secret = {0}'.format(EXAMPLE_SECRET), ], 1, ), diff --git a/tests/plugins/discord_test.py b/tests/plugins/discord_test.py index 56f4186f3..dde4996c1 100644 --- a/tests/plugins/discord_test.py +++ b/tests/plugins/discord_test.py @@ -22,17 +22,67 @@ class TestDiscordBotTokenDetector: 'MZ1yGvKTjE0rY0cV8i47CjAa.uRHQPq.Xb1Mk2nEhe-4iUcrGOuegj57zMC', True, ), - # Random values to fail + # From https://github.com/Yelp/detect-secrets/issues/627 + ( + 'OTUyNED5MDk2MTMxNzc2MkEz.YjESug.UNf-1GhsIG8zWT409q2C7Bh_zWQ', + True, + ), + ( + 'OTUyNED5MDk2MTMxNzc2MkEz.GSroKE.g2MTwve8OnUAAByz8KV_ZTV1Ipzg4o_NmQWUMs', + True, + ), + ( + 'MTAyOTQ4MTN5OTU5MTDwMEcxNg.GSwJyi.sbaw8msOR3Wi6vPUzeIWy_P0vJbB0UuRVjH8l8', + True, + ), + # Pass - token starts on the 3rd character (first segment is 24 characters) + ( + 'ATMyOTQ4MTN5OTU5MTDwMEcxNg.GSwJyi.sbaw8msOR3Wi6vPUzeIWy_P0vJbB0UuRVjH8l8', + True, + ), + # Pass - token starts on the 2nd character (first segment is 25 characters) + ( + '=MTAyOTQ4MTN5OTU5MTDwMEcxN.GSwJyi.sbaw8msOR3Wi6vPUzeIWy_P0vJbB0UuRVjH8l8', + True, + ), + # Pass - token ends before the '!' (last segment is 27 characters) + ( + 'MTAyOTQ4MTN5OTU5MTDwMEcxNg.YjESug.UNf-1GhsIG8zWT409q2C7Bh_zWQ!4o_NmQWUMs', + True, + ), + # Fail - all segments too short (23.5.26) ( 'MZ1yGvKTj0rY0cV8i47CjAa.uHQPq.Xb1Mk2nEhe-4icrGOuegj57zMC', False, ), + # Fail - first segment too short (23.6.27) + ( + 'MZ1yGvKTj0rY0cV8i47CjAa.uRHQPq.Xb1Mk2nEhe-4iUcrGOuegj57zMC', + False, + ), + # Fail - middle segment too short (24.5.27) + ( + 'MZ1yGvKTjE0rY0cV8i47CjAa.uHQPq.Xb1Mk2nEhe-4iUcrGOuegj57zMC', + False, + ), + # Fail - last segment too short (24.6.26) + ( + 'MZ1yGvKTjE0rY0cV8i47CjAa.uRHQPq.Xb1Mk2nEhe-4iUcrGOuegj57zM', + False, + ), + # Fail - contains invalid character ',' + ( + 'MZ1yGvKTjE0rY0cV8i47CjAa.uRHQPq.Xb1Mk2nEhe,4iUcrGOuegj57zMC', + False, + ), + # Fail - invalid first character 'P' (must be one of M/N/O) ( - 'SZ1yGvKTj0rY0cV8i47CjAa.uHQPq.Xb1Mk2nEhe-4icrGOuegj57zMC', + 'PZ1yGvKTjE0rY0cV8i47CjAa.uRHQPq.Xb1Mk2nEhe-4iUcrGOuegj57zMC', False, ), + # Fail - first segment 1 character too long; causes invalid first character 'T' ( - 'MZ1yGvKTj0rY0cV8i47CjAa.uHQPq.Xb1Mk2nEhe-4icrGOuegj57zM', + 'MTAyOTQ4MTN5OTU5MTDwMEcxNg0.GSwJyi.sbaw8msOR3Wi6vPUzeIWy_P0vJbB0UuRVjH8l8', False, ), ], diff --git a/tests/plugins/gitlab_token_test.py b/tests/plugins/gitlab_token_test.py new file mode 100644 index 000000000..e75085a99 --- /dev/null +++ b/tests/plugins/gitlab_token_test.py @@ -0,0 +1,138 @@ +import pytest + +from detect_secrets.plugins.gitlab_token import GitLabTokenDetector + + +class TestGitLabTokenDetector: + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ( + # valid PAT prefix and token length + 'glpat-hellOworld380_testin', + True, + ), + ( + # spaces are not part of the token + 'glpat-hellOWorld380 testin', + False, + ), + ( + # invalid separator (underscore VS dash) + 'glpat_hellOworld380_testin', + False, + ), + ( + # valid different prefix and token length + 'gldt-HwllOuhfw-wu0rlD_yep', + True, + ), + ( + # token < 20 chars should be too short + 'gldt-seems_too000Sshorty', + False, + ), + ( + # invalid prefix, but valid token length + 'foo-hello-world80_testin', + False, + ), + ( + # token length may vary depending on the impl., but <= 50 chars should be fine + 'glsoat-PREfix_helloworld380_testin_pretty_long_token_long', + True, + ), + ( + # token > 50 chars is too long + 'glsoat-PREfix_helloworld380_testin_pretty_long_token_long_', + False, + ), + ( + # GitLab is not GitHub + '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', + False, + ), + ], + ) + def test_base_token_format(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('GR1348941PREfix_helloworld380', True), + ('GR1348941PREfix_helloworld380_testin_pretty_long_token_long', True), + ('GR1348941PREfix_helloworld380_testin_pretty_long_token_long_', False), # too long + ('GR1348941helloWord0', False), # too short + ], + ) + def test_runner_registration_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('glcbt-helloworld380_testin', True), + ], + ) + def test_cicd_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('glimt-my-tokens_are-correctAB38', True), + ('glimt-my-tokens_are-correctAB', False), # too short + ('glimt-my-tokens_are-correctAB38_280', False), # too long + ], + ) + def test_incoming_mail_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('glptt-Need5_T00-be-exactly-40-chars--ELse_fail', True), + ('glptt-Need5_T00-be-exactly-40-chars--ELse_failing', False), # too long + ('glptt-hellOworld380_testin', False), # too short + ], + ) + def test_trigger_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('glagent-Need5_T00-bee-longer-than-50_chars-or-else-failING', True), + ('glagent-Need5_T00-bee-longer-than-50_chars-or-else-failING-still_OK', True), + (('glagent-' + 'X' * 1025), False), # 2 long + ('glagent-hellOworld380_testin', False), # len 20 is too short + ], + ) + def test_agent_token(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('gloas-checking_Length-Is-_exactly_64--checking_Length-Is-_exactly_64--', True), + ('gloas-checking_Length-Is-checking_Length-Is-', False), # too short + ('gloas-checking_Length-Is-_exactly_64--Xchecking_Length-Is-_longer_longer', False), + ], + ) + def test_oauth_application_secret(self, payload, should_flag): + logic = GitLabTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) diff --git a/tests/plugins/ip_public_test.py b/tests/plugins/ip_public_test.py new file mode 100644 index 000000000..77c9da95b --- /dev/null +++ b/tests/plugins/ip_public_test.py @@ -0,0 +1,55 @@ +import pytest + +from detect_secrets.plugins.ip_public import IPPublicDetector + + +class TestIPPublicDetector: + + class TestIPv4: + """ + Testing strategy + + Cover the cartesian product of these partitions: + + 1. Partition on ip address format: + a. Valid ipv4 address + + 2. Partition on ip address type: + a. Public + b. Non-public + + And cover this case: + 1. Partition on ip address format: + a. Invalid ipv4 address + """ + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + # Valid IPv4 addresses, Public + ('133.133.133.133', True), + ('This line has an IP address 133.133.133.133@something else', True), + ('133.133.133.133:8080', True), + ('This line has an IP address: 133.133.133.133:8080@something else', True), + ('1.1.1.1', True), + # Valid IPv4 addresses, Non-public + ('127.0.0.1', False), + ('10.0.0.1', False), + ('172.16.0.1', False), + ('192.168.0.1', False), + ('169.254.169.254', False), + # Invalid IPv4 addresses + ('256.256.256.256', False), + ('1.2.3', False), + ('1.2.3.4.5.6', False), + ('1.2.3.4.5.6.7.8', False), + ('1.2.3.04', False), + ('noreply@github.com', False), + ('github.com', False), + ], + ) + def test_analyze_line(self, payload, should_flag): + logic = IPPublicDetector() + + output = logic.analyze_line(filename='mock_filename', line=payload) + assert len(output) == int(should_flag) diff --git a/tests/plugins/keyword_test.py b/tests/plugins/keyword_test.py index ec5cf4ce2..003d8dd38 100644 --- a/tests/plugins/keyword_test.py +++ b/tests/plugins/keyword_test.py @@ -83,6 +83,8 @@ ('password := "somefakekey"', None), # 'fake' in the secret ('some_key = "real_secret"', None), # We cannot make 'key' a Keyword, too noisy) ('private_key "hopenobodyfindsthisone\';', None), # Double-quote does not match single-quote) + ('password: real_key', None), + ('password: "real_key"', None), (LONG_LINE, None), # Long line test ] diff --git a/tests/plugins/openai_test.py b/tests/plugins/openai_test.py new file mode 100644 index 000000000..7acf868c2 --- /dev/null +++ b/tests/plugins/openai_test.py @@ -0,0 +1,23 @@ +import pytest + +from detect_secrets.plugins.openai import OpenAIDetector + + +class TestOpenAIDetector: + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + # pragma: allowlist nextline secret + ('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', True), + # pragma: allowlist nextline secret + ('sk-proj-Xi8tdMjHV6pmbBbwilTeT3BlbkFJ3UDnpdEwNNm6wVBpYM0o', True), + # pragma: allowlist nextline secret + ('sk-proj-Xi8tdMjHV6pmbBbwilTeT4BlbkFJ3UDnpdEwNNm6wVBpYM0o', False), + ], + ) + def test_analyze(self, payload, should_flag): + logic = OpenAIDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + + assert len(output) == int(should_flag) diff --git a/tests/plugins/pypi_token_test.py b/tests/plugins/pypi_token_test.py new file mode 100644 index 000000000..5854b651c --- /dev/null +++ b/tests/plugins/pypi_token_test.py @@ -0,0 +1,29 @@ +import pytest + +from detect_secrets.plugins.pypi_token import PypiTokenDetector + + +class TestPypiTokenDetector: + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ( + # pragma: allowlist nextline secret + 'pypi-AgEIcHlwaS5vcmcCJDU3OTM1MjliLWIyYTYtNDEwOC05NzRkLTM0MjNiNmEwNWIzYgACF1sxLFsitesttestbWluaW1hbC1wcm9qZWN0Il1dAAIsWzIsWyJjYWY4OTAwZi0xNDMwLTRiYQstYmFmMi1mMDE3OGIyNWZhNTkiXV0AAAYgh2UINPjWBDwT0r3tQ1o5oZyswcjN0-IluP6z34SX3KM', True, # noqa: E501 + ), + ( + # pragma: allowlist nextline secret + 'pypi-AgENdGVzdC5weXBpLm9yZwIkN2YxOWZhOWEtY2FjYS00MGZhLTj2MGEtODFjMnE2MjdmMzY0AAIqWzMsImJlM2FiOWI5LTRmYUTnNEg4ZS04Mjk0LWFlY2Y2NWYzNGYzNyJdAAAGIMb5Hb8nVvhcAizcVVzA-bKKnwN7Pe0RmgPRCvrPwyJf', True, # noqa: E501 + ), + ( + # pragma: allowlist nextline secret + 'pypi-AgEIcHlwaS5vcmcCJDU3OTM1MjliLWIyYTYtNDEwOC05NzRkLTM0MjNiNmEwNWIzYgACF1sxLFsibWluaW1h', False, # noqa: E501 + ), + ], + ) + def test_analyze(self, payload, should_flag): + logic = PypiTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + + assert len(output) == int(should_flag) diff --git a/tests/plugins/telegram_token_test.py b/tests/plugins/telegram_token_test.py new file mode 100644 index 000000000..a1df2d900 --- /dev/null +++ b/tests/plugins/telegram_token_test.py @@ -0,0 +1,23 @@ +import pytest + +from detect_secrets.plugins.telegram_token import TelegramBotTokenDetector + + +class TestTelegramTokenDetector: + + @pytest.mark.parametrize( + 'payload, should_flag', + [ + ('bot110201543:AAHdqTcvCH1vGWJxfSe1ofSAs0K5PALDsaw', False), + ('110201543:AAHdqTcvCH1vGWJxfSe1ofSAs0K5PALDsaw', True), + ('7213808860:AAH1bjqpKKW3maRSPAxzIU-0v6xNuq2-NjM', True), + ('foo:AAH1bjqpKKW3maRSPAxzIU-0v6xNuq2-NjM', False), + ('foo', False), + ('arn:aws:sns:aaa:111122223333:aaaaaaaaaaaaaaaaaaassssssddddddddddddd', False), + ], + ) + def test_analyze(self, payload, should_flag): + logic = TelegramBotTokenDetector() + output = logic.analyze_line(filename='mock_filename', line=payload) + + assert len(output) == int(should_flag) diff --git a/tests/pre_commit_hook_test.py b/tests/pre_commit_hook_test.py index d2f9ee98d..84da3e335 100644 --- a/tests/pre_commit_hook_test.py +++ b/tests/pre_commit_hook_test.py @@ -3,6 +3,7 @@ import sys from contextlib import contextmanager from functools import partial +from pathlib import Path from typing import List from unittest import mock @@ -68,7 +69,7 @@ def test_baseline_filters_out_known_secrets(): ]) # Remove one arbitrary secret, so that it won't be the full set. - secrets.data['test_data/each_secret.py'].pop() + secrets.data[str(Path('test_data/each_secret.py'))].pop() with mock_named_temporary_file() as f: baseline.save_to_file(secrets, f.name) @@ -135,7 +136,7 @@ def test_success(self): def test_maintains_labelled_data(self): def label_secret(secrets): - list(secrets[self.FILENAME])[0].is_secret = True + list(secrets[str(Path(self.FILENAME))])[0].is_secret = True return baseline.format_for_output(secrets) with self.get_baseline_file(formatter=label_secret) as f: @@ -148,7 +149,7 @@ def label_secret(secrets): f.seek(0) data = json.loads(f.read()) - assert data['results'][self.FILENAME][0]['is_secret'] + assert data['results'][str(Path(self.FILENAME))][0]['is_secret'] def test_maintains_slim_mode(self): with self.get_baseline_file( diff --git a/tests/transformers/yaml_transformer_test.py b/tests/transformers/yaml_transformer_test.py index 9aa736ca2..3ecbd9d42 100644 --- a/tests/transformers/yaml_transformer_test.py +++ b/tests/transformers/yaml_transformer_test.py @@ -53,11 +53,13 @@ def test_multiline_block_scalar_folded_style(block_chomping): # However, "folded" style may be used to keep a certain line limit with very long secrets, # so we should probably handle that. file = mock_file_object( - textwrap.dedent(f""" + textwrap.dedent( + f""" multiline: |{block_chomping} # example this is a basic multiline string - """)[1:-1], + """, + )[1:-1], ) assert YAMLTransformer().parse_file(file) == [ @@ -121,6 +123,43 @@ def test_multi_line_flow_mapping(): 'keyD: "valueD"', ] + @staticmethod + def test_single_anchor_tag(): + file = mock_file_object( + textwrap.dedent(""" + keyA: &test + keyB: string # with comments + """)[1:-1], + ) + + assert YAMLTransformer().parse_file(file) == [ + '', + 'keyB: "string" # with comments', + ] + + @staticmethod + def test_anchor_tag_alias_combination(): + file = mock_file_object( + textwrap.dedent(""" + groupA: &groupA + keyA: valueA + keyB: valueB + + groupB: &groupB + keyC: valueC + keyD: *groupA + """)[1:-1], + ) + + assert YAMLTransformer().parse_file(file) == [ + '', + 'keyA: "valueA"', + 'keyB: "valueB"', + '', + '', + 'keyC: "valueC"', + ] + class TestYAMLFileParser: @staticmethod @@ -158,12 +197,14 @@ def test_basic(): def test_multi_line(block_scalar_style, block_chomping): # NOTE: Referenced https://yaml-multiline.info/ for the many ways to do multi line strings file = mock_file_object( - textwrap.dedent(f""" + textwrap.dedent( + f""" key: {block_scalar_style}{block_chomping} # comment multi - #line + # line string - """)[1:-1], + """, + )[1:-1], ) assert [item.line for item in YAMLFileParser(file)] == [ @@ -428,3 +469,80 @@ def test_inline_mapping_single_line_multikey_line_numbers(): '__original_key__': 'd', }, } + + @staticmethod + def test_single_anchor_tag(): + file = mock_file_object( + textwrap.dedent(""" + keyA: &test + keyB: string # with comments + keyC: + keyD: string + """)[1:-1], + ) + + assert YAMLFileParser(file).json() == { + 'keyA': { + 'keyB': { + '__value__': 'string', + '__line__': 2, + '__original_key__': 'keyB', + }, + 'keyC': { + 'keyD': { + '__value__': 'string', + '__line__': 4, + '__original_key__': 'keyD', + }, + }, + }, + } + + @staticmethod + def test_anchor_tag_alias_combination(): + file = mock_file_object( + textwrap.dedent(""" + groupA: &groupA + keyA: valueA + keyB: valueB + + groupB: &groupB + keyC: valueC + keyD: *groupA + """)[1:-1], + ) + + temp = YAMLFileParser(file).json() + assert temp == { + 'groupA': { + 'keyA': { + '__value__': 'valueA', + '__line__': 2, + '__original_key__': 'keyA', + }, + 'keyB': { + '__value__': 'valueB', + '__line__': 3, + '__original_key__': 'keyB', + }, + }, + 'groupB': { + 'keyC': { + '__value__': 'valueC', + '__line__': 6, + '__original_key__': 'keyC', + }, + 'keyD': { + 'keyA': { + '__value__': 'valueA', + '__line__': 2, + '__original_key__': 'keyA', + }, + 'keyB': { + '__value__': 'valueB', + '__line__': 3, + '__original_key__': 'keyB', + }, + }, + }, + } diff --git a/tox.ini b/tox.ini index 7c0e4a9bb..350300941 100644 --- a/tox.ini +++ b/tox.ini @@ -1,14 +1,13 @@ [tox] project = detect_secrets # These should match the ci python env list -envlist = py{36,37,38,39},mypy +envlist = py{39,310,311,312},mypy skip_missing_interpreters = true -tox_pip_extensions_ext_venv_update = true [testenv] passenv = SSH_AUTH_SOCK # NO_PROXY is needed to call requests API within a forked process -# when using macOS and python version 3.6/3.7 +# when using macOS and python version 3.7 setenv = NO_PROXY = '*' deps = -rrequirements-dev.txt