diff --git a/.detect-secrets/.secrets.baseline b/.detect-secrets/.secrets.baseline new file mode 100644 index 0000000..bb81938 --- /dev/null +++ b/.detect-secrets/.secrets.baseline @@ -0,0 +1,112 @@ +{ + "version": "1.4.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + } + ], + "results": {}, + "generated_at": "2023-05-10T09:40:36Z" +} diff --git a/.detect-secrets/plugins/absolute_filepath.py b/.detect-secrets/plugins/absolute_filepath.py new file mode 100644 index 0000000..aac0a4a --- /dev/null +++ b/.detect-secrets/plugins/absolute_filepath.py @@ -0,0 +1,17 @@ +import re +from detect_secrets.plugins.base import RegexBasedDetector + +class AbsolutePathDetector(RegexBasedDetector): + """Scans for absolute file paths.""" + secret_type = 'Absolute File Path' + + skip_list = [ + 'usr/bin/python', + # Add more paths to skip as needed + ] + + skip_pattern = '|'.join(f'({re.escape(path)})' for path in skip_list) + + denylist = [ + re.compile(rf'^(?:[A-Z]:|\/)(?!{skip_pattern})[\S\s]+') + ] diff --git a/.detect-secrets/plugins/aws_sensitive_info.py b/.detect-secrets/plugins/aws_sensitive_info.py new file mode 100644 index 0000000..9628506 --- /dev/null +++ b/.detect-secrets/plugins/aws_sensitive_info.py @@ -0,0 +1,128 @@ +"""This plugin searches for AWS sensitive information, including +1. AWS account id +2. AWS ARN +3. AWS security group id +4. AWS VPC id +5. AWS subnet id +6. AWS bucket name +7. AWS hostname +""" +import re +from detect_secrets.plugins.base import RegexBasedDetector + + +class AWSSensitiveInfoDetector(RegexBasedDetector): + """Scans for AWS sensitive information""" + secret_type = 'AWS Sensitive Information2' + denylist = [] + + + def __init__(self): + self._add_deny_aws_account_id() + self._add_deny_aws_arn() + self._add_deny_sg_id() + self._add_deny_vpc_id() + self._add_deny_subnet_id() + self._add_deny_bucket_name() + self._add_deny_aws_hostname() + + + def _add_deny_aws_account_id(self): + """Add AWS account id pattern to denylist. + AWS account ID is a 12-digit number. + For example, 123456789012 + + Pattern Strategy: + 1. Find the 12-digit number in the string. + 2. Check keyword `aws_account_id` in the string. + + Reference: https://docs.aws.amazon.com/accounts/latest/reference/manage-acct-identifiers.html#:~:text=each%20AWS%20account%3A-,AWS%20account%20ID,Amazon%20Resource%20Names%20(ARNs). + """ + self.denylist.append(re.compile(r'\d{12}')) + self.denylist.append(re.compile(r'aws_account_id')) + + def _add_deny_aws_arn(self): + """Add AWS ARN pattern to denylist. + For more details, check the reference. + + Pattern Strategy: + 1. Find the string that follows this pattern: `arn:partition:service:region:account-id` + - partition: aws | aws-cn | aws-us-gov + - service: skiped + - region: skiped + - account-id: 12 digits, for example, 123456789012. + + So, the pattern is like: after 1st `:`, it is the partition, and after the 4th `:`, it is the account-id. + + Reference: https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html + """ + arn_pattern = r'arn:(aws|aws-cn|aws-us-gov):[^:]*:[^:]*:\d{12}(?::[^:\s]+)*' + self.denylist.append(re.compile(arn_pattern)) + + def _add_deny_sg_id(self): + """Add AWS security group id pattern to denylist. + AWS security group id is a string starts with `sg-` and followed by 8 or 17 characters. + For example, sg-12345678 + + Pattern Strategy: + 1. Only find the string that starts with `sg-` and followed by more than 8 characters. + + Reference: https://docs.aws.amazon.com/managedservices/latest/userguide/find-SGs.html + """ + self.denylist.append(re.compile(r'sg-\w{8,}')) + + def _add_deny_vpc_id(self): + """Add AWS VPC id pattern to denylist. + AWS VPC id is a string starts with `vpc-` and followed by 8 or 17 characters. + For example, vpc-12345678 + + Pattern Strategy: + 1. Only find the string that starts with `vpc-` and followed by >= 8 characters. + + Reference: https://docs.aws.amazon.com/vpc/latest/userguide/create-vpc.html + """ + self.denylist.append(re.compile(r'vpc-\w{8,}')) + + def _add_deny_subnet_id(self): + """Add AWS subnet id pattern to denylist. + AWS subnet id is a string starts with `subnet-` and followed by 8 or 17 characters. + For example, subnet-12345678 + + Pattern Strategy: + 1. Only find the string that starts with `subnet-` and followed by >= 8 characters. + + Reference: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ec2-subnet.html + """ + self.denylist.append(re.compile(r'subnet-\w{8,}')) + + def _add_deny_bucket_name(self): + """Add AWS bucket name pattern to denylist. + Note: this function is not implemented yet due to the complexity of the pattern. + Check the reference for more details. + + Reference: https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html + """ + pass + + def _add_deny_aws_hostname(self): + """Add AWS hostname pattern to denylist. + + Example IP name: + e.g. 1: ip-10-24-34-0.ec2.internal + e.g. 2: ip-10-24-34-0.us-west-2.compute.internal + + Example Resource name: + e.g. 1: i-0123456789abcdef.ec2.internal + e.g. 2: i-0123456789abcdef.us-west-2.compute.internal + + Pattern Strategy: + 1. For IP name, check the content of this format: `ip-ip_digit-ip_digit-ip_digit-ip_digit` + 2. For Resource name, check the content of this format: `i-16character` + + The denylist is a list of regular expressions that will be used to match and deny certain patterns. + + Reference: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-naming.html + """ + self.denylist.append(re.compile(r'ip-\d{1,3}-\d{1,3}-\d{1,3}-\d{1,3}')) + self.denylist.append(re.compile(r'i-\w{16}')) + \ No newline at end of file diff --git a/.detect-secrets/plugins/email_address.py b/.detect-secrets/plugins/email_address.py new file mode 100644 index 0000000..bbd882c --- /dev/null +++ b/.detect-secrets/plugins/email_address.py @@ -0,0 +1,16 @@ +import re +from detect_secrets.plugins.base import RegexBasedDetector + +class EmailAddressDetector(RegexBasedDetector): + """Scans for email addresses.""" + secret_type = 'Email Address' + skip_list = [ + 'git@', + # Add more paths to skip as needed + ] + + skip_pattern = '|'.join(f'({re.escape(email)})' for email in skip_list) + + denylist = [ + re.compile(rf'\b[A-Za-z0-9._%+-]+@(?!{skip_pattern})[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'), + ] diff --git a/.detect-secrets/plugins/ip_address.py b/.detect-secrets/plugins/ip_address.py new file mode 100644 index 0000000..c67a8e0 --- /dev/null +++ b/.detect-secrets/plugins/ip_address.py @@ -0,0 +1,13 @@ +import re +from detect_secrets.plugins.base import RegexBasedDetector + + +class IPAddressDetector(RegexBasedDetector): + """Scans for IP addresses (ipv4 and ipv6).""" + secret_type = 'IP Address' + + denylist = [ + re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b'), # ipv4 + re.compile(r'\b(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b') + # reference: https://ihateregex.io/expr/ipv6/ + ] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ec2f16d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,58 @@ +repos: +- repo: local + hooks: + - id: download-detect-secrets-customized-setup-folder + name: Download detect-secrets customized setup folder if not exist + # The reason for using Python is for compatibility, as some Windows devices may not support Bash. + # This part does not work well for precommit.ci due to network connection limitation, + # thus please make sure when using for cloud side (GitHub precommit.ci), .detect-secret folder exists + language: python + entry: | + python -c ' + import os + import shutil + import subprocess + import sys + from pathlib import Path + + # Repository URL for the custom detect-secrets plugins + REPO_URL = "https://github.com/NASA-AMMOS/slim-config-detect-secrets.git" + + # Define folder paths + detect_secrets_folder = Path(".detect-secrets") + temp_repo_folder = Path("temp_repo") + + # Clone the repository if the .detect-secrets folder does not exist + if not detect_secrets_folder.exists(): + subprocess.run(["git", "clone", "--depth", "1", REPO_URL, str(temp_repo_folder)], check=True) + shutil.copytree(temp_repo_folder / ".detect-secrets", detect_secrets_folder) + shutil.rmtree(temp_repo_folder) + + sys.exit(0)' + # Match no files, so the hook will run only once + files: ^$ + # Ensure the hook runs even when no files are staged + always_run: true + +- repo: https://github.com/Yelp/detect-secrets + rev: v1.4.0 + hooks: + - id: detect-secrets + # Specify the arguments for the detect-secrets hook + args: + - '--baseline' + - '.detect-secrets/.secrets.baseline' + - '--exclude-files' + - '^\.git(/.*)?$' + - '--exclude-files' + - '^\.detect-secrets(/.*)?$' + - '-p' + - .detect-secrets/plugins/absolute_filepath.py + - '-p' + - .detect-secrets/plugins/aws_sensitive_info.py + - '-p' + - .detect-secrets/plugins/email_address.py + - '-p' + - .detect-secrets/plugins/ip_address.py + # Specify the files that the hook should run on + files: . diff --git a/README.md b/README.md index 21ce3ae..3e6f1bd 100644 --- a/README.md +++ b/README.md @@ -1 +1,55 @@ -# slim-config-detect-secrets +# Starter Kit: Additional configuration for detect-secrets +This repository contains +1. `.pre-commit-config.yaml` for [pre-commit](https://pre-commit.com/) to set up the pre-commit hook for the local environment and the [pre-commit CI](https://pre-commit.ci/) for the remote repository. +2. The default baseline file `.secrets.baseline` for the open-source tool [detect secret](https://github.com/Yelp/detect-secrets), used to compare against the current codebase to detect any new secrets. +3. Additional customized plugins to provide additional secret types for the tool to detect. These plugins meet the needs from the [SLIM](https://github.com/NASA-AMMOS/slim) community based on the issue ticket: [Automated checking for general sensitive information within Git](https://github.com/NASA-AMMOS/slim/issues/89). +Please see categories and links below for more details. + +```text +├── .pre-commit-config.yaml +├── .detect-secrets + ├── .secrets.baseline + ├── plugins + │ ├── absolute_filepath.py + │ ├── aws_sensitive_info.py + │ ├── email_address.py + │ └── ip_address.py +``` +> Note: We are in the progress of contributing these general plugins back to the `detect-secrets` community. +> Once they are accepted, we will remove them from this repository and use the official plugins instead. +## Plugins +* [AWS Sensitive Information detection](#plugin-1---aws-sensitive-information-detection) +* [IP Address detection](#plugin-2---ip-address-detection) +* [Email Address detection](#plugin-3---email-address-detection) +* [Absolute Path detection](#plugin-4---absolute-path-detection) + +## Plugin 1 - AWS Sensitive Information detection + +This plugin is designed to detect AWS sensitive information mentioned in this [discussion](https://github.com/NASA-AMMOS/slim/issues/89#issuecomment-1433567397). Below is the list of secret types that this plugin can detect: + +### Types of Secret +1. [AWS account id](https://docs.aws.amazon.com/accounts/latest/reference/manage-acct-identifiers.html#:~:text=each%20AWS%20account%3A-,AWS%20account%20ID,Amazon%20Resource%20Names%20(ARNs).) + - A 12-digit number, such as 123456789012 +2. [AWS ARN](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html) +3. [AWS security group id](https://docs.aws.amazon.com/managedservices/latest/userguide/find-SGs.html) + - `sg-` followed by 8 or 17 hexadecimal characters + - For example, `sg-02ce123456e7893c7` +4. [AWS VPC id](https://docs.aws.amazon.com/vpc/latest/userguide/create-vpc.html) + - `vpc-` followed by [8](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ec2-subnet.html#:~:text=VPC%2C%20such%20as-,vpc%2D11ad4878,-.) or 17 hexadecimal characters + - For example, `vpc-1a2b3c4d5e6f1a2b3` +5. [AWS subnet id](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ec2-subnet.html) + - `subnet-` followed by 8 or 17 hexadecimal characters +6. [AWS bucket name](https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html) +7. [AWS hostname](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-naming.html) + + +Check implementation [here](aws_sensitive_info.py) to learn more about the regex used in this plugin. + +## Plugin 2 - IP Address detection +This plugin is designed to detect IP address mentioned in this [discussion](https://github.com/NASA-AMMOS/slim/issues/89#issuecomment-1433567397) + +## Plugin 3 - Email Address detection +This plugin is designed to detect email address with few exceptions for common email address, such as `git@` + +## Plugin 4 - Absolute Path detection +This plugin is designed to detect absolute path.