diff --git a/detect_secrets/plugins/email_address.py b/detect_secrets/plugins/email_address.py index 92f2393d..e752546a 100644 --- a/detect_secrets/plugins/email_address.py +++ b/detect_secrets/plugins/email_address.py @@ -1,27 +1,34 @@ import re + from .base import RegexBasedDetector -class EmailAddressDetector(RegexBasedDetector): - """Email Address Detector. - This class is designed to efficiently and accurately detect email addresses within given text. It primarily - validates the general format of email addresses, and does not adhere strictly to email format standards such as RFC 5322. +class EmailAddressDetector(RegexBasedDetector): + """ + A detector for identifying email addresses within text. It uses regular expressions to + focus on general email structures, not strictly adhering to standards like RFC 5322. + Designed for efficient and broad detection, it also has some limitations. - Key Features: - - Ignores common, non-security-threatening email addresses to enhance precision. + Features: + - Detects a wide range of email formats efficiently. + - Ignores common, non-critical emails to minimize false positives. Limitations: - - Despite robust detection mechanisms, the class is not infallible and may not cover all edge cases. - - It does not support some examples from RFC 6530, e.g., email addresses with Greek alphabets. + - May miss edge cases or unconventional email formats. + - Not compliant with advanced formats, e.g., RFC 6530 non-Latin emails. - References: + Regular Expression: + Utilizes a regex pattern focusing on typical email components: local part, domain, TLD. + Excludes predefined whitelist emails to reduce false positives. + + References: - https://en.wikipedia.org/wiki/Email_address - https://stackoverflow.com/a/14321045 """ secret_type = 'Email Address' - whitelist = ['noreply@github.com', 'git@github.com'] # Excluses whitelist email addresses from detection to reduce false positives. + whitelist = ['noreply@github.com', 'git@github.com'] base_pattern = r""" [\w+-]+ # Local part before the @ symbol @@ -32,21 +39,23 @@ class EmailAddressDetector(RegexBasedDetector): (?:\.[a-zA-Z]{2,4}) # TLD part """ # Pattern Breakdown: - # 1. [\w+-]+: Matches one or more of a-z, A-Z, _, +, - + # 1. [\w+-]+: Matches one or more of a-z, A-Z, _, +, - # Represents the local part of the email address before the @ symbol. # 2. (?:\.[\w+-]+)*: Matches zero or more of a-z, A-Z, _, +, -, but must start with a . (dot) # Allows for dot-separated words in the local part of the email address. # 3. @: Matches the @ symbol. - # 4. [\w+-]+: Matches one or more of a-z, A-Z, _, +, - + # 4. [\w+-]+: Matches one or more of a-z, A-Z, _, +, - # Represents the domain part of the email address after the @ symbol. # 5. (?:\.[\w+-]+)*: Matches zero or more of a-z, A-Z, _, +, -, but must start with a . (dot) # Allows for dot-separated words in the domain part of the email address. # 6. (?:\.[a-zA-Z]{2,4}): Matches 2 to 4 instances of a-z, A-Z, starting with a . (dot) # Represents the TLD (top-level domain) part of the email address. - deny_pattern = r"(?!" + "|".join(re.escape(email) for email in whitelist) + r"$)" + base_pattern + deny_pattern = r'(?!' \ + + '|'.join(re.escape(email) for email in whitelist) \ + + r'$)' + base_pattern # Combines the base pattern with a negative lookahead to exclude whitelist email addresses. denylist = [ - re.compile(r"\b" + deny_pattern + r"\b", flags=re.VERBOSE) + re.compile(r'\b' + deny_pattern + r'\b', flags=re.VERBOSE), ] diff --git a/tests/plugins/email_address_test.py b/tests/plugins/email_address_test.py index cb3fccb6..d33b3b4f 100644 --- a/tests/plugins/email_address_test.py +++ b/tests/plugins/email_address_test.py @@ -1,4 +1,5 @@ import pytest + from detect_secrets.plugins.email_address import EmailAddressDetector