Skip to content

Commit

Permalink
Fix python code style
Browse files Browse the repository at this point in the history
  • Loading branch information
perryzjc committed Nov 17, 2023
1 parent aed5d21 commit b663791
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 14 deletions.
37 changes: 23 additions & 14 deletions detect_secrets/plugins/email_address.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,34 @@
import re

from .base import RegexBasedDetector

class EmailAddressDetector(RegexBasedDetector):
"""Email Address Detector.

This class is designed to efficiently and accurately detect email addresses within given text. It primarily
validates the general format of email addresses, and does not adhere strictly to email format standards such as RFC 5322.
class EmailAddressDetector(RegexBasedDetector):
"""
A detector for identifying email addresses within text. It uses regular expressions to
focus on general email structures, not strictly adhering to standards like RFC 5322.
Designed for efficient and broad detection, it also has some limitations.
Key Features:
- Ignores common, non-security-threatening email addresses to enhance precision.
Features:
- Detects a wide range of email formats efficiently.
- Ignores common, non-critical emails to minimize false positives.
Limitations:
- Despite robust detection mechanisms, the class is not infallible and may not cover all edge cases.
- It does not support some examples from RFC 6530, e.g., email addresses with Greek alphabets.
- May miss edge cases or unconventional email formats.
- Not compliant with advanced formats, e.g., RFC 6530 non-Latin emails.
References:
Regular Expression:
Utilizes a regex pattern focusing on typical email components: local part, domain, TLD.
Excludes predefined whitelist emails to reduce false positives.
References:
- https://en.wikipedia.org/wiki/Email_address
- https://stackoverflow.com/a/14321045
"""
secret_type = 'Email Address'

whitelist = ['[email protected]', '[email protected]']
# Excluses whitelist email addresses from detection to reduce false positives.
whitelist = ['[email protected]', '[email protected]']

base_pattern = r"""
[\w+-]+ # Local part before the @ symbol
Expand All @@ -32,21 +39,23 @@ class EmailAddressDetector(RegexBasedDetector):
(?:\.[a-zA-Z]{2,4}) # TLD part
"""
# Pattern Breakdown:
# 1. [\w+-]+: Matches one or more of a-z, A-Z, _, +, -
# 1. [\w+-]+: Matches one or more of a-z, A-Z, _, +, -
# Represents the local part of the email address before the @ symbol.
# 2. (?:\.[\w+-]+)*: Matches zero or more of a-z, A-Z, _, +, -, but must start with a . (dot)
# Allows for dot-separated words in the local part of the email address.
# 3. @: Matches the @ symbol.
# 4. [\w+-]+: Matches one or more of a-z, A-Z, _, +, -
# 4. [\w+-]+: Matches one or more of a-z, A-Z, _, +, -
# Represents the domain part of the email address after the @ symbol.
# 5. (?:\.[\w+-]+)*: Matches zero or more of a-z, A-Z, _, +, -, but must start with a . (dot)
# Allows for dot-separated words in the domain part of the email address.
# 6. (?:\.[a-zA-Z]{2,4}): Matches 2 to 4 instances of a-z, A-Z, starting with a . (dot)
# Represents the TLD (top-level domain) part of the email address.

deny_pattern = r"(?!" + "|".join(re.escape(email) for email in whitelist) + r"$)" + base_pattern
deny_pattern = r'(?!' \
+ '|'.join(re.escape(email) for email in whitelist) \
+ r'$)' + base_pattern
# Combines the base pattern with a negative lookahead to exclude whitelist email addresses.

denylist = [
re.compile(r"\b" + deny_pattern + r"\b", flags=re.VERBOSE)
re.compile(r'\b' + deny_pattern + r'\b', flags=re.VERBOSE),
]
1 change: 1 addition & 0 deletions tests/plugins/email_address_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

from detect_secrets.plugins.email_address import EmailAddressDetector


Expand Down

0 comments on commit b663791

Please sign in to comment.