Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into jks
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Nov 1, 2023
2 parents 56c680e + 7a838cf commit 197b8b6
Show file tree
Hide file tree
Showing 32 changed files with 827 additions and 438 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ jobs:
contents: write
runs-on: ubuntu-latest
steps:
- name: Install hub tool
run: |
sudo apt-get update && sudo apt-get install -y hub
- name: Upload Assets
uses: samsung/supplychainassurance/.github/actions/[email protected]
env:
Expand Down
3 changes: 3 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,6 @@ ignore_missing_imports = True

[mypy-password_strength.*]
ignore_missing_imports = True

[mypy-docx.*]
ignore_missing_imports = True
8 changes: 4 additions & 4 deletions cicd/benchmark.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ Predefined Pattern 326 2 40
Private Key 1001 1 3
Seed, Salt, Nonce 40 4 4
TOTAL: 5307 63688 5644
Detected Credentials: 5993
credsweeper result_cnt : 5337, lost_cnt : 0, true_cnt : 4439, false_cnt : 898
Detected Credentials: 5997
credsweeper result_cnt : 5339, lost_cnt : 0, true_cnt : 4441, false_cnt : 898
Category TP FP TN FN FPR FNR ACC PRC RCL F1
-------------------------- ---- ---- -------- ---- --------- --------- -------- -------- -------- --------
Authentication Key & Token 54 4 28 16 0.125 0.228571 0.803922 0.931034 0.771429 0.84375
Generic Secret 973 3 215 83 0.0137615 0.0785985 0.932496 0.996926 0.921402 0.957677
Generic Token 287 7 596 46 0.0116086 0.138138 0.943376 0.97619 0.861862 0.91547
Generic Token 289 7 596 44 0.0116086 0.132132 0.945513 0.976351 0.867868 0.918919
Other 818 750 63395 258 0.0116923 0.239777 0.984545 0.521684 0.760223 0.618759
Password 995 130 4150 410 0.0303738 0.291815 0.905013 0.884444 0.708185 0.786561
Predefined Pattern 309 2 40 17 0.0476191 0.0521472 0.94837 0.993569 0.947853 0.970173
Private Key 967 0 4 34 0.033966 0.966169 1 0.966034 0.982724
Seed, Salt, Nonce 36 2 6 4 0.25 0.1 0.875 0.947368 0.9 0.923077
4439 898 19428253 868 4.622e-05 0.163558 0.999909 0.831741 0.836442 0.834085
4441 898 19428253 866 4.622e-05 0.163181 0.999909 0.831804 0.836819 0.834304
2 changes: 1 addition & 1 deletion credsweeper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
'__version__'
]

__version__ = "1.5.8"
__version__ = "1.5.9"
1 change: 1 addition & 0 deletions credsweeper/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def __init__(self, config: Dict[str, Any]) -> None:
self.exclude_patterns: List[re.Pattern] = [re.compile(pattern) for pattern in config["exclude"]["pattern"]]
self.exclude_paths: List[str] = config["exclude"]["path"]
self.exclude_containers: List[str] = config["exclude"]["containers"]
self.exclude_documents: List[str] = config["exclude"]["documents"]
self.exclude_extensions: List[str] = config["exclude"]["extension"]
self.exclude_lines: Set[str] = set(config["exclude"].get("lines", []))
self.exclude_values: Set[str] = set(config["exclude"].get("values", []))
Expand Down
3 changes: 3 additions & 0 deletions credsweeper/deep_scanner/deep_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from credsweeper.utils import Util
from .byte_scanner import ByteScanner
from .bzip2_scanner import Bzip2Scanner
from .docx_scanner import DocxScanner
from .encoder_scanner import EncoderScanner
from .gzip_scanner import GzipScanner
from .html_scanner import HtmlScanner
Expand All @@ -36,6 +37,7 @@
class DeepScanner(
ByteScanner, #
Bzip2Scanner, #
DocxScanner, #
EncoderScanner, #
GzipScanner, #
HtmlScanner, #
Expand Down Expand Up @@ -75,6 +77,7 @@ def get_deep_scanners(data: bytes) -> List[Any]:
deep_scanners.append(ZipScanner)
# probably, there might be a docx, xlxs and so on.
# It might be scanned with text representation in third-party libraries.
deep_scanners.append(DocxScanner)
elif Util.is_bzip2(data):
deep_scanners.append(Bzip2Scanner)
elif Util.is_tar(data):
Expand Down
43 changes: 43 additions & 0 deletions credsweeper/deep_scanner/docx_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import io
import logging
from abc import ABC
from typing import List

import docx

from credsweeper.credentials import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider

logger = logging.getLogger(__name__)


class DocxScanner(AbstractScanner, ABC):
"""Implements docx scanning"""

def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> List[Candidate]:
"""Tries to scan DOCX text with splitting by lines"""
candidates: List[Candidate] = []

try:
docx_lines: List[str] = []

doc = docx.Document(io.BytesIO(data_provider.data))
for paragraph in doc.paragraphs:
for line in paragraph.text.splitlines():
if line:
docx_lines.append(line)

string_data_provider = StringContentProvider(lines=docx_lines,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|DOCX")
candidates = self.scanner.scan(string_data_provider)
except Exception as docx_exc:
logger.debug(f"{data_provider.file_path}:{docx_exc}")
return candidates
3 changes: 3 additions & 0 deletions credsweeper/file_handler/file_path_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ def check_exclude_file(config: Config, path: str) -> bool:
return True
if not config.depth and file_extension in config.exclude_containers:
return True
# --depth or --doc enables scan for all documents extensions
if not (config.depth or config.doc) and file_extension in config.exclude_documents:
return True
return False

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/filters/value_allowlist_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class ValueAllowlistCheck(Filter):
"""Check that patterns from the list is not present in the candidate value."""

ALLOWED = [
r"ENC\(.*\)", r"ENC\[.*\]", r"\$\{.*\}", r"#\{.*\}", r"\{\{.+\}\}", r"([.a-z0-9]|->)+\(.*\)", r"\*\*\*\*\*"
r"ENC\(.*\)", r"ENC\[.*\]", r"\$\{.*\}", r"#\{.*\}", r"\{\{.+\}\}", r"([.a-z0-9]|->)+\(.*\)", r"\S{0,5}\*{5,}"
]
ALLOWED_PATTERN = re.compile( #
Util.get_regex_combine_or(ALLOWED), #
Expand Down
30 changes: 6 additions & 24 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- (?P<variable>[`'\"]?(?i:token|secret|key|키|암호|암호화|토큰)[`'\"]?)((\s)*[=:](\s)*)(?P<quote>[`'\"(])?(?P<value>\S{4,})(?(quote)[)`'\"])
filter_type:
- ValueAllowlistCheck
- ValuePatternCheck
min_line_len: 10
required_substrings:
- token
Expand All @@ -26,6 +27,7 @@
- (?P<variable>[`'\"]?(?i:(?<!id[ :/])pa[as]swo?r?ds?|pwd?|p/w|비밀번호|비번|패스워드|암호)[`'\"]?)((\s)*[=:](\s)*)(?P<quote>[`'\"(])?(?P<value>\S{4,})(?(quote)[)`'\"])
filter_type:
- ValueAllowlistCheck
- ValuePatternCheck
min_line_len: 10
required_substrings:
- pass
Expand All @@ -43,9 +45,10 @@
severity: medium
type: pattern
values:
- (^|(?P<variable>(?i:\bip[\s/]+id[\s/]+pw[\s/:]*))|(?P<url>://)|\s)(?P<ip>[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})((?P<lpar>\s*\()?\s*|(?(variable)[\s,/]+|\s*(?(url)[,]|[,/])\s*))[\w.-]{3,}[\s,/]+(?P<value>(?(lpar)[^)\s/]{4,}|(?(url)[^\s/]{4,}|[^\s]{4,})))
- (^|(?P<variable>(?i:\bip[\s/]+id[\s/]+pw[\s/:]*))|(?P<url>://)|\s)(?P<ip>[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})((?P<lpar>\s*(\w+\s+)?\()?\s*|(?(variable)[\s,/]+|\s*(?(url)[,]|[,/]))\s*)[\w.-]{3,}[\s,/]+(?P<value>(?(lpar)[^)\s]{4,}|[^\s/]{4,}))(?:\s|[^/]|$)
filter_type:
- ValueAllowlistCheck
- ValuePatternCheck
min_line_len: 10
required_substrings:
- "."
Expand All @@ -59,6 +62,7 @@
- (?P<ddash>--)?(?P<variable>(?i:user\s*)?(?i:id|login|account|root|admin|user|name|wifi|role|host|default|계정|아이디))\s*?(?(ddash)[ =]|[ :=])\s*?(?P<value>\S+)
filter_type:
- ValueAllowlistCheck
- ValuePatternCheck
min_line_len: 10
required_substrings:
- pass
Expand All @@ -79,6 +83,7 @@
filter_type:
- ValueAllowlistCheck
- ValueDictionaryKeywordCheck
- ValuePatternCheck
min_line_len: 10
required_substrings:
- pw
Expand Down Expand Up @@ -194,7 +199,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>(ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
required_substrings:
- A
min_line_len: 20
Expand All @@ -207,7 +211,6 @@
- (^|[^.0-9A-Za-z_/+-])(?P<value>(AKIA|ASIA)[0-9A-Z]{16,17})([^=0-9A-Za-z_/+-]|$)
- (?P<value>[0-9a-zA-Z/+]{40})
filter_type: GeneralPattern
use_ml: true
required_substrings:
- AKIA
- ASIA
Expand All @@ -219,7 +222,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>amzn\.mws\.[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
required_substrings:
- amzn
min_line_len: 30
Expand All @@ -242,7 +244,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>dt0[a-zA-Z]{1}[0-9]{2}\.[A-Z0-9]{24}\.[A-Z0-9]{64})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
required_substrings:
- dt0
min_line_len: 90
Expand All @@ -253,7 +254,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>EAAC[0-9A-Za-z]{27,})
filter_type: GeneralPattern
use_ml: true
required_substrings:
- EAAC
min_line_len: 31
Expand All @@ -277,7 +277,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>AIza[0-9A-Za-z_-]{35})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: false
validations:
- GoogleApiKeyValidation
required_substrings:
Expand All @@ -291,7 +290,6 @@
- (?P<value>[0-9]+\-[0-9A-Za-z_]{32}\.apps\.googleusercontent\.com)
- (?<![0-9a-zA-Z_-])(?P<value>[0-9a-zA-Z_-]{24})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: false
validations:
- GoogleMultiValidation
required_substrings:
Expand All @@ -304,7 +302,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>ya29\.[0-9A-Za-z_-]{22,})
filter_type: GeneralPattern
use_ml: true
required_substrings:
- ya29.
min_line_len: 27
Expand All @@ -315,7 +312,6 @@
values:
- (?i)(?P<value>heroku(.{0,20})?[0-9a-f]{8}(-[0-9a-f]{4})+-[0-9a-f]{12})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
required_substrings:
- heroku
min_line_len: 24
Expand All @@ -326,7 +322,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>IGQVJ[\w]{100,})
filter_type: GeneralPattern
use_ml: true
required_substrings:
- IGQVJ
min_line_len: 105
Expand All @@ -348,7 +343,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>[0-9a-zA-Z]{32}-us[0-9]{1,2})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: false
validations:
- MailChimpKeyValidation
required_substrings:
Expand All @@ -361,7 +355,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>key-[0-9a-zA-Z]{32})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
required_substrings:
- key-
min_line_len: 36
Expand All @@ -385,7 +378,6 @@
values:
- (?P<value>access_token\$production\$[0-9a-z]{16}\$[0-9a-z]{32})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: false
required_substrings:
- access_token$production$
min_line_len: 72
Expand All @@ -405,7 +397,6 @@
values:
- (?P<value>sk_live_[0-9a-z]{32})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: false
required_substrings:
- sk_live_
min_line_len: 40
Expand All @@ -428,7 +419,6 @@
values:
- (?P<value>SG\.[\w_]{16,32}\.[\w_]{16,64})
filter_type: GeneralPattern
use_ml: false
required_substrings:
- SG.
min_line_len: 34
Expand All @@ -449,7 +439,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>xox[a|b|p|r|o|s]\-[-a-zA-Z0-9]{10,250})
filter_type: GeneralPattern
use_ml: true
validations:
- SlackTokenValidation
required_substrings:
Expand All @@ -462,7 +451,6 @@
values:
- (?P<value>hooks\.slack\.com/services/T\w{8}/B\w{8}/\w{24})
filter_type: GeneralPattern
use_ml: true
required_substrings:
- hooks.slack.com/services/T
min_line_len: 61
Expand All @@ -473,7 +461,6 @@
values:
- (?P<value>sk_live_[0-9a-zA-Z]{24})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
validations:
- StripeApiKeyValidation
required_substrings:
Expand All @@ -486,7 +473,6 @@
values:
- (?P<value>rk_live_[0-9a-zA-Z]{24})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
required_substrings:
- rk_live_
min_line_len: 32
Expand All @@ -497,7 +483,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>EAAA[0-9A-Za-z_-]{60})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
validations:
- SquareAccessTokenValidation
required_substrings:
Expand All @@ -510,7 +495,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>sq0[a-z]{3}-[0-9A-Za-z_-]{22})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
validations:
- SquareClientIdValidation
required_substrings:
Expand All @@ -523,7 +507,6 @@
values:
- (?P<value>sq0csp-[0-9A-Za-z_-]{43})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: false
required_substrings:
- sq0csp
min_line_len: 50
Expand All @@ -546,7 +529,6 @@
values:
- (^|[^.0-9A-Za-z_/+-])(?P<value>SK[0-9a-fA-F]{32})([^=0-9A-Za-z_/+-]|$)
filter_type: GeneralPattern
use_ml: true
required_substrings:
- SK
min_line_len: 34
Expand Down
7 changes: 5 additions & 2 deletions credsweeper/secret/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
"containers": [
".apk",
".bz2",
".docx",
".gz",
".pdf",
".tar",
".xlsx",
".zip"
],
"documents": [
".docx",
".pdf"
],
"extension": [
".7z",
".aac",
Expand Down Expand Up @@ -71,6 +73,7 @@
"/__pycache__/",
"/node_modules/",
"/target/",
"/.venv/",
"/venv/"
],
"lines": [],
Expand Down
Loading

0 comments on commit 197b8b6

Please sign in to comment.