Skip to content

Commit

Permalink
Salesforce Credentials (#647)
Browse files Browse the repository at this point in the history
* salesforce

* benchmark

* skip json check for last symbol 0x0A

* style

* rollbackBM
  • Loading branch information
babenek authored Dec 27, 2024
1 parent d517536 commit f5b12b7
Show file tree
Hide file tree
Showing 16 changed files with 160 additions and 39 deletions.
17 changes: 9 additions & 8 deletions .ci/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
META MD5 b33b22ce3adc2141bcf91e4cdd6f1cab
DATA MD5 9ac09dae7d8873d53e1fbf18da2d71c4
DATA: 16329853 interested lines. MARKUP: 59549 items
META MD5 984f912263c0c337a1672296aa759cbc
DATA MD5 6db3f0cb94aad9db85077fb00a1ae6bf
DATA: 16329853 interested lines. MARKUP: 59550 items
FileType FileNumber ValidLines Positives Negatives Templates
--------------- ------------ ------------ ----------- ----------- -----------
193 28288 69 415 90
Expand Down Expand Up @@ -86,7 +86,7 @@ FileType FileNumber ValidLines Positives Negatives Templat
.jenkinsfile 1 58 2 6
.jinja2 1 64 2
.js 653 532652 512 2450 331
.json 843 13045846 1074 10011 139
.json 843 13045846 1076 10012 139
.jsp 13 3202 1 37
.jsx 7 857 19
.jwt 1 1 2
Expand Down Expand Up @@ -222,8 +222,8 @@ FileType FileNumber ValidLines Positives Negatives Templat
.yml 418 36057 522 910 376
.zsh 6 872 12
.zsh-theme 1 97 1
TOTAL: 10003 16329853 11856 46611 5084
credsweeper result_cnt : 11623, lost_cnt : 0, true_cnt : 11391, false_cnt : 232
TOTAL: 10003 16329853 11858 46612 5084
credsweeper result_cnt : 11626, lost_cnt : 0, true_cnt : 11393, false_cnt : 233
Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1
------------------------------ ----------- ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- --------
API 130 3166 188 126 125 1 3353 5 0.000298 0.038462 0.998278 0.992063 0.961538 0.976562
Expand All @@ -236,7 +236,7 @@ Azure Access Token 19 0 0 1
BASE64 Private Key 12 4 0 12 12 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333
Bitbucket Client ID 19 53 0 75 19 53 0 0 1.000000 0.000000 0.263889 0.263889 1.000000 0.417582
Bitbucket Client Secret 28 66 1 98 28 67 0 0 1.000000 0.000000 0.294737 0.294737 1.000000 0.455285
Bitbucket Client Secret 28 67 1 99 28 68 0 0 1.000000 0.000000 0.291667 0.291667 1.000000 0.451613
CMD ConvertTo-SecureString 13 4 0 13 13 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
CMD Password 21 128 6 20 20 0 134 1 0.000000 0.047619 0.993548 1.000000 0.952381 0.975610
CMD Secret 1 1 0 1 1 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
Expand All @@ -262,6 +262,7 @@ Nonce 93 49 0 9
Other 9 7447 5 0 0 7452 9 0.000000 1.000000 0.998794 0.000000
PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041
Password 1869 7536 2680 1795 1782 13 10203 87 0.001273 0.046549 0.991725 0.992758 0.953451 0.972707
Salesforce Credentials 2 0 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
Salt 47 76 1 45 45 0 77 2 0.000000 0.042553 0.983871 1.000000 0.957447 0.978261
Secret 1297 1576 802 1292 1288 4 2374 9 0.001682 0.006939 0.996463 0.996904 0.993061 0.994979
Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000
Expand All @@ -272,4 +273,4 @@ Token 644 4170 454 61
Twilio Credentials 30 39 0 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
URL Credentials 210 157 215 209 208 1 371 2 0.002688 0.009524 0.994845 0.995215 0.990476 0.992840
UUID 1075 265 0 1074 1073 1 264 2 0.003774 0.001860 0.997761 0.999069 0.998140 0.998604
11856 46611 5084 11636 11391 232 46379 465 0.004977 0.039221 0.988079 0.980040 0.960779 0.970314
11858 46612 5084 11639 11393 233 46379 465 0.004999 0.039214 0.988062 0.979959 0.960786 0.970278
2 changes: 1 addition & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
if: ${{ always() && steps.code_checkout.conclusion == 'success' }}
run: |
n=0
for f in $(find . -type f -not -wholename '*/.*' -a -not -wholename '*/tests/samples/*' -a -not -wholename '*/corpus/*'); do
for f in $(find . -type f -not -wholename '*/.*' -a -not -wholename '*/tests/samples/*' -a -not -wholename '*/corpus/*' -a -not -wholename '*.json'); do
n=$(( 1 + ${n} ))
filetype=$(file ${f})
if echo "${filetype}" | grep -q '.*text.*'; then
Expand Down
3 changes: 0 additions & 3 deletions credsweeper/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,6 @@ class DiffRowType(Enum):
# default value for config and ValuePatternCheck
DEFAULT_PATTERN_LEN = 4

# default value for config and ValuePemPatternCheck
DEFAULT_PEM_PATTERN_LEN = 5

# PEM x509 patterns
PEM_BEGIN_PATTERN = "-----BEGIN"
PEM_END_PATTERN = "-----END"
Expand Down
3 changes: 1 addition & 2 deletions credsweeper/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from humanfriendly import parse_size

from credsweeper.common.constants import Severity, DEFAULT_PATTERN_LEN, DEFAULT_PEM_PATTERN_LEN
from credsweeper.common.constants import Severity, DEFAULT_PATTERN_LEN
from credsweeper.utils import Util


Expand Down Expand Up @@ -46,4 +46,3 @@ def __init__(self, config: Dict[str, Any]) -> None:
self.exclude_values = set(line.strip() for line in self.exclude_values)

self.pattern_len = config.get("pattern_len", DEFAULT_PATTERN_LEN)
self.pem_pattern_len = config.get("pem_pattern_len", DEFAULT_PEM_PATTERN_LEN)
4 changes: 2 additions & 2 deletions credsweeper/filters/group/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ def get_keyword_base_filters(config: Config) -> List[Filter]:
ValueTokenCheck(),
]
if not config.doc:
filters.extend([ValuePatternCheck(config), ValueNotAllowedPatternCheck()])
filters.extend([ValuePatternCheck(pattern_len=config.pattern_len), ValueNotAllowedPatternCheck()])
return filters

@staticmethod
def get_pattern_base_filters(config: Config) -> List[Filter]:
"""return base filters for pattern"""
return [ #
LineSpecificKeyCheck(), #
ValuePatternCheck(config), #
ValuePatternCheck(pattern_len=config.pattern_len), #
]
7 changes: 6 additions & 1 deletion credsweeper/filters/group/token_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,9 @@ class TokenPattern(Group):

def __init__(self, config: Config) -> None:
super().__init__(config, GroupType.DEFAULT)
self.filters = [ValueCoupleKeywordCheck(), ValueNumberCheck(), ValueCamelCaseCheck(), ValuePatternCheck(config)]
self.filters = [
ValueCoupleKeywordCheck(),
ValueNumberCheck(),
ValueCamelCaseCheck(),
ValuePatternCheck(pattern_len=config.pattern_len)
]
2 changes: 1 addition & 1 deletion credsweeper/filters/group/url_credentials_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ def __init__(self, config: Config) -> None:
ValueNotAllowedPatternCheck(),
ValueTokenCheck(),
ValueDictionaryValueLengthCheck(min_len=4, max_len=80),
ValuePatternCheck(config)
ValuePatternCheck(pattern_len=config.pattern_len)
]
8 changes: 3 additions & 5 deletions credsweeper/filters/value_pattern_check.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re

from credsweeper.common.constants import DEFAULT_PATTERN_LEN
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
Expand All @@ -21,17 +22,14 @@ class ValuePatternCheck(Filter):
Default pattern LEN is 4
"""

def __init__(self, config: Config):
def __init__(self, config: Config = None, pattern_len: int = DEFAULT_PATTERN_LEN):
"""Create ValuePatternCheck with a specific pattern_len to check.
Args:
config: pattern len to use during check. DEFAULT_PATTERN_LEN by default
"""
if 'ValuePemPatternCheck' == self.__class__.__name__:
self.pattern_len = config.pem_pattern_len
else:
self.pattern_len = config.pattern_len
self.pattern_len = pattern_len
# use non whitespace symbol pattern
self.pattern = re.compile(fr"(\S)\1{{{str(self.pattern_len - 1)},}}")

Expand Down
17 changes: 17 additions & 0 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1386,3 +1386,20 @@
target:
- code
- doc

- name: Salesforce Credentials
severity: medium
confidence: weak
type: pattern
values:
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>00D[0-9A-Za-z]{9,15}(![.0-9A-Za-z_-]{24,200})?)(?![0-9A-Za-z_-])
min_line_len: 12
filter_type:
- ValuePatternCheck(9)
- ValueNumberCheck
- ValueBase64PartCheck
required_substrings:
- 00D
target:
- code
- doc
6 changes: 3 additions & 3 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from pathlib import Path

# total number of files in test samples
SAMPLES_FILES_COUNT = 142
SAMPLES_FILES_COUNT = 143

# the lowest value of ML threshold is used to display possible lowest values
NEGLIGIBLE_ML_THRESHOLD = 0.0001

# credentials count after scan with negligible ML threshold
SAMPLES_CRED_COUNT = 427
SAMPLES_CRED_COUNT = 428
SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19

# Number of filtered credentials with ML
Expand All @@ -17,7 +17,7 @@
SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED

# with option --doc
SAMPLES_IN_DOC = 673
SAMPLES_IN_DOC = 674

# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 35
Expand Down
28 changes: 27 additions & 1 deletion tests/data/depth_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -10817,6 +10817,32 @@
}
]
},
{
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Salesforce Credentials",
"severity": "medium",
"confidence": "weak",
"line_data_list": [
{
"line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
"line_num": 2,
"path": "./tests/samples/salesfoce",
"info": "./tests/samples/salesfoce|RAW",
"value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
"value_start": 8,
"value_end": 120,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 5.076545709916438,
"valid": true
}
}
]
},
{
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.999,
Expand Down Expand Up @@ -13859,4 +13885,4 @@
}
]
}
]
]
28 changes: 27 additions & 1 deletion tests/data/doc.json
Original file line number Diff line number Diff line change
Expand Up @@ -17658,6 +17658,32 @@
}
]
},
{
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Salesforce Credentials",
"severity": "medium",
"confidence": "weak",
"line_data_list": [
{
"line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
"line_num": 2,
"path": "./tests/samples/salesfoce",
"info": "./tests/samples/salesfoce|RAW",
"value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
"value_start": 8,
"value_end": 120,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 5.076545709916438,
"valid": true
}
}
]
},
{
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.79,
Expand Down Expand Up @@ -19486,4 +19512,4 @@
}
]
}
]
]
28 changes: 27 additions & 1 deletion tests/data/ml_threshold.json
Original file line number Diff line number Diff line change
Expand Up @@ -10124,6 +10124,32 @@
}
]
},
{
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Salesforce Credentials",
"severity": "medium",
"confidence": "weak",
"line_data_list": [
{
"line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
"line_num": 2,
"path": "./tests/samples/salesfoce",
"info": "",
"value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
"value_start": 8,
"value_end": 120,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 5.076545709916438,
"valid": true
}
}
]
},
{
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.999,
Expand Down Expand Up @@ -11424,4 +11450,4 @@
}
]
}
]
]
28 changes: 27 additions & 1 deletion tests/data/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -9370,6 +9370,32 @@
}
]
},
{
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Salesforce Credentials",
"severity": "medium",
"confidence": "weak",
"line_data_list": [
{
"line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
"line_num": 2,
"path": "./tests/samples/salesfoce",
"info": "",
"value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
"value_start": 8,
"value_end": 120,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64STDPAD_CHARS",
"entropy": 5.076545709916438,
"valid": true
}
}
]
},
{
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.999,
Expand Down Expand Up @@ -10592,4 +10618,4 @@
}
]
}
]
]
16 changes: 7 additions & 9 deletions tests/filters/test_value_pattern_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,33 @@ class TestValuePatternCheck(unittest.TestCase):
def setUp(self) -> None:
self.config = MagicMock(spec=Config)
self.config.pattern_len = 4
self.config.pem_pattern_len = 5

def test_equal_pattern_check_n(self) -> None:
self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("Crackle123"))
self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("IEEE32441"))
self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("Pass..."))
self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("Pass:\\n Crackle123"))
self.assertFalse(ValuePatternCheck(pattern_len=4).equal_pattern_check("Pass:\\n Crackle123"))

def test_equal_pattern_check_p(self) -> None:
self.assertTrue(ValuePatternCheck(self.config).equal_pattern_check("AAAABCD"))
self.assertTrue(ValuePatternCheck(self.config).equal_pattern_check("-------BEGIN"))
self.config.pattern_len = 8
self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("-------BEGIN"))
self.assertTrue(ValuePatternCheck(pattern_len=4).equal_pattern_check("-------BEGIN"))
self.assertFalse(ValuePatternCheck(pattern_len=8).equal_pattern_check("-------BEGIN"))

def test_ascending_pattern_check_n(self) -> None:
self.assertFalse(ValuePatternCheck(self.config).ascending_pattern_check("Crackle123"))
self.assertFalse(ValuePatternCheck(self.config).ascending_pattern_check("Crackle987654321"))
self.assertFalse(ValuePatternCheck(pattern_len=4).ascending_pattern_check("Crackle987654321"))

def test_ascending_pattern_check_p(self) -> None:
self.assertTrue(ValuePatternCheck(self.config).ascending_pattern_check("Crackle1234"))
self.assertTrue(ValuePatternCheck(self.config).ascending_pattern_check("Cracklefgh"))
self.assertTrue(ValuePatternCheck(pattern_len=4).ascending_pattern_check("Cracklefgh"))

def test_descending_pattern_check_n(self) -> None:
self.assertFalse(ValuePatternCheck(self.config).descending_pattern_check("Crackle321"))
self.assertFalse(ValuePatternCheck(self.config).descending_pattern_check("Crackle123456789"))
self.assertFalse(ValuePatternCheck(pattern_len=4).descending_pattern_check("Crackle123456789"))

def test_descending_pattern_check_p(self) -> None:
self.assertTrue(ValuePatternCheck(self.config).descending_pattern_check("Crackle4321"))
self.assertTrue(ValuePatternCheck(self.config).descending_pattern_check("Crackledcba"))
self.assertTrue(ValuePatternCheck(pattern_len=4).descending_pattern_check("Crackledcba"))


class TestValuePatternCheckFixture:
Expand Down
2 changes: 2 additions & 0 deletions tests/samples/salesfoce
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
f_org_id 00Dee00000000000A5 00D0A53429897F6E
t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1

0 comments on commit f5b12b7

Please sign in to comment.