Samsung · babenek · Dec 27, 2024 · Dec 25, 2024 · Dec 25, 2024 · Dec 26, 2024
@@ -1,6 +1,6 @@
-META MD5 b33b22ce3adc2141bcf91e4cdd6f1cab
-DATA MD5 9ac09dae7d8873d53e1fbf18da2d71c4
-DATA: 16329853 interested lines. MARKUP: 59549 items
+META MD5 984f912263c0c337a1672296aa759cbc
+DATA MD5 6db3f0cb94aad9db85077fb00a1ae6bf
+DATA: 16329853 interested lines. MARKUP: 59550 items
 FileType           FileNumber    ValidLines    Positives    Negatives    Templates
 ---------------  ------------  ------------  -----------  -----------  -----------
                           193         28288           69          415           90
@@ -86,7 +86,7 @@ FileType           FileNumber    ValidLines    Positives    Negatives    Templat
 .jenkinsfile                1            58            2            6
 .jinja2                     1            64                         2
 .js                       653        532652          512         2450          331
-.json                     843      13045846         1074        10011          139
+.json                     843      13045846         1076        10012          139
 .jsp                       13          3202            1           37
 .jsx                        7           857                        19
 .jwt                        1             1            2
@@ -222,8 +222,8 @@ FileType           FileNumber    ValidLines    Positives    Negatives    Templat
 .yml                      418         36057          522          910          376
 .zsh                        6           872                        12
 .zsh-theme                  1            97                         1
-TOTAL:                  10003      16329853        11856        46611         5084
-credsweeper result_cnt : 11623, lost_cnt : 0, true_cnt : 11391, false_cnt : 232
+TOTAL:                  10003      16329853        11858        46612         5084
+credsweeper result_cnt : 11626, lost_cnt : 0, true_cnt : 11393, false_cnt : 233
 Rules                             Positives    Negatives    Templates    Reported     TP    FP     TN    FN       FPR       FNR       ACC       PRC       RCL        F1
 ------------------------------  -----------  -----------  -----------  ----------  -----  ----  -----  ----  --------  --------  --------  --------  --------  --------
 API                                     130         3166          188         126    125     1   3353     5  0.000298  0.038462  0.998278  0.992063  0.961538  0.976562
@@ -236,7 +236,7 @@ Azure Access Token                       19            0            0          1
 BASE64 Private Key                       12            4            0          12     12     0      4     0  0.000000  0.000000  1.000000  1.000000  1.000000  1.000000
 BASE64 encoded PEM Private Key            7            0            0           5      5     0      0     2            0.285714  0.714286  1.000000  0.714286  0.833333
 Bitbucket Client ID                      19           53            0          75     19    53      0     0  1.000000  0.000000  0.263889  0.263889  1.000000  0.417582
-Bitbucket Client Secret                  28           66            1          98     28    67      0     0  1.000000  0.000000  0.294737  0.294737  1.000000  0.455285
+Bitbucket Client Secret                  28           67            1          99     28    68      0     0  1.000000  0.000000  0.291667  0.291667  1.000000  0.451613
 CMD ConvertTo-SecureString               13            4            0          13     13     0      4     0  0.000000  0.000000  1.000000  1.000000  1.000000  1.000000
 CMD Password                             21          128            6          20     20     0    134     1  0.000000  0.047619  0.993548  1.000000  0.952381  0.975610
 CMD Secret                                1            1            0           1      1     0      1     0  0.000000  0.000000  1.000000  1.000000  1.000000  1.000000
@@ -262,6 +262,7 @@ Nonce                                    93           49            0          9
 Other                                     9         7447            5                  0     0   7452     9  0.000000  1.000000  0.998794            0.000000
 PEM Private Key                        1019         1483            0        1023   1019     4   1479     0  0.002697  0.000000  0.998401  0.996090  1.000000  0.998041
 Password                               1869         7536         2680        1795   1782    13  10203    87  0.001273  0.046549  0.991725  0.992758  0.953451  0.972707
+Salesforce Credentials                    2            0            0           2      2     0      0     0            0.000000  1.000000  1.000000  1.000000  1.000000
 Salt                                     47           76            1          45     45     0     77     2  0.000000  0.042553  0.983871  1.000000  0.957447  0.978261
 Secret                                 1297         1576          802        1292   1288     4   2374     9  0.001682  0.006939  0.996463  0.996904  0.993061  0.994979
 Seed                                      1            6            0                  0     0      6     1  0.000000  1.000000  0.857143            0.000000
@@ -272,4 +273,4 @@ Token                                   644         4170          454         61
 Twilio Credentials                       30           39            0          30     30     0     39     0  0.000000  0.000000  1.000000  1.000000  1.000000  1.000000
 URL Credentials                         210          157          215         209    208     1    371     2  0.002688  0.009524  0.994845  0.995215  0.990476  0.992840
 UUID                                   1075          265            0        1074   1073     1    264     2  0.003774  0.001860  0.997761  0.999069  0.998140  0.998604
-                                      11856        46611         5084       11636  11391   232  46379   465  0.004977  0.039221  0.988079  0.980040  0.960779  0.970314
+                                      11858        46612         5084       11639  11393   233  46379   465  0.004999  0.039214  0.988062  0.979959  0.960786  0.970278
@@ -49,7 +49,7 @@ jobs:
       if: ${{ always() && steps.code_checkout.conclusion == 'success' }}
       run: |
         n=0
-        for f in $(find . -type f -not -wholename '*/.*' -a -not -wholename '*/tests/samples/*' -a -not -wholename '*/corpus/*'); do
+        for f in $(find . -type f -not -wholename '*/.*' -a -not -wholename '*/tests/samples/*' -a -not -wholename '*/corpus/*' -a -not -wholename '*.json'); do
             n=$(( 1 + ${n} ))
             filetype=$(file ${f})
             if echo "${filetype}" | grep -q '.*text.*'; then

@@ -170,9 +170,6 @@ class DiffRowType(Enum):
 # default value for config and ValuePatternCheck
 DEFAULT_PATTERN_LEN = 4
 
-# default value for config and ValuePemPatternCheck
-DEFAULT_PEM_PATTERN_LEN = 5
-
 # PEM x509 patterns
 PEM_BEGIN_PATTERN = "-----BEGIN"
 PEM_END_PATTERN = "-----END"

@@ -3,7 +3,7 @@
 
 from humanfriendly import parse_size
 
-from credsweeper.common.constants import Severity, DEFAULT_PATTERN_LEN, DEFAULT_PEM_PATTERN_LEN
+from credsweeper.common.constants import Severity, DEFAULT_PATTERN_LEN
 from credsweeper.utils import Util
 
 
@@ -46,4 +46,3 @@ def __init__(self, config: Dict[str, Any]) -> None:
         self.exclude_values = set(line.strip() for line in self.exclude_values)
 
         self.pattern_len = config.get("pattern_len", DEFAULT_PATTERN_LEN)
-        self.pem_pattern_len = config.get("pem_pattern_len", DEFAULT_PEM_PATTERN_LEN)
@@ -48,13 +48,13 @@ def get_keyword_base_filters(config: Config) -> List[Filter]:
             ValueTokenCheck(),
         ]
         if not config.doc:
-            filters.extend([ValuePatternCheck(config), ValueNotAllowedPatternCheck()])
+            filters.extend([ValuePatternCheck(pattern_len=config.pattern_len), ValueNotAllowedPatternCheck()])
         return filters
 
     @staticmethod
     def get_pattern_base_filters(config: Config) -> List[Filter]:
         """return base filters for pattern"""
         return [  #
             LineSpecificKeyCheck(),  #
-            ValuePatternCheck(config),  #
+            ValuePatternCheck(pattern_len=config.pattern_len),  #
         ]
@@ -9,4 +9,9 @@ class TokenPattern(Group):
 
     def __init__(self, config: Config) -> None:
         super().__init__(config, GroupType.DEFAULT)
-        self.filters = [ValueCoupleKeywordCheck(), ValueNumberCheck(), ValueCamelCaseCheck(), ValuePatternCheck(config)]
+        self.filters = [
+            ValueCoupleKeywordCheck(),
+            ValueNumberCheck(),
+            ValueCamelCaseCheck(),
+            ValuePatternCheck(pattern_len=config.pattern_len)
+        ]
@@ -30,5 +30,5 @@ def __init__(self, config: Config) -> None:
             ValueNotAllowedPatternCheck(),
             ValueTokenCheck(),
             ValueDictionaryValueLengthCheck(min_len=4, max_len=80),
-            ValuePatternCheck(config)
+            ValuePatternCheck(pattern_len=config.pattern_len)
         ]
@@ -1,5 +1,6 @@
 import re
 
+from credsweeper.common.constants import DEFAULT_PATTERN_LEN
 from credsweeper.config import Config
 from credsweeper.credentials import LineData
 from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -21,17 +22,14 @@ class ValuePatternCheck(Filter):
     Default pattern LEN is 4
     """
 
-    def __init__(self, config: Config):
+    def __init__(self, config: Config = None, pattern_len: int = DEFAULT_PATTERN_LEN):
         """Create ValuePatternCheck with a specific pattern_len to check.
 
         Args:
             config: pattern len to use during check. DEFAULT_PATTERN_LEN by default
 
         """
-        if 'ValuePemPatternCheck' == self.__class__.__name__:
-            self.pattern_len = config.pem_pattern_len
-        else:
-            self.pattern_len = config.pattern_len
+        self.pattern_len = pattern_len
         # use non whitespace symbol pattern
         self.pattern = re.compile(fr"(\S)\1{{{str(self.pattern_len - 1)},}}")
 

@@ -1386,3 +1386,20 @@
   target:
     - code
     - doc
+
+- name: Salesforce Credentials
+  severity: medium
+  confidence: weak
+  type: pattern
+  values:
+    - (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>00D[0-9A-Za-z]{9,15}(![.0-9A-Za-z_-]{24,200})?)(?![0-9A-Za-z_-])
+  min_line_len: 12
+  filter_type:
+    - ValuePatternCheck(9)
+    - ValueNumberCheck
+    - ValueBase64PartCheck
+  required_substrings:
+    - 00D
+  target:
+    - code
+    - doc
@@ -1,13 +1,13 @@
 from pathlib import Path
 
 # total number of files in test samples
-SAMPLES_FILES_COUNT = 142
+SAMPLES_FILES_COUNT = 143
 
 # the lowest value of ML threshold is used to display possible lowest values
 NEGLIGIBLE_ML_THRESHOLD = 0.0001
 
 # credentials count after scan with negligible ML threshold
-SAMPLES_CRED_COUNT = 427
+SAMPLES_CRED_COUNT = 428
 SAMPLES_CRED_LINE_COUNT = SAMPLES_CRED_COUNT + 19
 
 # Number of filtered credentials with ML
@@ -17,7 +17,7 @@
 SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED
 
 # with option --doc
-SAMPLES_IN_DOC = 673
+SAMPLES_IN_DOC = 674
 
 # archived credentials that are not found without --depth
 SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 35

@@ -10817,6 +10817,32 @@
             }
         ]
     },
+    {
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Salesforce Credentials",
+        "severity": "medium",
+        "confidence": "weak",
+        "line_data_list": [
+            {
+                "line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
+                "line_num": 2,
+                "path": "./tests/samples/salesfoce",
+                "info": "./tests/samples/salesfoce|RAW",
+                "value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
+                "value_start": 8,
+                "value_end": 120,
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 5.076545709916438,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "ml_validation": "VALIDATED_KEY",
         "ml_probability": 0.999,
@@ -13859,4 +13885,4 @@
             }
         ]
     }
-]
+]
@@ -17658,6 +17658,32 @@
             }
         ]
     },
+    {
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Salesforce Credentials",
+        "severity": "medium",
+        "confidence": "weak",
+        "line_data_list": [
+            {
+                "line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
+                "line_num": 2,
+                "path": "./tests/samples/salesfoce",
+                "info": "./tests/samples/salesfoce|RAW",
+                "value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
+                "value_start": 8,
+                "value_end": 120,
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 5.076545709916438,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "ml_validation": "VALIDATED_KEY",
         "ml_probability": 0.79,
@@ -19486,4 +19512,4 @@
             }
         ]
     }
-]
+]
@@ -10124,6 +10124,32 @@
             }
         ]
     },
+    {
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Salesforce Credentials",
+        "severity": "medium",
+        "confidence": "weak",
+        "line_data_list": [
+            {
+                "line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
+                "line_num": 2,
+                "path": "./tests/samples/salesfoce",
+                "info": "",
+                "value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
+                "value_start": 8,
+                "value_end": 120,
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 5.076545709916438,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "ml_validation": "VALIDATED_KEY",
         "ml_probability": 0.999,
@@ -11424,4 +11450,4 @@
             }
         ]
     }
-]
+]
@@ -9370,6 +9370,32 @@
             }
         ]
     },
+    {
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Salesforce Credentials",
+        "severity": "medium",
+        "confidence": "weak",
+        "line_data_list": [
+            {
+                "line": "t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
+                "line_num": 2,
+                "path": "./tests/samples/salesfoce",
+                "info": "",
+                "value": "00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1",
+                "value_start": 8,
+                "value_end": 120,
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "entropy_validation": {
+                    "iterator": "BASE64STDPAD_CHARS",
+                    "entropy": 5.076545709916438,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "ml_validation": "VALIDATED_KEY",
         "ml_probability": 0.999,
@@ -10592,4 +10618,4 @@
             }
         ]
     }
-]
+]
@@ -14,35 +14,33 @@ class TestValuePatternCheck(unittest.TestCase):
     def setUp(self) -> None:
         self.config = MagicMock(spec=Config)
         self.config.pattern_len = 4
-        self.config.pem_pattern_len = 5
 
     def test_equal_pattern_check_n(self) -> None:
         self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("Crackle123"))
         self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("IEEE32441"))
         self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("Pass..."))
-        self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("Pass:\\n        Crackle123"))
+        self.assertFalse(ValuePatternCheck(pattern_len=4).equal_pattern_check("Pass:\\n        Crackle123"))
 
     def test_equal_pattern_check_p(self) -> None:
         self.assertTrue(ValuePatternCheck(self.config).equal_pattern_check("AAAABCD"))
-        self.assertTrue(ValuePatternCheck(self.config).equal_pattern_check("-------BEGIN"))
-        self.config.pattern_len = 8
-        self.assertFalse(ValuePatternCheck(self.config).equal_pattern_check("-------BEGIN"))
+        self.assertTrue(ValuePatternCheck(pattern_len=4).equal_pattern_check("-------BEGIN"))
+        self.assertFalse(ValuePatternCheck(pattern_len=8).equal_pattern_check("-------BEGIN"))
 
     def test_ascending_pattern_check_n(self) -> None:
         self.assertFalse(ValuePatternCheck(self.config).ascending_pattern_check("Crackle123"))
-        self.assertFalse(ValuePatternCheck(self.config).ascending_pattern_check("Crackle987654321"))
+        self.assertFalse(ValuePatternCheck(pattern_len=4).ascending_pattern_check("Crackle987654321"))
 
     def test_ascending_pattern_check_p(self) -> None:
         self.assertTrue(ValuePatternCheck(self.config).ascending_pattern_check("Crackle1234"))
-        self.assertTrue(ValuePatternCheck(self.config).ascending_pattern_check("Cracklefgh"))
+        self.assertTrue(ValuePatternCheck(pattern_len=4).ascending_pattern_check("Cracklefgh"))
 
     def test_descending_pattern_check_n(self) -> None:
         self.assertFalse(ValuePatternCheck(self.config).descending_pattern_check("Crackle321"))
-        self.assertFalse(ValuePatternCheck(self.config).descending_pattern_check("Crackle123456789"))
+        self.assertFalse(ValuePatternCheck(pattern_len=4).descending_pattern_check("Crackle123456789"))
 
     def test_descending_pattern_check_p(self) -> None:
         self.assertTrue(ValuePatternCheck(self.config).descending_pattern_check("Crackle4321"))
-        self.assertTrue(ValuePatternCheck(self.config).descending_pattern_check("Crackledcba"))
+        self.assertTrue(ValuePatternCheck(pattern_len=4).descending_pattern_check("Crackledcba"))
 
 
 class TestValuePatternCheckFixture:

@@ -0,0 +1,2 @@
+f_org_id 00Dee00000000000A5 00D0A53429897F6E
+t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		f_org_id 00Dee00000000000A5 00D0A53429897F6E
		t_token 00DUI000005AecQ!AR8Amtqc2drcmpoZ3dlanJoMzQ5ODc1OTg0Mzc5NzQ1OCsrKysKLS0tLQp.cm9jLVR5cGU6IDQsRU5DUllwwMzQ1NzY3MzQ1