From 9b1a95dbb9aa4b7e5b5cb8bf6c50db0579efe9b7 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Thu, 26 Dec 2024 11:26:28 +0200 Subject: [PATCH] salesforce (#181) --- .ci/benchmark.txt | 15 ++++++++------- download_data.py | 2 ++ meta/eab0f8ed.csv | 5 ++++- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index a8bb7fcb3..706f80b46 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,6 +1,6 @@ -META MD5 b33b22ce3adc2141bcf91e4cdd6f1cab -DATA MD5 9ac09dae7d8873d53e1fbf18da2d71c4 -DATA: 16329853 interested lines. MARKUP: 59549 items +META MD5 984f912263c0c337a1672296aa759cbc +DATA MD5 6db3f0cb94aad9db85077fb00a1ae6bf +DATA: 16329853 interested lines. MARKUP: 59550 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- 193 28288 69 415 90 @@ -86,7 +86,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .jenkinsfile 1 58 2 6 .jinja2 1 64 2 .js 653 532652 512 2450 331 -.json 843 13045846 1074 10011 139 +.json 843 13045846 1076 10012 139 .jsp 13 3202 1 37 .jsx 7 857 19 .jwt 1 1 2 @@ -222,7 +222,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 418 36057 522 910 376 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10003 16329853 11856 46611 5084 +TOTAL: 10003 16329853 11858 46612 5084 credsweeper result_cnt : 0, lost_cnt : 0, true_cnt : 0, false_cnt : 0 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ----- -------- -------- -------- ----- -------- ---- @@ -236,7 +236,7 @@ Azure Access Token 19 0 0 BASE64 Private Key 12 4 0 0 0 4 12 0.000000 1.000000 0.250000 0.000000 BASE64 encoded PEM Private Key 7 0 0 0 0 0 7 1.000000 0.000000 0.000000 Bitbucket Client ID 19 53 0 0 0 53 19 0.000000 1.000000 0.736111 0.000000 -Bitbucket Client Secret 28 66 1 0 0 67 28 0.000000 1.000000 0.705263 0.000000 +Bitbucket Client Secret 28 67 1 0 0 68 28 0.000000 1.000000 0.708333 0.000000 CMD ConvertTo-SecureString 13 4 0 0 0 4 13 0.000000 1.000000 0.235294 0.000000 CMD Password 21 128 6 0 0 134 21 0.000000 1.000000 0.864516 0.000000 CMD Secret 1 1 0 0 0 1 1 0.000000 1.000000 0.500000 0.000000 @@ -262,6 +262,7 @@ Nonce 93 49 0 Other 9 7447 5 0 0 7452 9 0.000000 1.000000 0.998794 0.000000 PEM Private Key 1019 1483 0 0 0 1483 1019 0.000000 1.000000 0.592726 0.000000 Password 1869 7536 2680 0 0 10216 1869 0.000000 1.000000 0.845345 0.000000 +Salesforce Credentials 2 0 0 0 0 0 2 1.000000 0.000000 0.000000 Salt 47 76 1 0 0 77 47 0.000000 1.000000 0.620968 0.000000 Secret 1297 1576 802 0 0 2378 1297 0.000000 1.000000 0.647075 0.000000 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 @@ -272,4 +273,4 @@ Token 644 4170 454 Twilio Credentials 30 39 0 0 0 39 30 0.000000 1.000000 0.565217 0.000000 URL Credentials 210 157 215 0 0 372 210 0.000000 1.000000 0.639175 0.000000 UUID 1075 265 0 0 0 265 1075 0.000000 1.000000 0.197761 0.000000 - 11856 46611 5084 0 0 0 46611 11856 0.000000 1.000000 0.797219 0.000000 + 11858 46612 5084 0 0 0 46612 11858 0.000000 1.000000 0.797195 0.000000 diff --git a/download_data.py b/download_data.py index 4da2fea94..8d29ab1be 100644 --- a/download_data.py +++ b/download_data.py @@ -342,6 +342,8 @@ def get_obfuscated_value(value, meta_row: MetaRow): ["AC", "AD", "AL", "CA", "CF", "CL", "CN", "CR", "FW", "IP", "KS", "MM", "NO", "PK", "PN", "QU", "RE", "SC", "SD", "SK", "SM", "TR", "UT", "XE", "XR"]) and 34 == len(value))): obfuscated_value = value[:2] + generate_value(value[2:]) + elif value.startswith("00D") and (12 <= len(value) <= 18 or '!' in value): + obfuscated_value = value[:3] + generate_value(value[3:]) elif ".apps.googleusercontent.com" in value: pos = value.index(".apps.googleusercontent.com") obfuscated_value = generate_value(value[:pos]) + ".apps.googleusercontent.com" + generate_value( diff --git a/meta/eab0f8ed.csv b/meta/eab0f8ed.csv index 7a015a4b6..c613e5e2e 100644 --- a/meta/eab0f8ed.csv +++ b/meta/eab0f8ed.csv @@ -116,7 +116,7 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 29030,2bec3240,GitHub,eab0f8ed,data/eab0f8ed/test/2bec3240.json,12,12,Template,T,26,45,F,F,Any,,,Token,2.77,19,F,F,F,Token 29398,0f179256,GitHub,eab0f8ed,data/eab0f8ed/test/0f179256.cs,32,32,T,T,40,-1,F,F,,,,,0.0,0,F,F,F,BASE64 Private Key:Secret 29764,31fa3cd2,GitHub,eab0f8ed,data/eab0f8ed/test/31fa3cd2.json,10,10,Template,F,25,32,F,F,CharsOnly,,,Token,2.81,7,F,F,F,Token -29846,52aac773,GitHub,eab0f8ed,data/eab0f8ed/test/52aac773.json,10,10,T,F,25,137,F,F,Any,,,Token,5.13,112,F,F,F,Token +29846,52aac773,GitHub,eab0f8ed,data/eab0f8ed/test/52aac773.json,10,10,T,F,25,137,F,F,Any,,,Token,5.13,112,F,F,F,Token:Salesforce Credentials 29866,9892e3b1,GitHub,eab0f8ed,data/eab0f8ed/test/9892e3b1.json,9,9,Template,T,25,44,F,F,Any,,,Token,2.77,19,F,F,F,Token 31457,6d3b5a40,GitHub,eab0f8ed,data/eab0f8ed/test/6d3b5a40.cs,145,145,Template,T,32,48,F,F,Any,,,Secret,2.95,16,F,F,F,Secret 31513,d35008d1,GitHub,eab0f8ed,data/eab0f8ed/test/d35008d1.cs,21,21,Template,T,32,44,F,F,CharsOnly,,,Secret,3.02,12,F,F,F,Secret @@ -272,3 +272,6 @@ Id,FileID,Domain,RepoName,FilePath,LineStart,LineEnd,GroundTruth,WithWords,Value 1479532,791ba8fa,GitHub,eab0f8ed,data/eab0f8ed/src/791ba8fa.ps1,137,137,F,F,206,215,F,F,,,,,0.0,0,F,F,F,CMD Password 1479533,791ba8fa,GitHub,eab0f8ed,data/eab0f8ed/src/791ba8fa.ps1,138,138,F,F,226,235,F,F,,,,,0.0,0,F,F,F,CMD Password 1479569,15cd5bcf,GitHub,eab0f8ed,data/eab0f8ed/src/15cd5bcf.ks,9,9,T,F,51,56,F,F,,,,,0.0,0,F,F,F,CMD Password +1480654,52aac773,GitHub,eab0f8ed,data/eab0f8ed/test/52aac773.json,26,26,T,F,28,43,F,F,,,,,0.0,0,F,F,F,Salesforce Credentials +1480655,52aac773,GitHub,eab0f8ed,data/eab0f8ed/test/52aac773.json,10,10,F,F,73,-1,F,F,,,,,0.0,0,F,F,F,Bitbucket Client Secret +