Skip to content

Commit

Permalink
fix \r in xlsx
Browse files Browse the repository at this point in the history
  • Loading branch information
babenek committed Dec 11, 2024
1 parent 84a5ed8 commit 51962bd
Show file tree
Hide file tree
Showing 10 changed files with 664 additions and 253 deletions.
7 changes: 3 additions & 4 deletions credsweeper/deep_scanner/xlsx_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ def data_scan(
candidates = []
try:
book = pd.read_excel(io.BytesIO(data_provider.data), sheet_name=None, header=None)
sheet_lines = []
for sheet_name, sheet_data in book.items():
text = sheet_data.fillna('').astype(str)
for i in text.values:
sheet_lines.append('\t'.join(i))
# replace open xml carriage returns _x000D_ before line feed only
df = sheet_data.replace(to_replace="_x000D_\n", value='\n', regex=True).fillna('').astype(str)
sheet_lines = ['\t'.join(x) for x in df.values]
string_data_provider = StringContentProvider(lines=sheet_lines,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
Expand Down
8 changes: 4 additions & 4 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
confidence: moderate
type: pattern
values:
- (^|\s|(?P<variable>(?i:\bip[\s/]{1,80}id[\s/]{1,80}pw[\s/:]{0,80}))|(?P<url>://))(?P<ip>[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})((\s*\()?|(?(variable)[\s,/]{1,80}|(?(url)[,]|[,/])))\s*\w[\w.-]{3,80}[\s,/]{1,80}(?P<value>(?(url)(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9_+=~!@#$%^&*;?-])){7,31}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)|(?-i:(?P<e>[A-Z])|(?P<f>[a-z])|(?P<g>[0-9/_+=~!@#$%^&*;?-])){7,31}(?(e)(?(f)(?(g)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)))(?:\s|[^/]|$)
- (^|\s|(?P<variable>(?i:\bip[\s/]{1,80}id[\s/]{1,80}pw[\s/:]{0,80}))|(?P<url>://))(?P<ip>(?<![0-9.])[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}(?![0-9.]))((\s*[(])?|(?(variable)[\s,/]{1,80}|(?(url)[,]|[,/])))\s*\w[\w.-]{3,80}[\s,/]{1,80}(?P<value>(?(url)(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9_+=~!@#$%^&*;?-])){7,31}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)|(?-i:(?P<e>[A-Z])|(?P<f>[a-z])|(?P<g>[0-9/_+=~!@#$%^&*;?-])){7,31}(?(e)(?(f)(?(g)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x)))(?:\s|[^/]|$)
filter_type:
- ValueAllowlistCheck
- ValuePatternCheck
Expand Down Expand Up @@ -1001,7 +1001,7 @@
confidence: strong
type: pattern
values:
- (?<![0-9A-Za-z_-])(?P<value>BBDC-[NMO][ADgjQTwz][0-9A-Za-z_-]{42})(?![0-9A-Za-z_-])
- (?<![0-9A-Za-z_-])(?P<value>BBDC-[MNO][ADQTgjwz][AEIMQUYcgk][012345wxyz][0-9A-Za-z_-]{40})(?![0-9A-Za-z_-])
filter_type:
- ValueAtlassianTokenCheck
min_line_len: 49
Expand Down Expand Up @@ -1042,7 +1042,7 @@
confidence: strong
type: pattern
values:
- (?<![0-9A-Za-z_-])(?P<value>[NMO][ADgjQTwz][0-9A-Za-z_-]{42})(?![0-9A-Za-z_-])
- (?<![0-9A-Za-z_-])(?P<value>[MNO][ADQTgjwz][AEIMQUYcgk][012345wxyz][0-9A-Za-z_-]{40})(?![0-9A-Za-z_-])
filter_type:
- ValueAtlassianTokenCheck
- ValueBase64PartCheck
Expand Down Expand Up @@ -1374,7 +1374,7 @@
confidence: strong
type: pattern
values:
- (?<![0-9A-Za-z_-])(?P<value>[NMO][ADgjQTwz][0-9A-Za-z_-]{22,26}\.[0-9A-Za-z_-]{6}\.[0-9A-Za-z_-]{30,40})(?![0-9A-Za-z_-])
- (?<![0-9A-Za-z_-])(?P<value>[MNO][ADQTgjwz][AEIMQUYcgk][012345wxyz][0-9A-Za-z_-]{20,24}\.[0-9A-Za-z_-]{6}\.[0-9A-Za-z_-]{30,40})(?![0-9A-Za-z_-])
min_line_len: 62
filter_type:
- ValueDiscordBotCheck
Expand Down
4 changes: 2 additions & 2 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED

# with option --doc
SAMPLES_IN_DOC = 451
SAMPLES_IN_DOC = 461

# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 29
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 33
SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 54
SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 1

Expand Down
432 changes: 270 additions & 162 deletions tests/data/depth_3.json

Large diffs are not rendered by default.

Loading

0 comments on commit 51962bd

Please sign in to comment.