Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: handle arbitrary newline terminators #26

Merged
merged 2 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 32 additions & 16 deletions src/picklescan/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ def merge(self, sr: "ScanResult"):


class GenOpsError(Exception):
def __init__(self, msg: str):
def __init__(self, msg: str, globals: Optional[Set[Tuple[str, str]]]):
self.msg = msg
self.globals = globals
super().__init__()

def __str__(self) -> str:
Expand Down Expand Up @@ -167,7 +168,6 @@ def _http_get(url) -> bytes:


def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]]:

globals = set()

memo = {}
Expand All @@ -178,7 +178,11 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
try:
ops = list(pickletools.genops(data))
except Exception as e:
raise GenOpsError(str(e))
# XXX: given we can have multiple pickles in a file, we may have already successfully extracted globals from a valid pickle.
# Thus return the already found globals in the error & let the caller decide what to do.
globals_opt = globals if len(globals) > 0 else None
raise GenOpsError(str(e), globals_opt)

last_byte = data.read(1)
data.seek(-1, 1)

Expand Down Expand Up @@ -232,18 +236,12 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
return globals


def scan_pickle_bytes(data: IO[bytes], file_id, multiple_pickles=True) -> ScanResult:
"""Disassemble a Pickle stream and report issues"""

def _build_scan_result_from_raw_globals(
raw_globals: Set[Tuple[str, str]],
file_id,
scan_err=False,
) -> ScanResult:
globals = []
try:
raw_globals = _list_globals(data, multiple_pickles)
except GenOpsError as e:
_log.error(f"ERROR: parsing pickle in {file_id}: {e}")
return ScanResult(globals, scan_err=True)

_log.debug("Global imports in %s: %s", file_id, raw_globals)

issues_count = 0
for rg in raw_globals:
g = Global(rg[0], rg[1], SafetyLevel.Dangerous)
Expand All @@ -269,7 +267,26 @@ def scan_pickle_bytes(data: IO[bytes], file_id, multiple_pickles=True) -> ScanRe
g.safety = SafetyLevel.Suspicious
globals.append(g)

return ScanResult(globals, 1, issues_count, 1 if issues_count > 0 else 0, False)
return ScanResult(globals, 1, issues_count, 1 if issues_count > 0 else 0, scan_err)


def scan_pickle_bytes(data: IO[bytes], file_id, multiple_pickles=True) -> ScanResult:
"""Disassemble a Pickle stream and report issues"""

try:
raw_globals = _list_globals(data, multiple_pickles)
except GenOpsError as e:
_log.error(f"ERROR: parsing pickle in {file_id}: {e}")
if e.globals is not None:
return _build_scan_result_from_raw_globals(
e.globals, file_id, scan_err=True
)
else:
return ScanResult([], scan_err=True)

_log.debug("Global imports in %s: %s", file_id, raw_globals)

return _build_scan_result_from_raw_globals(raw_globals, file_id)


def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:
Expand All @@ -288,7 +305,6 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:


def scan_numpy(data: IO[bytes], file_id) -> ScanResult:

# Delay import to avoid dependency on NumPy
import numpy as np

Expand Down
14 changes: 14 additions & 0 deletions tests/data/malicious-invalid-bytes.pkl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Vos
p2
0Vsystem
p3
0Vtorch
p0
0VLongStorage
p1
0g2
g3
“(Vcat flag.txt
tR.


49 changes: 45 additions & 4 deletions tests/test_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,35 @@ def initialize_pickle_files():
),
)

initialize_data_file(
f"{_root_path}/data/malicious-invalid-bytes.pkl",
b"".join(
[
pickle.UNICODE + b"os\n",
pickle.PUT + b"2\n",
pickle.POP,
pickle.UNICODE + b"system\n",
pickle.PUT + b"3\n",
pickle.POP,
pickle.UNICODE + b"torch\n",
pickle.PUT + b"0\n",
pickle.POP,
pickle.UNICODE + b"LongStorage\n",
pickle.PUT + b"1\n",
pickle.POP,
pickle.GET + b"2\n",
pickle.GET + b"3\n",
pickle.STACK_GLOBAL,
pickle.MARK,
pickle.UNICODE + b"cat flag.txt\n",
pickle.TUPLE,
pickle.REDUCE,
pickle.STOP,
b"\n\n\t\t",
]
),
)

# Code which created malicious12.pkl using pickleassem (see https://github.com/gousaiyang/pickleassem)
#
# p = PickleAssembler(proto=4)
Expand Down Expand Up @@ -351,7 +380,6 @@ def test_scan_pickle_bytes():


def test_scan_zip_bytes():

buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w") as zip:
zip.writestr("data.pkl", pickle.dumps(Malicious1()))
Expand Down Expand Up @@ -559,15 +587,17 @@ def test_scan_directory_path():
Global("torch", "_utils", SafetyLevel.Suspicious),
Global("__builtin__", "exec", SafetyLevel.Dangerous),
Global("os", "system", SafetyLevel.Dangerous),
Global("os", "system", SafetyLevel.Dangerous),
Global("operator", "attrgetter", SafetyLevel.Dangerous),
Global("builtins", "__import__", SafetyLevel.Suspicious),
Global("pickle", "loads", SafetyLevel.Dangerous),
Global("_pickle", "loads", SafetyLevel.Dangerous),
Global("_codecs", "encode", SafetyLevel.Suspicious),
],
scanned_files=26,
issues_count=24,
infected_files=21,
scanned_files=27,
issues_count=25,
infected_files=22,
scan_err=True,
)
compare_scan_results(scan_directory_path(f"{_root_path}/data/"), sr)

Expand Down Expand Up @@ -610,3 +640,14 @@ def test_pickle_files():
assert pickle.load(file) == 12345
with open(f"{_root_path}/data/malicious13b.pkl", "rb") as file:
assert pickle.load(file) == 12345


def test_invalid_bytes_err():
malicious_invalid_bytes = ScanResult(
[Global("os", "system", SafetyLevel.Dangerous)], 1, 1, 1, True
)
with open(f"{_root_path}/data/malicious-invalid-bytes.pkl", "rb") as file:
compare_scan_results(
scan_pickle_bytes(file, f"{_root_path}/data/malicious-invalid-bytes.pkl"),
malicious_invalid_bytes,
)
Loading