Skip to content

Commit

Permalink
ROB: Soft failure for flate encode image mode 1 with wrong LUT size (#…
Browse files Browse the repository at this point in the history
…2900)

Closes #2889.
  • Loading branch information
stefan6419846 authored Oct 18, 2024
1 parent 7044454 commit 80c3939
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 44 deletions.
18 changes: 11 additions & 7 deletions pypdf/_xobj_image_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,18 @@ def _handle_flate(
if img.mode == "1":
# Two values ("high" and "low").
expected_count = 2 * nb
if len(lookup) != expected_count:
if len(lookup) < expected_count:
raise PdfReadError(
f"Not enough lookup values: Expected {expected_count}, got {len(lookup)}."
actual_count = len(lookup)
if actual_count != expected_count:
if actual_count < expected_count:
logger_warning(
f"Not enough lookup values: Expected {expected_count}, got {actual_count}.",
__name__
)
if not check_if_whitespace_only(lookup[expected_count:]):
raise PdfReadError(
f"Too many lookup values: Expected {expected_count}, got {len(lookup)}."
lookup += bytes([0] * (expected_count - actual_count))
elif not check_if_whitespace_only(lookup[expected_count:]):
logger_warning(
f"Too many lookup values: Expected {expected_count}, got {actual_count}.",
__name__
)
lookup = lookup[:expected_count]
colors_arr = [lookup[:nb], lookup[nb:]]
Expand Down
78 changes: 41 additions & 37 deletions tests/test_xobject_image_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_get_imagemode_recursion_depth():
reader.pages[0].images[0]


def test_handle_flate__image_mode_1():
def test_handle_flate__image_mode_1(caplog):
data = b"\x00\xe0\x00"
lookup = DecodedStreamObject()
expected_data = [
Expand Down Expand Up @@ -57,6 +57,7 @@ def test_handle_flate__image_mode_1():
obj_as_text="dummy",
)
assert expected_data == list(result[0].getdata())
assert not caplog.text

# Trailing whitespace.
lookup.set_data(b"\x42\x42\x42\x00\x13\x37 \x0a")
Expand All @@ -71,48 +72,51 @@ def test_handle_flate__image_mode_1():
obj_as_text="dummy",
)
assert expected_data == list(result[0].getdata())
assert not caplog.text

# Trailing non-whitespace character.
lookup.set_data(b"\x42\x42\x42\x00\x13\x37\x12")
with pytest.raises(
PdfReadError, match=r"^Too many lookup values: Expected 6, got 7\.$"
):
_handle_flate(
size=(3, 3),
data=data,
mode="1",
color_space=ArrayObject(
[
NameObject("/Indexed"),
NameObject("/DeviceRGB"),
NumberObject(1),
lookup,
]
),
colors=2,
obj_as_text="dummy",
)
result = _handle_flate(
size=(3, 3),
data=data,
mode="1",
color_space=ArrayObject(
[
NameObject("/Indexed"),
NameObject("/DeviceRGB"),
NumberObject(1),
lookup,
]
),
colors=2,
obj_as_text="dummy",
)
assert expected_data == list(result[0].getdata())
assert "Too many lookup values: Expected 6, got 7." in caplog.text

# Not enough lookup data.
# `\xe0` of the original input (the middle part) does not use `0x37 = 55` for the lookup
# here, but received a custom padding of `0`.
lookup.set_data(b"\x42\x42\x42\x00\x13")
with pytest.raises(
PdfReadError, match=r"^Not enough lookup values: Expected 6, got 5\.$"
):
_handle_flate(
size=(3, 3),
data=data,
mode="1",
color_space=ArrayObject(
[
NameObject("/Indexed"),
NameObject("/DeviceRGB"),
NumberObject(1),
lookup,
]
),
colors=2,
obj_as_text="dummy",
)
caplog.clear()
expected_short_data = [entry if entry[0] == 66 else (0, 19, 0) for entry in expected_data]
result = _handle_flate(
size=(3, 3),
data=data,
mode="1",
color_space=ArrayObject(
[
NameObject("/Indexed"),
NameObject("/DeviceRGB"),
NumberObject(1),
lookup,
]
),
colors=2,
obj_as_text="dummy",
)
assert expected_short_data == list(result[0].getdata())
assert "Not enough lookup values: Expected 6, got 5." in caplog.text


def test_extended_image_frombytes_zero_data():
Expand Down

0 comments on commit 80c3939

Please sign in to comment.