Skip to content

Commit e75f6e3

Browse files
committed
fix bug in counting PDF pages
1 parent fe87483 commit e75f6e3

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

mindee/inputs.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ def __init__(
4747
self.merge_pdf_pages(
4848
[0, count_pages - 2, count_pages - 1][:n_pdf_pages]
4949
)
50-
self.check_if_document_is_empty()
50+
if self.is_pdf_empty():
51+
raise AssertionError(f"PDF pages are empty in: {self.filename}")
5152

5253
def count_pdf_pages(self):
5354
"""
@@ -71,20 +72,30 @@ def merge_pdf_pages(self, pages_number):
7172
self.file_object = io.BytesIO()
7273
new_pdf.save(self.file_object)
7374

74-
def check_if_document_is_empty(self):
75+
def is_pdf_empty(self) -> bool:
7576
"""
7677
:return: (void) Check if the document contain only empty pages
7778
"""
7879
self.file_object.seek(0)
7980
with pikepdf.open(self.file_object) as pdf:
80-
for _, page in enumerate(pdf.pages):
81-
if (
82-
"/Font" in page["/Resources"].keys()
83-
or "/XObject" in page["/Resources"].keys()
84-
or page["/Contents"]["/Length"] > 1000
85-
):
86-
return
87-
raise Exception("PDF pages are empty")
81+
for page in pdf.pages:
82+
83+
# mypy incorrectly identifies the "/Length" key's value as
84+
# an object rather than an int.
85+
try:
86+
total_size = page["/Contents"]["/Length"]
87+
except ValueError:
88+
total_size = 0 # type: ignore
89+
for content in page["/Contents"]: # type: ignore
90+
total_size += content["/Length"]
91+
has_data = total_size > 1000 # type: ignore
92+
93+
has_font = "/Font" in page["/Resources"].keys()
94+
has_xobj = "/XObject" in page["/Resources"].keys()
95+
96+
if has_font or has_xobj or has_data:
97+
return False
98+
return True
8899

89100
def check_pdf_open(self):
90101
"""

0 commit comments

Comments
 (0)