@@ -47,7 +47,8 @@ def __init__(
4747 self .merge_pdf_pages (
4848 [0 , count_pages - 2 , count_pages - 1 ][:n_pdf_pages ]
4949 )
50- self .check_if_document_is_empty ()
50+ if self .is_pdf_empty ():
51+ raise AssertionError (f"PDF pages are empty in: { self .filename } " )
5152
5253 def count_pdf_pages (self ):
5354 """
@@ -71,20 +72,30 @@ def merge_pdf_pages(self, pages_number):
7172 self .file_object = io .BytesIO ()
7273 new_pdf .save (self .file_object )
7374
74- def check_if_document_is_empty (self ):
75+ def is_pdf_empty (self ) -> bool :
7576 """
7677 :return: (void) Check if the document contain only empty pages
7778 """
7879 self .file_object .seek (0 )
7980 with pikepdf .open (self .file_object ) as pdf :
80- for _ , page in enumerate (pdf .pages ):
81- if (
82- "/Font" in page ["/Resources" ].keys ()
83- or "/XObject" in page ["/Resources" ].keys ()
84- or page ["/Contents" ]["/Length" ] > 1000
85- ):
86- return
87- raise Exception ("PDF pages are empty" )
81+ for page in pdf .pages :
82+
83+ # mypy incorrectly identifies the "/Length" key's value as
84+ # an object rather than an int.
85+ try :
86+ total_size = page ["/Contents" ]["/Length" ]
87+ except ValueError :
88+ total_size = 0 # type: ignore
89+ for content in page ["/Contents" ]: # type: ignore
90+ total_size += content ["/Length" ]
91+ has_data = total_size > 1000 # type: ignore
92+
93+ has_font = "/Font" in page ["/Resources" ].keys ()
94+ has_xobj = "/XObject" in page ["/Resources" ].keys ()
95+
96+ if has_font or has_xobj or has_data :
97+ return False
98+ return True
8899
89100 def check_pdf_open (self ):
90101 """
0 commit comments