From 1d9a52c306cefbda0749f6bd16b9bc4c8141e6dc Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Fri, 18 Oct 2024 15:07:07 -0700 Subject: [PATCH 1/9] BUG: Don't close stream passed to PdfWriter.write() --- pypdf/_writer.py | 10 ++++++---- tests/test_writer.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 0ac1524bc..8b15d56e1 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -249,7 +249,6 @@ def _get_clone_from( # to prevent overwriting self.temp_fileobj = fileobj self.fileobj = "" - self.with_as_usage = False # The root of our page tree node. pages = DictionaryObject() pages.update( @@ -356,7 +355,6 @@ def __enter__(self) -> "PdfWriter": """Store that writer is initialized by 'with'.""" t = self.temp_fileobj self.__init__() # type: ignore - self.with_as_usage = True self.fileobj = t # type: ignore return self @@ -369,6 +367,9 @@ def __exit__( """Write data to the fileobj.""" if self.fileobj: self.write(self.fileobj) + close_attr = getattr(self.fileobj, "close", None) + if callable(close_attr): + self.fileobj.close() def _repr_mimebundle_( self, @@ -1388,13 +1389,14 @@ def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO[Any]]: if isinstance(stream, (str, Path)): stream = FileIO(stream, "wb") - self.with_as_usage = True # my_file = True self.write_stream(stream) - if self.with_as_usage: + if my_file: stream.close() + else: + stream.flush() return my_file, stream diff --git a/tests/test_writer.py b/tests/test_writer.py index 0cd2d03f8..cb11236d3 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2480,3 +2480,14 @@ def test_append_pdf_with_dest_without_page(caplog): writer.append(reader) assert "/__WKANCHOR_8" not in writer.named_destinations assert len(writer.named_destinations) == 3 + + +def test_stream_not_closed(): + """Tests for #2905""" + src = RESOURCE_ROOT / "pdflatex-outline.pdf" + with NamedTemporaryFile() as tmp: + with PdfReader(src) as reader, PdfWriter() as writer: + for i in range(4): + writer.add_page(reader.pages[i]) + writer.write(tmp) + assert not tmp.file.closed From 5568450134440babc58ffb68b37767c8b79676b2 Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Fri, 18 Oct 2024 15:12:34 -0700 Subject: [PATCH 2/9] mypy --- pypdf/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 8b15d56e1..607041703 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -369,7 +369,7 @@ def __exit__( self.write(self.fileobj) close_attr = getattr(self.fileobj, "close", None) if callable(close_attr): - self.fileobj.close() + self.fileobj.close() # type: ignore[attr-defined] def _repr_mimebundle_( self, From a905bacf7a015879f136621eb7eee8a7f8772554 Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Sun, 20 Oct 2024 18:46:29 -0700 Subject: [PATCH 3/9] Improvements based on code review comments. --- pypdf/_writer.py | 15 ++++++++------- tests/test_writer.py | 24 +++++++++++++++++++++--- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 607041703..ffb8df31a 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -235,9 +235,9 @@ def _get_clone_from( or Path(str(fileobj)).stat().st_size == 0 ): cloning = False - if isinstance(fileobj, (IO, BytesIO)): + if isinstance(fileobj, (IOBase, BytesIO)): t = fileobj.tell() - fileobj.seek(-1, 2) + fileobj.seek(0, 2) if fileobj.tell() == 0: cloning = False fileobj.seek(t, 0) @@ -249,6 +249,7 @@ def _get_clone_from( # to prevent overwriting self.temp_fileobj = fileobj self.fileobj = "" + self.cloned = False # The root of our page tree node. pages = DictionaryObject() pages.update( @@ -266,6 +267,7 @@ def _get_clone_from( if not isinstance(clone_from, PdfReader): clone_from = PdfReader(clone_from) self.clone_document_from_reader(clone_from) + self.cloned = True else: self._pages = self._add_object(pages) # root object @@ -352,9 +354,11 @@ def xmp_metadata(self, value: Optional[XmpInformation]) -> None: return self.root_object.xmp_metadata # type: ignore def __enter__(self) -> "PdfWriter": - """Store that writer is initialized by 'with'.""" + """Store how writer is initialized by 'with'.""" + c: bool = self.cloned t = self.temp_fileobj self.__init__() # type: ignore + self.cloned = c self.fileobj = t # type: ignore return self @@ -365,11 +369,8 @@ def __exit__( traceback: Optional[TracebackType], ) -> None: """Write data to the fileobj.""" - if self.fileobj: + if self.fileobj and not self.cloned: self.write(self.fileobj) - close_attr = getattr(self.fileobj, "close", None) - if callable(close_attr): - self.fileobj.close() # type: ignore[attr-defined] def _repr_mimebundle_( self, diff --git a/tests/test_writer.py b/tests/test_writer.py index cb11236d3..9e0f49bb8 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2485,9 +2485,27 @@ def test_append_pdf_with_dest_without_page(caplog): def test_stream_not_closed(): """Tests for #2905""" src = RESOURCE_ROOT / "pdflatex-outline.pdf" - with NamedTemporaryFile() as tmp: + with NamedTemporaryFile(suffix=".pdf") as tmp: with PdfReader(src) as reader, PdfWriter() as writer: - for i in range(4): - writer.add_page(reader.pages[i]) + writer.add_page(reader.pages[0]) writer.write(tmp) assert not tmp.file.closed + + with NamedTemporaryFile(suffix=".pdf") as target: + with PdfWriter(target.file) as writer: + writer.add_blank_page(100, 100) + assert not target.file.closed + + with open(src, "rb") as fileobj: + with PdfWriter(fileobj) as writer: + pass + assert not fileobj.closed + + +def test_auto_write(): + """Another test for #2905""" + with NamedTemporaryFile(suffix=".pdf", delete_on_close=False) as tmp: + tmp.close() + with PdfWriter(tmp.name) as writer: + writer.add_blank_page(100, 100) + assert Path(tmp.name).stat().st_size > 0 From dafbafc30787e32794a45dec660c5dc83ae09763 Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Sun, 20 Oct 2024 18:58:08 -0700 Subject: [PATCH 4/9] Make test work with Python 3.8. --- tests/test_writer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_writer.py b/tests/test_writer.py index 9e0f49bb8..f50b8f269 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2504,8 +2504,8 @@ def test_stream_not_closed(): def test_auto_write(): """Another test for #2905""" - with NamedTemporaryFile(suffix=".pdf", delete_on_close=False) as tmp: - tmp.close() - with PdfWriter(tmp.name) as writer: - writer.add_blank_page(100, 100) - assert Path(tmp.name).stat().st_size > 0 + target = Path(_get_write_target(str)) + with PdfWriter(target) as writer: + writer.add_blank_page(100, 100) + assert target.stat().st_size > 0 + target.unlink() From 3d6b7dc6edcf18fce5a340c6aabfb4f847bff4b2 Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Mon, 21 Oct 2024 01:26:24 -0700 Subject: [PATCH 5/9] Use tmp_path to avoid needing to unlink. --- tests/test_writer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_writer.py b/tests/test_writer.py index f50b8f269..d4e176834 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2502,10 +2502,9 @@ def test_stream_not_closed(): assert not fileobj.closed -def test_auto_write(): +def test_auto_write(tmp_path): """Another test for #2905""" - target = Path(_get_write_target(str)) + target = tmp_path / "out.pdf" with PdfWriter(target) as writer: writer.add_blank_page(100, 100) assert target.stat().st_size > 0 - target.unlink() From 4a1189b458782d910a0789b1d8487e79347feddc Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Thu, 24 Oct 2024 15:08:59 -0700 Subject: [PATCH 6/9] Restore some dead code; make cloned look private. --- pypdf/_writer.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index ffb8df31a..56265765d 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -237,8 +237,7 @@ def _get_clone_from( cloning = False if isinstance(fileobj, (IOBase, BytesIO)): t = fileobj.tell() - fileobj.seek(0, 2) - if fileobj.tell() == 0: + if fileobj.seek(0, 2) == 0: cloning = False fileobj.seek(t, 0) if cloning: @@ -249,7 +248,8 @@ def _get_clone_from( # to prevent overwriting self.temp_fileobj = fileobj self.fileobj = "" - self.cloned = False + self.with_as_usage = False + self._cloned = False # The root of our page tree node. pages = DictionaryObject() pages.update( @@ -267,7 +267,7 @@ def _get_clone_from( if not isinstance(clone_from, PdfReader): clone_from = PdfReader(clone_from) self.clone_document_from_reader(clone_from) - self.cloned = True + self._cloned = True else: self._pages = self._add_object(pages) # root object @@ -355,10 +355,11 @@ def xmp_metadata(self, value: Optional[XmpInformation]) -> None: def __enter__(self) -> "PdfWriter": """Store how writer is initialized by 'with'.""" - c: bool = self.cloned + c: bool = self._cloned t = self.temp_fileobj self.__init__() # type: ignore - self.cloned = c + self._cloned = c + self.with_as_usage = True self.fileobj = t # type: ignore return self @@ -369,7 +370,7 @@ def __exit__( traceback: Optional[TracebackType], ) -> None: """Write data to the fileobj.""" - if self.fileobj and not self.cloned: + if self.fileobj and not self._cloned: self.write(self.fileobj) def _repr_mimebundle_( From 86f11a8ba42bc0c131754c8a472cce17bcbcce0f Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Tue, 29 Oct 2024 11:31:51 -0700 Subject: [PATCH 7/9] Deprecate with_as_usage --- pypdf/_writer.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 80709d075..fbc4b29d1 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -63,6 +63,7 @@ StreamType, _get_max_pdf_version_header, deprecate, + deprecate_no_replacement, deprecation_with_replacement, logger_warning, ) @@ -249,7 +250,7 @@ def _get_clone_from( # to prevent overwriting self.temp_fileobj = fileobj self.fileobj = "" - self.with_as_usage = False + self._with_as_usage = False self._cloned = False # The root of our page tree node. pages = DictionaryObject() @@ -356,13 +357,23 @@ def xmp_metadata(self, value: Optional[XmpInformation]) -> None: return self.root_object.xmp_metadata # type: ignore + @property + def with_as_usage(self) -> bool: + deprecate_no_replacement("with_as_usage", "6.0") + return self._with_as_usage + + @with_as_usage.setter + def with_as_usage(self, value: bool) -> None: + deprecate_no_replacement("with_as_usage", "6.0") + self._with_as_usage = value + def __enter__(self) -> "PdfWriter": """Store how writer is initialized by 'with'.""" c: bool = self._cloned t = self.temp_fileobj self.__init__() # type: ignore self._cloned = c - self.with_as_usage = True + self._with_as_usage = True self.fileobj = t # type: ignore return self From a57277f8abcb9b0ca2517d3cbaf57f3eee321f25 Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Tue, 29 Oct 2024 11:55:17 -0700 Subject: [PATCH 8/9] test coverage --- tests/test_writer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_writer.py b/tests/test_writer.py index 6c8f65a05..e3bed91bc 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2509,3 +2509,12 @@ def test_auto_write(tmp_path): with PdfWriter(target) as writer: writer.add_blank_page(100, 100) assert target.stat().st_size > 0 + + +def test_deprecate_with_as(): + """Yet another test for #2905""" + with PdfWriter() as writer: + with pytest.raises(DeprecationWarning): + assert writer.with_as_usage + with pytest.raises(DeprecationWarning): + writer.with_as_usage = True # old code allowed setting this, so... From b115223b48fcb27be06aef7c6c4c2d983d56028d Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Wed, 30 Oct 2024 10:56:16 -0700 Subject: [PATCH 9/9] Test deprecation warning messages. --- tests/test_writer.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/test_writer.py b/tests/test_writer.py index e3bed91bc..09f24244b 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2514,7 +2514,10 @@ def test_auto_write(tmp_path): def test_deprecate_with_as(): """Yet another test for #2905""" with PdfWriter() as writer: - with pytest.raises(DeprecationWarning): - assert writer.with_as_usage - with pytest.raises(DeprecationWarning): - writer.with_as_usage = True # old code allowed setting this, so... + with pytest.warns(DeprecationWarning) as w: + val = writer.with_as_usage + assert "with_as_usage is deprecated" in w[0].message.args[0] + assert val + with pytest.warns(DeprecationWarning) as w: + writer.with_as_usage = val # old code allowed setting this, so... + assert "with_as_usage is deprecated" in w[0].message.args[0]