From fb1ee445a5f1770cc69b5d2fb5f9e96adb2fa161 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 12:11:36 +0200 Subject: [PATCH 01/13] BUG: Improve PdfWriter handing of context manager closes #2912 --- pypdf/_writer.py | 78 +++++++++++++++++++++++++++----------------- tests/test_writer.py | 60 ++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 30 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 5852e13cf..cb4b5d3ac 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -152,11 +152,16 @@ class PdfWriter(PdfDocCommon): Typically data is added from a :class:`PdfReader`. Args: + * : 1st argument is assigned to fileobj or clone_from based on context: + assigned to clone_from if str/path to a non empty file or stream or PdfReader + else assigned to fileobj. + + fileobj: output file/stream. To be used with context manager only. + clone_from: identical to fileobj (for compatibility) incremental: If true, loads the document and set the PdfWriter in incremental mode. - When writing incrementally, the original document is written first and new/modified content is appended. To be used for signed document/forms to keep signature valid. @@ -166,6 +171,7 @@ class PdfWriter(PdfDocCommon): def __init__( self, + *args: Any, fileobj: Union[None, PdfReader, StrByteType, Path] = "", clone_from: Union[None, PdfReader, StrByteType, Path] = None, incremental: bool = False, @@ -202,39 +208,34 @@ def __init__( self._ID: Union[ArrayObject, None] = None self._info_obj: Optional[PdfObject] - if self.incremental: - if isinstance(fileobj, (str, Path)): - with open(fileobj, "rb") as f: - fileobj = BytesIO(f.read(-1)) - if isinstance(fileobj, BytesIO): - fileobj = PdfReader(fileobj) - if not isinstance(fileobj, PdfReader): - raise PyPdfError("Invalid type for incremental mode") - self._reader = fileobj # prev content is in _reader.stream - self._header = fileobj.pdf_header.encode() - self._readonly = True # !!!TODO: to be analysed - else: - self._header = b"%PDF-1.3" - self._info_obj = self._add_object( - DictionaryObject( - {NameObject("/Producer"): create_string_object("pypdf")} - ) - ) + manualset_fileobj = True + if len(args) > 0: + if fileobj == "": + fileobj = args[0] + manualset_fileobj = False + elif clone_from is None: + clone_from = args[0] def _get_clone_from( fileobj: Union[None, PdfReader, str, Path, IO[Any], BytesIO], clone_from: Union[None, PdfReader, str, Path, IO[Any], BytesIO], - ) -> Union[None, PdfReader, str, Path, IO[Any], BytesIO]: - if isinstance(fileobj, (str, Path, IO, BytesIO)) and ( - fileobj == "" or clone_from is not None + manualset_fileobj: bool, + ) -> Tuple[ + Union[None, PdfReader, str, Path, IO[Any], BytesIO], + Union[None, PdfReader, str, Path, IO[Any], BytesIO], + ]: + if manualset_fileobj or ( + isinstance(fileobj, (str, Path, IO, BytesIO)) + and (fileobj in ("", None) or clone_from is not None) ): - return clone_from + return clone_from, fileobj cloning = True if isinstance(fileobj, (str, Path)) and ( not Path(str(fileobj)).exists() or Path(str(fileobj)).stat().st_size == 0 ): cloning = False + if isinstance(fileobj, (IO, BytesIO)): t = fileobj.tell() fileobj.seek(-1, 2) @@ -242,10 +243,30 @@ def _get_clone_from( cloning = False fileobj.seek(t, 0) if cloning: - clone_from = fileobj - return clone_from + return fileobj, None + return clone_from, fileobj + + clone_from, fileobj = _get_clone_from(fileobj, clone_from, manualset_fileobj) + + if self.incremental: + if isinstance(clone_from, (str, Path)): + with open(clone_from, "rb") as f: + clone_from = BytesIO(f.read(-1)) + if isinstance(clone_from, (IO, BytesIO)): + clone_from = PdfReader(clone_from) + if not isinstance(clone_from, PdfReader): + raise PyPdfError("Invalid type for incremental mode") + self._reader = clone_from # prev content is in _reader.stream + self._header = clone_from.pdf_header.encode() + self._readonly = True # !!!TODO: to be analysed + else: + self._header = b"%PDF-1.3" + self._info_obj = self._add_object( + DictionaryObject( + {NameObject("/Producer"): create_string_object("pypdf")} + ) + ) - clone_from = _get_clone_from(fileobj, clone_from) # to prevent overwriting self.temp_fileobj = fileobj self.fileobj = "" @@ -354,10 +375,7 @@ def xmp_metadata(self, value: Optional[XmpInformation]) -> None: def __enter__(self) -> "PdfWriter": """Store that writer is initialized by 'with'.""" - t = self.temp_fileobj - self.__init__() # type: ignore self.with_as_usage = True - self.fileobj = t # type: ignore return self def __exit__( @@ -1393,7 +1411,7 @@ def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO[Any]]: self.write_stream(stream) - if self.with_as_usage: + if my_file: stream.close() return my_file, stream diff --git a/tests/test_writer.py b/tests/test_writer.py index d422cd69c..70cee68e6 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2480,3 +2480,63 @@ def test_append_pdf_with_dest_without_page(caplog): writer.append(reader) assert "/__WKANCHOR_8" not in writer.named_destinations assert len(writer.named_destinations) == 3 + + +def test_writer_contextmanager(): + """To test the writer with context manager, cf #2912""" + pdf_path = str(RESOURCE_ROOT / "crazyones.pdf") + with PdfWriter(pdf_path) as w: + assert len(w.pages) > 0 + assert not w.fileobj + with open(pdf_path, "rb") as f, PdfWriter(f) as w: + assert len(w.pages) > 0 + assert not w.fileobj + with open(pdf_path, "rb") as f, PdfWriter(BytesIO(f.read(-1))) as w: + assert len(w.pages) > 0 + assert not w.fileobj + + try: + with NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: + tmp_file = Path(tmp.name) + with PdfWriter(tmp_file) as w: + assert len(w.pages) == 0 + + with open(tmp_file, "wb") as f1, open(pdf_path, "rb") as f: + f1.write(f.read(-1)) + with PdfWriter(tmp_file) as w: + assert len(w.pages) > 0 + assert tmp_file.stat().st_size > 0 + + with PdfWriter(tmp_file, incremental=True) as w: + assert w._reader + assert not w.fileobj + assert tmp_file.stat().st_size > 0 + + with PdfWriter(clone_from=tmp_file) as w: + assert len(w.pages) > 0 + assert not w.fileobj + assert tmp_file.stat().st_size > 0 + + with PdfWriter(fileobj=tmp_file) as w: + assert len(w.pages) == 0 + assert 8 <= tmp_file.stat().st_size <= 1024 + + b = BytesIO() + with PdfWriter(fileobj=b) as w: + assert len(w.pages) == 0 + assert not b.closed + assert 8 <= len(b.getbuffer()) <= 1024 + + with NamedTemporaryFile(mode="wb", suffix=".pdf", delete=True) as tmp: + with PdfWriter(pdf_path, fileobj=tmp, incremental=True) as w: + assert w._reader + assert not tmp.closed + assert Path(tmp.name).stat().st_size == Path(pdf_path).stat().st_size + + with PdfWriter(tmp_file) as w: + assert len(w.pages) == 0 + + except Exception as e: + raise e + finally: + tmp_file.unlink() From 02040f7e99ea7f69a5e9de6556546ab460dd50d6 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 14:25:30 +0200 Subject: [PATCH 02/13] Update pypdf/_writer.py Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com> --- pypdf/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index cb4b5d3ac..c652e7982 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -152,7 +152,7 @@ class PdfWriter(PdfDocCommon): Typically data is added from a :class:`PdfReader`. Args: - * : 1st argument is assigned to fileobj or clone_from based on context: + *: 1st argument is assigned to fileobj or clone_from based on context: assigned to clone_from if str/path to a non empty file or stream or PdfReader else assigned to fileobj. From fc06d92246c94f3caf10e4a389ad706b0440a7de Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 14:25:41 +0200 Subject: [PATCH 03/13] Update pypdf/_writer.py Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com> --- pypdf/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index c652e7982..8a8eef8e7 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -156,7 +156,7 @@ class PdfWriter(PdfDocCommon): assigned to clone_from if str/path to a non empty file or stream or PdfReader else assigned to fileobj. - fileobj: output file/stream. To be used with context manager only. + fileobj: Output file/stream. To be used with context manager only. clone_from: identical to fileobj (for compatibility) From 9f43335567147439a9f227e8b74ea4ba35cdf477 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 14:25:51 +0200 Subject: [PATCH 04/13] Update pypdf/_writer.py Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com> --- pypdf/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 8a8eef8e7..6b93681a2 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -153,7 +153,7 @@ class PdfWriter(PdfDocCommon): Args: *: 1st argument is assigned to fileobj or clone_from based on context: - assigned to clone_from if str/path to a non empty file or stream or PdfReader + assigned to clone_from if str/path to a non empty file or stream or PdfReader, else assigned to fileobj. fileobj: Output file/stream. To be used with context manager only. From 708455ec0ec0d36375dc50318f91566ece5d4984 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 14:26:02 +0200 Subject: [PATCH 05/13] Update pypdf/_writer.py Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com> --- pypdf/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 6b93681a2..8bb8da272 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -154,7 +154,7 @@ class PdfWriter(PdfDocCommon): Args: *: 1st argument is assigned to fileobj or clone_from based on context: assigned to clone_from if str/path to a non empty file or stream or PdfReader, - else assigned to fileobj. + otherwise assigned to fileobj. fileobj: Output file/stream. To be used with context manager only. From fca18975b43bd82752544d4b93b42688d622af26 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 15:28:45 +0200 Subject: [PATCH 06/13] fix --- pypdf/_writer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index cb4b5d3ac..7e7cea443 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -172,7 +172,7 @@ class PdfWriter(PdfDocCommon): def __init__( self, *args: Any, - fileobj: Union[None, PdfReader, StrByteType, Path] = "", + fileobj: Union[None, StrByteType, Path] = "", clone_from: Union[None, PdfReader, StrByteType, Path] = None, incremental: bool = False, full: bool = False, @@ -222,12 +222,13 @@ def _get_clone_from( manualset_fileobj: bool, ) -> Tuple[ Union[None, PdfReader, str, Path, IO[Any], BytesIO], - Union[None, PdfReader, str, Path, IO[Any], BytesIO], + Union[None, str, Path, IO[Any], BytesIO], ]: if manualset_fileobj or ( isinstance(fileobj, (str, Path, IO, BytesIO)) and (fileobj in ("", None) or clone_from is not None) ): + assert not isinstance(fileobj, PdfReader), " for mypy" return clone_from, fileobj cloning = True if isinstance(fileobj, (str, Path)) and ( @@ -244,6 +245,7 @@ def _get_clone_from( fileobj.seek(t, 0) if cloning: return fileobj, None + assert not isinstance(fileobj, PdfReader), " for mypy" return clone_from, fileobj clone_from, fileobj = _get_clone_from(fileobj, clone_from, manualset_fileobj) @@ -269,7 +271,7 @@ def _get_clone_from( # to prevent overwriting self.temp_fileobj = fileobj - self.fileobj = "" + self.fileobj: Union[None, StrByteType, Path] = "" self.with_as_usage = False # The root of our page tree node. pages = DictionaryObject() @@ -375,6 +377,7 @@ def xmp_metadata(self, value: Optional[XmpInformation]) -> None: def __enter__(self) -> "PdfWriter": """Store that writer is initialized by 'with'.""" + self.fileobj = self.temp_fileobj self.with_as_usage = True return self From fb7df102659f25aa6d507bb55b8f2e9feda71e12 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 16:22:12 +0200 Subject: [PATCH 07/13] Update pypdf/_writer.py Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com> --- pypdf/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 50326a3d1..19c9de9e0 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -228,7 +228,7 @@ def _get_clone_from( isinstance(fileobj, (str, Path, IO, BytesIO)) and (fileobj in ("", None) or clone_from is not None) ): - assert not isinstance(fileobj, PdfReader), " for mypy" + assert not isinstance(fileobj, PdfReader), "for mypy" return clone_from, fileobj cloning = True if isinstance(fileobj, (str, Path)) and ( From 9458e40bd025ff445db31d1fc9f5254181c85d2d Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 16:22:24 +0200 Subject: [PATCH 08/13] Update pypdf/_writer.py Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com> --- pypdf/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 19c9de9e0..6982a4e04 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -245,7 +245,7 @@ def _get_clone_from( fileobj.seek(t, 0) if cloning: return fileobj, None - assert not isinstance(fileobj, PdfReader), " for mypy" + assert not isinstance(fileobj, PdfReader), "for mypy" return clone_from, fileobj clone_from, fileobj = _get_clone_from(fileobj, clone_from, manualset_fileobj) From e34f8683bd9f84f5ae21c1f6fee7f14b35d93585 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 16:23:20 +0200 Subject: [PATCH 09/13] Update pypdf/_writer.py Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com> --- pypdf/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 6982a4e04..422ccb78f 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -208,7 +208,7 @@ def __init__( self._ID: Union[ArrayObject, None] = None self._info_obj: Optional[PdfObject] - manualset_fileobj = True + manual_set_fileobj = True if len(args) > 0: if fileobj == "": fileobj = args[0] From 990f6fae0fc3354f4cb56b8755fa351b8770d825 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 16:25:01 +0200 Subject: [PATCH 10/13] coverage --- pypdf/_writer.py | 5 +++++ tests/test_writer.py | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index a109fe996..ece07c6b3 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -215,6 +215,11 @@ def __init__( manualset_fileobj = False elif clone_from is None: clone_from = args[0] + else: + logger_warning( + "unnamed param ignored: fileobj and clone_from already defined", + __name__, + ) def _get_clone_from( fileobj: Union[None, PdfReader, str, Path, IO[Any], BytesIO], diff --git a/tests/test_writer.py b/tests/test_writer.py index 70cee68e6..1971c112d 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2482,7 +2482,7 @@ def test_append_pdf_with_dest_without_page(caplog): assert len(writer.named_destinations) == 3 -def test_writer_contextmanager(): +def test_writer_contextmanager(caplog): """To test the writer with context manager, cf #2912""" pdf_path = str(RESOURCE_ROOT / "crazyones.pdf") with PdfWriter(pdf_path) as w: @@ -2540,3 +2540,10 @@ def test_writer_contextmanager(): raise e finally: tmp_file.unlink() + caplog.clear() + b = BytesIO() + with PdfWriter("ignored", fileobj=b, clone_from=pdf_path) as w: + pass + assert ( + "unnamed param ignored: fileobj and clone_from already defined" in caplog.text + ) From cd76f93aeada9a1e0d3526f5eb3f9e471c319998 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 16:45:02 +0200 Subject: [PATCH 11/13] comments --- pypdf/_writer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index ece07c6b3..f4d9d2598 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -208,11 +208,11 @@ def __init__( self._ID: Union[ArrayObject, None] = None self._info_obj: Optional[PdfObject] - manualset_fileobj = True + manual_set_fileobj = True if len(args) > 0: if fileobj == "": fileobj = args[0] - manualset_fileobj = False + manual_set_fileobj = False elif clone_from is None: clone_from = args[0] else: @@ -224,12 +224,12 @@ def __init__( def _get_clone_from( fileobj: Union[None, PdfReader, str, Path, IO[Any], BytesIO], clone_from: Union[None, PdfReader, str, Path, IO[Any], BytesIO], - manualset_fileobj: bool, + manual_set_fileobj: bool, ) -> Tuple[ Union[None, PdfReader, str, Path, IO[Any], BytesIO], Union[None, str, Path, IO[Any], BytesIO], ]: - if manualset_fileobj or ( + if manual_set_fileobj or ( isinstance(fileobj, (str, Path, IO, BytesIO)) and (fileobj in ("", None) or clone_from is not None) ): @@ -253,7 +253,7 @@ def _get_clone_from( assert not isinstance(fileobj, PdfReader), " for mypy" return clone_from, fileobj - clone_from, fileobj = _get_clone_from(fileobj, clone_from, manualset_fileobj) + clone_from, fileobj = _get_clone_from(fileobj, clone_from, manual_set_fileobj) if self.incremental: if isinstance(clone_from, (str, Path)): From c82fd5a7892d1bfaa3c7ec1dacad45e7d71387d4 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 16:53:02 +0200 Subject: [PATCH 12/13] comment tmp_path --- tests/test_writer.py | 86 +++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/tests/test_writer.py b/tests/test_writer.py index a1323c1c0..1cdbd3aef 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -982,7 +982,7 @@ def test_write_empty_stream(): with pytest.raises(ValueError) as exc: writer.write("") - assert exc.value.args[0] == "Output(stream='') is empty." + assert exc.value.args[0] == "Output(stream=) is empty." def test_startup_dest(): @@ -1187,21 +1187,21 @@ def test_set_page_label(pdf_file_path): writer = PdfWriter() writer.clone_document_from_reader(reader) with pytest.raises( - ValueError, match="At least one of style and prefix must be given" + ValueError, match="at least one between style and prefix must be given" ): writer.set_page_label(0, 5, start=2) with pytest.raises( - ValueError, match="page_index_from must be greater or equal than 0" + ValueError, match="page_index_from must be equal or greater then 0" ): writer.set_page_label(-1, 5, "/r") with pytest.raises( - ValueError, match="page_index_to must be greater or equal than page_index_from" + ValueError, match="page_index_to must be equal or greater then page_index_from" ): writer.set_page_label(5, 0, "/r") with pytest.raises(ValueError, match="page_index_to exceeds number of pages"): writer.set_page_label(0, 19, "/r") with pytest.raises( - ValueError, match="If given, start must be greater or equal than one" + ValueError, match="if given, start must be equal or greater than one" ): writer.set_page_label(0, 5, "/r", start=-1) @@ -2482,7 +2482,7 @@ def test_append_pdf_with_dest_without_page(caplog): assert len(writer.named_destinations) == 3 -def test_writer_contextmanager(caplog): +def test_writer_contextmanager(tmp_path, caplog): """To test the writer with context manager, cf #2912""" pdf_path = str(RESOURCE_ROOT / "crazyones.pdf") with PdfWriter(pdf_path) as w: @@ -2495,51 +2495,45 @@ def test_writer_contextmanager(caplog): assert len(w.pages) > 0 assert not w.fileobj - try: - with NamedTemporaryFile(suffix=".pdf", delete=False) as tmp: - tmp_file = Path(tmp.name) - with PdfWriter(tmp_file) as w: - assert len(w.pages) == 0 + tmp_file = tmp_path / "out.pdf" + with PdfWriter(tmp_file) as w: + assert len(w.pages) == 0 - with open(tmp_file, "wb") as f1, open(pdf_path, "rb") as f: - f1.write(f.read(-1)) - with PdfWriter(tmp_file) as w: - assert len(w.pages) > 0 - assert tmp_file.stat().st_size > 0 + with open(tmp_file, "wb") as f1, open(pdf_path, "rb") as f: + f1.write(f.read(-1)) + with PdfWriter(tmp_file) as w: + assert len(w.pages) > 0 + assert tmp_file.stat().st_size > 0 - with PdfWriter(tmp_file, incremental=True) as w: - assert w._reader - assert not w.fileobj - assert tmp_file.stat().st_size > 0 + with PdfWriter(tmp_file, incremental=True) as w: + assert w._reader + assert not w.fileobj + assert tmp_file.stat().st_size > 0 - with PdfWriter(clone_from=tmp_file) as w: - assert len(w.pages) > 0 - assert not w.fileobj - assert tmp_file.stat().st_size > 0 + with PdfWriter(clone_from=tmp_file) as w: + assert len(w.pages) > 0 + assert not w.fileobj + assert tmp_file.stat().st_size > 0 - with PdfWriter(fileobj=tmp_file) as w: - assert len(w.pages) == 0 - assert 8 <= tmp_file.stat().st_size <= 1024 + with PdfWriter(fileobj=tmp_file) as w: + assert len(w.pages) == 0 + assert 8 <= tmp_file.stat().st_size <= 1024 + + b = BytesIO() + with PdfWriter(fileobj=b) as w: + assert len(w.pages) == 0 + assert not b.closed + assert 8 <= len(b.getbuffer()) <= 1024 + + with NamedTemporaryFile(mode="wb", suffix=".pdf", delete=True) as tmp: + with PdfWriter(pdf_path, fileobj=tmp, incremental=True) as w: + assert w._reader + assert not tmp.closed + assert Path(tmp.name).stat().st_size == Path(pdf_path).stat().st_size + + with PdfWriter(tmp_file) as w: + assert len(w.pages) == 0 - b = BytesIO() - with PdfWriter(fileobj=b) as w: - assert len(w.pages) == 0 - assert not b.closed - assert 8 <= len(b.getbuffer()) <= 1024 - - with NamedTemporaryFile(mode="wb", suffix=".pdf", delete=True) as tmp: - with PdfWriter(pdf_path, fileobj=tmp, incremental=True) as w: - assert w._reader - assert not tmp.closed - assert Path(tmp.name).stat().st_size == Path(pdf_path).stat().st_size - - with PdfWriter(tmp_file) as w: - assert len(w.pages) == 0 - - except Exception as e: - raise e - finally: - tmp_file.unlink() caplog.clear() b = BytesIO() with PdfWriter("ignored", fileobj=b, clone_from=pdf_path) as w: From 2f98e6ce0aea304117f24142b2e3e71426cd9f2a Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 20 Oct 2024 18:23:07 +0200 Subject: [PATCH 13/13] missed merge --- tests/test_writer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_writer.py b/tests/test_writer.py index 1cdbd3aef..382b1c26e 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -982,7 +982,7 @@ def test_write_empty_stream(): with pytest.raises(ValueError) as exc: writer.write("") - assert exc.value.args[0] == "Output(stream=) is empty." + assert exc.value.args[0] == "Output(stream='') is empty." def test_startup_dest(): @@ -1187,21 +1187,21 @@ def test_set_page_label(pdf_file_path): writer = PdfWriter() writer.clone_document_from_reader(reader) with pytest.raises( - ValueError, match="at least one between style and prefix must be given" + ValueError, match="At least one of style and prefix must be given" ): writer.set_page_label(0, 5, start=2) with pytest.raises( - ValueError, match="page_index_from must be equal or greater then 0" + ValueError, match="page_index_from must be greater or equal than 0" ): writer.set_page_label(-1, 5, "/r") with pytest.raises( - ValueError, match="page_index_to must be equal or greater then page_index_from" + ValueError, match="page_index_to must be greater or equal than page_index_from" ): writer.set_page_label(5, 0, "/r") with pytest.raises(ValueError, match="page_index_to exceeds number of pages"): writer.set_page_label(0, 19, "/r") with pytest.raises( - ValueError, match="if given, start must be equal or greater than one" + ValueError, match="If given, start must be greater or equal than one" ): writer.set_page_label(0, 5, "/r", start=-1)