Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add capability to replace image #1849

Merged
merged 45 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from 42 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
ca44aec
BUG : fix RGB FlateEncode Images(PNG) and transparency
pubpub-zz May 6, 2023
c4c7378
add test
pubpub-zz May 6, 2023
54b228f
update req for pillow
pubpub-zz May 6, 2023
8861d5d
revert req
pubpub-zz May 6, 2023
56c076f
fix text
pubpub-zz May 6, 2023
330adcb
add image property to images[]
pubpub-zz May 6, 2023
84bd081
Process TIFF predictor 2
pubpub-zz May 6, 2023
7d34466
implement images as a Sequence
pubpub-zz May 6, 2023
a06a4a2
Lut
pubpub-zz May 6, 2023
0dcc07c
Lut2
pubpub-zz May 7, 2023
6e173b8
mypy
pubpub-zz May 7, 2023
f6a264c
ref image updated
pubpub-zz May 7, 2023
6703e9a
disable test temporarily
pubpub-zz May 7, 2023
a446cc4
erratum
pubpub-zz May 7, 2023
726eda0
improve test coverage
pubpub-zz May 7, 2023
2704454
get tuple
pubpub-zz May 7, 2023
4f19824
improve test coverage
pubpub-zz May 7, 2023
ae8e00c
factorisation and fixes
pubpub-zz May 14, 2023
9979039
mypy
pubpub-zz May 14, 2023
ca94859
mypy2
pubpub-zz May 14, 2023
d6405b2
mypy 3.7
pubpub-zz May 15, 2023
ef14cd9
add Test for CMYK
pubpub-zz May 15, 2023
baebd9f
BUG: get_contents does not return ContentStream
pubpub-zz May 18, 2023
7bdf874
Merge branch 'get_content' into rgb_png&transparency
pubpub-zz May 18, 2023
2009a07
extract Inline Images
pubpub-zz May 18, 2023
814b70f
mypy
pubpub-zz May 18, 2023
e8600f8
improve coverage
pubpub-zz May 19, 2023
04aa288
Merge branch 'main' into rgb_png&transparency
pubpub-zz May 19, 2023
61a0e10
from review
pubpub-zz May 19, 2023
7e4115c
test
pubpub-zz May 19, 2023
000659d
mypy
pubpub-zz May 19, 2023
d9c481c
New Image interface
pubpub-zz May 20, 2023
c68f806
clean up and remove inline_images
pubpub-zz May 20, 2023
4880f73
late fix
pubpub-zz May 20, 2023
7a1a714
mypy
pubpub-zz May 20, 2023
2d531d0
add image replace
pubpub-zz May 20, 2023
8a04c8c
adjust threshold
pubpub-zz May 20, 2023
a73e24a
improve coverage
pubpub-zz May 20, 2023
0313323
Merge branch 'main' into replace_images
pubpub-zz Jun 11, 2023
a688ec6
rename FileImage to ImageFile
pubpub-zz Jun 13, 2023
8ada65b
Merge branch 'main' into replace_images
MartinThoma Jun 13, 2023
ae7bd18
Merge branch 'main' into replace_images
MartinThoma Jun 13, 2023
d5ce8e7
DOC: replace
MartinThoma Jun 13, 2023
7921953
Update pypdf/_utils.py
MartinThoma Jun 13, 2023
2e79ce9
Update pypdf/_utils.py
MartinThoma Jun 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,13 @@ def images(self) -> List[ImageFile]:
`.image` : PIL Image Object
`.indirect_reference` : object reference

and the following methods:
`.replace(new_image: PIL.Image.Image, **kwargs)` :
replace the image in the pdf with the new image
applying the saving parameters indicated (such as quality)
e.g. :
`reader.pages[0].images[0]=replace(Image.open("new_image.jpg", quality = 20)`

"""
return _VirtualListImages(self._get_ids_image, self._get_image) # type: ignore

Expand Down
42 changes: 41 additions & 1 deletion pypdf/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import warnings
from codecs import getencoder
from dataclasses import dataclass
from io import DEFAULT_BUFFER_SIZE
from io import DEFAULT_BUFFER_SIZE, BytesIO
from os import SEEK_CUR
from typing import (
IO,
Expand All @@ -45,6 +45,7 @@
Pattern,
Tuple,
Union,
cast,
overload,
)

Expand Down Expand Up @@ -508,3 +509,42 @@ class ImageFile(File):

image: Optional[Any] = None # optional ; direct PIL image access
indirect_reference: Optional[IndirectObject] = None # optional ; link to PdfObject

def replace(self, new_image: Any, **kwargs: Any) -> None:
"""
replace the Image with a new PIL image
This is not allowed for inline image or image in a PdfReader
kwargs allows to pass parameters to `Image.Image.save()` such as quality
MartinThoma marked this conversation as resolved.
Show resolved Hide resolved
"""
from PIL import Image

from ._reader import PdfReader

# to prevent circular import
from .filters import _xobj_to_image
from .generic import DictionaryObject, PdfObject

if self.indirect_reference is None:
raise TypeError("Can not update an inline image")
if not hasattr(self.indirect_reference.pdf, "_id_translated"):
raise TypeError("Can not update an image not belonging to a PdfWriter")
if not isinstance(new_image, Image.Image):
raise TypeError("new_image shall be a PIL Image")
b = BytesIO()
new_image.save(b, "PDF", **kwargs)
reader = PdfReader(b)
assert reader.pages[0].images[0].indirect_reference is not None
self.indirect_reference.pdf._objects[self.indirect_reference.idnum - 1] = (
reader.pages[0].images[0].indirect_reference.get_object()
)
cast(
PdfObject, self.indirect_reference.get_object()
).indirect_reference = self.indirect_reference
# change the object attributes
extension, byte_stream, img = _xobj_to_image(
cast(DictionaryObject, self.indirect_reference.get_object())
)
assert extension is not None
self.name = self.name[: self.name.rfind(".")] + extension
self.data = byte_stream
self.image = img
31 changes: 31 additions & 0 deletions tests/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from re import findall

import pytest
from PIL import ImageChops

from pypdf import PdfMerger, PdfReader, PdfWriter
from pypdf.constants import PageAttributes as PG
Expand Down Expand Up @@ -947,3 +948,33 @@ def test_fields_returning_stream():
data = BytesIO(get_pdf_from_url(url, name=name))
reader = PdfReader(data, strict=False)
assert "BtchIssQATit_time" in reader.get_form_text_fields()["TimeStampData"]


def test_replace_image(tmp_path):
writer = PdfWriter(clone_from=RESOURCE_ROOT / "labeled-edges-center-image.pdf")
reader = PdfReader(RESOURCE_ROOT / "jpeg.pdf")
img = reader.pages[0].images[0].image
writer.pages[0].images[0].replace(img)
b = BytesIO()
writer.write(b)
reader2 = PdfReader(b)
# very simple image distance evaluation
diff = ImageChops.difference(reader2.pages[0].images[0].image, img)
d = sum(diff.convert("L").getdata()) / (diff.size[0] * diff.size[1])
assert d < 1.5
writer.pages[0].images[0].replace(img, quality=20)
diff = ImageChops.difference(writer.pages[0].images[0].image, img)
d1 = sum(diff.convert("L").getdata()) / (diff.size[0] * diff.size[1])
assert d1 > d
# extra tests for coverage
with pytest.raises(TypeError) as exc:
reader.pages[0].images[0].replace(img)
assert exc.value.args[0] == "Can not update an image not belonging to a PdfWriter"
i = writer.pages[0].images[0]
with pytest.raises(TypeError) as exc:
i.replace(reader.pages[0].images[0]) # missing .image
assert exc.value.args[0] == "new_image shall be a PIL Image"
i.indirect_reference = None # to behave like an inline image
with pytest.raises(TypeError) as exc:
i.replace(reader.pages[0].images[0].image)
assert exc.value.args[0] == "Can not update an inline image"