From 3752e432b88d7b7d6868279bca6c5d47134adedc Mon Sep 17 00:00:00 2001 From: Gregory Goodson Date: Fri, 7 Jun 2024 16:20:56 +0100 Subject: [PATCH] CMYK JPEGs appearing inverted in PDF output Addresses issue [#1128](https://github.com/Kozea/WeasyPrint/issues/1128) According to [libjpeg](https://github.com/libjpeg-turbo/libjpeg-turbo/blob/3c17063ef1ab43f5877f19d670dc39497c5cd036/libjpeg.txt#L1569-L1582) "it appears that Adobe Photoshop writes inverted data in CMYK JPEG files" An Adobe JPEG can be identified by the presence of the [APP14](https://exiftool.org/TagNames/JPEG.html#Adobe) segment. The code now checks for the `APP14` segment in `RasterImage` and adds a Decode Array to the XObject when rendering the CMYK JPEG. The value of the Decode Array is the inverse of the default value for DeviceCMYK according to the PDF spec. This has the effect of inverting the inverted image back to normal. --- weasyprint/images.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/weasyprint/images.py b/weasyprint/images.py index 64b44536a..417a1c11b 100644 --- a/weasyprint/images.py +++ b/weasyprint/images.py @@ -66,6 +66,7 @@ def __init__(self, pillow_image, image_id, image_data, filename=None, self.height = pillow_image.height self.ratio = (self.width / self.height) if self.height != 0 else inf self.optimize = optimize = options['optimize_images'] + self.app14 = getattr(original_pillow_image, 'app', {}).get('APP14') if pillow_image.format in ('JPEG', 'MPO'): self.format = 'JPEG' @@ -150,6 +151,11 @@ def get_x_object(self, interpolate, dpi_ratio): }) if self.format == 'JPEG': + if self.mode == 'CMYK' and self.app14 is not None: + # The presence of the APP14 segment indicates an Adobe image + # with inverted CMYK data. Specify a Decode Array to invert + # it again back to normal. + extra['Decode'] = '[1 0 1 0 1 0 1 0]' extra['Filter'] = '/DCTDecode' return pydyf.Stream([self.image_data], extra)