From b06010425fd5afe094eb3d6f45c4e7e60d4fdcd6 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Mon, 11 Dec 2023 11:59:42 +0100 Subject: [PATCH 1/6] TST: Increase test coverage for flate handling of image mode 1 --- tests/test_xobject_image_helpers.py | 61 +++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index e6d9e8e0f..cbc282edc 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -5,6 +5,8 @@ from pypdf import PdfReader from pypdf.errors import PdfReadError +from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject +from pypdf._xobj_image_helpers import _handle_flate from . import get_data_from_url @@ -25,3 +27,62 @@ def test_get_imagemode_recursion_depth(): match="Color spaces nested too deep. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH.", ): reader.pages[0].images[0] + + +def test_handle_flate__image_mode_1(): + data = b"\x00\xe0\x00" + lookup = DecodedStreamObject() + + # No trailing data. + lookup.set_data(b"\x42\x42\x42\x00\x13\x37") + result = _handle_flate( + size=(3, 3), + data=data, + mode="1", + color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]), + colors=2, + obj_as_text="dummy" + ) + assert ( + [(66, 66, 66), (66, 66, 66), (66, 66, 66), (0, 19, 55), (0, 19, 55), (0, 19, 55), (66, 66, 66), (66, 66, 66), (66, 66, 66)] == + list(result[0].getdata()) + ) + + # Trailing whitespace. + lookup.set_data(b"\x42\x42\x42\x00\x13\x37 \x0a") + result = _handle_flate( + size=(3, 3), + data=data, + mode="1", + color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]), + colors=2, + obj_as_text="dummy" + ) + assert ( + [(66, 66, 66), (66, 66, 66), (66, 66, 66), (0, 19, 55), (0, 19, 55), (0, 19, 55), (66, 66, 66), (66, 66, 66), (66, 66, 66)] == + list(result[0].getdata()) + ) + + # Trailing non-whitespace character. + lookup.set_data(b"\x42\x42\x42\x00\x13\x37\x12") + with pytest.raises(PdfReadError, match=r"^Too many lookup values: Expected 6, got 7\.$"): + _handle_flate( + size=(3, 3), + data=data, + mode="1", + color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]), + colors=2, + obj_as_text="dummy" + ) + + # Not enough lookup data. + lookup.set_data(b"\x42\x42\x42\x00\x13") + with pytest.raises(PdfReadError, match=r"^Not enough lookup values: Expected 6, got 5\.$"): + _handle_flate( + size=(3, 3), + data=data, + mode="1", + color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]), + colors=2, + obj_as_text="dummy" + ) From d1822e2eb5a9ebcf46218c9396d1905411bc1d82 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:00:35 +0100 Subject: [PATCH 2/6] add utility function --- pypdf/_utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pypdf/_utils.py b/pypdf/_utils.py index 0e3e7ebab..9788ba418 100644 --- a/pypdf/_utils.py +++ b/pypdf/_utils.py @@ -190,6 +190,23 @@ def skip_over_whitespace(stream: StreamType) -> bool: return cnt > 1 +def check_if_whitespace_only(value: bytes) -> bool: + """ + Check if the given value consists of whitespace characters only. + + Args: + value: The bytes to check. + + Returns: + True if the value only has whitespace characters, otherwise return False. + """ + for index in range(len(value)): + current = value[index:index + 1] + if current not in WHITESPACES: + return False + return True + + def skip_over_comment(stream: StreamType) -> None: tok = stream.read(1) stream.seek(-1, 1) From 737fed259b810b19c6dc79629560c7ac5c393a0d Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:01:29 +0100 Subject: [PATCH 3/6] fix check --- pypdf/_xobj_image_helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index a390357dd..1c41c453b 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -4,7 +4,7 @@ from io import BytesIO from typing import Any, List, Tuple, Union, cast -from ._utils import WHITESPACES, logger_warning +from ._utils import check_if_whitespace_only, logger_warning from .constants import ColorSpaces from .errors import PdfReadError from .generic import ( @@ -199,9 +199,9 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes: if len(lookup) != expected_count: if len(lookup) < expected_count: raise PdfReadError(f"Not enough lookup values: Expected {expected_count}, got {len(lookup)}.") - lookup = lookup[:expected_count] - if not all(_value in WHITESPACES for _value in lookup[expected_count:]): + if not check_if_whitespace_only(lookup[expected_count:]): raise PdfReadError(f"Too many lookup values: Expected {expected_count}, got {len(lookup)}.") + lookup = lookup[:expected_count] colors_arr = [lookup[:nb], lookup[nb:]] arr = b"".join( [ From c2af3a66c77e1493304d51bad9c77b6912e016cf Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:07:23 +0100 Subject: [PATCH 4/6] fix code style --- tests/test_xobject_image_helpers.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index cbc282edc..f967b3813 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -4,9 +4,9 @@ import pytest from pypdf import PdfReader +from pypdf._xobj_image_helpers import _handle_flate from pypdf.errors import PdfReadError from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject -from pypdf._xobj_image_helpers import _handle_flate from . import get_data_from_url @@ -32,6 +32,9 @@ def test_get_imagemode_recursion_depth(): def test_handle_flate__image_mode_1(): data = b"\x00\xe0\x00" lookup = DecodedStreamObject() + expected_data = [ + (66, 66, 66), (66, 66, 66), (66, 66, 66), (0, 19, 55), (0, 19, 55), (0, 19, 55), (66, 66, 66), (66, 66, 66), (66, 66, 66) + ] # No trailing data. lookup.set_data(b"\x42\x42\x42\x00\x13\x37") @@ -43,10 +46,7 @@ def test_handle_flate__image_mode_1(): colors=2, obj_as_text="dummy" ) - assert ( - [(66, 66, 66), (66, 66, 66), (66, 66, 66), (0, 19, 55), (0, 19, 55), (0, 19, 55), (66, 66, 66), (66, 66, 66), (66, 66, 66)] == - list(result[0].getdata()) - ) + assert expected_data == list(result[0].getdata()) # Trailing whitespace. lookup.set_data(b"\x42\x42\x42\x00\x13\x37 \x0a") @@ -58,10 +58,7 @@ def test_handle_flate__image_mode_1(): colors=2, obj_as_text="dummy" ) - assert ( - [(66, 66, 66), (66, 66, 66), (66, 66, 66), (0, 19, 55), (0, 19, 55), (0, 19, 55), (66, 66, 66), (66, 66, 66), (66, 66, 66)] == - list(result[0].getdata()) - ) + assert expected_data == list(result[0].getdata()) # Trailing non-whitespace character. lookup.set_data(b"\x42\x42\x42\x00\x13\x37\x12") From f7d4a96e8303dbb340063554a0d7a1ecef4cf3b3 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:08:36 +0100 Subject: [PATCH 5/6] fix line length --- tests/test_xobject_image_helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index f967b3813..0e515cae5 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -33,7 +33,9 @@ def test_handle_flate__image_mode_1(): data = b"\x00\xe0\x00" lookup = DecodedStreamObject() expected_data = [ - (66, 66, 66), (66, 66, 66), (66, 66, 66), (0, 19, 55), (0, 19, 55), (0, 19, 55), (66, 66, 66), (66, 66, 66), (66, 66, 66) + (66, 66, 66), (66, 66, 66), (66, 66, 66), + (0, 19, 55), (0, 19, 55), (0, 19, 55), + (66, 66, 66), (66, 66, 66), (66, 66, 66) ] # No trailing data. From 8bea3d4217afc1693b6c9e55f0c99a61d90484f3 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Mon, 11 Dec 2023 19:32:27 +0100 Subject: [PATCH 6/6] add test for utility function --- tests/test_utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index f00be5d62..8803feea8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -10,6 +10,7 @@ Version, _get_max_pdf_version_header, _human_readable_bytes, + check_if_whitespace_only, deprecate_with_replacement, deprecation_bookmark, deprecation_no_replacement, @@ -48,6 +49,23 @@ def test_skip_over_whitespace(stream, expected): assert skip_over_whitespace(stream) == expected +@pytest.mark.parametrize( + ("value", "expected"), + [ + (b"foo", False), + (b" a", False), + (b" a\n b", False), + (b"", True), + (b" ", True), + (b" ", True), + (b" \n", True), + (b" \n", True), + ], +) +def test_check_if_whitespace_only(value, expected): + assert check_if_whitespace_only(value) is expected + + def test_read_until_whitespace(): assert read_until_whitespace(io.BytesIO(b"foo"), maxchars=1) == b"f"