py-pdf · MartinThoma · Dec 18, 2023 · Dec 11, 2023 · Dec 11, 2023 · Dec 11, 2023
diff --git a/pypdf/_utils.py b/pypdf/_utils.py
@@ -190,6 +190,23 @@ def skip_over_whitespace(stream: StreamType) -> bool:
     return cnt > 1
 
 
+def check_if_whitespace_only(value: bytes) -> bool:
+    """
+    Check if the given value consists of whitespace characters only.
+
+    Args:
+        value: The bytes to check.
+
+    Returns:
+        True if the value only has whitespace characters, otherwise return False.
+    """
+    for index in range(len(value)):
+        current = value[index:index + 1]
+        if current not in WHITESPACES:
+            return False
+    return True
+
+
 def skip_over_comment(stream: StreamType) -> None:
     tok = stream.read(1)
     stream.seek(-1, 1)

diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py
@@ -4,7 +4,7 @@
 from io import BytesIO
 from typing import Any, List, Tuple, Union, cast
 
-from ._utils import WHITESPACES, logger_warning
+from ._utils import check_if_whitespace_only, logger_warning
 from .constants import ColorSpaces
 from .errors import PdfReadError
 from .generic import (
@@ -199,9 +199,9 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
                 if len(lookup) != expected_count:
                     if len(lookup) < expected_count:
                         raise PdfReadError(f"Not enough lookup values: Expected {expected_count}, got {len(lookup)}.")
-                    lookup = lookup[:expected_count]
-                    if not all(_value in WHITESPACES for _value in lookup[expected_count:]):
+                    if not check_if_whitespace_only(lookup[expected_count:]):
                         raise PdfReadError(f"Too many lookup values: Expected {expected_count}, got {len(lookup)}.")
+                    lookup = lookup[:expected_count]
                 colors_arr = [lookup[:nb], lookup[nb:]]
                 arr = b"".join(
                     [

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -10,6 +10,7 @@
     Version,
     _get_max_pdf_version_header,
     _human_readable_bytes,
+    check_if_whitespace_only,
     deprecate_with_replacement,
     deprecation_bookmark,
     deprecation_no_replacement,
@@ -48,6 +49,23 @@ def test_skip_over_whitespace(stream, expected):
     assert skip_over_whitespace(stream) == expected
 
 
+@pytest.mark.parametrize(
+    ("value", "expected"),
+    [
+        (b"foo", False),
+        (b" a", False),
+        (b" a\n b", False),
+        (b"", True),
+        (b" ", True),
+        (b"  ", True),
+        (b"  \n", True),
+        (b"    \n", True),
+    ],
+)
+def test_check_if_whitespace_only(value, expected):
+    assert check_if_whitespace_only(value) is expected
+
+
 def test_read_until_whitespace():
     assert read_until_whitespace(io.BytesIO(b"foo"), maxchars=1) == b"f"
 

diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py
@@ -4,7 +4,9 @@
 import pytest
 
 from pypdf import PdfReader
+from pypdf._xobj_image_helpers import _handle_flate
 from pypdf.errors import PdfReadError
+from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject
 
 from . import get_data_from_url
 
@@ -25,3 +27,61 @@ def test_get_imagemode_recursion_depth():
         match="Color spaces nested too deep. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH.",
     ):
         reader.pages[0].images[0]
+
+
+def test_handle_flate__image_mode_1():
+    data = b"\x00\xe0\x00"
+    lookup = DecodedStreamObject()
+    expected_data = [
+        (66, 66, 66), (66, 66, 66), (66, 66, 66),
+        (0, 19, 55), (0, 19, 55), (0, 19, 55),
+        (66, 66, 66), (66, 66, 66), (66, 66, 66)
+    ]
+
+    # No trailing data.
+    lookup.set_data(b"\x42\x42\x42\x00\x13\x37")
+    result = _handle_flate(
+        size=(3, 3),
+        data=data,
+        mode="1",
+        color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]),
+        colors=2,
+        obj_as_text="dummy"
+    )
+    assert expected_data == list(result[0].getdata())
+
+    # Trailing whitespace.
+    lookup.set_data(b"\x42\x42\x42\x00\x13\x37  \x0a")
+    result = _handle_flate(
+        size=(3, 3),
+        data=data,
+        mode="1",
+        color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]),
+        colors=2,
+        obj_as_text="dummy"
+    )
+    assert expected_data == list(result[0].getdata())
+
+    # Trailing non-whitespace character.
+    lookup.set_data(b"\x42\x42\x42\x00\x13\x37\x12")
+    with pytest.raises(PdfReadError, match=r"^Too many lookup values: Expected 6, got 7\.$"):
+        _handle_flate(
+            size=(3, 3),
+            data=data,
+            mode="1",
+            color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]),
+            colors=2,
+            obj_as_text="dummy"
+        )
+
+    # Not enough lookup data.
+    lookup.set_data(b"\x42\x42\x42\x00\x13")
+    with pytest.raises(PdfReadError, match=r"^Not enough lookup values: Expected 6, got 5\.$"):
+        _handle_flate(
+            size=(3, 3),
+            data=data,
+            mode="1",
+            color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]),
+            colors=2,
+            obj_as_text="dummy"
+        )