py-pdf · MartinThoma · Aug 2, 2023 · Jul 30, 2023 · Aug 2, 2023
diff --git a/docs/user/file-size.md b/docs/user/file-size.md
@@ -1,4 +1,4 @@
-# Reduce PDF Size
+# Reduce PDF File Size
 
 There are multiple ways to reduce the size of a given PDF file. The easiest
 one is to remove content (e.g. images) or pages.
@@ -96,6 +96,10 @@ with open("out.pdf", "wb") as f:
     writer.write(f)
 ```
 
+`page.compress_content_streams` uses [`zlib.compress`](https://docs.python.org/3/library/zlib.html#zlib.compress) and support the
+`level` paramter: `level=0` is no compression, `level=9` is the
+highest compression.
+
 Using this method, we have seen a reduction by 70% (from 11.8 MB to 3.5 MB)
 with a real PDF.
 

diff --git a/pypdf/_page.py b/pypdf/_page.py
@@ -1763,7 +1763,7 @@ def scaleTo(self, width: float, height: float) -> None:  # deprecated
         deprecation_with_replacement("scaleTo", "scale_to", "3.0.0")
         self.scale_to(width, height)
 
-    def compress_content_streams(self) -> None:
+    def compress_content_streams(self, level: int = -1) -> None:
         """
         Compress the size of this page by joining all content streams and
         applying a FlateDecode filter.
@@ -1773,7 +1773,7 @@ def compress_content_streams(self) -> None:
         """
         content = self.get_contents()
         if content is not None:
-            content_obj = content.flate_encode()
+            content_obj = content.flate_encode(level)
             try:
                 content.indirect_reference.pdf._objects[  # type: ignore
                     content.indirect_reference.idnum - 1  # type: ignore

diff --git a/pypdf/filters.py b/pypdf/filters.py
@@ -225,17 +225,18 @@ def _decode_png_prediction(data: str, columns: int, rowlength: int) -> bytes:
         return output.getvalue()
 
     @staticmethod
-    def encode(data: bytes) -> bytes:
+    def encode(data: bytes, level: int = -1) -> bytes:
         """
         Compress the input data using zlib.
 
         Args:
             data: The data to be compressed.
+            level: See https://docs.python.org/3/library/zlib.html#zlib.compress
 
         Returns:
             The compressed data.
         """
-        return zlib.compress(data)
+        return zlib.compress(data, level)
 
 
 class ASCIIHexDecode:

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
@@ -880,7 +880,7 @@ def flateEncode(self) -> "EncodedStreamObject":  # deprecated
         deprecation_with_replacement("flateEncode", "flate_encode", "3.0.0")
         return self.flate_encode()
 
-    def flate_encode(self) -> "EncodedStreamObject":
+    def flate_encode(self, level: int = -1) -> "EncodedStreamObject":
         from ..filters import FlateDecode
 
         if SA.FILTER in self:
@@ -909,7 +909,7 @@ def flate_encode(self) -> "EncodedStreamObject":
         retval[NameObject(SA.FILTER)] = f
         if parms is not None:
             retval[NameObject(SA.DECODE_PARMS)] = parms
-        retval._data = FlateDecode.encode(self._data)
+        retval._data = FlateDecode.encode(self._data, level)
         return retval