pdfminer · pietermarsman · Feb 22, 2022 · Feb 12, 2022 · Feb 12, 2022 · Feb 12, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Export type annotations from pypi package per PEP561 ([#679](https://github.com/pdfminer/pdfminer.six/pull/679))
 - Support for identity cmap's ([#626](https://github.com/pdfminer/pdfminer.six/pull/626))
 - Add support for PDF page labels ([#680](https://github.com/pdfminer/pdfminer.six/pull/680))
+- Installation of Pillow as an optional extra dependency ([#714](https://github.com/pdfminer/pdfminer.six/pull/714))
 
 ### Fixed
 - Hande decompression error due to CRC checksum error ([#637](https://github.com/pdfminer/pdfminer.six/pull/637))

diff --git a/README.md b/README.md
@@ -43,6 +43,10 @@ How to use
 
   `pip install pdfminer.six`
 
+* (Optionally) install extra dependencies for extracting images.
+
+  `pip install 'pdfminer.six[image]`
+
 * Use command-line interface to extract text from pdf:
 
   `python pdf2txt.py samples/simple1.pdf`

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -66,6 +66,13 @@ Before using it, you must install it using Python 3.6 or newer.
     $ pip install pdfminer.six
 
 
+Optionally install extra dependencies that are needed to extract jpg images.
+
+::
+
+    $ pip install 'pdfminer.six[image]'
+
+
 Contributing
 ============
 

diff --git a/pdfminer/image.py b/pdfminer/image.py
@@ -11,6 +11,12 @@
 from .pdfcolor import LITERAL_DEVICE_RGB
 from .pdftypes import LITERALS_DCT_DECODE, LITERALS_JBIG2_DECODE, LITERALS_JPX_DECODE
 
+PIL_ERROR_MESSAGE = (
+    "Could not import Pillow. This dependency of pdfminer.six is not "
+    "installed by default. You need it to to save jpg images to a file. Install it "
+    "with `pip install 'pdfminer.six[image]'`"
+)
+
 
 def align32(x: int) -> int:
     return ((x + 3) // 4) * 4
@@ -93,8 +99,10 @@ def export_image(self, image: LTImage) -> str:
             raw_data = image.stream.get_rawdata()
             assert raw_data is not None
             if LITERAL_DEVICE_CMYK in image.colorspace:
-                from PIL import Image  # type: ignore[import]
-                from PIL import ImageChops
+                try:
+                    from PIL import Image, ImageChops  # type: ignore[import]
+                except ImportError:
+                    raise ImportError(PIL_ERROR_MESSAGE)
 
                 ifp = BytesIO(raw_data)
                 i = Image.open(ifp)
@@ -104,12 +112,15 @@ def export_image(self, image: LTImage) -> str:
             else:
                 fp.write(raw_data)
         elif ext == ".jp2":
+            try:
+                from PIL import Image
+            except ImportError:
+                raise ImportError(PIL_ERROR_MESSAGE)
+
             # if we just write the raw data, most image programs
             # that I have tried cannot open the file. However,
             # open and saving with PIL produces a file that
             # seems to be easily opened by other programs
-            from PIL import Image
-
             raw_data = image.stream.get_rawdata()
             assert raw_data is not None
             ifp = BytesIO(raw_data)

diff --git a/setup.py b/setup.py
@@ -23,6 +23,7 @@
     extras_require={
         "dev": ["pytest", "nox", "black", "mypy == 0.931"],
         "docs": ["sphinx", "sphinx-argparse"],
+        "image": ["Pillow"],
     },
     description="PDF parser and analyzer",
     long_description=readme,