Skip to content

Commit

Permalink
Add first and last page parameters to pdfinfo
Browse files Browse the repository at this point in the history
  • Loading branch information
magnurud authored Jan 7, 2024
1 parent 579a57c commit 9cde8dd
Showing 1 changed file with 20 additions and 0 deletions.
20 changes: 20 additions & 0 deletions pdf2image/pdf2image.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,8 @@ def pdfinfo_from_path(
poppler_path: str = None,
rawdates: bool = False,
timeout: int = None,
first_page: int = None,
last_page: int = None,
) -> Dict:
"""Function wrapping poppler's pdfinfo utility and returns the result as a dictionary.
Expand All @@ -543,6 +545,10 @@ def pdfinfo_from_path(
:type rawdates: bool, optional
:param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
:type timeout: int, optional
:param first_page: First page to process, defaults to None
:type first_page: int, optional
:param last_page: Last page to process before stopping, defaults to None
:type last_page: int, optional
:raises PDFPopplerTimeoutError: Raised after the timeout for the image processing is exceeded
:raises PDFInfoNotInstalledError: Raised if pdfinfo is not installed
:raises PDFPageCountError: Raised if the output could not be parsed
Expand All @@ -561,6 +567,12 @@ def pdfinfo_from_path(
if rawdates:
command.extend(["-rawdates"])

if first_page:
command.extend(["-f", str(first_page)])

if last_page:
command.extend(["-l", str(last_page)])

# Add poppler path to LD_LIBRARY_PATH
env = os.environ.copy()
if poppler_path is not None:
Expand Down Expand Up @@ -607,6 +619,8 @@ def pdfinfo_from_bytes(
poppler_path: str = None,
rawdates: bool = False,
timeout: int = None,
first_page: int = None,
last_page: int = None,
) -> Dict:
"""Function wrapping poppler's pdfinfo utility and returns the result as a dictionary.
Expand All @@ -622,6 +636,10 @@ def pdfinfo_from_bytes(
:type rawdates: bool, optional
:param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
:type timeout: int, optional
:param first_page: First page to process, defaults to None
:type first_page: int, optional
:param last_page: Last page to process before stopping, defaults to None
:type last_page: int, optional
:return: Dictionary containing various information on the PDF
:rtype: Dict
"""
Expand All @@ -637,6 +655,8 @@ def pdfinfo_from_bytes(
poppler_path=poppler_path,
rawdates=rawdates,
timeout=timeout,
first_page=first_page,
last_page=last_page,
)
finally:
os.close(fh)
Expand Down

0 comments on commit 9cde8dd

Please sign in to comment.