Skip to content

Commit

Permalink
Merge pull request #21 from wolfgangwalther/default-identifier
Browse files Browse the repository at this point in the history
Add standard compliant default identifier
  • Loading branch information
liZe authored Feb 26, 2024
2 parents 8772b18 + 9380986 commit 30cad31
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 9 deletions.
24 changes: 17 additions & 7 deletions pydyf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,14 @@ def data(self):

class PDF:
"""PDF document."""
def __init__(self, version=b'1.7', identifier=None):
def __init__(self, version=b'1.7', identifier=False):
"""Create a PDF document.
:param bytes version: PDF version.
:param bytes identifier: PDF file identifier.
:param identifier: PDF file identifier. Default is :obj:`False`
to include no identifier, can be set to :obj:`True` to generate an
automatic identifier.
:type identifier: :obj:`bytes` or :obj:`bool`
"""
#: PDF version, as :obj:`bytes`.
Expand Down Expand Up @@ -528,18 +531,21 @@ def write_line(self, content, output):
self.current_position += len(content) + 1
output.write(content + b'\n')

def write(self, output, version=None, identifier=None, compress=False):
def write(self, output, version=None, identifier=False, compress=False):
"""Write PDF to output.
:param output: Output stream.
:type output: binary :term:`file object`
:param bytes version: PDF version.
:param bytes identifier: PDF file identifier.
:param identifier: PDF file identifier. Default is :obj:`False`
to include no identifier, can be set to :obj:`True` to generate an
automatic identifier.
:type identifier: :obj:`bytes` or :obj:`bool`
:param bool compress: whether the PDF uses a compressed object stream.
"""
version = self.version if version is None else _to_bytes(version)
identifier = self.identifier if identifier is None else identifier
identifier = self.identifier or identifier

# Write header
self.write_line(b'%PDF-' + version, output)
Expand Down Expand Up @@ -607,10 +613,12 @@ def write(self, output, version=None, identifier=None, compress=False):
'Root': self.catalog.reference,
'Info': self.info.reference,
}
if identifier is not None:
if identifier:
data = b''.join(
obj.data for obj in self.objects if obj.free != 'f')
data_hash = md5(data).hexdigest().encode()
if identifier is True:
identifier = data_hash
extra['ID'] = Array((
String(identifier).data, String(data_hash).data))
dict_stream = Stream([xref_stream], extra, compress)
Expand Down Expand Up @@ -640,10 +648,12 @@ def write(self, output, version=None, identifier=None, compress=False):
self.write_line(f'/Size {len(self.objects)}'.encode(), output)
self.write_line(b'/Root ' + self.catalog.reference, output)
self.write_line(b'/Info ' + self.info.reference, output)
if identifier is not None:
if identifier:
data = b''.join(
obj.data for obj in self.objects if obj.free != 'f')
data_hash = md5(data).hexdigest().encode()
if identifier is True:
identifier = data_hash
self.write_line(
b'/ID [' + String(identifier).data + b' ' +
String(data_hash).data + b']', output)
Expand Down
28 changes: 26 additions & 2 deletions tests/test_pydyf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import re

import pydyf

Expand Down Expand Up @@ -704,11 +705,34 @@ def test_text():
''')


def test_identifier():
def test_no_identifier():
document = pydyf.PDF()
pdf = io.BytesIO()
document.write(pdf, identifier=False)
assert re.search(
b'/ID \\[\\((?P<hash>[0-9a-f]{32})\\) \\((?P=hash)\\)\\]',
pdf.getvalue()
) is None


def test_default_identifier():
document = pydyf.PDF()
pdf = io.BytesIO()
document.write(pdf, identifier=True)
assert re.search(
b'/ID \\[\\((?P<hash>[0-9a-f]{32})\\) \\((?P=hash)\\)\\]',
pdf.getvalue()
) is not None


def test_custom_identifier():
document = pydyf.PDF()
pdf = io.BytesIO()
document.write(pdf, identifier=b'abc')
assert b'abc' in pdf.getvalue()
assert re.search(
b'/ID \\[\\(abc\\) \\(([0-9a-f]{32})\\)\\]',
pdf.getvalue()
) is not None


def test_version():
Expand Down

0 comments on commit 30cad31

Please sign in to comment.