From 85f98e0c5aa5aada38a908c728f18cfd5b092a0a Mon Sep 17 00:00:00 2001 From: Wolfgang Walther Date: Thu, 15 Feb 2024 17:02:21 +0100 Subject: [PATCH 1/2] Add standard compliant default identifier --- pydyf/__init__.py | 24 +++++++++++------------- tests/test_pydyf.py | 18 ++++++++++++++++-- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/pydyf/__init__.py b/pydyf/__init__.py index 2ee8385..7524956 100755 --- a/pydyf/__init__.py +++ b/pydyf/__init__.py @@ -568,12 +568,11 @@ def write(self, output, version=None, identifier=None, compress=False): 'Root': self.catalog.reference, 'Info': self.info.reference, } - if identifier is not None: - data = b''.join( - obj.data for obj in self.objects if obj.free != 'f') - data_hash = md5(data).hexdigest().encode() - extra['ID'] = Array(( - String(identifier).data, String(data_hash).data)) + data = b''.join( + obj.data for obj in self.objects if obj.free != 'f') + data_hash = md5(data).hexdigest().encode() + extra['ID'] = Array(( + String(identifier or data_hash).data, String(data_hash).data)) dict_stream = Stream([xref_stream], extra, compress) self.xref_position = dict_stream.offset = self.current_position self.add_object(dict_stream) @@ -601,13 +600,12 @@ def write(self, output, version=None, identifier=None, compress=False): self.write_line(f'/Size {len(self.objects)}'.encode(), output) self.write_line(b'/Root ' + self.catalog.reference, output) self.write_line(b'/Info ' + self.info.reference, output) - if identifier is not None: - data = b''.join( - obj.data for obj in self.objects if obj.free != 'f') - data_hash = md5(data).hexdigest().encode() - self.write_line( - b'/ID [' + String(identifier).data + b' ' + - String(data_hash).data + b']', output) + data = b''.join( + obj.data for obj in self.objects if obj.free != 'f') + data_hash = md5(data).hexdigest().encode() + self.write_line( + b'/ID [' + String(identifier or data_hash).data + b' ' + + String(data_hash).data + b']', output) self.write_line(b'>>', output) self.write_line(b'startxref', output) diff --git a/tests/test_pydyf.py b/tests/test_pydyf.py index 83c260c..e5ccef8 100644 --- a/tests/test_pydyf.py +++ b/tests/test_pydyf.py @@ -1,4 +1,5 @@ import io +import re import pydyf @@ -704,11 +705,24 @@ def test_text(): ''') -def test_identifier(): +def test_default_identifier(): + document = pydyf.PDF() + pdf = io.BytesIO() + document.write(pdf, identifier=None) + assert re.search( + b'/ID \\[\\((?P[0-9a-f]{32})\\) \\((?P=hash)\\)\\]', + pdf.getvalue() + ) is not None + + +def test_custom_identifier(): document = pydyf.PDF() pdf = io.BytesIO() document.write(pdf, identifier=b'abc') - assert b'abc' in pdf.getvalue() + assert re.search( + b'/ID \\[\\(abc\\) \\(([0-9a-f]{32})\\)\\]', + pdf.getvalue() + ) is not None def test_version(): From 938098607a693e5ba6da6ebb9fe5b5a2a7c37216 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Mon, 26 Feb 2024 15:22:09 +0100 Subject: [PATCH 2/2] =?UTF-8?q?Don=E2=80=99t=20add=20identifier=20by=20def?= =?UTF-8?q?ault?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pydyf/__init__.py | 44 ++++++++++++++++++++++++++++---------------- tests/test_pydyf.py | 12 +++++++++++- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/pydyf/__init__.py b/pydyf/__init__.py index 7524956..1bb32ca 100755 --- a/pydyf/__init__.py +++ b/pydyf/__init__.py @@ -411,11 +411,14 @@ def data(self): class PDF: """PDF document.""" - def __init__(self, version=b'1.7', identifier=None): + def __init__(self, version=b'1.7', identifier=False): """Create a PDF document. :param bytes version: PDF version. - :param bytes identifier: PDF file identifier. + :param identifier: PDF file identifier. Default is :obj:`False` + to include no identifier, can be set to :obj:`True` to generate an + automatic identifier. + :type identifier: :obj:`bytes` or :obj:`bool` """ #: PDF version, as :obj:`bytes`. @@ -489,18 +492,21 @@ def write_line(self, content, output): self.current_position += len(content) + 1 output.write(content + b'\n') - def write(self, output, version=None, identifier=None, compress=False): + def write(self, output, version=None, identifier=False, compress=False): """Write PDF to output. :param output: Output stream. :type output: binary :term:`file object` :param bytes version: PDF version. - :param bytes identifier: PDF file identifier. + :param identifier: PDF file identifier. Default is :obj:`False` + to include no identifier, can be set to :obj:`True` to generate an + automatic identifier. + :type identifier: :obj:`bytes` or :obj:`bool` :param bool compress: whether the PDF uses a compressed object stream. """ version = self.version if version is None else _to_bytes(version) - identifier = self.identifier if identifier is None else identifier + identifier = self.identifier or identifier # Write header self.write_line(b'%PDF-' + version, output) @@ -568,11 +574,14 @@ def write(self, output, version=None, identifier=None, compress=False): 'Root': self.catalog.reference, 'Info': self.info.reference, } - data = b''.join( - obj.data for obj in self.objects if obj.free != 'f') - data_hash = md5(data).hexdigest().encode() - extra['ID'] = Array(( - String(identifier or data_hash).data, String(data_hash).data)) + if identifier: + data = b''.join( + obj.data for obj in self.objects if obj.free != 'f') + data_hash = md5(data).hexdigest().encode() + if identifier is True: + identifier = data_hash + extra['ID'] = Array(( + String(identifier).data, String(data_hash).data)) dict_stream = Stream([xref_stream], extra, compress) self.xref_position = dict_stream.offset = self.current_position self.add_object(dict_stream) @@ -600,12 +609,15 @@ def write(self, output, version=None, identifier=None, compress=False): self.write_line(f'/Size {len(self.objects)}'.encode(), output) self.write_line(b'/Root ' + self.catalog.reference, output) self.write_line(b'/Info ' + self.info.reference, output) - data = b''.join( - obj.data for obj in self.objects if obj.free != 'f') - data_hash = md5(data).hexdigest().encode() - self.write_line( - b'/ID [' + String(identifier or data_hash).data + b' ' + - String(data_hash).data + b']', output) + if identifier: + data = b''.join( + obj.data for obj in self.objects if obj.free != 'f') + data_hash = md5(data).hexdigest().encode() + if identifier is True: + identifier = data_hash + self.write_line( + b'/ID [' + String(identifier).data + b' ' + + String(data_hash).data + b']', output) self.write_line(b'>>', output) self.write_line(b'startxref', output) diff --git a/tests/test_pydyf.py b/tests/test_pydyf.py index e5ccef8..ff63be2 100644 --- a/tests/test_pydyf.py +++ b/tests/test_pydyf.py @@ -705,10 +705,20 @@ def test_text(): ''') +def test_no_identifier(): + document = pydyf.PDF() + pdf = io.BytesIO() + document.write(pdf, identifier=False) + assert re.search( + b'/ID \\[\\((?P[0-9a-f]{32})\\) \\((?P=hash)\\)\\]', + pdf.getvalue() + ) is None + + def test_default_identifier(): document = pydyf.PDF() pdf = io.BytesIO() - document.write(pdf, identifier=None) + document.write(pdf, identifier=True) assert re.search( b'/ID \\[\\((?P[0-9a-f]{32})\\) \\((?P=hash)\\)\\]', pdf.getvalue()