Skip to content

Commit

Permalink
Don't use pdfrw anymore
Browse files Browse the repository at this point in the history
pdfrw is a great piece of software, but we don't know PDF enough to debug the
problems we've met. It's safer to use the new cairo API and get back to manual
edition for attachments and bleed boxes.

We only have two regressions for now:
- some internal links are broken,
- PDF producer is not overwritten.

A mail has been sent to cairo's mailing-list about that:
https://lists.cairographics.org/archives/cairo/2018-August/028694.html

Fix #639, #615, fix #596, fix #565.
  • Loading branch information
liZe committed Aug 6, 2018
1 parent 9199e49 commit 0834615
Show file tree
Hide file tree
Showing 7 changed files with 919 additions and 519 deletions.
9 changes: 4 additions & 5 deletions docs/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@ Installing

WeasyPrint |version| depends on:

* CPython_ ≥ 3.4
* CPython_ ≥ 3.4.0
* cairo_ ≥ 1.15.4 [#]_
* Pango_ ≥ 1.38.0 [#]_
* CFFI_ ≥ 0.6
* html5lib_ ≥ 0.999999999
* cairocffi_ ≥ 0.5
* cairocffi_ ≥ 0.9.0
* tinycss2_ ≥ 0.5
* cssselect2_ ≥ 0.1
* CairoSVG_ ≥ 1.0.20
* Pyphen_ ≥ 0.8
* pdfrw_ ≥ 0.4
* GDK-PixBuf_ ≥ 2.25.0 [#]_

.. _CPython: http://www.python.org/
Expand All @@ -26,7 +25,6 @@ WeasyPrint |version| depends on:
.. _cssselect2: https://cssselect2.readthedocs.io/
.. _CairoSVG: http://cairosvg.org/
.. _Pyphen: http://pyphen.org/
.. _pdfrw: https://github.com/pmaupin/pdfrw/
.. _GDK-PixBuf: https://live.gnome.org/GdkPixbuf


Expand Down Expand Up @@ -81,7 +79,8 @@ WeasyPrint! Otherwise, please copy the full error message and
you get incomplete SVG renderings, please read `#339
<https://github.com/Kozea/WeasyPrint/issues/339>`_. If you get invalid
PDF files, please read `#565
<https://github.com/Kozea/WeasyPrint/issues/565>`.
<https://github.com/Kozea/WeasyPrint/issues/565>`_. Some PDF metadata
including PDF information, hyperlinks and bookmarks require 1.15.4.
.. [#] pango ≥ 1.29.3 is required, but 1.38.0 is needed to handle `@font-face`
CSS rules.
Expand Down
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,13 @@

REQUIREMENTS = [
# XXX: Keep this in sync with docs/install.rst
'cffi>=0.6',
'html5lib>=0.999999999',
'cairocffi>=0.9.0',
'tinycss2>=0.5',
'cssselect2>=0.1',
'cffi>=0.6',
'cairocffi>=0.5',
'Pyphen>=0.8',
'pdfrw>=0.4',
'CairoSVG>=1.0.20',
'Pyphen>=0.8',
# C dependencies: Gdk-Pixbuf (optional), Pango, cairo.
]

Expand Down
129 changes: 120 additions & 9 deletions weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,20 @@
from .fonts import FontConfiguration
from .formatting_structure import boxes
from .formatting_structure.build import build_formatting_structure
from .html import W3C_DATE_RE
from .images import get_image_from_uri as original_get_image_from_uri
from .layout import layout_document
from .layout.backgrounds import percentage
from .logger import LOGGER
from .pdf import write_pdf_metadata
from .pdf import write_pdf_attachments

if cairo.cairo_version() < 11504:
warnings.warn(
'There are known rendering problems with cairo < 1.15.4. '
'WeasyPrint may work with older versions, but please read the note '
'about the needed cairo version on the "Install" page of the '
'documentation before reporting bugs.')
'There are known rendering problems and missing features with '
'cairo < 1.15.4. WeasyPrint may work with older versions, but please '
'read the note about the needed cairo version on the "Install" page '
'of the documentation before reporting bugs. '
'http://weasyprint.readthedocs.io/en/latest/install.html')


def _get_matrix(box):
Expand Down Expand Up @@ -142,6 +144,31 @@ def _gather_links_and_bookmarks(box, bookmarks, links, anchors, matrix):
_gather_links_and_bookmarks(child, bookmarks, links, anchors, matrix)


def _w3c_date_to_iso(string, attr_name):
"""Tranform W3C date to ISO-8601 format."""
if string is None:
return None
match = W3C_DATE_RE.match(string)
if match is None:
LOGGER.warning('Invalid %s date: %r', attr_name, string)
return None
groups = match.groupdict()
iso_date = '%04i-%02i-%02iT%02i:%02i:%02i' % (
int(groups['year']),
int(groups['month'] or 1),
int(groups['day'] or 1),
int(groups['hour'] or 0),
int(groups['minute'] or 0),
int(groups['second'] or 0))
if groups['hour']:
assert groups['minute']
assert groups['tz_hour'].startswith(('+', '-'))
assert groups['tz_minute']
iso_date += '%+03i:%02i' % (
int(groups['tz_hour']), int(groups['tz_minute']))
return iso_date


class Page(object):
"""Represents a single rendered page.
Expand Down Expand Up @@ -467,6 +494,31 @@ def make_bookmark_tree(self):
last_by_depth.append(children)
return root

def add_hyperlinks(self, links, context, scale):
"""Include hyperlinks in current page."""
if cairo.cairo_version() < 11504:
return

# TODO: Instead of using rects, we could use the drawing rectangles
# defined by cairo when drawing targets. This would give a feeling
# similiar to what browsers do with links that span multiple lines.
for link in links:
link_type, link_target, rectangle = link
if link_type == 'external':
attributes = "rect=[{} {} {} {}] uri='{}'".format(
*[i * scale for i in rectangle], link_target)
elif link_type == 'internal':
page, x, y = link_target
attributes = (
'rect=[{} {} {} {}] page={} '
'pos=[{} {}]'.format(
*[i * scale for i in rectangle],
page + 1, x * scale, y * scale))
elif link_type == 'attachment':
# Attachments are handled in write_pdf_metadata
continue
context.tag_begin(cairo.TAG_LINK, attributes)

def write_pdf(self, target=None, zoom=1, attachments=None):
"""Paint the pages in a PDF file, with meta-data.
Expand Down Expand Up @@ -497,8 +549,11 @@ def write_pdf(self, target=None, zoom=1, attachments=None):
# (1, 1) is overridden by .set_size() below.
surface = cairo.PDFSurface(file_obj, 1, 1)
context = cairo.Context(surface)

LOGGER.info('Step 6 - Drawing')
for page in self.pages:

paged_links = list(self.resolve_links())
for page, links in zip(self.pages, paged_links):
surface.set_size(
math.floor(scale * (
page.width + page.bleed['left'] + page.bleed['right'])),
Expand All @@ -508,12 +563,68 @@ def write_pdf(self, target=None, zoom=1, attachments=None):
context.translate(
page.bleed['left'] * scale, page.bleed['top'] * scale)
page.paint(context, scale=scale)
self.add_hyperlinks(links, context, scale)
surface.show_page()
surface.finish()

LOGGER.info('Step 7 - Adding PDF metadata')
write_pdf_metadata(self, file_obj, scale, self.metadata, attachments,
self.url_fetcher)

# TODO: overwrite producer when possible in cairo
if cairo.cairo_version() >= 11504:
# Set document information
for attr, key in (
('title', cairo.PDF_METADATA_TITLE),
('description', cairo.PDF_METADATA_SUBJECT),
('generator', cairo.PDF_METADATA_CREATOR)):
value = getattr(self.metadata, attr)
if value is not None:
surface.set_metadata(key, value)
for attr, key in (
('authors', cairo.PDF_METADATA_AUTHOR),
('keywords', cairo.PDF_METADATA_KEYWORDS)):
value = getattr(self.metadata, attr)
if value is not None:
surface.set_metadata(key, ', '.join(value))
for attr, key in (
('created', cairo.PDF_METADATA_CREATE_DATE),
('modified', cairo.PDF_METADATA_MOD_DATE)):
value = getattr(self.metadata, attr)
if value is not None:
surface.set_metadata(key, _w3c_date_to_iso(value, attr))

# Set bookmarks
bookmarks = self.make_bookmark_tree()
levels = [cairo.PDF_OUTLINE_ROOT] * len(bookmarks)
while bookmarks:
title, destination, children = bookmarks.pop(0)
page, x, y = destination
link_attribs = 'page={} pos=[{} {}]'.format(
page + 1, x * scale, y * scale)
outline = surface.add_outline(
levels.pop(), title, link_attribs, 0)
levels.extend([outline] * len(children))
bookmarks = children + bookmarks

surface.finish()

# Add extra PDF metadata: attachments, embedded files
attachment_links = [
[link for link in page_links if link[0] == 'attachment']
for page_links in paged_links]
# Write extra PDF metadata only when there is a least one from:
# - attachments in metadata
# - attachments as function parameters
# - attachments as PDF links
# - bleed boxes
condition = (
self.metadata.attachments or
attachments or
any(attachment_links) or
any(any(page.bleed.values()) for page in self.pages))
if condition:
write_pdf_attachments(
file_obj, scale, self.url_fetcher,
self.metadata.attachments + (attachments or []),
attachment_links, self.pages)

if target is None:
return file_obj.getvalue()
Expand Down
Loading

0 comments on commit 0834615

Please sign in to comment.