Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix .paint_path bug noted in issue #473 #512

Merged
merged 7 commits into from
Oct 12, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Fixed
- Pass caching parameter to PDFResourceManager in `high_level` functions ([#475](https://github.com/pdfminer/pdfminer.six/pull/475))
- Fix `.paint_path` logic for handling non-rect quadrilaterals and decomposing complex paths ([#473](https://github.com/pdfminer/pdfminer.six/pull/473))

### Removed
- Remove unused rijndael encryption implementation ([#465](https://github.com/pdfminer/pdfminer.six/pull/465))
Expand Down
44 changes: 26 additions & 18 deletions pdfminer/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@

class PDFLayoutAnalyzer(PDFTextDevice):

RECTS = re.compile('^(mlllh)+$')

def __init__(self, rsrcmgr, pageno=1, laparams=None):
PDFTextDevice.__init__(self, rsrcmgr)
self.pageno = pageno
Expand Down Expand Up @@ -77,8 +75,18 @@ def render_image(self, name, stream):
def paint_path(self, gstate, stroke, fill, evenodd, path):
"""Paint paths described in section 4.4 of the PDF reference manual"""
shape = ''.join(x[0] for x in path)

# if path contains multiple subpaths, split them up and reprocess
if shape.count("m") > 1:
m_indices = [i for i, x in enumerate(shape) if x == "m"]
m_indices_zipped = zip(m_indices, m_indices[1:] + [None])
subpaths = [path[a:b] for a, b in m_indices_zipped]
for sp in subpaths:
self.paint_path(gstate, stroke, fill, evenodd, sp)
pietermarsman marked this conversation as resolved.
Show resolved Hide resolved
return

if shape == 'ml':
# horizontal/vertical line
# single line segment
(_, x0, y0) = path[0]
(_, x1, y1) = path[1]
(x0, y0) = apply_matrix_pt(self.ctm, (x0, y0))
Expand All @@ -87,9 +95,10 @@ def paint_path(self, gstate, stroke, fill, evenodd, path):
line = LTLine(gstate.linewidth, (x0, y0), (x1, y1), stroke,
fill, evenodd, gstate.scolor, gstate.ncolor)
self.cur_item.add(line)
return
pietermarsman marked this conversation as resolved.
Show resolved Hide resolved

elif shape == 'mlllh':
# rectangle
if shape == 'mlllh':
# possibly a rectangle
(_, x0, y0) = path[0]
(_, x1, y1) = path[1]
(_, x2, y2) = path[2]
Expand All @@ -98,24 +107,23 @@ def paint_path(self, gstate, stroke, fill, evenodd, path):
(x1, y1) = apply_matrix_pt(self.ctm, (x1, y1))
(x2, y2) = apply_matrix_pt(self.ctm, (x2, y2))
(x3, y3) = apply_matrix_pt(self.ctm, (x3, y3))

# confirmed to be a rectangle
if (x0 == x1 and y1 == y2 and x2 == x3 and y3 == y0) or \
(y0 == y1 and x1 == x2 and y2 == y3 and x3 == x0):
rect = LTRect(gstate.linewidth, (x0, y0, x2, y2), stroke,
fill, evenodd, gstate.scolor, gstate.ncolor)
self.cur_item.add(rect)

elif self.RECTS.match(shape):
for paths in zip(*(iter(path),) * 5):
self.paint_path(gstate, stroke, fill, evenodd, list(paths))

else:
pts = []
for p in path:
for i in range(1, len(p), 2):
pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1])))
curve = LTCurve(gstate.linewidth, pts, stroke, fill, evenodd,
gstate.scolor, gstate.ncolor)
self.cur_item.add(curve)
return

# if not a rectangle or a line, treat as a curve
pts = []
for p in path:
for i in range(1, len(p), 2):
pts.append(apply_matrix_pt(self.ctm, (p[i], p[i+1])))
curve = LTCurve(gstate.linewidth, pts, stroke, fill, evenodd,
gstate.scolor, gstate.ncolor)
self.cur_item.add(curve)

def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
graphicstate):
Expand Down
65 changes: 64 additions & 1 deletion tests/test_converter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from nose.tools import assert_equal

from pdfminer.converter import PDFLayoutAnalyzer
from pdfminer.layout import LTContainer
from pdfminer.layout import LTContainer, LTRect, LTCurve
from pdfminer.pdfinterp import PDFGraphicState


Expand Down Expand Up @@ -32,6 +32,69 @@ def test_paint_path_multiple_mlllh(self):
analyzer.paint_path(PDFGraphicState(), False, False, False, path)
assert_equal(len(analyzer.cur_item._objs), 3)

def test_paint_path_quadrilaterals(self):
"""via https://github.com/pdfminer/pdfminer.six/issues/473"""

def parse(path):
analyzer = self._get_analyzer()
analyzer.cur_item = LTContainer([0, 1000, 0, 1000])
analyzer.paint_path(PDFGraphicState(), False, False, False, path)
return analyzer.cur_item._objs

def get_types(path):
return list(map(type, parse(path)))

assert_equal(get_types([
("m", 10, 90),
("l", 90, 90),
("l", 90, 10),
("l", 10, 10),
("h",),
]), [LTRect])

assert_equal(get_types([
("m", 110, 90),
("l", 190, 10),
("l", 190, 90),
("l", 110, 10),
("h",),
]), [LTCurve])
jsvine marked this conversation as resolved.
Show resolved Hide resolved

assert_equal(get_types([
("m", 210, 90),
("l", 290, 60),
("l", 290, 10),
("l", 210, 10),
("h",),
]), [LTCurve])

assert_equal(get_types([
("m", 310, 90),
("l", 350, 90),
("l", 350, 10),
("l", 310, 10),
("h",),
("m", 350, 90),
("l", 390, 90),
("l", 390, 10),
("l", 350, 10),
("h",),
]), [LTRect, LTRect])

assert_equal(get_types([
("m", 410, 90),
("l", 445, 90),
("l", 445, 10),
("l", 410, 10),
("h",),
("m", 455, 70),
("l", 475, 90),
("l", 490, 70),
("l", 490, 10),
("l", 455, 10),
("h",),
]), [LTRect, LTCurve])

def _get_analyzer(self):
analyzer = PDFLayoutAnalyzer(None)
analyzer.set_ctm([1, 0, 0, 1, 0, 0])
Expand Down