diff --git a/src/core/document.js b/src/core/document.js index 396aa4070da75c..764acf5c25b8dd 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -857,6 +857,10 @@ class PDFDocument { return shadow(this, "xfaFaxtory", null); } + get isPureXfa() { + return this.xfaFactory && this.xfaFactory.isValid(); + } + get htmlForXfa() { if (this.xfaFactory) { return this.xfaFactory.getPages(); @@ -898,8 +902,14 @@ class PDFDocument { options, }); const operatorList = new OperatorList(); + const pdfFonts = []; const initialState = { - font: null, + get font() { + return pdfFonts[pdfFonts.length - 1]; + }, + set font(font) { + pdfFonts.push(font); + }, clone() { return this; }, @@ -947,6 +957,7 @@ class PDFDocument { ); } await Promise.all(promises); + this.xfaFactory.setFonts(pdfFonts); } get formInfo() { diff --git a/src/core/fonts.js b/src/core/fonts.js index 8b8ec434d3377e..a7340c67d8d2ec 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -844,6 +844,7 @@ class Font { this.capHeight = properties.capHeight / PDF_GLYPH_SPACE_UNITS; this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS; this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS; + this.lineHeight = this.ascent - this.descent; this.fontMatrix = properties.fontMatrix; this.bbox = properties.bbox; this.defaultEncoding = properties.defaultEncoding; @@ -2466,13 +2467,16 @@ class Font { unitsPerEm: int16(tables.head.data[18], tables.head.data[19]), yMax: int16(tables.head.data[42], tables.head.data[43]), yMin: signedInt16(tables.head.data[38], tables.head.data[39]), - ascent: int16(tables.hhea.data[4], tables.hhea.data[5]), + ascent: signedInt16(tables.hhea.data[4], tables.hhea.data[5]), descent: signedInt16(tables.hhea.data[6], tables.hhea.data[7]), + lineGap: signedInt16(tables.hhea.data[8], tables.hhea.data[9]), }; // PDF FontDescriptor metrics lie -- using data from actual font. this.ascent = metricsOverride.ascent / metricsOverride.unitsPerEm; this.descent = metricsOverride.descent / metricsOverride.unitsPerEm; + this.lineGap = metricsOverride.lineGap / metricsOverride.unitsPerEm; + this.lineHeight = this.ascent - this.descent + this.lineGap; // The 'post' table has glyphs names. if (tables.post) { diff --git a/src/core/worker.js b/src/core/worker.js index 2ce3ef206dcb3a..a58f412e6ee5bc 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -187,13 +187,8 @@ class WorkerMessageHandler { await pdfManager.ensureDoc("checkFirstPage"); } - const [numPages, fingerprint, htmlForXfa] = await Promise.all([ - pdfManager.ensureDoc("numPages"), - pdfManager.ensureDoc("fingerprint"), - pdfManager.ensureDoc("htmlForXfa"), - ]); - - if (htmlForXfa) { + const isPureXfa = await pdfManager.ensureDoc("isPureXfa"); + if (isPureXfa) { const task = new WorkerTask("loadXfaFonts"); startWorkerTask(task); await pdfManager @@ -203,6 +198,17 @@ class WorkerMessageHandler { }) .then(() => finishWorkerTask(task)); } + + const [numPages, fingerprint] = await Promise.all([ + pdfManager.ensureDoc("numPages"), + pdfManager.ensureDoc("fingerprint"), + ]); + + // Get htmlForXfa after numPages to avoid to create HTML 2 times. + const htmlForXfa = isPureXfa + ? await pdfManager.ensureDoc("htmlForXfa") + : null; + return { numPages, fingerprint, htmlForXfa }; } diff --git a/src/core/xfa/factory.js b/src/core/xfa/factory.js index cea1a911096aa2..286ec844ef011e 100644 --- a/src/core/xfa/factory.js +++ b/src/core/xfa/factory.js @@ -13,8 +13,9 @@ * limitations under the License. */ -import { $toHTML } from "./xfa_object.js"; +import { $fonts, $toHTML } from "./xfa_object.js"; import { Binder } from "./bind.js"; +import { warn } from "../../shared/util.js"; import { XFAParser } from "./parser.js"; class XFAFactory { @@ -22,18 +23,25 @@ class XFAFactory { try { this.root = new XFAParser().parse(XFAFactory._createDocument(data)); this.form = new Binder(this.root).bind(); - this._createPages(); } catch (e) { - console.log(e); + warn(`XFA - an error occured during parsing and binding: ${e}`); } } + isValid() { + return this.root && this.form; + } + _createPages() { - this.pages = this.form[$toHTML](); - this.dims = this.pages.children.map(c => { - const { width, height } = c.attributes.style; - return [0, 0, parseInt(width), parseInt(height)]; - }); + try { + this.pages = this.form[$toHTML](); + this.dims = this.pages.children.map(c => { + const { width, height } = c.attributes.style; + return [0, 0, parseInt(width), parseInt(height)]; + }); + } catch (e) { + warn(`XFA - an error occured during layout: ${e}`); + } } getBoundingBox(pageIndex) { @@ -41,9 +49,35 @@ class XFAFactory { } get numberPages() { + if (!this.pages) { + this._createPages(); + } return this.dims.length; } + setFonts(fonts) { + this.form[$fonts] = Object.create(null); + for (const font of fonts) { + const cssFontInfo = font.cssFontInfo; + const name = cssFontInfo.fontFamily; + if (!this.form[$fonts][name]) { + this.form[$fonts][name] = Object.create(null); + } + let property = "regular"; + if (cssFontInfo.italicAngle !== "0") { + if (parseFloat(cssFontInfo.fontWeight) >= 700) { + property = "bolditalic"; + } else { + property = "italic"; + } + } else if (parseFloat(cssFontInfo.fontWeight) >= 700) { + property = "bold"; + } + + this.form[$fonts][name][property] = font; + } + } + getPages() { if (!this.pages) { this._createPages(); diff --git a/src/core/xfa/html_utils.js b/src/core/xfa/html_utils.js index 9d4270631fc379..7bf723928f8c1a 100644 --- a/src/core/xfa/html_utils.js +++ b/src/core/xfa/html_utils.js @@ -18,18 +18,14 @@ import { $getParent, $getSubformParent, $nodeName, + $pushGlyphs, $toStyle, XFAObject, } from "./xfa_object.js"; import { getMeasurement } from "./utils.js"; +import { TextMeasure } from "./text.js"; import { warn } from "../../shared/util.js"; -const wordNonWordRegex = new RegExp( - "([\\p{N}\\p{L}\\p{M}]+)|([^\\p{N}\\p{L}\\p{M}]+)", - "gu" -); -const wordFirstRegex = new RegExp("^[\\p{N}\\p{L}\\p{M}]", "u"); - function measureToString(m) { if (typeof m === "string") { return "0px"; @@ -192,65 +188,15 @@ const converters = { }, }; -function layoutText(text, fontSize, space) { - // Try to guess width and height for the given text in taking into - // account the space where the text should fit. - // The computed dimensions are just an overestimation. - // TODO: base this estimation on real metrics. - let width = 0; - let height = 0; - let totalWidth = 0; - const lineHeight = fontSize * 1.5; - const averageCharSize = fontSize * 0.4; - const maxCharOnLine = Math.floor(space.width / averageCharSize); - const chunks = text.match(wordNonWordRegex); - let treatedChars = 0; - - let i = 0; - let chunk = chunks[0]; - while (chunk) { - const w = chunk.length * averageCharSize; - if (width + w <= space.width) { - width += w; - treatedChars += chunk.length; - chunk = chunks[i++]; - continue; - } - - if (!wordFirstRegex.test(chunk) || chunk.length > maxCharOnLine) { - const numOfCharOnLine = Math.floor( - (space.width - width) / averageCharSize - ); - chunk = chunk.slice(numOfCharOnLine); - treatedChars += numOfCharOnLine; - if (height + lineHeight > space.height) { - return { width: 0, height: 0, splitPos: treatedChars }; - } - totalWidth = Math.max(width, totalWidth); - width = 0; - height += lineHeight; - continue; - } - - if (height + lineHeight > space.height) { - return { width: 0, height: 0, splitPos: treatedChars }; - } - - totalWidth = Math.max(width, totalWidth); - width = w; - height += lineHeight; - chunk = chunks[i++]; - } - - if (totalWidth === 0) { - totalWidth = width; - } - - if (totalWidth !== 0) { - height += lineHeight; +function layoutText(text, xfaFont, fonts, width) { + const measure = new TextMeasure(xfaFont, fonts); + if (typeof text === "string") { + measure.addString(text); + } else { + text[$pushGlyphs](measure); } - return { width: totalWidth, height, splitPos: -1 }; + return measure.compute(width); } function computeBbox(node, html, availableSpace) { diff --git a/src/core/xfa/template.js b/src/core/xfa/template.js index 04428554e02495..c5b7bfb8f1f07e 100644 --- a/src/core/xfa/template.js +++ b/src/core/xfa/template.js @@ -22,6 +22,7 @@ import { $extra, $finalize, $flushHTML, + $fonts, $getAvailableSpace, $getChildren, $getContainedChildren, @@ -1521,14 +1522,51 @@ class Draw extends XFAObject { fixDimensions(this); - if (this.w !== "" && this.h === "" && this.value) { - const text = this.value[$text](); - if (text) { - const { height } = layoutText(text, this.font.size, { - width: this.w, - height: Infinity, - }); - this.h = height || ""; + if ((this.w === "" || this.h === "") && this.value) { + const maxWidth = this.w === "" ? availableSpace.width : this.w; + const fonts = getRoot(this)[$fonts]; + let font = this.font; + if (!font) { + let parent = this[$getParent](); + while (!(parent instanceof Template)) { + if (parent.font) { + font = parent.font; + break; + } + parent = parent[$getParent](); + } + } + + let height = null; + let width = null; + if ( + this.value.exData && + this.value.exData[$content] && + this.value.exData.contentType === "text/html" + ) { + const res = layoutText( + this.value.exData[$content], + font, + fonts, + maxWidth + ); + width = res.width; + height = res.height; + } else { + const text = this.value[$text](); + if (text) { + const res = layoutText(text, font, fonts, maxWidth); + width = res.width; + height = res.height; + } + } + + if (width !== null && this.w === "") { + this.w = width; + } + + if (height !== null && this.h === "") { + this.h = height; } } @@ -2622,7 +2660,7 @@ class Font extends XFAObject { ]); this.posture = getStringOption(attributes.posture, ["normal", "italic"]); this.size = getMeasurement(attributes.size, "10pt"); - this.typeface = attributes.typeface || ""; + this.typeface = attributes.typeface || "Myriad Pro"; this.underline = getInteger({ data: attributes.underline, defaultValue: 0, @@ -4483,7 +4521,6 @@ class Template extends XFAObject { children: [], }); } - this[$extra] = { overflowNode: null, pageNumber: 1, diff --git a/src/core/xfa/text.js b/src/core/xfa/text.js new file mode 100644 index 00000000000000..f4ca55be2e20b9 --- /dev/null +++ b/src/core/xfa/text.js @@ -0,0 +1,200 @@ +/* Copyright 2021 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +const WIDTH_FACTOR = 1.1; +const HEIGHT_FACTOR = 1.2; + +class FontInfo { + constructor(xfaFont, fonts) { + if (!xfaFont) { + this.pdfFont = Object.values(fonts)[0].regular; + const info = this.pdfFont.cssFontInfo; + this.xfaFont = { + typeface: info.fontFamily, + posture: "normal", + weight: "normal", + size: 10, + }; + return; + } + + this.xfaFont = xfaFont; + let typeface = fonts[xfaFont.typeface]; + if (!typeface) { + typeface = fonts[`${xfaFont.typeface}-PdfJS-XFA`]; + } + this.pdfFont = null; + if (typeface) { + if (xfaFont.posture === "italic") { + if (xfaFont.weight === "bold") { + this.pdfFont = typeface.bolditalic; + } else { + this.pdfFont = typeface.italic; + } + } else if (xfaFont.weigth === "bold") { + this.pdfFont = typeface.bold; + } else { + this.pdfFont = typeface.regular; + } + this.pdfFont = this.pdfFont || null; + } + } +} + +class FontSelector { + constructor(defaultXfaFont, fonts) { + this.fonts = fonts; + this.stack = [new FontInfo(defaultXfaFont, fonts)]; + } + + pushFont(xfaFont) { + const lastFont = this.stack[this.stack.length - 1]; + for (const name of ["typeface", "posture", "weight", "size"]) { + if (!xfaFont[name]) { + xfaFont[name] = lastFont.xfaFont[name]; + } + } + + const fontInfo = new FontInfo(xfaFont, this.fonts); + if (!fontInfo.pdfFont) { + fontInfo.pdfFont = lastFont.pdfFont; + } + this.stack.push(fontInfo); + } + + popFont() { + this.stack.pop(); + } + + topFont() { + return this.stack[this.stack.length - 1]; + } +} + +/** + * Compute a text area dimensions based on font metrics. + */ +class TextMeasure { + constructor(defaultXfaFont, fonts) { + this.glyphs = []; + this.fontSelector = new FontSelector(defaultXfaFont, fonts); + } + + pushFont(xfaFont) { + return this.fontSelector.pushFont(xfaFont); + } + + popFont(xfaFont) { + return this.fontSelector.popFont(); + } + + addString(str) { + if (!str) { + return; + } + + const lastFont = this.fontSelector.topFont(); + const pdfFont = lastFont.pdfFont; + const fontSize = lastFont.xfaFont.size; + const lineHeight = Math.round(Math.max(1, pdfFont.lineHeight) * fontSize); + const scale = fontSize / 1000; + + for (const line of str.split(/[\u2029\n]/)) { + const encodedLine = pdfFont.encodeString(line).join(""); + const glyphs = pdfFont.charsToGlyphs(encodedLine); + + for (const glyph of glyphs) { + this.glyphs.push([ + glyph.width * scale, + lineHeight, + glyph.unicode === " ", + false, + ]); + } + + this.glyphs.push([0, 0, false, true]); + } + + this.glyphs.pop(); + } + + compute(maxWidth) { + let lastSpacePos = -1, + lastSpaceWidth = 0, + width = 0, + height = 0, + currentLineWidth = 0, + currentLineHeight = 0; + + for (let i = 0, ii = this.glyphs.length; i < ii; i++) { + const [glyphWidth, glyphHeight, isSpace, isEOL] = this.glyphs[i]; + if (isEOL) { + width = Math.max(width, currentLineWidth); + currentLineWidth = 0; + height += currentLineHeight; + currentLineHeight = glyphHeight; + lastSpacePos = -1; + lastSpaceWidth = 0; + continue; + } + + if (isSpace) { + if (currentLineWidth + glyphWidth > maxWidth) { + // We can break here but the space is not taken into account. + width = Math.max(width, currentLineWidth); + currentLineWidth = 0; + height += currentLineHeight; + currentLineHeight = glyphHeight; + lastSpacePos = -1; + lastSpaceWidth = 0; + } else { + currentLineHeight = Math.max(glyphHeight, currentLineHeight); + lastSpaceWidth = currentLineWidth; + currentLineWidth += glyphWidth; + lastSpacePos = i; + } + continue; + } + + if (currentLineWidth + glyphWidth > maxWidth) { + // We must break to the last white position (if available) + height += currentLineHeight; + currentLineHeight = glyphHeight; + if (lastSpacePos !== -1) { + i = lastSpacePos; + width = Math.max(width, lastSpaceWidth); + currentLineWidth = 0; + lastSpacePos = -1; + lastSpaceWidth = 0; + } else { + // Just break in the middle of the word + width = Math.max(width, currentLineWidth); + currentLineWidth = glyphWidth; + } + continue; + } + + currentLineWidth += glyphWidth; + currentLineHeight = Math.max(glyphHeight, currentLineHeight); + } + + width = Math.max(width, currentLineWidth); + height += currentLineHeight; + + return { width: WIDTH_FACTOR * width, height: HEIGHT_FACTOR * height }; + } +} + +export { TextMeasure }; diff --git a/src/core/xfa/xfa_object.js b/src/core/xfa/xfa_object.js index a6cd94121dfbe4..1ed91cd1c16001 100644 --- a/src/core/xfa/xfa_object.js +++ b/src/core/xfa/xfa_object.js @@ -34,6 +34,7 @@ const $dump = Symbol(); const $extra = Symbol("extra"); const $finalize = Symbol(); const $flushHTML = Symbol(); +const $fonts = Symbol(); const $getAttributeIt = Symbol(); const $getAvailableSpace = Symbol(); const $getChildrenByClass = Symbol(); @@ -46,6 +47,7 @@ const $getContainedChildren = Symbol(); const $getNextPage = Symbol(); const $getSubformParent = Symbol(); const $getParent = Symbol(); +const $pushGlyphs = Symbol(); const $global = Symbol(); const $hasItem = Symbol(); const $hasSettableValue = Symbol(); @@ -970,6 +972,7 @@ export { $extra, $finalize, $flushHTML, + $fonts, $getAttributeIt, $getAvailableSpace, $getChildren, @@ -998,6 +1001,7 @@ export { $onChild, $onChildCheck, $onText, + $pushGlyphs, $removeChild, $resolvePrototypes, $root, diff --git a/src/core/xfa/xhtml.js b/src/core/xfa/xhtml.js index 5b12bcdfe6c486..78bbf0429f2047 100644 --- a/src/core/xfa/xhtml.js +++ b/src/core/xfa/xhtml.js @@ -18,8 +18,10 @@ import { $childrenToHTML, $content, $extra, + $getChildren, $nodeName, $onText, + $pushGlyphs, $text, $toHTML, XmlObject, @@ -167,6 +169,39 @@ class XhtmlObject extends XmlObject { } } + [$pushGlyphs](measure) { + const xfaFont = Object.create(null); + for (const [key, value] of this.style + .split(";") + .map(s => s.split(":", 2))) { + if (!key.startsWith("font-")) { + continue; + } + if (key === "font-family") { + xfaFont.typeface = value; + } else if (key === "font-size") { + xfaFont.size = getMeasurement(value); + } else if (key === "font-weight") { + xfaFont.weight = value; + } else if (key === "font-style") { + xfaFont.posture = value; + } + } + measure.pushFont(xfaFont); + if (this[$content]) { + measure.addString(this[$content]); + } else { + for (const child of this[$getChildren]()) { + if (child[$nodeName] === "#text") { + measure.addString(child[$content]); + continue; + } + child[$pushGlyphs](measure); + } + } + measure.popFont(); + } + [$toHTML](availableSpace) { const children = []; this[$extra] = { @@ -202,6 +237,12 @@ class B extends XhtmlObject { constructor(attributes) { super(attributes, "b"); } + + [$pushGlyphs](measure) { + measure.pushFont({ weight: "bold" }); + super[$pushGlyphs](measure); + measure.popFont(); + } } class Body extends XhtmlObject { @@ -230,6 +271,10 @@ class Br extends XhtmlObject { return "\n"; } + [$pushGlyphs](measure) { + measure.addString("\n"); + } + [$toHTML](availableSpace) { return HTMLResult.success({ name: "br", @@ -282,6 +327,12 @@ class I extends XhtmlObject { constructor(attributes) { super(attributes, "i"); } + + [$pushGlyphs](measure) { + measure.pushFont({ posture: "italic" }); + super[$pushGlyphs](measure); + measure.popFont(); + } } class Li extends XhtmlObject { @@ -301,6 +352,11 @@ class P extends XhtmlObject { super(attributes, "p"); } + [$pushGlyphs](measure) { + super[$pushGlyphs](measure); + measure.addString("\n"); + } + [$text]() { return super[$text]() + "\n"; } diff --git a/test/pdfs/xfa_issue13500.pdf.link b/test/pdfs/xfa_issue13500.pdf.link new file mode 100644 index 00000000000000..828007a0da4e71 --- /dev/null +++ b/test/pdfs/xfa_issue13500.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/6602628/Acrobat.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index cbcc0874368bc3..8bc59304b7ef76 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -5327,5 +5327,13 @@ "type": "eq", "forms": true, "lastPage": 1 + }, + { "id": "xfa_issue13500", + "file": "pdfs/xfa_issue13500.pdf", + "md5": "b81274a19f5a95c1466db3648f1be491", + "link": true, + "rounds": 1, + "enableXfa": true, + "type": "eq" } ]