From b09f238436237c5b7d2c058972cb268c0bc1c5af Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 14 Dec 2023 20:09:47 +0100 Subject: [PATCH 1/3] Add iteration support in the `PDFObjects` class This (obviously) only includes "resolved" data, and will be used in an upcoming patch. --- src/display/api.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/display/api.js b/src/display/api.js index ada62580a3943..cd332473f1243 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -3125,7 +3125,7 @@ class PDFObjects { */ has(objId) { const obj = this.#objs[objId]; - return obj?.capability.settled || false; + return obj?.capability.settled ?? false; } /** @@ -3147,6 +3147,17 @@ class PDFObjects { } this.#objs = Object.create(null); } + + *[Symbol.iterator]() { + for (const objId in this.#objs) { + const { capability, data } = this.#objs[objId]; + + if (!capability.settled) { + continue; + } + yield [objId, data]; + } + } } /** From e547b198a3e44b0198afe58ab74b0c7c74d8006a Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 14 Dec 2023 21:32:08 +0100 Subject: [PATCH 2/3] Compute the length of the final image-bitmap/data on the worker-thread Currently this is done in the API, but moving it into the worker-thread will simplify upcoming changes. --- src/core/evaluator.js | 18 ++++++++++++++---- src/display/api.js | 14 ++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 41081c5f4e3ef..ec363c73d0bb6 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -546,6 +546,12 @@ class PartialEvaluator { } _sendImgData(objId, imgData, cacheGlobally = false) { + if ( + (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) && + imgData + ) { + assert(Number.isInteger(imgData.dataLen), "Expected dataLen to be set."); + } const transfers = imgData ? [imgData.bitmap || imgData.data.buffer] : null; if (this.parsingType3Font || cacheGlobally) { @@ -690,6 +696,10 @@ class PartialEvaluator { const objId = `mask_${this.idFactory.createObjId()}`; operatorList.addDependency(objId); + + imgData.dataLen = imgData.bitmap + ? imgData.width * imgData.height * 4 + : imgData.data.length; this._sendImgData(objId, imgData); args = [ @@ -790,12 +800,12 @@ class PartialEvaluator { /* isOffscreenCanvasSupported = */ this.options .isOffscreenCanvasSupported ); + imgData.dataLen = imgData.bitmap + ? imgData.width * imgData.height * 4 + : imgData.data.length; if (cacheKey && imageRef && cacheGlobally) { - const length = imgData.bitmap - ? imgData.width * imgData.height * 4 - : imgData.data.length; - this.globalImageCache.addByteSize(imageRef, length); + this.globalImageCache.addByteSize(imageRef, imgData.dataLen); } return this._sendImgData(objId, imgData, cacheGlobally); diff --git a/src/display/api.js b/src/display/api.js index cd332473f1243..d77830e58f0bc 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -2781,18 +2781,8 @@ class WorkerTransport { pageProxy.objs.resolve(id, imageData); // Heuristic that will allow us not to store large data. - if (imageData) { - let length; - if (imageData.bitmap) { - const { width, height } = imageData; - length = width * height * 4; - } else { - length = imageData.data?.length || 0; - } - - if (length > MAX_IMAGE_SIZE_TO_CACHE) { - pageProxy._maybeCleanupAfterRender = true; - } + if (imageData?.dataLen > MAX_IMAGE_SIZE_TO_CACHE) { + pageProxy._maybeCleanupAfterRender = true; } break; case "Pattern": From 9f02cc36d48d349011916ac1878dede898528934 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 14 Dec 2023 21:57:48 +0100 Subject: [PATCH 3/3] Attempt to further reduce re-parsing for globally cached images (PR 11912, 16108 follow-up) In PR 11912 we started caching images that occur on multiple pages globally, which improved performance a lot in many PDF documents. However, one slightly annoying limitation of the implementation is the need to re-parse the image once the global-caching threshold has been reached. Previously this was difficult to avoid, since large image-resources will cause cleanup to run on the main-thread after rendering has finished. In PR 16108 we started delaying this cleanup a little bit, to improve performance if a user e.g. zooms and/or rotates the document immediately after rendering completes. Taking those two PRs together, we now have a situation where it's much more likely that the main-thread has "globally used" images cached at the page-level. Hence we can instead attempt to *copy* a locally cached image into the global object-cache on the main-thread and thus reduce unnecessary re-parsing of large/complex global images, which significantly reduces the rendering time in many cases. For the PDF document in issue 11878, the rendering time of *the second page* changes as follows (on my computer): - With the `master`-branch it takes >600 ms to render. - With this patch that goes down to ~50 ms, which is one order of magnitude faster. (Note that all other pages are, as expected, completely unaffected by these changes.) This new main-thread copying is limited to "large" global images, since: - Re-parsing of small images, on the worker-thread, is usually fast enough to not be an issue. - With the delayed cleanup after rendering, it's still not guaranteed that an image is available in a page-level cache on the main-thread. - This forces the worker-thread to wait for the main-thread, which is a pattern that you always want to avoid unless absolutely necessary. --- src/core/evaluator.js | 36 +++++++++++++++++++++++++++++------- src/display/api.js | 32 ++++++++++++++++++++++++++++++-- test/unit/api_spec.js | 38 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 95 insertions(+), 11 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index ec363c73d0bb6..1a0ac5806a058 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -771,13 +771,15 @@ class PartialEvaluator { if (this.parsingType3Font) { objId = `${this.idFactory.getDocId()}_type3_${objId}`; - } else if (imageRef) { + } else if (cacheKey && imageRef) { cacheGlobally = this.globalImageCache.shouldCache( imageRef, this.pageIndex ); if (cacheGlobally) { + assert(!isInline, "Cannot cache an inline image globally."); + objId = `${this.idFactory.getDocId()}_${objId}`; } } @@ -785,6 +787,30 @@ class PartialEvaluator { // Ensure that the dependency is added before the image is decoded. operatorList.addDependency(objId); args = [objId, w, h]; + operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent); + + // For large images, at least 500x500 in size, that we'll cache globally + // check if the image is still cached locally on the main-thread to avoid + // having to re-parse the image (since that can be slow). + if (cacheGlobally && w * h > 250000) { + const localLength = await this.handler.sendWithPromise("commonobj", [ + objId, + "CopyLocalImage", + { imageRef }, + ]); + + if (localLength) { + this.globalImageCache.setData(imageRef, { + objId, + fn: OPS.paintImageXObject, + args, + optionalContent, + byteSize: 0, // Temporary entry, to avoid `setData` returning early. + }); + this.globalImageCache.addByteSize(imageRef, localLength); + return; + } + } PDFImage.buildImage({ xref: this.xref, @@ -803,11 +829,11 @@ class PartialEvaluator { imgData.dataLen = imgData.bitmap ? imgData.width * imgData.height * 4 : imgData.data.length; + imgData.ref = imageRef; - if (cacheKey && imageRef && cacheGlobally) { + if (cacheGlobally) { this.globalImageCache.addByteSize(imageRef, imgData.dataLen); } - return this._sendImgData(objId, imgData, cacheGlobally); }) .catch(reason => { @@ -816,8 +842,6 @@ class PartialEvaluator { return this._sendImgData(objId, /* imgData = */ null, cacheGlobally); }); - operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent); - if (cacheKey) { const cacheData = { fn: OPS.paintImageXObject, @@ -830,8 +854,6 @@ class PartialEvaluator { this._regionalImageCache.set(/* name = */ null, imageRef, cacheData); if (cacheGlobally) { - assert(!isInline, "Cannot cache an inline image globally."); - this.globalImageCache.setData(imageRef, { objId, fn: OPS.paintImageXObject, diff --git a/src/display/api.js b/src/display/api.js index d77830e58f0bc..037c7d0daa0b2 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -2704,11 +2704,11 @@ class WorkerTransport { messageHandler.on("commonobj", ([id, type, exportedData]) => { if (this.destroyed) { - return; // Ignore any pending requests if the worker was terminated. + return null; // Ignore any pending requests if the worker was terminated. } if (this.commonObjs.has(id)) { - return; + return null; } switch (type) { @@ -2750,6 +2750,23 @@ class WorkerTransport { this.commonObjs.resolve(id, font); }); break; + case "CopyLocalImage": + const { imageRef } = exportedData; + assert(imageRef, "The imageRef must be defined."); + + for (const pageProxy of this.#pageCache.values()) { + for (const [, data] of pageProxy.objs) { + if (data.ref !== imageRef) { + continue; + } + if (!data.dataLen) { + return null; + } + this.commonObjs.resolve(id, structuredClone(data)); + return data.dataLen; + } + } + break; case "FontPath": case "Image": case "Pattern": @@ -2758,6 +2775,8 @@ class WorkerTransport { default: throw new Error(`Got unknown common object type ${type}`); } + + return null; }); messageHandler.on("obj", ([id, pageIndex, type, imageData]) => { @@ -3166,6 +3185,15 @@ class RenderTask { * @type {function} */ this.onContinue = null; + + if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { + // For testing purposes. + Object.defineProperty(this, "getOperatorList", { + value: () => { + return this.#internalRenderTask.operatorList; + }, + }); + } } /** diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 2db8a25dd9743..e0a349506ac4b 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -3811,14 +3811,36 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) const loadingTask = getDocument( buildGetDocumentParams("issue11878.pdf", { isOffscreenCanvasSupported: false, + pdfBug: true, }) ); const pdfDoc = await loadingTask.promise; - let firstImgData = null; + let checkedCopyLocalImage = false, + firstImgData = null, + firstStatsOverall = null; for (let i = 1; i <= pdfDoc.numPages; i++) { const pdfPage = await pdfDoc.getPage(i); - const opList = await pdfPage.getOperatorList(); + const viewport = pdfPage.getViewport({ scale: 1 }); + + const canvasAndCtx = CanvasFactory.create( + viewport.width, + viewport.height + ); + const renderTask = pdfPage.render({ + canvasContext: canvasAndCtx.context, + viewport, + }); + + await renderTask.promise; + const opList = renderTask.getOperatorList(); + // The canvas is no longer necessary, since we only care about + // the image-data below. + CanvasFactory.destroy(canvasAndCtx); + + const [statsOverall] = pdfPage.stats.times + .filter(time => time.name === "Overall") + .map(time => time.end - time.start); const { commonObjs, objs } = pdfPage; const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject); @@ -3843,6 +3865,7 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) // Ensure that the actual image data is identical for all pages. if (i === 1) { firstImgData = objs.get(objId); + firstStatsOverall = statsOverall; expect(firstImgData.width).toEqual(EXPECTED_WIDTH); expect(firstImgData.height).toEqual(EXPECTED_HEIGHT); @@ -3854,6 +3877,8 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs; const currentImgData = objsPool.get(objId); + expect(currentImgData).not.toBe(firstImgData); + expect(currentImgData.width).toEqual(firstImgData.width); expect(currentImgData.height).toEqual(firstImgData.height); @@ -3866,11 +3891,20 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) return value === firstImgData.data[index]; }) ).toEqual(true); + + if (i === NUM_PAGES_THRESHOLD) { + checkedCopyLocalImage = true; + // Ensure that the image was copied in the main-thread, rather + // than being re-parsed in the worker-thread (which is slower). + expect(statsOverall).toBeLessThan(firstStatsOverall / 5); + } } } + expect(checkedCopyLocalImage).toBeTruthy(); await loadingTask.destroy(); firstImgData = null; + firstStatsOverall = null; }); it("render for printing, with `printAnnotationStorage` set", async function () {