From 65c827b0ebf566631f49dc602681e326513982c7 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 8 Nov 2023 09:51:52 +0100 Subject: [PATCH 1/2] Ensure that `fieldObjects` and `#collectFieldObjects` handles References correctly The `fieldObjects`-getter itself is called, from `src/core/worker.js`, in a way that'll ensure that any `MissingDataException`s are handled. However the problem is that the actual data-lookups in `fieldObjects` and `#collectFieldObjects` are done inside of a Promise, which means that `MissingDataException`s won't be handled and parsing could thus break. To address this we change all data-lookups to be asynchronous instead. --- src/core/document.js | 78 ++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index fe54c53f5cb32..c56378243f234 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -1711,13 +1711,15 @@ class PDFDocument { : clearGlobalCaches(); } - #collectFieldObjects(name, fieldRef, promises, annotationGlobals) { - const field = this.xref.fetchIfRef(fieldRef); + async #collectFieldObjects(name, fieldRef, promises, annotationGlobals) { + const { xref } = this; + + const field = await xref.fetchIfRefAsync(fieldRef); if (!(field instanceof Dict)) { return; } if (field.has("T")) { - const partName = stringToPDFString(field.get("T")); + const partName = stringToPDFString(await field.getAsync("T")); name = name === "" ? partName : `${name}.${partName}`; } @@ -1726,7 +1728,7 @@ class PDFDocument { } promises.get(name).push( AnnotationFactory.create( - this.xref, + xref, fieldRef, annotationGlobals, this._localIdFactory, @@ -1740,10 +1742,13 @@ class PDFDocument { }) ); - const kids = field.get("Kids"); + if (!field.has("Kids")) { + return; + } + const kids = await field.getAsync("Kids"); if (Array.isArray(kids)) { for (const kid of kids) { - this.#collectFieldObjects(name, kid, promises, annotationGlobals); + await this.#collectFieldObjects(name, kid, promises, annotationGlobals); } } } @@ -1753,39 +1758,40 @@ class PDFDocument { return shadow(this, "fieldObjects", Promise.resolve(null)); } - const promise = this.pdfManager - .ensureDoc("annotationGlobals") - .then(async annotationGlobals => { - if (!annotationGlobals) { - return null; - } + const promise = Promise.all([ + this.pdfManager.ensureDoc("annotationGlobals"), + this.pdfManager.ensureCatalog("acroForm"), + ]).then(async ([annotationGlobals, acroForm]) => { + if (!annotationGlobals) { + return null; + } - const allFields = Object.create(null); - const fieldPromises = new Map(); - for (const fieldRef of this.catalog.acroForm.get("Fields")) { - this.#collectFieldObjects( - "", - fieldRef, - fieldPromises, - annotationGlobals - ); - } + const allFields = Object.create(null); + const fieldPromises = new Map(); + for (const fieldRef of await acroForm.getAsync("Fields")) { + await this.#collectFieldObjects( + "", + fieldRef, + fieldPromises, + annotationGlobals + ); + } - const allPromises = []; - for (const [name, promises] of fieldPromises) { - allPromises.push( - Promise.all(promises).then(fields => { - fields = fields.filter(field => !!field); - if (fields.length > 0) { - allFields[name] = fields; - } - }) - ); - } + const allPromises = []; + for (const [name, promises] of fieldPromises) { + allPromises.push( + Promise.all(promises).then(fields => { + fields = fields.filter(field => !!field); + if (fields.length > 0) { + allFields[name] = fields; + } + }) + ); + } - await Promise.all(allPromises); - return allFields; - }); + await Promise.all(allPromises); + return allFields; + }); return shadow(this, "fieldObjects", promise); } From ff62fc8e2c7b805f5ce6ee36828b08cf85cb277a Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 8 Nov 2023 10:52:56 +0100 Subject: [PATCH 2/2] Skip `fieldObjects` that are not actually References The `fieldObjects`-getter is implemented in the `PDFDocument` class, which means that the `this._localIdFactory`-property that we pass to `AnnotationFactory.create` doesn't actually exist. The reason that this hasn't caused any bugs, that I'm aware of, is that all /Fields-entries need to be References to actually make sense. --- src/core/document.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index c56378243f234..a7b1342ef999a 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -1714,7 +1714,10 @@ class PDFDocument { async #collectFieldObjects(name, fieldRef, promises, annotationGlobals) { const { xref } = this; - const field = await xref.fetchIfRefAsync(fieldRef); + if (!(fieldRef instanceof Ref)) { + return; + } + const field = await xref.fetchAsync(fieldRef); if (!(field instanceof Dict)) { return; } @@ -1731,7 +1734,7 @@ class PDFDocument { xref, fieldRef, annotationGlobals, - this._localIdFactory, + /* idFactory = */ null, /* collectFields */ true, /* pageRef */ null )