From 6eebb89579a5ad7c0e7e74997a941e50c055e254 Mon Sep 17 00:00:00 2001 From: Arnaud TAMAILLON Date: Sun, 1 Sep 2024 14:34:00 +0200 Subject: [PATCH 1/4] Support not finding the Pages dictionary in lenient mode --- .../Integration/CatGeneticsTests.cs | 19 +++++++++++++++++++ src/UglyToad.PdfPig/Parser/CatalogFactory.cs | 12 ++++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs b/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs index 5072f4efa..7b235f9b6 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs @@ -1,6 +1,8 @@ namespace UglyToad.PdfPig.Tests.Integration { using Annotations; + using PdfPig.Core; + using System.IO; public class CatGeneticsTests { @@ -39,5 +41,22 @@ public void CanGetAnnotations() } } } + + [Fact] + public void CanSupportPageInformationNotFoundInLenientMode() + { + var path = IntegrationHelpers.GetSpecificTestDocumentPath("pages-indirect-to-null.pdf"); + // Lenient Parsing On -> can process + using (var document = PdfDocument.Open(path)) + { + // unable to parse + Assert.Equal(1, document.NumberOfPages); + Assert.NotNull(document.GetPage(1)); + } + + // Lenient Parsing Off -> throws + var ex = Assert.Throws(() => PdfDocument.Open(path, ParsingOptions.LenientParsingOff)); + Assert.Equal("Pages entry is null", ex.Message); + } } } diff --git a/src/UglyToad.PdfPig/Parser/CatalogFactory.cs b/src/UglyToad.PdfPig/Parser/CatalogFactory.cs index 8b2a35165..1fea5462a 100644 --- a/src/UglyToad.PdfPig/Parser/CatalogFactory.cs +++ b/src/UglyToad.PdfPig/Parser/CatalogFactory.cs @@ -46,6 +46,18 @@ public static Catalog Create(IndirectReference rootReference, DictionaryToken di pagesDictionary = DirectObjectFinder.Get(value, scanner); } + if (pagesDictionary == null) + { + if (isLenientParsing) + { + pagesDictionary = new DictionaryToken(new Dictionary()); + } + else + { + throw new PdfDocumentFormatException($"Pages entry is null"); + } + } + var pages = PagesFactory.Create(pagesReference, pagesDictionary, scanner, pageFactory, log, isLenientParsing); var namedDestinations = NamedDestinationsProvider.Read(dictionary, scanner, pages, null); From 6014827f44beb833f5f19aa45142aa15a1d4d6d8 Mon Sep 17 00:00:00 2001 From: Arnaud TAMAILLON Date: Sun, 1 Sep 2024 14:52:22 +0200 Subject: [PATCH 2/4] Add missing test file --- .../pages-indirect-to-null.pdf | Bin 0 -> 410 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/pages-indirect-to-null.pdf diff --git a/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/pages-indirect-to-null.pdf b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/pages-indirect-to-null.pdf new file mode 100644 index 0000000000000000000000000000000000000000..02025037682843d1cb122b08d73f445d0ad15ff6 GIT binary patch literal 410 zcmZuuK~BRk5WHLRAC^NTIDq3cb{nbcp`{fPAWDhEq3XdXZU{0lI7TY`3m(vyu}%~y z5!O8Gk{mEDtD;#C0%pTz_O157JC&Bru60qVsokw(gosSQFiSRn)dIPH|4P@Te*`fK zkj6W$o+){yWSUdLAd`@9JHs(>7)X$#6mo Date: Sun, 1 Sep 2024 15:03:55 +0200 Subject: [PATCH 3/4] Support Kids object not referencing a page object in lenient mode # Conflicts: # src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs --- .../Integration/CatGeneticsTests.cs | 18 +++++++++++++++++- .../pages-kids-not-page.pdf | Bin 0 -> 410 bytes src/UglyToad.PdfPig/Content/PagesFactory.cs | 10 +++++++--- 3 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/pages-kids-not-page.pdf diff --git a/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs b/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs index 7b235f9b6..56ce364ad 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs @@ -2,7 +2,6 @@ { using Annotations; using PdfPig.Core; - using System.IO; public class CatGeneticsTests { @@ -58,5 +57,22 @@ public void CanSupportPageInformationNotFoundInLenientMode() var ex = Assert.Throws(() => PdfDocument.Open(path, ParsingOptions.LenientParsingOff)); Assert.Equal("Pages entry is null", ex.Message); } + + [Fact] + public void CanSupportPageKidsObjectNotBeingAPage() + { + var path = IntegrationHelpers.GetSpecificTestDocumentPath("pages-kids-not-page.pdf"); + + using (var document = PdfDocument.Open(path)) + { + // unable to parse + Assert.Equal(1, document.NumberOfPages); + Assert.NotNull(document.GetPage(1)); + } + + // Lenient Parsing Off -> throws + var ex = Assert.Throws(() => PdfDocument.Open(path, ParsingOptions.LenientParsingOff)); + Assert.Equal("Pages entry is null", ex.Message); + } } } diff --git a/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/pages-kids-not-page.pdf b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/pages-kids-not-page.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b3f156ea2d602d6bd050acbb5e34932f90039bc7 GIT binary patch literal 410 zcmZuu%TB{E5WHLRKP-nxZ~)tB;xK0-guO|~q zbW?ORN<0)4msQbj2A;EJGxORw5WP#MX4kr?)YR-Z94Tp*!Z3@sf8D$|xLxU{bPte@ zJf(G3t0zicD4FJzFvuk2yFM@n0Rstglt7M@;|pEa2JVcli=z|BvCSMMH@+X{4o;!M z-fHyWkl^?IK&B~L?%)R$8FXW4eu45IP$a{^q{LZO)!KFoy{+CgNVYZx7vs0o7gN^t N8DbgZDWdsu@df|GWN!cf literal 0 HcmV?d00001 diff --git a/src/UglyToad.PdfPig/Content/PagesFactory.cs b/src/UglyToad.PdfPig/Content/PagesFactory.cs index 45ce51e89..9747776a0 100644 --- a/src/UglyToad.PdfPig/Content/PagesFactory.cs +++ b/src/UglyToad.PdfPig/Content/PagesFactory.cs @@ -143,15 +143,19 @@ private static PageTreeNode ProcessPagesNode(IndirectReference referenceInput, foreach (var kid in kids.Data) { + DictionaryToken? kidDictionaryToken = null; if (!(kid is IndirectReferenceToken kidRef)) { throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}."); } - - if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken? kidDictionaryToken)) + if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out kidDictionaryToken)) { - throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}."); + if (!isLenientParsing) + { + throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}."); + } } + kidDictionaryToken ??= new DictionaryToken(new Dictionary()); bool isChildPage = CheckIfIsPage(kidDictionaryToken, current.reference, false, pdfTokenScanner, isLenientParsing); From dc5eb6d9bfcf8c8c19b6af41141cb1760c29ddbc Mon Sep 17 00:00:00 2001 From: Arnaud TAMAILLON Date: Sun, 1 Sep 2024 15:40:28 +0200 Subject: [PATCH 4/4] Fixed test --- src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs b/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs index 56ce364ad..b8f5e7b3b 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs @@ -72,7 +72,7 @@ public void CanSupportPageKidsObjectNotBeingAPage() // Lenient Parsing Off -> throws var ex = Assert.Throws(() => PdfDocument.Open(path, ParsingOptions.LenientParsingOff)); - Assert.Equal("Pages entry is null", ex.Message); + Assert.Equal("Could not find dictionary associated with reference in pages kids array: 3 0.", ex.Message); } } }