From 4dc9c8b41691fcfb802d0f1a882d2a00b68cfdf5 Mon Sep 17 00:00:00 2001 From: Wayne Low Date: Fri, 19 Apr 2024 14:46:16 +0800 Subject: [PATCH] Fix stream indexes parsing issue as stream indexes might be separated by newline --- pdf-parser.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pdf-parser.py b/pdf-parser.py index 3f945ad..aceb7cb 100644 --- a/pdf-parser.py +++ b/pdf-parser.py @@ -1585,7 +1585,16 @@ def Main(): oPDFParseDictionary = cPDFParseDictionary(object.ContainsStream(), options.nocanonicalizedoutput) numberOfObjects = int(oPDFParseDictionary.Get('/N')[0]) offsetFirstObject = int(oPDFParseDictionary.Get('/First')[0]) - indexes = list(map(int, C2SIP3(object.Stream())[:offsetFirstObject].strip().split(' '))) + data_indexes = C2SIP3(object.Stream())[:offsetFirstObject].strip().split(' ') + # Some indexes might be associated with newline b'12 0\n10 55\n17 169\n19' + final_indexes = [] + for d in data_indexes: + if '\n' in d: + s = d.split('\n') + final_indexes.extend(s) + else: + final_indexes.append(d) + indexes = list(map(int, final_indexes)) if len(indexes) % 2 != 0 or len(indexes) / 2 != numberOfObjects: raise Exception('Error in index of /ObjStm stream') streamObject = C2SIP3(object.Stream()[offsetFirstObject:])