From 4dc9c8b41691fcfb802d0f1a882d2a00b68cfdf5 Mon Sep 17 00:00:00 2001
From: Wayne Low <wayne.low@synapxe.sg>
Date: Fri, 19 Apr 2024 14:46:16 +0800
Subject: [PATCH] Fix stream indexes parsing issue as stream indexes might be
 separated by newline

---
 pdf-parser.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pdf-parser.py b/pdf-parser.py
index 3f945ad..aceb7cb 100644
--- a/pdf-parser.py
+++ b/pdf-parser.py
@@ -1585,7 +1585,16 @@ def Main():
                 oPDFParseDictionary = cPDFParseDictionary(object.ContainsStream(), options.nocanonicalizedoutput)
                 numberOfObjects = int(oPDFParseDictionary.Get('/N')[0])
                 offsetFirstObject = int(oPDFParseDictionary.Get('/First')[0])
-                indexes = list(map(int, C2SIP3(object.Stream())[:offsetFirstObject].strip().split(' ')))
+                data_indexes = C2SIP3(object.Stream())[:offsetFirstObject].strip().split(' ')
+                # Some indexes might be associated with newline b'12 0\n10 55\n17 169\n19'
+                final_indexes = []
+                for d in data_indexes:
+                    if '\n' in d:
+                        s = d.split('\n')
+                        final_indexes.extend(s)
+                    else:
+                        final_indexes.append(d)
+                indexes = list(map(int, final_indexes))
                 if len(indexes) % 2 != 0 or len(indexes) / 2 != numberOfObjects:
                     raise Exception('Error in index of /ObjStm stream')
                 streamObject = C2SIP3(object.Stream()[offsetFirstObject:])