Skip to content

Commit

Permalink
Modified _get_ids_image and _get_image so they work with onlyoffice i…
Browse files Browse the repository at this point in the history
…mages
  • Loading branch information
Nathanaël Renaud committed May 17, 2024
1 parent 53a3781 commit d0493ae
Showing 1 changed file with 125 additions and 22 deletions.
147 changes: 125 additions & 22 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,19 +457,71 @@ def _get_ids_image(
if ancest is None:
ancest = []
lst: List[Union[str, List[str]]] = []
if PG.RESOURCES not in obj or RES.XOBJECT not in cast(
DictionaryObject, obj[PG.RESOURCES]
):
return self.inline_images_keys

x_object = obj[PG.RESOURCES][RES.XOBJECT].get_object() # type: ignore
for o in x_object:
if not isinstance(x_object[o], StreamObject):
continue
if x_object[o][IA.SUBTYPE] == "/Image":
lst.append(o if len(ancest) == 0 else ancest + [o])
else: # is a form with possible images inside
lst.extend(self._get_ids_image(x_object[o], ancest + [o], call_stack))

if PG.ANNOTS in obj:
for annot in cast(DictionaryObject, obj[PG.ANNOTS]):
if (
"/AP" in cast(DictionaryObject, annot.keys())
and "/N" in cast(DictionaryObject, annot["/AP"].keys())
and PG.RESOURCES in annot["/AP"]["/N"].get_object()
and RES.XOBJECT
in cast(DictionaryObject, annot["/AP"]["/N"][PG.RESOURCES])
and "/FRM"
in cast(
DictionaryObject, annot["/AP"]["/N"][PG.RESOURCES][RES.XOBJECT]
)
):
frame = annot["/AP"]["/N"][PG.RESOURCES][RES.XOBJECT]["/FRM"]

if PG.RESOURCES in frame.get_object() and RES.XOBJECT in cast(
DictionaryObject, frame[PG.RESOURCES]
):
x_object = frame[PG.RESOURCES][RES.XOBJECT]
for o in x_object:
if x_object[o][IA.SUBTYPE] == "/Image":
if not isinstance(x_object[o], StreamObject):
continue
lst.append(
f"{PG.ANNOTS}/{annot['/T']}{o}"
if len(ancest) == 0
else ancest + [o]
)

if PG.RESOURCES in obj:
if RES.PATTERN in cast(DictionaryObject, obj[PG.RESOURCES]):
for pattern_name, pattern in cast(
DictionaryObject,
cast(DictionaryObject, obj[PG.RESOURCES])[RES.PATTERN],
).items():
if PG.RESOURCES in pattern.get_object() and RES.XOBJECT in cast(
DictionaryObject, pattern[PG.RESOURCES]
):
x_object = pattern[PG.RESOURCES][RES.XOBJECT].get_object()
for o in x_object:
if not isinstance(x_object[o], StreamObject):
continue
if x_object[o][IA.SUBTYPE] == "/Image":
lst.append(
f"{RES.PATTERN}{pattern_name}{o}"
if len(ancest) == 0
else ancest + [o]
)

if RES.XOBJECT in cast(DictionaryObject, obj[PG.RESOURCES]):
x_object = cast(
DictionaryObject,
cast(DictionaryObject, obj[PG.RESOURCES])[RES.XOBJECT],
).get_object()
for o in x_object:
if not isinstance(x_object[o], StreamObject):
continue
if x_object[o][IA.SUBTYPE] == "/Image":
lst.append(o if len(ancest) == 0 else ancest + [o])
else: # is a form with possible images inside
lst.extend(
self._get_ids_image(x_object[o], ancest + [o], call_stack)
)

return lst + self.inline_images_keys

def _get_image(
Expand All @@ -484,9 +536,50 @@ def _get_image(
if isinstance(id, List) and len(id) == 1:
id = id[0]
try:
xobjs = cast(
DictionaryObject, cast(DictionaryObject, obj[PG.RESOURCES])[RES.XOBJECT]
)
if isinstance(id, str) and id.find(RES.PATTERN) == 0:
pattern_name = id[len(RES.PATTERN) : id.find("/", len(RES.PATTERN) + 1)]

patterns = cast(
DictionaryObject,
cast(DictionaryObject, obj[PG.RESOURCES])[RES.PATTERN],
)

xobjs = cast(
DictionaryObject,
cast(
DictionaryObject,
cast(DictionaryObject, patterns[pattern_name])[PG.RESOURCES],
)[RES.XOBJECT],
)
elif isinstance(id, str) and id.find(PG.ANNOTS) == 0:
annot_name = id[len(RES.PATTERN) : id.find("/", len(RES.PATTERN) + 1)]
annots = cast(DictionaryObject, obj[PG.ANNOTS])

for temp_annot in annots:
if temp_annot["/T"] == annot_name:
annot = temp_annot
break

frame_xobjs = cast(
DictionaryObject,
cast(
DictionaryObject,
cast(DictionaryObject, annot["/AP"]["/N"])[PG.RESOURCES],
)[RES.XOBJECT],
)

xobjs = cast(
DictionaryObject,
cast(
DictionaryObject,
cast(DictionaryObject, frame_xobjs["/FRM"])[PG.RESOURCES],
)[RES.XOBJECT],
)
else:
xobjs = cast(
DictionaryObject,
cast(DictionaryObject, obj[PG.RESOURCES])[RES.XOBJECT],
)
except KeyError:
if not (id[0] == "~" and id[-1] == "~"):
raise
Expand All @@ -498,15 +591,25 @@ def _get_image(
raise KeyError("no inline image can be found")
return self.inline_images[id]

imgd = _xobj_to_image(cast(DictionaryObject, xobjs[id]))
extension, byte_stream = imgd[:2]
f = ImageFile(
name=f"{id[1:]}{extension}",
if id.find("/Pattern") == 0:
image_identifier = id[id.rfind("/") :]
image_name = pattern_name[1:] + "_" + image_identifier[1:]
elif id.find("/Annot") == 0:
image_identifier = id[id.rfind("/") :]
image_name = annot_name + "_" + image_identifier[1:]
else:
image_identifier = str(id)
image_name = id[1:]

imgd = _xobj_to_image(cast(DictionaryObject, xobjs[image_identifier]))
image_extension, byte_stream = imgd[:2]

return ImageFile(
name=image_name + str(image_extension),
data=byte_stream,
image=imgd[2],
indirect_reference=xobjs[id].indirect_reference,
indirect_reference=xobjs[image_identifier].indirect_reference,
)
return f
else: # in a sub object
ids = id[1:]
return self._get_image(ids, cast(DictionaryObject, xobjs[id[0]]))
Expand Down

0 comments on commit d0493ae

Please sign in to comment.