Skip to content

Commit

Permalink
fix: check that the previous xref is not the just processed xref (#727)
Browse files Browse the repository at this point in the history
* fix: check that that previous xref is not the just processed xref

* test: add unit test for Issue 727

* fix: code style issue

* Update tests/PHPUnit/Integration/RawData/RawDataParserTest.php

Co-authored-by: Konrad Abicht <hi@inspirito.de>

---------

Co-authored-by: Konrad Abicht <hi@inspirito.de>
  • Loading branch information
tkegan and k00ni authored Aug 16, 2024
1 parent 7964d2e commit ac8e667
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 3 deletions.
Binary file added samples/bugs/Issue727.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion src/Smalot/PdfParser/RawData/FilterHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ protected function decodeFilterLZWDecode(string $data): string
// convert string to binary string
$bitstring = '';
for ($i = 0; $i < $data_length; ++$i) {
$bitstring .= sprintf('%08b', \ord($data[$i]));
$bitstring .= \sprintf('%08b', \ord($data[$i]));
}
// get the number of bits
$data_length = \strlen($bitstring);
Expand Down
7 changes: 5 additions & 2 deletions src/Smalot/PdfParser/RawData/RawDataParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,11 @@ protected function decodeXref(string $pdfData, int $startxref, array $xref = [])
}
}
if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
// get previous xref
$xref = $this->getXrefData($pdfData, (int) $matches[1], $xref);
$offset = (int) $matches[1];
if (0 != $offset) {
// get previous xref
$xref = $this->getXrefData($pdfData, $offset, $xref);
}
}
} else {
throw new \Exception('Unable to find trailer');
Expand Down
19 changes: 19 additions & 0 deletions tests/PHPUnit/Integration/RawData/RawDataParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,23 @@ public function testGetXrefDataIssue673(): void

self::assertStringContainsString('6 rue des Goutais', $text);
}

/**
* Handle self referencing xref
*
* It seems that some PDF creators output `Prev 0` when there is no previous xref.
*
* @see https://github.com/smalot/pdfparser/pull/727
*/
public function testDecodeXrefIssue727(): void
{
$filename = $this->rootDir.'/samples/bugs/Issue727.pdf';

// Parsing this document would previously cause an infinite loop
$parser = $this->getParserInstance();
$document = $parser->parseFile($filename);
$text = $document->getText();

self::assertStringContainsString('', $text);
}
}

0 comments on commit ac8e667

Please sign in to comment.