Skip to content

Commit

Permalink
Merge pull request #11523 from Snuffleupagus/issue-10880
Browse files Browse the repository at this point in the history
Add a heuristic, in `src/core/jpg.js`, to handle JPEG images with a wildly incorrect SOF (Start of Frame) `scanLines` parameter (issue 10880)
  • Loading branch information
timvandermeij authored Mar 6, 2020
2 parents 001b0b2 + c3c3b8c commit 1a97c14
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 6 deletions.
3 changes: 2 additions & 1 deletion src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
this.xref,
resources,
this.pdfFunctionFactory
)
) &&
image.maybeValidDimensions
) {
// These JPEGs don't need any more processing so we can just send it.
return this.handler
Expand Down
3 changes: 2 additions & 1 deletion src/core/image_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class NativeImageDecoder {
this.xref,
this.resources,
this.pdfFunctionFactory
)
) &&
image.maybeValidDimensions
);
}

Expand Down
129 changes: 129 additions & 0 deletions src/core/jpeg_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,135 @@ const JpegStream = (function JpegStreamClosure() {
this.eof = true;
};

Object.defineProperty(JpegStream.prototype, "maybeValidDimensions", {
get: function JpegStream_maybeValidDimensions() {
const { dict, stream } = this;
const dictHeight = dict.get("Height", "H");
const startPos = stream.pos;

let validDimensions = true,
foundSOF = false,
b;
while ((b = stream.getByte()) !== -1) {
if (b !== 0xff) {
// Not a valid marker.
continue;
}
switch (stream.getByte()) {
case 0xc0: // SOF0
case 0xc1: // SOF1
case 0xc2: // SOF2
// These three SOF{n} markers are the only ones that the built-in
// PDF.js JPEG decoder currently supports.
foundSOF = true;

stream.pos += 2; // Skip marker length.
stream.pos += 1; // Skip precision.
const scanLines = stream.getUint16();

// The "normal" case, where the image data and dictionary agrees.
if (scanLines === dictHeight) {
break;
}
// A DNL (Define Number of Lines) marker is expected,
// which browsers (usually) cannot decode natively.
if (scanLines === 0) {
validDimensions = false;
break;
}
// The dimensions of the image, among other properties, should
// always be taken from the image data *itself* rather than the
// XObject dictionary. However there's cases of corrupt images that
// browsers cannot decode natively, for example:
// - JPEG images with DNL markers, where the SOF `scanLines`
// parameter has an unexpected value (see issue 8614).
// - JPEG images with too large SOF `scanLines` parameter, where
// the EOI marker is encountered prematurely (see issue 10880).
// In an attempt to handle these kinds of corrupt images, compare
// the dimensions in the image data with the dictionary and *always*
// let the PDF.js JPEG decoder (rather than the browser) handle the
// image if the difference is larger than one order of magnitude
// (since that would generally suggest that something is off).
if (scanLines > dictHeight * 10) {
validDimensions = false;
break;
}
break;

case 0xc3: // SOF3
/* falls through */
case 0xc5: // SOF5
case 0xc6: // SOF6
case 0xc7: // SOF7
/* falls through */
case 0xc9: // SOF9
case 0xca: // SOF10
case 0xcb: // SOF11
/* falls through */
case 0xcd: // SOF13
case 0xce: // SOF14
case 0xcf: // SOF15
foundSOF = true;
break;

case 0xc4: // DHT
case 0xcc: // DAC
/* falls through */
case 0xda: // SOS
case 0xdb: // DQT
case 0xdc: // DNL
case 0xdd: // DRI
case 0xde: // DHP
case 0xdf: // EXP
/* falls through */
case 0xe0: // APP0
case 0xe1: // APP1
case 0xe2: // APP2
case 0xe3: // APP3
case 0xe4: // APP4
case 0xe5: // APP5
case 0xe6: // APP6
case 0xe7: // APP7
case 0xe8: // APP8
case 0xe9: // APP9
case 0xea: // APP10
case 0xeb: // APP11
case 0xec: // APP12
case 0xed: // APP13
case 0xee: // APP14
case 0xef: // APP15
/* falls through */
case 0xfe: // COM
const markerLength = stream.getUint16();
if (markerLength > 2) {
stream.skip(markerLength - 2); // Jump to the next marker.
} else {
// The marker length is invalid, resetting the stream position.
stream.skip(-2);
}
break;

case 0xff: // Fill byte.
// Avoid skipping a valid marker, resetting the stream position.
stream.skip(-1);
break;

case 0xd9: // EOI
foundSOF = true;
break;
}
if (foundSOF) {
break;
}
}
// Finally, don't forget to reset the stream position.
stream.pos = startPos;

return shadow(this, "maybeValidDimensions", validDimensions);
},
configurable: true,
});

JpegStream.prototype.getIR = function(forceDataSchema = false) {
return createObjectURL(this.bytes, "image/jpeg", forceDataSchema);
};
Expand Down
24 changes: 20 additions & 4 deletions src/core/jpg.js
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ var JpegImage = (function JpegImageClosure() {
if (bitsData === 0xff) {
var nextByte = data[offset++];
if (nextByte) {
if (nextByte === 0xdc && parseDNLMarker) {
if (nextByte === /* DNL = */ 0xdc && parseDNLMarker) {
offset += 2; // Skip marker length.

const scanLines = readUint16(data, offset);
Expand All @@ -159,7 +159,22 @@ var JpegImage = (function JpegImageClosure() {
scanLines
);
}
} else if (nextByte === 0xd9) {
} else if (nextByte === /* EOI = */ 0xd9) {
if (parseDNLMarker) {
// NOTE: only 8-bit JPEG images are supported in this decoder.
const maybeScanLines = blockRow * 8;
// Heuristic to attempt to handle corrupt JPEG images with too
// large `scanLines` parameter, by falling back to the currently
// parsed number of scanLines when it's at least one order of
// magnitude smaller than expected (fixes issue10880.pdf).
if (maybeScanLines > 0 && maybeScanLines < frame.scanLines / 10) {
throw new DNLMarkerError(
"Found EOI marker (0xFFD9) while parsing scan data, " +
"possibly caused by incorrect `scanLines` parameter",
maybeScanLines
);
}
}
throw new EOIMarkerError(
"Found EOI marker (0xFFD9) while parsing scan data"
);
Expand Down Expand Up @@ -337,17 +352,18 @@ var JpegImage = (function JpegImageClosure() {
}
}

let blockRow = 0;
function decodeMcu(component, decode, mcu, row, col) {
var mcuRow = (mcu / mcusPerLine) | 0;
var mcuCol = mcu % mcusPerLine;
var blockRow = mcuRow * component.v + row;
blockRow = mcuRow * component.v + row;
var blockCol = mcuCol * component.h + col;
var offset = getBlockBufferOffset(component, blockRow, blockCol);
decode(component, offset);
}

function decodeBlock(component, decode, mcu) {
var blockRow = (mcu / component.blocksPerLine) | 0;
blockRow = (mcu / component.blocksPerLine) | 0;
var blockCol = mcu % component.blocksPerLine;
var offset = getBlockBufferOffset(component, blockRow, blockCol);
decode(component, offset);
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/issue10880.pdf.link
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/3247065/B3-T-G5-50.pdf
9 changes: 9 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3634,6 +3634,15 @@
"lastPage": 1,
"type": "eq"
},
{ "id": "issue10880",
"file": "pdfs/issue10880.pdf",
"md5": "244ee5ee3ab88db8d8eb51d4416e2c97",
"rounds": 1,
"link": true,
"firstPage": 7,
"lastPage": 7,
"type": "eq"
},
{ "id": "issue9650",
"file": "pdfs/issue9650.pdf",
"md5": "20d50bda6b1080b6d9088811299c791e",
Expand Down

0 comments on commit 1a97c14

Please sign in to comment.