Skip to content

Commit

Permalink
Update the token reading code to protect against obvious format abuses.
Browse files Browse the repository at this point in the history
Update the xref loading code to protect against looping xref tables.
  • Loading branch information
michaelrsweet committed Dec 7, 2023
1 parent ed723a4 commit c992b2b
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 5 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ v1.2.0 (Month DD, YYYY)
functions (Issue #24)
- Renamed `pdfioContentTextNextLine` to `pdfioContentTextNewLine`.
- Now use autoconf to configure the PDFio sources (Issue #54)
- Updated the token reading code to protect against some obvious abuses of the
PDF format.
- Updated the xref reading code to protect against loops.


v1.1.4 (December 3, 2023)
Expand Down
13 changes: 12 additions & 1 deletion pdfio-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -2091,8 +2091,19 @@ load_xref(
PDFIO_DEBUG_VALUE(&trailer);
PDFIO_DEBUG("\n");

if ((xref_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev")) <= 0)
off_t new_offset = (off_t)pdfioDictGetNumber(trailer.value.dict, "Prev");

if (new_offset <= 0)
{
done = true;
}
else if (new_offset == xref_offset)
{
_pdfioFileError(pdf, "Recursive xref table.");
return (false);
}

xref_offset = new_offset;
}

// Once we have all of the xref tables loaded, get the important objects and
Expand Down
51 changes: 47 additions & 4 deletions pdfio-token.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,10 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufend, // End of buffer
state = '\0'; // Current state
bool saw_nul = false; // Did we see a nul character?
size_t count = 0; // Number of whitespace/comment bytes



//
// "state" is:
//
// - '\0' for idle
Expand All @@ -229,17 +230,38 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
// Skip leading whitespace...
while ((ch = get_char(tb)) != EOF)
{
count ++;

if (ch == '%')
{
// Skip comment
PDFIO_DEBUG("_pdfioTokenRead: Skipping comment...\n");
while ((ch = get_char(tb)) != EOF)
{
count ++;

if (ch == '\n' || ch == '\r')
{
break;
}
else if (count > 2048)
{
_pdfioFileError(tb->pdf, "Comment too long.");
*bufptr = '\0';
return (false);
}
}
}
else if (!isspace(ch))
{
break;
}
else if (count > 2048)
{
_pdfioFileError(tb->pdf, "Too much whitespace.");
*bufptr = '\0';
return (false);
}
}

if (ch == EOF)
Expand All @@ -266,6 +288,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr++ = (char)ch;
}

PDFIO_DEBUG("_pdfioTokenRead: state='%c'\n", state);

switch (state)
{
case '(' : // Literal string
Expand Down Expand Up @@ -431,6 +455,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
if (!isdigit(ch) && ch != '.')
{
// End of number...
PDFIO_DEBUG("_pdfioTokenRead: End of number with ch=0x%02x\n", ch);
tb->bufptr --;
break;
}
Expand Down Expand Up @@ -496,6 +521,13 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
return (false);
}
}

if (bufptr == (buffer + 1))
{
_pdfioFileError(tb->pdf, "Empty name.");
*bufptr = '\0';
return (false);
}
break;

case '<' : // Potential hex string
Expand All @@ -519,6 +551,8 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
return (false);
}

count = 0;

do
{
if (isxdigit(ch))
Expand All @@ -527,6 +561,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
{
// Hex digit
*bufptr++ = (char)ch;
count = 0;
}
else
{
Expand All @@ -542,6 +577,16 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack
*bufptr = '\0';
return (false);
}
else
{
count ++;
if (count > 2048)
{
_pdfioFileError(tb->pdf, "Too much whitespace.");
*bufptr = '\0';
return (false);
}
}
}
while ((ch = get_char(tb)) != EOF && ch != '>');

Expand Down Expand Up @@ -569,7 +614,7 @@ _pdfioTokenRead(_pdfio_token_t *tb, // I - Token buffer/stack

*bufptr = '\0';

// PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);
PDFIO_DEBUG("_pdfioTokenRead: Read '%s'.\n", buffer);

return (bufptr > buffer);
}
Expand Down Expand Up @@ -606,7 +651,6 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
tb->bufptr = tb->buffer;
tb->bufend = tb->buffer + bytes;

#if 0
#ifdef DEBUG
unsigned char *ptr; // Pointer into buffer

Expand All @@ -620,7 +664,6 @@ get_char(_pdfio_token_t *tb) // I - Token buffer
}
PDFIO_DEBUG("'\n");
#endif // DEBUG
#endif // 0
}

// Return the next character...
Expand Down

0 comments on commit c992b2b

Please sign in to comment.