Skip to content

Commit

Permalink
Avoid processing the body as headers (#942)
Browse files Browse the repository at this point in the history
* Avoid processing the body as headers

* Improve GetHeaderList() performance
by stopping the comparisons altogether after reaching body.

---------

Co-authored-by: Stephen Griffin <stephenegriffin@users.noreply.github.com>
  • Loading branch information
oh2fih and stephenegriffin authored Mar 18, 2024
1 parent e6cba07 commit 83a38a9
Showing 1 changed file with 49 additions and 39 deletions.
88 changes: 49 additions & 39 deletions src/Scripts/Headers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,55 +45,65 @@ export class HeaderModel {
}

public GetHeaderList(headers: string): Header[] {
// First, break up out input by lines.
const lines: string[] = headers.split(/[\n\r]+/);
// First, break up out input by lines.
// Keep empty lines for recognizing the boundary between the header section & the body.
const lines: string[] = headers.split(/\n|\r|\r\n/);

const headerList: Header[] = [];
let iNextHeader: number = 0;
let prevHeader: Header | undefined;
// Unfold lines
lines.forEach((line: string) => {
// Skip empty lines
if (line === "") return;
let body: boolean = false;
header_section: while(!body) {
unfold_lines: for (let line of lines) {
// Handling empty lines. The body is separated from the header section by an empty line (RFC 5322, 2.1).
// To avoid processing the body as headers we should stop there, as someone might paste an entire message.
// Empty lines at the beginning can be omitted, because that could be a common copy-paste error.
if (body) break header_section;
if (line === "") {
if (headerList.length > 0) body = true;
continue unfold_lines;
}

// Recognizing a header:
// - First colon comes before first white space.
// - We're not strictly honoring white space folding because initial white space
// - is commonly lost. Instead, we heuristically assume that space before a colon must have been folded.
// This expression will give us:
// match[1] - everything before the first colon, assuming no spaces (header).
// match[2] - everything after the first colon (value).
const match: RegExpMatchArray | null = line.match(/(^[\w-.]*?): ?(.*)/);
// Recognizing a header:
// - First colon comes before first white space.
// - We're not strictly honoring white space folding because initial white space
// - is commonly lost. Instead, we heuristically assume that space before a colon must have been folded.
// This expression will give us:
// match[1] - everything before the first colon, assuming no spaces (header).
// match[2] - everything after the first colon (value).
const match: RegExpMatchArray | null = line.match(/(^[\w-.]*?): ?(.*)/);

// There's one false positive we might get: if the time in a Received header has been
// folded to the next line, the line might start with something like "16:20:05 -0400".
// This matches our regular expression. The RFC does not preclude such a header, but I've
// never seen one in practice, so we check for and exclude 'headers' that
// consist only of 1 or 2 digits.
if (match && match[1] && !match[1].match(/^\d{1,2}$/)) {
headerList[iNextHeader] = new Header(match[1], match[2] ?? "");
prevHeader = headerList[iNextHeader];
iNextHeader++;
} else {
if (iNextHeader > 0) {
// Tack this line to the previous line
// All folding whitespace should collapse to a single space
line = line.replace(/^[\s]+/, "");
if (!line) return;
if (prevHeader) {
const separator: string = prevHeader.value ? " " : "";
prevHeader.value += separator + line;
}
// There's one false positive we might get: if the time in a Received header has been
// folded to the next line, the line might start with something like "16:20:05 -0400".
// This matches our regular expression. The RFC does not preclude such a header, but I've
// never seen one in practice, so we check for and exclude 'headers' that
// consist only of 1 or 2 digits.
if (match && match[1] && !match[1].match(/^\d{1,2}$/)) {
headerList[iNextHeader] = new Header(match[1], match[2] ?? "");
prevHeader = headerList[iNextHeader];
iNextHeader++;
} else {
// If we didn't have a previous line, go ahead and use this line
if (line.match(/\S/g)) {
headerList[iNextHeader] = new Header("", line);
prevHeader = headerList[iNextHeader];
iNextHeader++;
if (iNextHeader > 0) {
// Tack this line to the previous line
// All folding whitespace should collapse to a single space
line = line.replace(/^[\s]+/, "");
if (!line) continue unfold_lines;
if (prevHeader) {
const separator: string = prevHeader.value ? " " : "";
prevHeader.value += separator + line;
}
} else {
// If we didn't have a previous line, go ahead and use this line
if (line.match(/\S/g)) {
headerList[iNextHeader] = new Header("", line);
prevHeader = headerList[iNextHeader];
iNextHeader++;
}
}
}
}
});
break header_section;
}

// 2047 decode our headers now
headerList.forEach((header: Header) => {
Expand Down

0 comments on commit 83a38a9

Please sign in to comment.