Skip to content

Commit

Permalink
Improved MIME charset decoding #662
Browse files Browse the repository at this point in the history
  • Loading branch information
the-djmaze committed Nov 15, 2022
1 parent 456ade5 commit f23e0d7
Showing 1 changed file with 19 additions and 28 deletions.
47 changes: 19 additions & 28 deletions dev/Mime/Parser.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@

const
// RFC2045
QPDecodeIn = /=([0-9A-F]{2})/g,
QPDecodeOut = (...args) => String.fromCharCode(parseInt(args[1], 16)),

utf8To16 = data => new TextDecoder().decode(Uint8Array.from(data, c => c.charCodeAt(0)));
QPDecodeParams = [/=([0-9A-F]{2})/g, (...args) => String.fromCharCode(parseInt(args[1], 16))],
QPDecode = data => data.replace(/=\r?\n/g, '').replace(...QPDecodeParams),
decodeText = (charset, data) => {
try {
// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
return new TextDecoder(charset).decode(Uint8Array.from(data, c => c.charCodeAt(0)));
} catch (e) {
console.error({charset:charset,error:e});
}
};

export function ParseMime(text)
{
Expand Down Expand Up @@ -42,23 +48,14 @@ export function ParseMime(text)

get body() {
let body = this.bodyRaw,
// charset = this.header('content-type')?.params.charset,
charset = this.header('content-type')?.params.charset,
encoding = this.headerValue('content-transfer-encoding');
if ('quoted-printable' == encoding) {
body = utf8To16(body.replace(/=\r?\n/g, '').replace(QPDecodeIn, QPDecodeOut));
body = QPDecode(body);
} else if ('base64' == encoding) {
body = utf8To16(atob(body.replace(/\r?\n/g, '')));
body = atob(body.replace(/\r?\n/g, ''));
}
/*
// TODO: convert charsets other then utf8
try {
// https://developer.mozilla.org/en-US/docs/Web/API/Encoding_API/Encodings
return new TextDecoder(charset).decode(Uint8Array.from(body, c => c.charCodeAt(0)));
// return new TextDecoder(charset).decode(new TextEncoder().encode(body));
} catch (e) {
}
*/
return body;
return decodeText(charset, body);
}

get dataUrl() {
Expand All @@ -68,7 +65,7 @@ export function ParseMime(text)
body = body.replace(/\r?\n/g, '');
} else {
if ('quoted-printable' == encoding) {
body = body.replace(/=\r?\n/g, '').replace(QPDecodeIn, QPDecodeOut);
body = QPDecode(body);
}
body = btoa(body);
}
Expand Down Expand Up @@ -114,16 +111,10 @@ export function ParseMime(text)
[...header.matchAll(/;\s*([^;=]+)=\s*"?([^;"]+)"?/g)].forEach(param =>
params[param[1].trim().toLowerCase()] = param[2].trim()
);
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
match[2] = match[2].trim().replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) => {
m = decodeURIComponent(
'B' == encoding ? escape(atob(text))
// else 'Q'
: text.replace(/=([A-Z0-9]{2})/, '%$1')
);
// TODO: convert when charset != utf-8
return m;
});
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
match[2] = match[2].trim().replace(/=\?([^?]+)\?(B|Q)\?(.+?)\?=/g, (m, charset, encoding, text) =>
decodeText(charset, 'B' == encoding ? atob(text) : QPDecode(text))
);
headers[match[1].trim().toLowerCase()] = {
value: match[2],
params: params
Expand Down

0 comments on commit f23e0d7

Please sign in to comment.