Skip to content

Commit

Permalink
feat: convert html email to plain text if no plain text email part is…
Browse files Browse the repository at this point in the history
… available
  • Loading branch information
th0rgall committed Jun 20, 2024
1 parent a298ae3 commit 30e1be7
Show file tree
Hide file tree
Showing 6 changed files with 372 additions and 21 deletions.
6 changes: 6 additions & 0 deletions api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ This example (run from the `api` folder) runs tests in the group that includes t
echo "node_modules/.bin/mocha -f sendMessageFromEmail" > runtests.sh && firebase --project demo-test emulators:exec --only auth,firestore --ui ./runtests.sh
```

Some unit tests can be run without starting firebase emulators, because they don't have Firebase dependencies, or their dependencies (like `logger` in from `functions-framework`) work standalone.

```
node_modules/.bin/mocha -w -f 'inbound email parser'
```

To prevent Firestore-triggered functions from running (and potentially slowly hitting SendGrid), this example adds `--only auth,firestore`. Remove this to run the functions anyway for more realistic side-effects.

## Deployment to Firebase
Expand Down
1 change: 1 addition & 0 deletions api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"es6-promise-pool": "^2.5.0",
"firebase-admin": "^11.9.0",
"firebase-functions": "^4.6.0",
"html-to-text": "^9.0.5",
"lodash": "^4.17.21",
"luxon": "^3.4.4",
"node-fetch": "^2",
Expand Down
40 changes: 22 additions & 18 deletions api/src/sendgrid/parseInboundEmail.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const EmailReplyParser = require('email-reply-parser');

// https://github.com/haraka/node-address-rfc2822
const addrparser = require('address-rfc2822');
const { htmlToText } = require('html-to-text');
const { MAX_MESSAGE_LENGTH, sendMessageFromEmail } = require('../chat');
const { sendEmailReplyError } = require('../mail');
const { auth, db } = require('../firebase');
Expand Down Expand Up @@ -76,7 +77,7 @@ const unpackInboundEmailRequest = (req) => {
* See https://docs.sendgrid.com/for-developers/parsing-email/setting-up-the-inbound-parse-webhook
* We use the non-raw webhook.
* @param {UnpackedInboundRequest} unpackedInboundRequest
* @returns {Promise<{
* @returns {{
* envelopeFromEmail?: string,
* headerFrom?: addrparser.Address,
* responseText?: string,
Expand All @@ -87,9 +88,9 @@ const unpackInboundEmailRequest = (req) => {
* },
* senderIP?: string,
* html?: string
* }>}
* }}
*/
const parseUnpackedInboundEmail = async (unpackedInboundRequest) => {
exports.parseUnpackedInboundEmail = (unpackedInboundRequest) => {
const {
text: emailPlainText,
html,
Expand Down Expand Up @@ -147,20 +148,28 @@ const parseUnpackedInboundEmail = async (unpackedInboundRequest) => {

// Attempt to parse the response text and Chat ID from the email
let chatId;
function parsePlainTextEmail(text) {
const parsedEmail = new EmailReplyParser().read(text);
// Trim is not done automatically
return [parsedEmail.getVisibleText().trim(), parsedEmail.getQuotedText()];
}
try {
// Attempt parsing plain text if it exists
if (emailPlainText) {
const parsedEmail = new EmailReplyParser().read(emailPlainText);
// Trim is not done automatically
responsePlainText = parsedEmail.getVisibleText().trim();
quotedPlainText = parsedEmail.getQuotedText();
if (emailPlainText?.trim()) {
[responsePlainText, quotedPlainText] = parsePlainTextEmail(emailPlainText);
} else if (html) {
// TODO: plain text doesn't exist, attempt deriving plain text from HTML text, if that exists
logger.debug('TODO: Deriving plain text from HTML');
logger.debug('Deriving plain text from HTML');
const convertedPlainText = htmlToText(html, { wordwrap: null });
[responsePlainText, quotedPlainText] = parsePlainTextEmail(convertedPlainText);
}

// Find the chat ID from the quoted text
const chatIdRegex = /\/chat\/.+?\/([a-zA-Z0-9]+)>/;
//
// This regex assumes a delimiter like > ] or " at the end
// > is used in Gmail plain text parts
// ] is used in html-to-text
// " is used in html
const chatIdRegex = /\/chat\/.+?\/([a-zA-Z0-9]+)/;
let chatRegexResult;
const possibleSources = [
['parsed quoted', quotedPlainText],
Expand Down Expand Up @@ -206,11 +215,6 @@ Sender IP: ${senderIP}`);
};
};

/**
* @template T
* @typedef {T extends Promise<infer U> ? U : T} Unpacked
*/

/**
*
* See https://docs.sendgrid.com/for-developers/parsing-email/setting-up-the-inbound-parse-webhook
Expand All @@ -226,12 +230,12 @@ exports.parseInboundEmail = async (req, res) => {
}

/**
* @type {Unpacked<ReturnType<typeof parseUnpackedInboundEmail>>}
* @type {ReturnType<typeof exports.parseUnpackedInboundEmail>}
*/
let parsedEmail = { dkimResult: {} };
try {
try {
parsedEmail = await parseUnpackedInboundEmail(unpackInboundEmailRequest(req));
parsedEmail = this.parseUnpackedInboundEmail(unpackInboundEmailRequest(req));
} catch (parseError) {
logger.error(parseError);
throw new Error('Email error: unknown parsing error');
Expand Down
152 changes: 152 additions & 0 deletions api/test/input/240619_12_04.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body dir="auto">Hoi<div>Ik ben nu in IJsland. We moeten even afstemmen wat jullie willen. Sanitair en zo.&nbsp;</div><div>Ik ben ook niet zo vaak thuis dan. Want TAZ 😀</div><div>TestHost<br id="lineBreakAtBeginningOfSignature"><div dir="ltr">Verstuurd vanaf mijn iPhone</div><div dir="ltr"><br><blockquote type="cite">Op 19 jun 2024 om 08:00 heeft Welcome To My Garden &lt;support@welcometomygarden.org&gt; het volgende geschreven:<br><br></blockquote></div><blockquote type="cite"><div dir="ltr">


<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=1">
<!--[if !mso]><!-->
<meta http-equiv="X-UA-Compatible" content="IE=Edge">
<!--<![endif]-->
<!--[if (gte mso 9)|(IE)]>
<xml>
<o:OfficeDocumentSettings>
<o:AllowPNG/>
<o:PixelsPerInch>96</o:PixelsPerInch>
</o:OfficeDocumentSettings>
</xml>
<![endif]-->
<!--[if (gte mso 9)|(IE)]>
<style type="text/css">
body {width: 500px;margin: 0 auto;}
table {border-collapse: collapse;}
table, td {mso-table-lspace: 0pt;mso-table-rspace: 0pt;}
img {-ms-interpolation-mode: bicubic;}
</style>
<![endif]-->

<!--user entered Head Start--><link href="https://fonts.googleapis.com/css2?family=Montserrat" rel="stylesheet"><style>* { font-family: Montserrat, sans-serif; }</style><!--End Head user entered-->


<center class="wrapper" data-link-color="#C24F38" data-body-style="font-size:15px; font-family:arial,helvetica,sans-serif; color:#495747; background-color:#FFFFFF;">
<div class="webkit">
<table cellpadding="0" cellspacing="0" border="0" width="100%" class="wrapper" bgcolor="#FFFFFF">
<tbody><tr>
<td valign="top" bgcolor="#FFFFFF" width="100%">
<table width="100%" role="content-container" class="outer" align="center" cellpadding="0" cellspacing="0" border="0">
<tbody><tr>
<td width="100%">
<table width="100%" cellpadding="0" cellspacing="0" border="0">
<tbody><tr>
<td>
<!--[if mso]>
<center>
<table><tr><td width="500">
<![endif]-->
<table width="100%" cellpadding="0" cellspacing="0" border="0" style="width:100%; max-width:500px;" align="center">
<tbody><tr>
<td role="modules-container" style="padding:0px 0px 0px 0px; color:#495747; text-align:left;" bgcolor="#FFFFFF" width="100%" align="left"><table class="module preheader preheader-hide" role="module" data-type="preheader" border="0" cellpadding="0" cellspacing="0" width="100%" style="display: none !important; mso-hide: all; visibility: hidden; opacity: 0; color: transparent; height: 0; width: 0;">
<tbody><tr>
<td role="module-content">
<p></p>
</td>
</tr>
</tbody></table><table class="wrapper" role="module" data-type="image" border="0" cellpadding="0" cellspacing="0" width="100%" style="table-layout: fixed;" data-muid="489e2362-5304-48fa-ad54-9721d37d6c5b">
<tbody>
<tr>
<td style="font-size:6px; line-height:10px; padding:0px 0px 0px 0px;" valign="top" align="center">
<img class="max-width" border="0" style="display:block; color:#000000; text-decoration:none; font-family:Helvetica, arial, sans-serif; font-size:16px; max-width:40% !important; width:40%; height:auto !important;" width="200" alt="" data-proportionally-constrained="true" data-responsive="true" src="http://cdn.mcauto-images-production.sendgrid.net/4287b09c9b6674a6/c8989278-ff02-42ac-89f4-6fa9edb7758c/1256x744.png" data-unique-identifier="">
</td>
</tr>
</tbody>
</table><table class="module" role="module" data-type="text" border="0" cellpadding="0" cellspacing="0" width="100%" style="table-layout: fixed;" data-muid="398dceef-2423-4f3f-ae4f-0244223c3cb3" data-mc-module-version="2019-10-22">
<tbody>
<tr>
<td style="padding:18px 0px 18px 0px; line-height:22px; text-align:inherit;" height="100%" valign="top" bgcolor="" role="module-content"><div><div style="font-family: inherit; text-align: inherit">Hallo TestHost!</div>
<div style="font-family: inherit; text-align: inherit"><br></div>
<div style="font-family: inherit; text-align: inherit"><span style="color: #495747">Fred heeft je een bericht gestuurd op </span><a href="https://welcometomygarden.org/"><span style="color: #495747"><u>Welcome To My Garden</u></span></a><span style="color: #495747">. Je kunt antwoorden via de </span><a href="https://welcometomygarden.org/chat"><span style="color: #495747"><u>chat op de website</u></span></a><span style="color: #495747">. Let op: antwoord niet op deze email; anders kan de persoon die je gecontacteerd heeft je bericht niet lezen.</span></div><div></div></div></td>
</tr>
</tbody>
</table><table class="module" role="module" data-type="text" border="0" cellpadding="0" cellspacing="0" width="100%" style="table-layout: fixed;" data-muid="edfb45d4-c593-4173-af77-24487302b824" data-mc-module-version="2019-10-22">
<tbody>
<tr>
<td style="padding:0px 0px 30px 40px; line-height:22px; text-align:inherit;" height="100%" valign="top" bgcolor="" role="module-content"><div><div style="font-family: inherit; text-align: inherit"><span style="color: #696969">"Hallo TestHost! Wij zijn drie leuke testreizigers en Fred en wij zijn op zoek naar een slaapplek van 5 tot 10 augustus in de buurt van Oostende. We zouden die periode graag naar Theater Aan Zee gaan! We zijn allemaal zelf theaterstudenten tussen de 19 en 25 jaar.

Laat je iets weten?
Lieve groetjes,
Fred "</span></div><div></div></div></td>
</tr>
</tbody>
</table><table border="0" cellpadding="0" cellspacing="0" class="module" data-role="module-button" data-type="button" role="module" style="table-layout:fixed;" width="100%" data-muid="adb7a0a6-88f8-4c7f-ad4d-e69d6b4939c3">
<tbody>
<tr>
<td align="center" bgcolor="" class="outer-td" style="padding:10px 0px 25px 0px;">
<table border="0" cellpadding="0" cellspacing="0" class="wrapper-mobile" style="text-align:center;">
<tbody>
<tr>
<td align="center" bgcolor="#495747" class="inner-td" style="border-radius:6px; font-size:16px; text-align:center; background-color:inherit;">
<a href="https://welcometomygarden.org/chat/fred/59sd12zTmJJhDFSQfP" style="background-color:#495747; border:1px solid #333333; border-color:#333333; border-radius:6px; border-width:1px; color:#ffffff; display:inline-block; font-size:14px; font-weight:normal; letter-spacing:0px; line-height:normal; padding:10px 18px 12px 18px; text-align:center; text-decoration:none; border-style:solid;" target="_blank">Reageer op Fred</a>
</td>
</tr>
</tbody>
</table>
</td>
</tr>
</tbody>
</table><table class="module" role="module" data-type="text" border="0" cellpadding="0" cellspacing="0" width="100%" style="table-layout: fixed;" data-muid="16faa0b8-7818-48e7-a62c-bedbcffa9de5" data-mc-module-version="2019-10-22">
<tbody>
<tr>
<td style="padding:5px 0px 10px 0px; line-height:22px; text-align:inherit;" height="100%" valign="top" bgcolor="" role="module-content"><div><div style="font-family: inherit; text-align: inherit">Met trage groet,&nbsp;</div>
<div style="font-family: inherit; text-align: inherit"><br>
<strong>Het WTMG-team</strong></div>
<div style="font-family: inherit; text-align: inherit">Volg WTMG op <a href="https://www.facebook.com/Welcome2mygarden"><span style="color: #495747"><u>Facebook</u></span></a><span style="color: #495747"> en </span><a href="https://www.instagram.com/welcometomygarden_org/"><span style="color: #495747"><u>Instagram</u></span></a></div><div></div></div></td>
</tr>
</tbody>
</table><table class="module" role="module" data-type="divider" border="0" cellpadding="0" cellspacing="0" width="100%" style="table-layout: fixed;" data-muid="fd5bf1a5-ba3e-4ccf-9503-cfc90bed0d83">
<tbody>
<tr>
<td style="padding:10px 0px 10px 0px;" role="module-content" height="100%" valign="top" bgcolor="">
<table border="0" cellpadding="0" cellspacing="0" align="center" width="100%" height="4px" style="line-height:4px; font-size:4px;">
<tbody>
<tr>
<td style="padding:0px 0px 4px 0px;" bgcolor="#495747"></td>
</tr>
</tbody>
</table>
</td>
</tr>
</tbody>
</table><table class="module" role="module" data-type="text" border="0" cellpadding="0" cellspacing="0" width="100%" style="table-layout: fixed;" data-muid="cd9a643d-0206-4fc6-ac0b-c89abb295f17" data-mc-module-version="2019-10-22">
<tbody>
<tr>
<td style="padding:10px 0px 5px 0px; line-height:14px; text-align:inherit; background-color:#FFFFFF;" height="100%" valign="top" bgcolor="#FFFFFF" role="module-content"><div><div style="font-family: inherit; text-align: center"><span style="font-size: 14px">Verzonden met 💚 door </span><a href="https://mc.sendgrid.com/dynamic-templates/d-dsafh9sadfads-sdafsdfdsa" title="<span data-offset-key=&quot;4n9g4-1-0&quot; style=&quot;text-decoration: underline; color: rgb(73, 87, 71); font-size: 14px;&quot;><span data-text=&quot;true&quot;>Welcome To My Garden</span></span>"><span style="text-decoration-line: underline; text-decoration-style: solid; text-decoration-color: currentcolor; text-decoration-thickness: auto; color: #495747; font-size: 14px">Welcome To My Garden</span></a></div>
<div style="font-family: inherit; text-align: center"><span style="font-size: 12px; line-height: 20px"><em>Adres: Van Bortonnestraat, 1090 Jette<br>
</em></span></div><div></div></div></td>
</tr>
</tbody>
</table><table class="module" role="module" data-type="text" border="0" cellpadding="0" cellspacing="0" width="100%" style="table-layout: fixed;" data-muid="aa6e8aa6-b2e0-4f79-999e-97335590940f.1" data-mc-module-version="2019-10-22">
<tbody>
<tr>
<td style="padding:5px 0px 5px 0px; line-height:22px; text-align:inherit;" height="100%" valign="top" bgcolor="" role="module-content"><div><div style="font-family: inherit; text-align: center"><span style="font-size: 12px">Ontvang je liever geen emails meer van ons? Geen probleem! Je kan je </span><a href="https://welcometomygarden.org/account"><span style="font-size: 12px; color: #ed7f69"><u>hier uitschrijven</u></span></a><span style="font-size: 12px">.</span></div><div></div></div></td>
</tr>
</tbody>
</table></td>
</tr>
</tbody></table>
<!--[if mso]>
</td>
</tr>
</table>
</center>
<![endif]-->
</td>
</tr>
</tbody></table>
</td>
</tr>
</tbody></table>
</td>
</tr>
</tbody></table>
</div>
</center>
<img src="https://u119245.ct.sendgrid.net/wf/open?upn=sadfas.dsafdsafihe.iasehfasdifd" alt="" width="1" height="1" border="0" style="height:1px !important;width:1px !important;border-width:0 !important;margin-top:0 !important;margin-bottom:0 !important;margin-right:0 !important;margin-left:0 !important;padding-top:0 !important;padding-bottom:0 !important;padding-right:0 !important;padding-left:0 !important;" data-unique-identifier="">
</div></blockquote></div></body></html>
Loading

0 comments on commit 30e1be7

Please sign in to comment.