Merge branch 'main' into fix-17828

Expensify · May 17, 2023 · 13ea2b4 · 13ea2b4
2 parents 2716d0b + c20d572
commit 13ea2b4
Show file tree

Hide file tree

Showing 6 changed files with 104 additions and 19 deletions.
diff --git a/__tests__/ExpensiMark-HTML-test.js b/__tests__/ExpensiMark-HTML-test.js
@@ -401,7 +401,7 @@ test('Test url replacements', () => {
         + '<a href="https://expensify.cash/#/r/1234" target="_blank" rel="noreferrer noopener">https://expensify.cash/#/r/1234</a> '
         + '<a href="https://github.com/Expensify/ReactNativeChat/pull/6.45" target="_blank" rel="noreferrer noopener">https://github.com/Expensify/ReactNativeChat/pull/6.45</a> '
         + '<a href="https://github.com/Expensify/Expensify/issues/143,231" target="_blank" rel="noreferrer noopener">https://github.com/Expensify/Expensify/issues/143,231</a> '
-        + '<a href="https://testRareTLDs.beer" target="_blank" rel="noreferrer noopener">testRareTLDs.beer</a> '
+        + '<a href="https://testraretlds.beer" target="_blank" rel="noreferrer noopener">testRareTLDs.beer</a> '
         + '<a href="mailto:test@expensify.com">test@expensify.com</a> '
         + 'test.completelyFakeTLD '
         + '<a href="https://www.expensify.com/_devportal/tools/logSearch/#query=request_id:(%22Ufjjim%22)+AND+timestamp:[2021-01-08T03:48:10.389Z+TO+2021-01-08T05:48:10.389Z]&amp;index=logs_expensify-008878" target="_blank" rel="noreferrer noopener">https://www.expensify.com/_devportal/tools/logSearch/#query=request_id:(%22Ufjjim%22)+AND+timestamp:[2021-01-08T03:48:10.389Z+TO+2021-01-08T05:48:10.389Z]&amp;index=logs_expensify-008878</a>) '
@@ -747,6 +747,11 @@ test('Test for link with no content', () => {
     expect(parser.replace(testString)).toBe(resultString);
 });
 
+test('Test for link with emoji', () => {
+    const testString = '[😀](www.link.com)';
+    const resultString = '[😀](<a href="https://www.link.com" target="_blank" rel="noreferrer noopener">www.link.com</a>)';
+    expect(parser.replace(testString)).toBe(resultString);
+});
 test('Test quotes markdown replacement with heading inside', () => {
     let testString = '> # heading';
     expect(parser.replace(testString)).toBe('<blockquote><h1>heading</h1></blockquote>');
@@ -910,3 +915,14 @@ test('Test for @here mention without space or supported styling character', () =
     const resultString = 'hi@<a href=\"mailto:username@expensify.com\">username@expensify.com</a>';
     expect(parser.replace(testString)).toBe(resultString);
 });
+
+test('Skip rendering invalid markdown',() => {
+    let testString = '_*test_*';
+    expect(parser.replace(testString)).toBe('<em>*test</em>*');
+
+    testString = '*_test*_';
+    expect(parser.replace(testString)).toBe('*<em>test*</em>');
+
+    testString = '~*test~*';
+    expect(parser.replace(testString)).toBe('~<strong>test~</strong>');
+});
diff --git a/__tests__/Str-test.js b/__tests__/Str-test.js
@@ -84,3 +84,15 @@ describe('Str.isValidMention', () => {
         expect(Str.isValidMention('"@username@expensify.com"')).toBeTruthy();
     });
 });
+
+describe('Str.sanitizeURL', () => {
+    it('Normalize domain name to lower case and add missing https:// protocol', () => {
+        expect(Str.sanitizeURL('https://google.com')).toBe('https://google.com');
+        expect(Str.sanitizeURL('google.com')).toBe('https://google.com');
+        expect(Str.sanitizeURL('Https://google.com')).toBe('https://google.com');
+        expect(Str.sanitizeURL('https://GOOgle.com')).toBe('https://google.com');
+        expect(Str.sanitizeURL('FOO.com/blah_BLAH')).toBe('https://foo.com/blah_BLAH');
+        expect(Str.sanitizeURL('http://FOO.com/blah_BLAH')).toBe('http://foo.com/blah_BLAH');
+        expect(Str.sanitizeURL('HTtp://FOO.com/blah_BLAH')).toBe('http://foo.com/blah_BLAH');
+    });
+});
diff --git a/lib/CONST.jsx b/lib/CONST.jsx
@@ -331,6 +331,13 @@ export const CONST = {
          * @type String
          */
         MARKDOWN_EMAIL: "([a-zA-Z0-9.!#$%&'+/=?^`{|}-][a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]*@[a-zA-Z0-9-]+?(\\.[a-zA-Z]+)+)",
+
+        /**
+         * Regex matching an text containing an Emoji
+         *
+         * @type RegExp
+         */
+        EMOJIS: /[\p{Extended_Pictographic}\u200d\u{1f1e6}-\u{1f1ff}\u{1f3fb}-\u{1f3ff}\u{e0020}-\u{e007f}\u20E3\uFE0F]|[#*0-9]\uFE0F?\u20E3/gu,
     },
 
     REPORT: {

diff --git a/lib/ExpensiMark.js b/lib/ExpensiMark.js
@@ -113,15 +113,11 @@ export default class ExpensiMark {
                     return this.modifyTextForUrlLinks(regex, textToProcess, replacement);
                 },
 
-                // We use a function here to check if there is already a https:// on the link.
-                // If there is not, we force the link to be absolute by prepending '//' to the target.
-                replacement: (match, g1, g2, g3) => {
-                    if (!g1.trim()) {
+                replacement: (match, g1, g2) => {
+                    if (g1.match(CONST.REG_EXP.EMOJIS) || !g1.trim()) {
                         return match;
                     }
-
-                    const href = g3 ? g2.replace(g3, g3.toLowerCase()) : `https://${g2}`;
-                    return `<a href="${href}" target="_blank" rel="noreferrer noopener">${g1.trim()}</a>`;
+                    return `<a href="${Str.sanitizeURL(g2)}" target="_blank" rel="noreferrer noopener">${g1.trim()}</a>`;
                 },
             },
 
@@ -154,10 +150,8 @@ export default class ExpensiMark {
                     return this.modifyTextForUrlLinks(regex, textToProcess, replacement);
                 },
 
-                // We use a function here to check if there is already a https:// on the link.
-                // If there is not, we force the link to be absolute by prepending '//' to the target.
-                replacement: (match, g1, g2, g3) => {
-                    const href = g3 ? g2.replace(g3, g3.toLowerCase()) : `https://${g2}`;
+                replacement: (match, g1, g2) => {
+                    const href = Str.sanitizeURL(g2);
                     return `${g1}<a href="${href}" target="_blank" rel="noreferrer noopener">${g2}</a>${g1}`;
                 },
             },
@@ -173,20 +167,20 @@ export default class ExpensiMark {
                  */
                 name: 'italic',
                 regex: /(?!_blank")[^\W_]?_((?![\s_])[\s\S]*?[^\s_])_(?![^\W_])(?![^<]*(<\/pre>|<\/code>|<\/a>|_blank))/g,
-                replacement: (match, g1) => (g1.includes('<pre>') ? match : `<em>${g1}</em>`),
+                replacement: (match, g1) => (g1.includes('<pre>') || this.containsNonPairTag(g1) ? match : `<em>${g1}</em>`),
             },
             {
                 // Use \B in this case because \b doesn't match * or ~.
                 // \B will match everything that \b doesn't, so it works
                 // for * and ~: https://www.rexegg.com/regex-boundaries.html#notb
                 name: 'bold',
                 regex: /\B\*((?=\S)(([^\s*]|\s(?!\*))+?))\*\B(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
-                replacement: (match, g1) => (g1.includes('<pre>') ? match : `<strong>${g1}</strong>`),
+                replacement: (match, g1) => (g1.includes('<pre>') || this.containsNonPairTag(g1) ? match : `<strong>${g1}</strong>`),
             },
             {
                 name: 'strikethrough',
                 regex: /\B~((?=\S)((~~(?!~)|[^\s~]|\s(?!~))+?))~\B(?![^<]*(<\/pre>|<\/code>|<\/a>))/g,
-                replacement: '<del>$1</del>',
+                replacement: (match, g1) => (this.containsNonPairTag(g1) ? match : `<del>${g1}</del>`),
             },
             {
                 name: 'quote',
@@ -465,7 +459,7 @@ export default class ExpensiMark {
                     filterRules: ['bold', 'strikethrough', 'italic'],
                     shouldEscapeText: false,
                 });
-                replacedText = replacedText.concat(replacement(match[0], linkText, match[2], match[4]));
+                replacedText = replacedText.concat(replacement(match[0], linkText, match[2]));
             }
             startIndex = match.index + (match[0].length);
 
@@ -685,4 +679,43 @@ export default class ExpensiMark {
         }
         return textToCheck;
     }
+
+    /**
+     * Check if the input text includes only the open or the close tag of an element.
+     *
+     * @param {String} textToCheck - Text to check
+     *
+     * @returns {Boolean}
+     */
+    containsNonPairTag(textToCheck) {
+        // Create a regular expression to match HTML tags
+        const tagRegExp = /<([a-z][a-z0-9-]*)\b[^>]*>|<\/([a-z][a-z0-9-]*)\s*>/gi;
+
+        // Use a stack to keep track of opening tags
+        const tagStack = [];
+
+        // Match all HTML tags in the string
+        let match = tagRegExp.exec(textToCheck);
+        while (match) {
+            const openingTag = match[1];
+            const closingTag = match[2];
+
+            if (openingTag && openingTag !== 'br') {
+                // If it's an opening tag, push it onto the stack
+                tagStack.push(openingTag);
+            } else if (closingTag) {
+                // If it's a closing tag, pop the top of the stack
+                const expectedTag = tagStack.pop();
+
+                // If the closing tag doesn't match the expected opening tag, return false
+                if (closingTag !== expectedTag) {
+                    return true;
+                }
+            }
+            match = tagRegExp.exec(textToCheck);
+        }
+
+        // If there are any tags left in the stack, they're unclosed
+        return tagStack.length !== 0;
+    }
 }
diff --git a/lib/Url.js b/lib/Url.js
@@ -5,14 +5,14 @@ const URL_PROTOCOL_REGEX = '((ht|f)tps?:\\/\\/)';
 const URL_WEBSITE_REGEX = `${URL_PROTOCOL_REGEX}?((?:www\\.)?[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\\.)+(?:${TLD_REGEX})(?:\\:${ALLOWED_PORTS}|\\b|(?=_))`;
 const addEscapedChar = reg => `(?:${reg}|&(?:amp|quot|#x27);)`;
 const URL_PATH_REGEX = `(?:${addEscapedChar('[.,=(+$!*]')}?\\/${addEscapedChar('[-\\w$@.+!*:(),=%~]')}*${addEscapedChar('[-\\w~@:%)]')}|\\/)*`;
-const URL_PARAM_REGEX = `(?:\\?${addEscapedChar('[-\\w$@.+!*()\\/,=%{}:;\\[\\]\\|_]')}*)?`;
+const URL_PARAM_REGEX = `(?:\\?${addEscapedChar('[-\\w$@.+!*()\\/,=%{}:;\\[\\]\\|_|~]')}*)?`;
 const URL_FRAGMENT_REGEX = `(?:#${addEscapedChar('[-\\w$@.+!*()[\\],=%;\\/:~]')}*)?`;
-const URL_REGEX = `(${URL_WEBSITE_REGEX}${URL_PATH_REGEX}(?:${URL_PARAM_REGEX}|${URL_FRAGMENT_REGEX})*)`;
+const URL_REGEX = `((${URL_WEBSITE_REGEX})${URL_PATH_REGEX}(?:${URL_PARAM_REGEX}|${URL_FRAGMENT_REGEX})*)`;
 
 const URL_REGEX_WITH_REQUIRED_PROTOCOL = URL_REGEX.replace(`${URL_PROTOCOL_REGEX}?`, URL_PROTOCOL_REGEX);
 
 const LOOSE_URL_WEBSITE_REGEX = `${URL_PROTOCOL_REGEX}([-\\w]+(\\.[-\\w]+)*)(?:\\:${ALLOWED_PORTS}|\\b|(?=_))`;
-const LOOSE_URL_REGEX = `(${LOOSE_URL_WEBSITE_REGEX}${URL_PATH_REGEX}(?:${URL_PARAM_REGEX}|${URL_FRAGMENT_REGEX})*)`;
+const LOOSE_URL_REGEX = `((${LOOSE_URL_WEBSITE_REGEX})${URL_PATH_REGEX}(?:${URL_PARAM_REGEX}|${URL_FRAGMENT_REGEX})*)`;
 
 
 const MARKDOWN_URL_REGEX = `(${LOOSE_URL_REGEX}|${URL_REGEX})`;

diff --git a/lib/str.js b/lib/str.js
@@ -3,6 +3,7 @@ import _ from 'underscore';
 import {AllHtmlEntities} from 'html-entities';
 import replaceAll from 'string.prototype.replaceall';
 import {CONST} from './CONST';
+import {URL_REGEX} from './Url';
 
 const REMOVE_SMS_DOMAIN_PATTERN = new RegExp(`@${CONST.SMS.DOMAIN}`, 'gi');
 
@@ -1021,6 +1022,22 @@ const Str = {
         return (typeof url === 'string' && url.startsWith('/')) ? url : `/${url}`;
     },
 
+    /**
+     *  Formats a URL by converting the domain name to lowercase and adding the missing 'https://' protocol.
+     *
+     * @param {url} url The URL to be formatted
+     * @returns {String} The formatted URL
+     */
+    sanitizeURL(url) {
+        const regex = new RegExp(`^${URL_REGEX}$`, 'i');
+        const match = regex.exec(url);
+        if (!match) {
+            return url;
+        }
+        const website = match[3] ? match[2] : `https://${match[2]}`;
+        return website.toLowerCase() + this.cutBefore(match[1], match[2]);
+    },
+
     /**
      * Checks if parameter is a string or function
      * if it is a function then we will call it with