From d4bdc89f1743fe3cdb7c3f24b06e4c875f349b0c Mon Sep 17 00:00:00 2001 From: Kerrie Niemasik Date: Thu, 20 Oct 2022 18:12:27 -0400 Subject: [PATCH] Fix html entity tab (#45) * fix: correct urls that did not sanitize html encoded colons * fix: replace html encoded tabs * update CHANGELOG.md Co-authored-by: Blade Barringer --- CHANGELOG.md | 5 +++++ src/__tests__/test.ts | 10 ++++++++++ src/index.ts | 4 +++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e5d9fa..e40cc26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # CHANGELOG +## unreleased + +- Fix issue where urls in the form `javascript:alert('xss');` were not properly sanitized +- Fix issue where urls in the form `javasc ript:alert('XSS');` were not properly sanitized + ## 6.0.0 **Breaking Changes** diff --git a/src/__tests__/test.ts b/src/__tests__/test.ts index 51cb224..365dd17 100644 --- a/src/__tests__/test.ts +++ b/src/__tests__/test.ts @@ -100,6 +100,7 @@ describe("sanitizeUrl", () => { "javascript:alert('XSS')", "jav ascript:alert('XSS');", "  javascript:alert('XSS');", + "javasc ript: alert('XSS');", ]; attackVectors.forEach((vector) => { @@ -136,6 +137,15 @@ describe("sanitizeUrl", () => { ); }); + it(`disallows ${protocol} urls that use : for the colon portion of the url`, () => { + expect(sanitizeUrl(`${protocol}:alert(document.domain)`)).toBe( + "about:blank" + ); + expect(sanitizeUrl(`${protocol}:alert(document.domain)`)).toBe( + "about:blank" + ); + }); + it(`disregards capitalization for ${protocol} urls`, () => { // upper case every other letter in protocol name const mixedCapitalizationProtocol = protocol diff --git a/src/index.ts b/src/index.ts index 801dfae..3852569 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,8 +1,9 @@ const invalidProtocolRegex = /^([^\w]*)(javascript|data|vbscript)/im; const htmlEntitiesRegex = /&#(\w+)(^\w|;)?/g; +const htmlTabEntityRegex = /&tab;/gi; const ctrlCharactersRegex = /[\u0000-\u001F\u007F-\u009F\u2000-\u200D\uFEFF]/gim; -const urlSchemeRegex = /^([^:]+):/gm; +const urlSchemeRegex = /^.+(:|:)/gim; const relativeFirstCharacters = [".", "/"]; function isRelativeUrlWithoutProtocol(url: string): boolean { @@ -11,6 +12,7 @@ function isRelativeUrlWithoutProtocol(url: string): boolean { // adapted from https://stackoverflow.com/a/29824550/2601552 function decodeHtmlCharacters(str: string) { + str = str.replace(htmlTabEntityRegex, " "); return str.replace(htmlEntitiesRegex, (match, dec) => { return String.fromCharCode(dec); });