diff --git a/CHANGELOG.md b/CHANGELOG.md index e12bc9e..e1a4dff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # CHANGELOG +## unreleased +- Fix issue where urls in the form `https://example.com /something` were not properly sanitized + ## 6.0.1 - Fix issue where urls in the form `javascript:alert('xss');` were not properly sanitized diff --git a/src/__tests__/test.ts b/src/__tests__/test.ts index 365dd17..08720ba 100644 --- a/src/__tests__/test.ts +++ b/src/__tests__/test.ts @@ -92,6 +92,12 @@ describe("sanitizeUrl", () => { ); }); + it("removes newline entities from urls", () => { + expect(sanitizeUrl("https://example.com /something")).toBe( + "https://example.com/something" + ); + }); + it("decodes html entities", () => { // all these decode to javascript:alert('xss'); const attackVectors = [ diff --git a/src/index.ts b/src/index.ts index 3852569..5f96750 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,6 +1,6 @@ const invalidProtocolRegex = /^([^\w]*)(javascript|data|vbscript)/im; const htmlEntitiesRegex = /&#(\w+)(^\w|;)?/g; -const htmlTabEntityRegex = /&tab;/gi; +const htmlCtrlEntityRegex = /&(newline|tab);/gi; const ctrlCharactersRegex = /[\u0000-\u001F\u007F-\u009F\u2000-\u200D\uFEFF]/gim; const urlSchemeRegex = /^.+(:|:)/gim; @@ -12,7 +12,6 @@ function isRelativeUrlWithoutProtocol(url: string): boolean { // adapted from https://stackoverflow.com/a/29824550/2601552 function decodeHtmlCharacters(str: string) { - str = str.replace(htmlTabEntityRegex, " "); return str.replace(htmlEntitiesRegex, (match, dec) => { return String.fromCharCode(dec); }); @@ -20,6 +19,7 @@ function decodeHtmlCharacters(str: string) { export function sanitizeUrl(url?: string): string { const sanitizedUrl = decodeHtmlCharacters(url || "") + .replace(htmlCtrlEntityRegex, "") .replace(ctrlCharactersRegex, "") .trim();