Skip to content

Commit

Permalink
Fix html entity tab (#45)
Browse files Browse the repository at this point in the history
* fix: correct urls that did not sanitize html encoded colons

* fix: replace html encoded tabs

* update CHANGELOG.md

Co-authored-by: Blade Barringer <blade.barringer@paypal.com>
  • Loading branch information
kniemasik and crookedneighbor committed Oct 20, 2022
1 parent b70161d commit d4bdc89
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 1 deletion.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# CHANGELOG

## unreleased

- Fix issue where urls in the form `javascript&colon;alert('xss');` were not properly sanitized
- Fix issue where urls in the form `javasc&Tab;ript:alert('XSS');` were not properly sanitized

## 6.0.0

**Breaking Changes**
Expand Down
10 changes: 10 additions & 0 deletions src/__tests__/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ describe("sanitizeUrl", () => {
"&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29",
"jav&#x09;ascript:alert('XSS');",
" &#14; javascript:alert('XSS');",
"javasc&Tab;ript: alert('XSS');",
];

attackVectors.forEach((vector) => {
Expand Down Expand Up @@ -136,6 +137,15 @@ describe("sanitizeUrl", () => {
);
});

it(`disallows ${protocol} urls that use &colon; for the colon portion of the url`, () => {
expect(sanitizeUrl(`${protocol}&colon;alert(document.domain)`)).toBe(
"about:blank"
);
expect(sanitizeUrl(`${protocol}&COLON;alert(document.domain)`)).toBe(
"about:blank"
);
});

it(`disregards capitalization for ${protocol} urls`, () => {
// upper case every other letter in protocol name
const mixedCapitalizationProtocol = protocol
Expand Down
4 changes: 3 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
const invalidProtocolRegex = /^([^\w]*)(javascript|data|vbscript)/im;
const htmlEntitiesRegex = /&#(\w+)(^\w|;)?/g;
const htmlTabEntityRegex = /&tab;/gi;
const ctrlCharactersRegex =
/[\u0000-\u001F\u007F-\u009F\u2000-\u200D\uFEFF]/gim;
const urlSchemeRegex = /^([^:]+):/gm;
const urlSchemeRegex = /^.+(:|&colon;)/gim;
const relativeFirstCharacters = [".", "/"];

function isRelativeUrlWithoutProtocol(url: string): boolean {
Expand All @@ -11,6 +12,7 @@ function isRelativeUrlWithoutProtocol(url: string): boolean {

// adapted from https://stackoverflow.com/a/29824550/2601552
function decodeHtmlCharacters(str: string) {
str = str.replace(htmlTabEntityRegex, "&#9;");
return str.replace(htmlEntitiesRegex, (match, dec) => {
return String.fromCharCode(dec);
});
Expand Down

0 comments on commit d4bdc89

Please sign in to comment.