Skip to content

Commit

Permalink
fix: remove newline entities (#46)
Browse files Browse the repository at this point in the history
* fix: remove newline entities

* strip out tabs and newlines

* not decoding, so just replace

* update changelog
  • Loading branch information
kniemasik committed Nov 9, 2022
1 parent ab8d43d commit a39ca11
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# CHANGELOG

## unreleased
- Fix issue where urls in the form `https://example.com

/something` were not properly sanitized

## 6.0.1

- Fix issue where urls in the form `javascript:alert('xss');` were not properly sanitized
Expand Down
6 changes: 6 additions & 0 deletions src/__tests__/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ describe("sanitizeUrl", () => {
);
});

it("removes newline entities from urls", () => {
expect(sanitizeUrl("https://example.com

/something")).toBe(
"https://example.com/something"
);
});

it("decodes html entities", () => {
// all these decode to javascript:alert('xss');
const attackVectors = [
Expand Down
4 changes: 2 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const invalidProtocolRegex = /^([^\w]*)(javascript|data|vbscript)/im;
const htmlEntitiesRegex = /&#(\w+)(^\w|;)?/g;
const htmlTabEntityRegex = /&tab;/gi;
const htmlCtrlEntityRegex = /&(newline|tab);/gi;
const ctrlCharactersRegex =
/[\u0000-\u001F\u007F-\u009F\u2000-\u200D\uFEFF]/gim;
const urlSchemeRegex = /^.+(:|:)/gim;
Expand All @@ -12,14 +12,14 @@ function isRelativeUrlWithoutProtocol(url: string): boolean {

// adapted from https://stackoverflow.com/a/29824550/2601552
function decodeHtmlCharacters(str: string) {
str = str.replace(htmlTabEntityRegex, "	");
return str.replace(htmlEntitiesRegex, (match, dec) => {
return String.fromCharCode(dec);
});
}

export function sanitizeUrl(url?: string): string {
const sanitizedUrl = decodeHtmlCharacters(url || "")
.replace(htmlCtrlEntityRegex, "")
.replace(ctrlCharactersRegex, "")
.trim();

Expand Down

0 comments on commit a39ca11

Please sign in to comment.