Skip to content

Commit

Permalink
fix: escape matching closing tag in raw text
Browse files Browse the repository at this point in the history
This commit adds a logic to escape a matching closing tag in raw text.

For example, given `<style>#text(</style><script></script>)</style>`, the parent tag would by "style" and the raw text is "</style><script></script>". If we come across a matching closing tag (in out case `</style>`) - replace `<` with `&lt;` to avoid unexpected and unsafe behavior after de-serialization.
  • Loading branch information
AndrewKushnir committed Sep 29, 2023
1 parent 9212032 commit c3bcd83
Show file tree
Hide file tree
Showing 5 changed files with 1,997 additions and 1,243 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ var domino = require('domino');
Domino includes test from the [W3C DOM Conformance Suites](http://www.w3.org/DOM/Test/)
as well as tests from [HTML Working Group](http://www.w3.org/html/wg/wiki/Testing).

When you checkout this repository for the first time, run the following command to also check out code for the mentioned tests:

```
git submodule update --init --recursive
```

The tests can be run via `npm test` or directly though the [Mocha](http://mochajs.org/) command line:

![Screenshot](http://fgnass.github.com/images/domino.png)
Expand Down
81 changes: 79 additions & 2 deletions lib/NodeUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,76 @@ function attrname(a) {
return a.name;
}

/**
* Escapes matching closing tag in a raw text.
*
* For example, given `<style>#text(</style><script></script>)</style>`,
* the parent tag would by "style" and the raw text is
* "</style><script></script>". If we come across a matching closing tag
* (in out case `</style>`) - replace `<` with `&lt;` to avoid unexpected
* and unsafe behavior after de-serialization.
*/
function escapeMatchingClosingTag(rawText, parentTag) {
const regexp = new RegExp('</' + parentTag, 'ig');
const matches = rawText.matchAll(regexp);

let result = '';
let lastIndex = 0;
for (const match of matches) {
result += rawText.substring(lastIndex, match.index) + '&lt;';
// Skip over the `<` char, since we've replaced it above.
// Start the next iteration with a char following the `<` one.
lastIndex = match.index + 1;
}
// Add remaining contents of a string after the last `<` char.
if (lastIndex < rawText.length) {
result += rawText.substring(lastIndex);
}
return result;
}

const CLOSING_COMMENT_REGEXP = /--!?>/;
const CLOSING_COMMENT_REGEXP_GLOBAL = /--!?>/g;

/**
* Escapes closing comment tag in a comment content.
*
* For example, given `#comment('-->')`, the content of a comment would be
* updated to `--&gt;` to avoid unexpected and unsafe behavior after
* de-serialization.
*/
function escapeClosingCommentTag(rawContent) {
if (!CLOSING_COMMENT_REGEXP.test(rawContent)) {
return rawContent; // fast path
}

const matches = rawContent.matchAll(CLOSING_COMMENT_REGEXP_GLOBAL);

let result = '';
let lastIndex = 0;
for (const match of matches) {
const lastMatchingCharIndex = match.index + match[0].length;
result += rawContent.substring(lastIndex, lastMatchingCharIndex) + '&gt;';
// Skip over the `>` char, since we've replaced it above.
// Start the next iteration with a char following the `>` one.
lastIndex = lastMatchingCharIndex + 1;
}
// Add remaining contents of a string after the last `>` char.
if (lastIndex < rawContent.length) {
result += rawContent.substring(lastIndex);
}
return result;
}

/**
* Escapes processing instruction content by replacing `>` with `&gt`.
*/
function escapeProcessingInstructionContent(rawContent) {
return rawContent.includes('>')
? rawContent.replaceAll('>', '&gt;')
: rawContent;
}

function serializeOne(kid, parent) {
var s = '';
switch(kid.nodeType) {
Expand All @@ -117,6 +187,12 @@ function serializeOne(kid, parent) {

if (!(html && emptyElements[tagname])) {
var ss = kid.serialize();
// If an element can have raw content and serialized child content
// has a substring that looks like a closing tag for the element -
// perform escaping to avoid XSS.
if (hasRawContent[tagname.toUpperCase()] && ss.includes('</' + tagname)) {
ss = escapeMatchingClosingTag(ss, tagname);
}
if (html && extraNewLine[tagname] && ss.charAt(0)==='\n') s += '\n';
// Serialize children and add end tag for all others
s += ss;
Expand All @@ -140,10 +216,11 @@ function serializeOne(kid, parent) {
}
break;
case 8: //COMMENT_NODE
s += '<!--' + kid.data + '-->';
s += '<!--' + escapeClosingCommentTag(kid.data) + '-->';
break;
case 7: //PROCESSING_INSTRUCTION_NODE
s += '<?' + kid.target + ' ' + kid.data + '?>';
const content = escapeProcessingInstructionContent(kid.data);
s += '<?' + kid.target + ' ' + content + '?>';
break;
case 10: //DOCUMENT_TYPE_NODE
s += '<!DOCTYPE ' + kid.name;
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"devDependencies": {
"jquery": "^3.5.1",
"mocha": "^6.2.3",
"puppeteer": "^21.3.5",
"should": "^13.2.3"
}
}
174 changes: 174 additions & 0 deletions test/xss.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'use strict';
var domino = require('../lib');
var puppeteer = require("puppeteer");

exports = exports.xss = {};

Expand All @@ -9,6 +10,32 @@ exports = exports.xss = {};
// If we change HTML serialization such that any of these tests fail, please
// review the change very carefully for potential XSS vectors!

async function alertFired(html) {
let alerted = false;
const page = await incognito.newPage();
page.on("dialog", async dialog => {
alerted = true;
await dialog.accept();
});
await page.goto("data:text/html," + html, {waitUntil: 'load'});
return alerted;
}

/** @type {puppeteer.Browser} */
let browser;
/** @type {puppeteer.BrowserContext} */
let incognito;

exports.before = async function() {
browser = await puppeteer.launch({headless:"new"});
incognito = await browser.createIncognitoBrowserContext();
}

exports.after = async function() {
await incognito.close();
await browser.close();
}

exports.fp170_31 = function() {
var document = domino.createDocument(
'<img src="test.jpg" alt="``onload=xss()" />'
Expand Down Expand Up @@ -83,3 +110,150 @@ exports.fp170_37 = function() {
'<p><svg><style>*{font-family:\'&lt;/style&gt;&lt;img/src=x\tonerror=xss()//\'}</style></svg></p>'
);
};

exports.styleMatchingClosingTagInRawText = function() {
const document = domino.createDocument('');
const style = document.createElement("style");
style.textContent = "abc</style><script>alert(1)</script>";
document.body.appendChild(style);

// Ensure that HTML entities are properly encoded inside <style>
document.body.serialize().should.equal(
'<style>abc&lt;/style><script>alert(1)</script></style>'
);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
};

exports.styleMatchingClosingTagSkipsCommentedContent = function() {

const document = domino.createDocument('');
const style = document.createElement("style");
style.textContent = "abc<!--</style>--><script>alert(1)</script>";
document.body.appendChild(style);

document.body.serialize().should.equal(
'<style>abc<!--&lt;/style>--><script>alert(1)</script></style>'
);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
};

exports.styleMatchingClosingTagAfterClosingComment = function() {
const document = domino.createDocument('');
const style = document.createElement("style");
style.textContent = "abc--></style><script>alert(1)</script>";
document.body.appendChild(style);

// Ensure that HTML entities are properly encoded inside <style>
document.body.serialize().should.equal(
'<style>abc-->&lt;/style><script>alert(1)</script></style>'
);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
};

exports.styleMatchingClosingTagSkipsUnclosedCommentedContent = function() {
const document = domino.createDocument('');
const style = document.createElement("style");
style.textContent = "abc<!--</style><script>alert(1)</script>";
document.body.appendChild(style);

document.body.serialize().should.equal(
'<style>abc<!--&lt;/style><script>alert(1)</script></style>'
);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
};

exports.scriptMatchingClosingTagInRawText = function() {
const document = domino.createDocument('');
const script = document.createElement("script");
script.textContent = "abc</script><script>alert(1)</script>";
document.body.appendChild(script);

// Ensure that HTML entities are properly encoded inside <script>
// Note: the `</script>` is encoded in both places.
document.body.serialize().should.equal(
'<script>abc&lt;/script><script>alert(1)&lt;/script></script>'
);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
};

exports.oneRawTextTagInsideAnotherOne = function() {
const document = domino.createDocument('');
const xmp = document.createElement("xmp");
const style = document.createElement("style");
xmp.textContent = "</style><script>alert(1)</script>";
style.appendChild(xmp);
document.body.appendChild(style);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
}

exports.xssInAttributeInsideRawTextTag = function() {
const document = domino.createDocument('');
const xmp = document.createElement("xmp");
const div = document.createElement("div");
div.title = "</xmp><script>alert(1)</script>";
xmp.appendChild(div);
document.body.appendChild(xmp);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
}

exports.commentNodeInsideRawTextTag = function() {
const document = domino.createDocument('');
const xmp = document.createElement("xmp");
const comment = document.createComment('</xmp><script>alert(1)</script>');
xmp.appendChild(comment);
document.body.appendChild(xmp);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
}

exports.alternativeEndTagForRawTextTag = function() {
const document = domino.createDocument('');
const style = document.createElement("style");
style.textContent = "</style /foobar><script>alert(1)</script>";
document.body.appendChild(style);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
}

exports.badCommentNode = function() {
const document = domino.createDocument('');
const comment = document.createComment('--><script>alert(1)</script>');
document.body.appendChild(comment);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
}

exports.anotherBadCommentNode = function() {
const document = domino.createDocument('');
const comment = document.createComment('--!><script>alert(1)</script>');
document.body.appendChild(comment);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
}

exports.badProcessingInstruction = function() {
const document = domino.createDocument('');
const pi = document.createProcessingInstruction("bad", "><script>alert(1)</script>");
document.body.appendChild(pi);

const html = document.serialize();
return alertFired(html).should.eventually.be.false('alert fired for: ' + html);
}
Loading

0 comments on commit c3bcd83

Please sign in to comment.