Skip to content

Commit

Permalink
fix: escape unsafe patterns in elements, comment nodes and processing…
Browse files Browse the repository at this point in the history
… instructions (#18)

This commit updates the logic to espace unsafe patterns:

- for elements that can have raw text: all matching closing tags are escaped
- for comment nodes: all closing comment tags are escaped
- for processing instructions: all `>` symbols are excaped
  • Loading branch information
AndrewKushnir authored Oct 2, 2023
1 parent 77f745f commit f2435fe
Show file tree
Hide file tree
Showing 6 changed files with 1,387 additions and 157 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ var domino = require('domino');
Domino includes test from the [W3C DOM Conformance Suites](http://www.w3.org/DOM/Test/)
as well as tests from [HTML Working Group](http://www.w3.org/html/wg/wiki/Testing).

When you checkout this repository for the first time, run the following command to also check out code for the mentioned tests:

```
git submodule update --init --recursive
```

The tests can be run via `npm test` or directly though the [Mocha](http://mochajs.org/) command line:

![Screenshot](http://fgnass.github.com/images/domino.png)
Expand Down
66 changes: 63 additions & 3 deletions lib/NodeUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,14 @@ module.exports = {
// `NodeUtils.serializeOne` to get to the function and reduce pressure
// on the megamorphic stub cache.
// See https://github.com/fgnass/domino/pull/142 for more information.
serializeOne: serializeOne
serializeOne: serializeOne,

// Export util functions so that we can run extra test for them.
// Note: we prefix function names with `ɵ`, similar to what we do
// with internal functions in Angular packages.
ɵescapeMatchingClosingTag: escapeMatchingClosingTag,
ɵescapeClosingCommentTag: escapeClosingCommentTag,
ɵescapeProcessingInstructionContent: escapeProcessingInstructionContent
};

var utils = require('./utils');
Expand Down Expand Up @@ -116,6 +123,53 @@ function attrname(a) {
return a.name;
}

/**
* Escapes matching closing tag in a raw text.
*
* For example, given `<style>#text(</style><script></script>)</style>`,
* the parent tag would by "style" and the raw text is
* "</style><script></script>". If we come across a matching closing tag
* (in out case `</style>`) - replace `<` with `&lt;` to avoid unexpected
* and unsafe behavior after de-serialization.
*/
function escapeMatchingClosingTag(rawText, parentTag) {
const parentClosingTag = '</' + parentTag;
if (!rawText.toLowerCase().includes(parentClosingTag)) {
return rawText; // fast path
}
const result = [...rawText];
const matches = rawText.matchAll(new RegExp(parentClosingTag, 'ig'));
for (const match of matches) {
result[match.index] = '&lt;';
}
return result.join('');
}

const CLOSING_COMMENT_REGEXP = /--!?>/;

/**
* Escapes closing comment tag in a comment content.
*
* For example, given `#comment('-->')`, the content of a comment would be
* updated to `--&gt;` to avoid unexpected and unsafe behavior after
* de-serialization.
*/
function escapeClosingCommentTag(rawContent) {
if (!CLOSING_COMMENT_REGEXP.test(rawContent)) {
return rawContent; // fast path
}
return rawContent.replace(/(--\!?)>/g, '$1&gt;');
}

/**
* Escapes processing instruction content by replacing `>` with `&gt`.
*/
function escapeProcessingInstructionContent(rawContent) {
return rawContent.includes('>')
? rawContent.replaceAll('>', '&gt;')
: rawContent;
}

function serializeOne(kid, parent) {
var s = '';
switch(kid.nodeType) {
Expand All @@ -135,6 +189,11 @@ function serializeOne(kid, parent) {

if (!(html && emptyElements[tagname])) {
var ss = kid.serialize();
// If an element can have raw content, this content may
// potentially require escaping to avoid XSS.
if (hasRawContent[tagname.toUpperCase()]) {
ss = escapeMatchingClosingTag(ss, tagname);
}
if (html && extraNewLine[tagname] && ss.charAt(0)==='\n') s += '\n';
// Serialize children and add end tag for all others
s += ss;
Expand All @@ -158,10 +217,11 @@ function serializeOne(kid, parent) {
}
break;
case 8: //COMMENT_NODE
s += '<!--' + kid.data + '-->';
s += '<!--' + escapeClosingCommentTag(kid.data) + '-->';
break;
case 7: //PROCESSING_INSTRUCTION_NODE
s += '<?' + kid.target + ' ' + kid.data + '?>';
const content = escapeProcessingInstructionContent(kid.data);
s += '<?' + kid.target + ' ' + content + '?>';
break;
case 10: //DOCUMENT_TYPE_NODE
s += '<!DOCTYPE ' + kid.name;
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"devDependencies": {
"jquery": "^3.5.1",
"mocha": "^6.2.3",
"puppeteer": "^21.3.5",
"should": "^13.2.3"
}
}
Loading

0 comments on commit f2435fe

Please sign in to comment.