Skip to content

Commit

Permalink
fix: escape matching closing tag in raw text
Browse files Browse the repository at this point in the history
This commit adds a logic to escape a matching closing tag in raw text.

For example, given `<style>#text(</style><script></script>)</style>`, the parent tag would by "style" and the raw text is "</style><script></script>". If we come across a matching closing tag (in out case `</style>`) - replace `<` with `&lt;` to avoid unexpected and unsafe behavior after de-serialization.
  • Loading branch information
AndrewKushnir committed Sep 29, 2023
1 parent 77f745f commit 1c8f504
Show file tree
Hide file tree
Showing 6 changed files with 2,151 additions and 1,393 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ var domino = require('domino');
Domino includes test from the [W3C DOM Conformance Suites](http://www.w3.org/DOM/Test/)
as well as tests from [HTML Working Group](http://www.w3.org/html/wg/wiki/Testing).

When you checkout this repository for the first time, run the following command to also check out code for the mentioned tests:

```
git submodule update --init --recursive
```

The tests can be run via `npm test` or directly though the [Mocha](http://mochajs.org/) command line:

![Screenshot](http://fgnass.github.com/images/domino.png)
Expand Down
86 changes: 84 additions & 2 deletions lib/NodeUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,82 @@ function attrname(a) {
return a.name;
}

/**
* Escapes matching closing tag in a raw text.
*
* For example, given `<style>#text(</style><script></script>)</style>`,
* the parent tag would by "style" and the raw text is
* "</style><script></script>". If we come across a matching closing tag
* (in out case `</style>`) - replace `<` with `&lt;` to avoid unexpected
* and unsafe behavior after de-serialization.
*/
function escapeMatchingClosingTag(rawText, parentTag) {
const regexp = new RegExp('</' + parentTag, 'ig');

if (!regexp.test(rawText)) {
return rawText; // fast path
}

regexp.lastIndex = 0; // reset an index to start from the beginning of a string
const matches = rawText.matchAll(regexp);

let result = '';
let lastIndex = 0;
for (const match of matches) {
result += rawText.substring(lastIndex, match.index) + '&lt;';
// Skip over the `<` char, since we've replaced it above.
// Start the next iteration with a char following the `<` one.
lastIndex = match.index + 1;
}
// Add remaining contents of a string after the last `<` char.
if (lastIndex < rawText.length) {
result += rawText.substring(lastIndex);
}
return result;
}

const CLOSING_COMMENT_REGEXP = /--!?>/;
const CLOSING_COMMENT_REGEXP_GLOBAL = /--!?>/g;

/**
* Escapes closing comment tag in a comment content.
*
* For example, given `#comment('-->')`, the content of a comment would be
* updated to `--&gt;` to avoid unexpected and unsafe behavior after
* de-serialization.
*/
function escapeClosingCommentTag(rawContent) {
if (!CLOSING_COMMENT_REGEXP.test(rawContent)) {
return rawContent; // fast path
}

const matches = rawContent.matchAll(CLOSING_COMMENT_REGEXP_GLOBAL);

let result = '';
let lastIndex = 0;
for (const match of matches) {
const lastMatchingCharIndex = match.index + match[0].length;
result += rawContent.substring(lastIndex, lastMatchingCharIndex) + '&gt;';
// Skip over the `>` char, since we've replaced it above.
// Start the next iteration with a char following the `>` one.
lastIndex = lastMatchingCharIndex + 1;
}
// Add remaining contents of a string after the last `>` char.
if (lastIndex < rawContent.length) {
result += rawContent.substring(lastIndex);
}
return result;
}

/**
* Escapes processing instruction content by replacing `>` with `&gt`.
*/
function escapeProcessingInstructionContent(rawContent) {
return rawContent.includes('>')
? rawContent.replaceAll('>', '&gt;')
: rawContent;
}

function serializeOne(kid, parent) {
var s = '';
switch(kid.nodeType) {
Expand All @@ -135,6 +211,11 @@ function serializeOne(kid, parent) {

if (!(html && emptyElements[tagname])) {
var ss = kid.serialize();
// If an element can have raw content, this content may
// potentially require escaping to avoid XSS.
if (hasRawContent[tagname.toUpperCase()]) {
ss = escapeMatchingClosingTag(ss, tagname);
}
if (html && extraNewLine[tagname] && ss.charAt(0)==='\n') s += '\n';
// Serialize children and add end tag for all others
s += ss;
Expand All @@ -158,10 +239,11 @@ function serializeOne(kid, parent) {
}
break;
case 8: //COMMENT_NODE
s += '<!--' + kid.data + '-->';
s += '<!--' + escapeClosingCommentTag(kid.data) + '-->';
break;
case 7: //PROCESSING_INSTRUCTION_NODE
s += '<?' + kid.target + ' ' + kid.data + '?>';
const content = escapeProcessingInstructionContent(kid.data);
s += '<?' + kid.target + ' ' + content + '?>';
break;
case 10: //DOCUMENT_TYPE_NODE
s += '<!DOCTYPE ' + kid.name;
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"devDependencies": {
"jquery": "^3.5.1",
"mocha": "^6.2.3",
"puppeteer": "^21.3.5",
"should": "^13.2.3"
}
}
Loading

0 comments on commit 1c8f504

Please sign in to comment.