From 58d66e59d1ad1a149bed2da6553d49642f75aa86 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Fri, 8 Nov 2024 19:38:08 -0700 Subject: [PATCH] fix: escape html in renderer (#3495) BREAKING CHANGE: escape html in renderers for all tokens. --- src/Lexer.ts | 7 +- src/Parser.ts | 4 +- src/Renderer.ts | 18 +++-- src/Tokenizer.ts | 26 +++--- src/Tokens.ts | 3 +- test/unit/Lexer.test.js | 175 ++++++++++++++++++++++++++++------------ 6 files changed, 148 insertions(+), 85 deletions(-) diff --git a/src/Lexer.ts b/src/Lexer.ts index 9aa79cdbf9..bed5886c6b 100644 --- a/src/Lexer.ts +++ b/src/Lexer.ts @@ -355,12 +355,7 @@ export class _Lexer { if (token = this.tokenizer.tag(src)) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1]; - if (lastToken && token.type === 'text' && lastToken.type === 'text') { - lastToken.raw += token.raw; - lastToken.text += token.text; - } else { - tokens.push(token); - } + tokens.push(token); continue; } diff --git a/src/Parser.ts b/src/Parser.ts index 8a6716196d..ebaaa2113c 100644 --- a/src/Parser.ts +++ b/src/Parser.ts @@ -98,7 +98,7 @@ export class _Parser { let textToken = token; let body = this.renderer.text(textToken); while (i + 1 < tokens.length && tokens[i + 1].type === 'text') { - textToken = tokens[++i] as Tokens.Text | Tokens.Tag; + textToken = tokens[++i] as Tokens.Text; body += '\n' + this.renderer.text(textToken); } if (top) { @@ -106,7 +106,7 @@ export class _Parser { type: 'paragraph', raw: body, text: body, - tokens: [{ type: 'text', raw: body, text: body }], + tokens: [{ type: 'text', raw: body, text: body, escaped: true }], }); } else { out += body; diff --git a/src/Renderer.ts b/src/Renderer.ts index c2a2645ced..8d11f19c42 100644 --- a/src/Renderer.ts +++ b/src/Renderer.ts @@ -79,13 +79,15 @@ export class _Renderer { if (item.tokens.length > 0 && item.tokens[0].type === 'paragraph') { item.tokens[0].text = checkbox + ' ' + item.tokens[0].text; if (item.tokens[0].tokens && item.tokens[0].tokens.length > 0 && item.tokens[0].tokens[0].type === 'text') { - item.tokens[0].tokens[0].text = checkbox + ' ' + item.tokens[0].tokens[0].text; + item.tokens[0].tokens[0].text = checkbox + ' ' + escape(item.tokens[0].tokens[0].text); + item.tokens[0].tokens[0].escaped = true; } } else { item.tokens.unshift({ type: 'text', raw: checkbox + ' ', text: checkbox + ' ', + escaped: true, }); } } else { @@ -164,7 +166,7 @@ export class _Renderer { } codespan({ text }: Tokens.Codespan): string { - return `${text}`; + return `${escape(text, true)}`; } br(token: Tokens.Br): string { @@ -184,7 +186,7 @@ export class _Renderer { href = cleanHref; let out = ''; return out; @@ -193,19 +195,21 @@ export class _Renderer { image({ href, title, text }: Tokens.Image): string { const cleanHref = cleanUrl(href); if (cleanHref === null) { - return text; + return escape(text); } href = cleanHref; let out = `${text}, raw: string, lexer: _Lexer): Tokens.Link | Tokens.Image { const href = link.href; - const title = link.title ? escape(link.title) : null; + const title = link.title || null; const text = cap[1].replace(/\\([\[\]])/g, '$1'); if (cap[0].charAt(0) !== '!') { @@ -33,7 +32,7 @@ function outputLink(cap: string[], link: Pick, ra raw, href, title, - text: escape(text), + text, }; } @@ -583,7 +582,7 @@ export class _Tokenizer { return { type: 'escape', raw: cap[0], - text: escape(cap[1]), + text: cap[1], }; } } @@ -766,7 +765,6 @@ export class _Tokenizer { if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) { text = text.substring(1, text.length - 1); } - text = escape(text, true); return { type: 'codespan', raw: cap[0], @@ -802,10 +800,10 @@ export class _Tokenizer { if (cap) { let text, href; if (cap[2] === '@') { - text = escape(cap[1]); + text = cap[1]; href = 'mailto:' + text; } else { - text = escape(cap[1]); + text = cap[1]; href = text; } @@ -830,7 +828,7 @@ export class _Tokenizer { if (cap = this.rules.inline.url.exec(src)) { let text, href; if (cap[2] === '@') { - text = escape(cap[0]); + text = cap[0]; href = 'mailto:' + text; } else { // do extended autolink path validation @@ -839,7 +837,7 @@ export class _Tokenizer { prevCapZero = cap[0]; cap[0] = this.rules.inline._backpedal.exec(cap[0])?.[0] ?? ''; } while (prevCapZero !== cap[0]); - text = escape(cap[0]); + text = cap[0]; if (cap[1] === 'www.') { href = 'http://' + cap[0]; } else { @@ -865,16 +863,12 @@ export class _Tokenizer { inlineText(src: string): Tokens.Text | undefined { const cap = this.rules.inline.text.exec(src); if (cap) { - let text; - if (this.lexer.state.inRawBlock) { - text = cap[0]; - } else { - text = escape(cap[0]); - } + const escaped = this.lexer.state.inRawBlock; return { type: 'text', raw: cap[0], - text, + text: cap[0], + escaped, }; } } diff --git a/src/Tokens.ts b/src/Tokens.ts index e5f8402e6a..0e8bf346f3 100644 --- a/src/Tokens.ts +++ b/src/Tokens.ts @@ -125,6 +125,7 @@ export namespace Tokens { raw: string; text: string; tokens?: Token[]; + escaped?: boolean; } export interface Def { @@ -142,7 +143,7 @@ export namespace Tokens { } export interface Tag { - type: 'text' | 'html'; + type: 'html'; raw: string; inLink: boolean; inRawBlock: boolean; diff --git a/test/unit/Lexer.test.js b/test/unit/Lexer.test.js index 82a2d6df65..a1dee78081 100644 --- a/test/unit/Lexer.test.js +++ b/test/unit/Lexer.test.js @@ -35,14 +35,14 @@ describe('Lexer', () => { type: 'paragraph', raw: 'paragraph 1', text: 'paragraph 1', - tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }], + tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1', escaped: false }], }, { type: 'space', raw: '\n\n' }, { type: 'paragraph', raw: 'paragraph 2', text: 'paragraph 2', - tokens: [{ type: 'text', raw: 'paragraph 2', text: 'paragraph 2' }], + tokens: [{ type: 'text', raw: 'paragraph 2', text: 'paragraph 2', escaped: false }], }, ], }); @@ -110,56 +110,56 @@ lheading 2 raw: '# heading 1\n\n', depth: 1, text: 'heading 1', - tokens: [{ type: 'text', raw: 'heading 1', text: 'heading 1' }], + tokens: [{ type: 'text', raw: 'heading 1', text: 'heading 1', escaped: false }], }, { type: 'heading', raw: '## heading 2\n\n', depth: 2, text: 'heading 2', - tokens: [{ type: 'text', raw: 'heading 2', text: 'heading 2' }], + tokens: [{ type: 'text', raw: 'heading 2', text: 'heading 2', escaped: false }], }, { type: 'heading', raw: '### heading 3\n\n', depth: 3, text: 'heading 3', - tokens: [{ type: 'text', raw: 'heading 3', text: 'heading 3' }], + tokens: [{ type: 'text', raw: 'heading 3', text: 'heading 3', escaped: false }], }, { type: 'heading', raw: '#### heading 4\n\n', depth: 4, text: 'heading 4', - tokens: [{ type: 'text', raw: 'heading 4', text: 'heading 4' }], + tokens: [{ type: 'text', raw: 'heading 4', text: 'heading 4', escaped: false }], }, { type: 'heading', raw: '##### heading 5\n\n', depth: 5, text: 'heading 5', - tokens: [{ type: 'text', raw: 'heading 5', text: 'heading 5' }], + tokens: [{ type: 'text', raw: 'heading 5', text: 'heading 5', escaped: false }], }, { type: 'heading', raw: '###### heading 6\n\n', depth: 6, text: 'heading 6', - tokens: [{ type: 'text', raw: 'heading 6', text: 'heading 6' }], + tokens: [{ type: 'text', raw: 'heading 6', text: 'heading 6', escaped: false }], }, { type: 'heading', raw: 'lheading 1\n==========\n\n', depth: 1, text: 'lheading 1', - tokens: [{ type: 'text', raw: 'lheading 1', text: 'lheading 1' }], + tokens: [{ type: 'text', raw: 'lheading 1', text: 'lheading 1', escaped: false }], }, { type: 'heading', raw: 'lheading 2\n----------\n', depth: 2, text: 'lheading 2', - tokens: [{ type: 'text', raw: 'lheading 2', text: 'lheading 2' }], + tokens: [{ type: 'text', raw: 'lheading 2', text: 'lheading 2', escaped: false }], }, ], }); @@ -172,7 +172,7 @@ lheading 2 type: 'paragraph', raw: '####### heading 7', text: '####### heading 7', - tokens: [{ type: 'text', raw: '####### heading 7', text: '####### heading 7' }], + tokens: [{ type: 'text', raw: '####### heading 7', text: '####### heading 7', escaped: false }], }], }); }); @@ -196,13 +196,13 @@ lheading 2 header: [ { text: 'a', - tokens: [{ type: 'text', raw: 'a', text: 'a' }], + tokens: [{ type: 'text', raw: 'a', text: 'a', escaped: false }], header: true, align: null, }, { text: 'b', - tokens: [{ type: 'text', raw: 'b', text: 'b' }], + tokens: [{ type: 'text', raw: 'b', text: 'b', escaped: false }], header: true, align: null, }, @@ -211,13 +211,13 @@ lheading 2 [ { text: '1', - tokens: [{ type: 'text', raw: '1', text: '1' }], + tokens: [{ type: 'text', raw: '1', text: '1', escaped: false }], header: false, align: null, }, { text: '2', - tokens: [{ type: 'text', raw: '2', text: '2' }], + tokens: [{ type: 'text', raw: '2', text: '2', escaped: false }], header: false, align: null, }, @@ -242,7 +242,7 @@ paragraph 1 type: 'paragraph', raw: 'paragraph 1\n', text: 'paragraph 1', - tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1' }], + tokens: [{ type: 'text', raw: 'paragraph 1', text: 'paragraph 1', escaped: false }], }, { type: 'table', @@ -251,13 +251,13 @@ paragraph 1 header: [ { text: 'a', - tokens: [{ type: 'text', raw: 'a', text: 'a' }], + tokens: [{ type: 'text', raw: 'a', text: 'a', escaped: false }], header: true, align: null, }, { text: 'b', - tokens: [{ type: 'text', raw: 'b', text: 'b' }], + tokens: [{ type: 'text', raw: 'b', text: 'b', escaped: false }], header: true, align: null, }, @@ -266,13 +266,13 @@ paragraph 1 [ { text: '1', - tokens: [{ type: 'text', raw: '1', text: '1' }], + tokens: [{ type: 'text', raw: '1', text: '1', escaped: false }], header: false, align: null, }, { text: '2', - tokens: [{ type: 'text', raw: '2', text: '2' }], + tokens: [{ type: 'text', raw: '2', text: '2', escaped: false }], header: false, align: null, }, @@ -300,19 +300,19 @@ paragraph 1 header: [ { text: 'a', - tokens: [{ type: 'text', raw: 'a', text: 'a' }], + tokens: [{ type: 'text', raw: 'a', text: 'a', escaped: false }], header: true, align: 'left', }, { text: 'b', - tokens: [{ type: 'text', raw: 'b', text: 'b' }], + tokens: [{ type: 'text', raw: 'b', text: 'b', escaped: false }], header: true, align: 'center', }, { text: 'c', - tokens: [{ type: 'text', raw: 'c', text: 'c' }], + tokens: [{ type: 'text', raw: 'c', text: 'c', escaped: false }], header: true, align: 'right', }, @@ -321,19 +321,19 @@ paragraph 1 [ { text: '1', - tokens: [{ type: 'text', raw: '1', text: '1' }], + tokens: [{ type: 'text', raw: '1', text: '1', escaped: false }], header: false, align: 'left', }, { text: '2', - tokens: [{ type: 'text', raw: '2', text: '2' }], + tokens: [{ type: 'text', raw: '2', text: '2', escaped: false }], header: false, align: 'center', }, { text: '3', - tokens: [{ type: 'text', raw: '3', text: '3' }], + tokens: [{ type: 'text', raw: '3', text: '3', escaped: false }], header: false, align: 'right', }, @@ -361,13 +361,13 @@ a | b header: [ { text: 'a', - tokens: [{ type: 'text', raw: 'a', text: 'a' }], + tokens: [{ type: 'text', raw: 'a', text: 'a', escaped: false }], header: true, align: null, }, { text: 'b', - tokens: [{ type: 'text', raw: 'b', text: 'b' }], + tokens: [{ type: 'text', raw: 'b', text: 'b', escaped: false }], header: true, align: null, }, @@ -376,13 +376,13 @@ a | b [ { text: '1', - tokens: [{ type: 'text', raw: '1', text: '1' }], + tokens: [{ type: 'text', raw: '1', text: '1', escaped: false }], header: false, align: null, }, { text: '2', - tokens: [{ type: 'text', raw: '2', text: '2' }], + tokens: [{ type: 'text', raw: '2', text: '2', escaped: false }], header: false, align: null, }, @@ -418,7 +418,7 @@ a | b raw: 'blockquote', text: 'blockquote', tokens: [ - { type: 'text', raw: 'blockquote', text: 'blockquote' }, + { type: 'text', raw: 'blockquote', text: 'blockquote', escaped: false }, ], }], }, @@ -439,7 +439,7 @@ a | b raw: 'blockquote', text: 'blockquote', tokens: [ - { type: 'text', raw: 'blockquote', text: 'blockquote' }, + { type: 'text', raw: 'blockquote', text: 'blockquote', escaped: false }, ], }], }, @@ -475,7 +475,7 @@ a | b raw: 'blockquote', text: 'blockquote', tokens: [ - { type: 'text', raw: 'blockquote', text: 'blockquote' }, + { type: 'text', raw: 'blockquote', text: 'blockquote', escaped: false }, ], }, ], @@ -519,7 +519,7 @@ a | b type: 'text', raw: 'item 1', text: 'item 1', - tokens: [{ type: 'text', raw: 'item 1', text: 'item 1' }], + tokens: [{ type: 'text', raw: 'item 1', text: 'item 1', escaped: false }], }], }, { @@ -533,7 +533,7 @@ a | b type: 'text', raw: 'item 2', text: 'item 2', - tokens: [{ type: 'text', raw: 'item 2', text: 'item 2' }], + tokens: [{ type: 'text', raw: 'item 2', text: 'item 2', escaped: false }], }], }, ], @@ -577,6 +577,7 @@ a | b type: 'text', raw: 'item 1', text: 'item 1', + escaped: false, }, ], }, @@ -599,6 +600,7 @@ a | b type: 'text', raw: 'item 2', text: 'item 2', + escaped: false, }, ], }, @@ -645,6 +647,7 @@ a | b type: 'text', raw: 'item 1', text: 'item 1', + escaped: false, }, ], }, @@ -667,6 +670,7 @@ a | b type: 'text', raw: 'item 2', text: 'item 2', + escaped: false, }, ], }, @@ -715,6 +719,7 @@ paragraph type: 'text', raw: 'item 1', text: 'item 1', + escaped: false, }, ], }, @@ -737,6 +742,7 @@ paragraph type: 'text', raw: 'item 2', text: 'item 2', + escaped: false, }, ], }, @@ -757,6 +763,7 @@ paragraph type: 'text', raw: 'paragraph', text: 'paragraph', + escaped: false, }, ], }, @@ -799,6 +806,7 @@ paragraph type: 'text', raw: 'item 1', text: 'item 1', + escaped: false, }, ], }, @@ -821,6 +829,7 @@ paragraph type: 'text', raw: 'item 2', text: 'item 2', + escaped: false, }, ], }, @@ -868,6 +877,7 @@ paragraph type: 'text', raw: 'item 1', text: 'item 1', + escaped: false, }, ], }, @@ -890,6 +900,7 @@ paragraph type: 'text', raw: 'item 2', text: 'item 2', + escaped: false, }, ], }, @@ -939,6 +950,7 @@ paragraph type: 'text', raw: 'item 1', text: 'item 1', + escaped: false, }, ], }, @@ -961,6 +973,7 @@ paragraph type: 'text', raw: 'item 2', text: 'item 2', + escaped: false, }, ], }, @@ -977,6 +990,7 @@ paragraph type: 'text', raw: 'item 2a', text: 'item 2a', + escaped: false, }, ], }, @@ -999,6 +1013,7 @@ paragraph type: 'text', raw: 'item 3', text: 'item 3', + escaped: false, }, ], }, @@ -1045,6 +1060,7 @@ paragraph type: 'text', raw: 'item 1', text: 'item 1', + escaped: false, }, ], }, @@ -1072,6 +1088,7 @@ paragraph type: 'text', raw: 'item 2', text: 'item 2', + escaped: false, }, ], }, @@ -1122,6 +1139,7 @@ paragraph type: 'text', raw: 'item 1', text: 'item 1', + escaped: false, }, ], }, @@ -1144,6 +1162,7 @@ paragraph type: 'text', raw: 'item 2', text: 'item 2', + escaped: false, }, ], }, @@ -1214,7 +1233,7 @@ paragraph expectInlineTokens({ md: '\\>', tokens: [ - { type: 'escape', raw: '\\>', text: '>' }, + { type: 'escape', raw: '\\>', text: '>' }, ], }); }); @@ -1228,7 +1247,7 @@ paragraph raw: '**strong text\\[**', text: 'strong text\\[', tokens: [ - { type: 'text', raw: 'strong text', text: 'strong text' }, + { type: 'text', raw: 'strong text', text: 'strong text', escaped: false }, { type: 'escape', raw: '\\[', text: '[' }, ], }, @@ -1243,11 +1262,11 @@ paragraph raw: '_em\\sis_', text: 'em\\sis', tokens: [ - { type: 'text', raw: 'em', text: 'em' }, - { type: 'escape', raw: '\\<', text: '<' }, - { type: 'text', raw: 'pha', text: 'pha' }, - { type: 'escape', raw: '\\>', text: '>' }, - { type: 'text', raw: 'sis', text: 'sis' }, + { type: 'text', raw: 'em', text: 'em', escaped: false }, + { type: 'escape', raw: '\\<', text: '<' }, + { type: 'text', raw: 'pha', text: 'pha', escaped: false }, + { type: 'escape', raw: '\\>', text: '>' }, + { type: 'text', raw: 'sis', text: 'sis', escaped: false }, ], }, ], @@ -1259,7 +1278,7 @@ paragraph md: '
html
', tokens: [ { type: 'html', raw: '
', inLink: false, inRawBlock: false, block: false, text: '
' }, - { type: 'text', raw: 'html', text: 'html' }, + { type: 'text', raw: 'html', text: 'html', escaped: false }, { type: 'html', raw: '
', inLink: false, inRawBlock: false, block: false, text: '
' }, ], }); @@ -1276,7 +1295,12 @@ paragraph title: null, text: 'link', tokens: [ - { type: 'text', raw: 'link', text: 'link' }, + { + type: 'text', + raw: 'link', + text: 'link', + escaped: false, + }, ], }, ], @@ -1294,7 +1318,12 @@ paragraph title: 'title', text: 'link', tokens: [ - { type: 'text', raw: 'link', text: 'link' }, + { + type: 'text', + raw: 'link', + text: 'link', + escaped: false, + }, ], }, ], @@ -1349,6 +1378,7 @@ paragraph type: 'text', raw: 'link', text: 'link', + escaped: false, }], }, ], @@ -1372,6 +1402,7 @@ paragraph type: 'text', raw: 'link', text: 'link', + escaped: false, }], }, ], @@ -1382,7 +1413,11 @@ paragraph expectInlineTokens({ md: '[link]', tokens: [ - { type: 'text', raw: '[link]', text: '[link]' }, + { + type: 'text', + raw: '[link]', + text: '[link]', + }, ], }); }); @@ -1397,7 +1432,12 @@ paragraph raw: '**strong**', text: 'strong', tokens: [ - { type: 'text', raw: 'strong', text: 'strong' }, + { + type: 'text', + raw: 'strong', + text: 'strong', + escaped: false, + }, ], }, ], @@ -1413,7 +1453,12 @@ paragraph raw: '*em*', text: 'em', tokens: [ - { type: 'text', raw: 'em', text: 'em' }, + { + type: 'text', + raw: 'em', + text: 'em', + escaped: false, + }, ], }, ], @@ -1521,6 +1566,7 @@ paragraph raw: 'a', text: 'a', type: 'text', + escaped: false, }, { raw: '\n', @@ -1530,6 +1576,7 @@ paragraph raw: 'b', text: 'b', type: 'text', + escaped: false, }, ], }); @@ -1544,7 +1591,12 @@ paragraph raw: '~~del~~', text: 'del', tokens: [ - { type: 'text', raw: 'del', text: 'del' }, + { + type: 'text', + raw: 'del', + text: 'del', + escaped: false, + }, ], }, ], @@ -1562,7 +1614,11 @@ paragraph text: 'https://example.com', href: 'https://example.com', tokens: [ - { type: 'text', raw: 'https://example.com', text: 'https://example.com' }, + { + type: 'text', + raw: 'https://example.com', + text: 'https://example.com', + }, ], }, ], @@ -1580,7 +1636,11 @@ paragraph text: 'test@example.com', href: 'mailto:test@example.com', tokens: [ - { type: 'text', raw: 'test@example.com', text: 'test@example.com' }, + { + type: 'text', + raw: 'test@example.com', + text: 'test@example.com', + }, ], }, ], @@ -1597,7 +1657,11 @@ paragraph text: 'https://example.com', href: 'https://example.com', tokens: [ - { type: 'text', raw: 'https://example.com', text: 'https://example.com' }, + { + type: 'text', + raw: 'https://example.com', + text: 'https://example.com', + }, ], }, ], @@ -1615,7 +1679,11 @@ paragraph text: 'test@example.com', href: 'mailto:test@example.com', tokens: [ - { type: 'text', raw: 'test@example.com', text: 'test@example.com' }, + { + type: 'text', + raw: 'test@example.com', + text: 'test@example.com', + }, ], }, ], @@ -1631,6 +1699,7 @@ paragraph type: 'text', raw: 'text', text: 'text', + escaped: false, }, ], });