From 8a73726d4a997a169507bd94ff356796a060d6b2 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Thu, 23 Apr 2020 22:59:45 -0500 Subject: [PATCH 1/7] add test --- test/specs/new/codespan_newline.html | 5 +++++ test/specs/new/codespan_newline.md | 5 +++++ 2 files changed, 10 insertions(+) create mode 100644 test/specs/new/codespan_newline.html create mode 100644 test/specs/new/codespan_newline.md diff --git a/test/specs/new/codespan_newline.html b/test/specs/new/codespan_newline.html new file mode 100644 index 0000000000..2ebfcdd713 --- /dev/null +++ b/test/specs/new/codespan_newline.html @@ -0,0 +1,5 @@ +

code code

+ + diff --git a/test/specs/new/codespan_newline.md b/test/specs/new/codespan_newline.md new file mode 100644 index 0000000000..4e5c7c13a7 --- /dev/null +++ b/test/specs/new/codespan_newline.md @@ -0,0 +1,5 @@ +`code +code` + +- `code +code` From 8b638b7c829efecf4bac8efd474433577ab7d4a9 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Thu, 23 Apr 2020 23:00:10 -0500 Subject: [PATCH 2/7] fix codespan with newline --- src/Lexer.js | 20 ++++++++++++++++---- src/Tokenizer.js | 37 ++++++++++++++++++++++++------------- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/src/Lexer.js b/src/Lexer.js index 577f40f966..b961706ee7 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -112,7 +112,7 @@ module.exports = class Lexer { */ blockTokens(src, tokens = [], top = true) { src = src.replace(/^ +$/gm, ''); - let token, i, l; + let token, i, l, lastToken; while (src) { // newline @@ -127,7 +127,13 @@ module.exports = class Lexer { // code if (token = this.tokenizer.code(src, tokens)) { src = src.substring(token.raw.length); - tokens.push(token); + if (token.type) { + tokens.push(token); + } else { + lastToken = tokens[tokens.length - 1]; + lastToken.raw += '\n' + token.raw; + lastToken.text += '\n' + token.text; + } continue; } @@ -219,9 +225,15 @@ module.exports = class Lexer { } // text - if (token = this.tokenizer.text(src)) { + if (token = this.tokenizer.text(src, tokens)) { src = src.substring(token.raw.length); - tokens.push(token); + if (token.type) { + tokens.push(token); + } else { + lastToken = tokens[tokens.length - 1]; + lastToken.raw += '\n' + token.raw; + lastToken.text += '\n' + token.text; + } continue; } diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 56656f52f9..a5d9a01d7d 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -56,21 +56,21 @@ module.exports = class Tokenizer { const lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph. if (lastToken && lastToken.type === 'paragraph') { - tokens.pop(); - lastToken.text += '\n' + cap[0].trimRight(); - lastToken.raw += '\n' + cap[0]; - return lastToken; - } else { - const text = cap[0].replace(/^ {4}/gm, ''); return { - type: 'code', raw: cap[0], - codeBlockStyle: 'indented', - text: !this.options.pedantic - ? rtrim(text, '\n') - : text + text: cap[0].trimRight() }; } + + const text = cap[0].replace(/^ {4}/gm, ''); + return { + type: 'code', + raw: cap[0], + codeBlockStyle: 'indented', + text: !this.options.pedantic + ? rtrim(text, '\n') + : text + }; } } @@ -343,9 +343,17 @@ module.exports = class Tokenizer { } } - text(src) { + text(src, tokens) { const cap = this.rules.block.text.exec(src); if (cap) { + const lastToken = tokens[tokens.length - 1]; + if (lastToken && lastToken.type === 'text') { + return { + raw: cap[0], + text: cap[0] + }; + } + return { type: 'text', raw: cap[0], @@ -473,10 +481,13 @@ module.exports = class Tokenizer { codespan(src) { const cap = this.rules.inline.code.exec(src); if (cap) { + const text = escape(cap[2].trim(), true); return { type: 'codespan', raw: cap[0], - text: escape(cap[2].trim(), true) + text: !this.options.pedantic + ? text.replace(/\n/g, ' ') + : text }; } } From 9721105c3fdf2069297a2a2f4ed81f391d51b38c Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Thu, 23 Apr 2020 23:36:54 -0500 Subject: [PATCH 3/7] update docs --- docs/USING_PRO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index 40aadea661..9b6bd7ac7c 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -155,7 +155,7 @@ console.log(marked('$ latex code $\n\n` other code `')); - table(*string* src) - lheading(*string* src) - paragraph(*string* src) -- text(*string* src) +- text(*string* src, *array* tokens) ### Inline level tokenizer methods From 143ae5d09a3a897c7327dacd5972531d6facec82 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Fri, 24 Apr 2020 00:39:47 -0500 Subject: [PATCH 4/7] fix codespan spaces --- src/Tokenizer.js | 8 ++++++- test/unit/Lexer-spec.js | 50 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/src/Tokenizer.js b/src/Tokenizer.js index a5d9a01d7d..9cb3eedebe 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -481,7 +481,13 @@ module.exports = class Tokenizer { codespan(src) { const cap = this.rules.inline.code.exec(src); if (cap) { - const text = escape(cap[2].trim(), true); + let text = cap[2]; + const hasNonSpaceChars = /\S/.test(text); + const hasSpaceCharsOnBothEnds = /^\s/.test(text) && /\s$/.test(text); + if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) { + text = text.substring(1, text.length - 1); + } + text = escape(text, true); return { type: 'codespan', raw: cap[0], diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index d8a802061b..ccb55d3e56 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -752,12 +752,50 @@ a | b }); }); - it('code', () => { - expectInlineTokens({ - md: '`code`', - tokens: [ - { type: 'codespan', raw: '`code`', text: 'code' } - ] + describe('codespan', () => { + it('code', () => { + expectInlineTokens({ + md: '`code`', + tokens: [ + { type: 'codespan', raw: '`code`', text: 'code' } + ] + }); + }); + + it('only spaces', () => { + expectInlineTokens({ + md: '` `', + tokens: [ + { type: 'codespan', raw: '` `', text: ' ' } + ] + }); + }); + + it('beginning space', () => { + expectInlineTokens({ + md: '` a`', + tokens: [ + { type: 'codespan', raw: '` a`', text: ' a' } + ] + }); + }); + + it('end space', () => { + expectInlineTokens({ + md: '`a `', + tokens: [ + { type: 'codespan', raw: '`a `', text: 'a ' } + ] + }); + }); + + it('begin and end space', () => { + expectInlineTokens({ + md: '` a `', + tokens: [ + { type: 'codespan', raw: '` a `', text: 'a' } + ] + }); }); }); From 60d810f3de7a9b947eb9a613e0ad2fcdb19d5c88 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Fri, 24 Apr 2020 00:44:02 -0500 Subject: [PATCH 5/7] test only remove one space --- test/unit/Lexer-spec.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index ccb55d3e56..dee93b6bfe 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -797,6 +797,15 @@ a | b ] }); }); + + it('begin and end multiple space', () => { + expectInlineTokens({ + md: '` a `', + tokens: [ + { type: 'codespan', raw: '` a `', text: ' a ' } + ] + }); + }); }); it('br', () => { From 6ad20acfc132f82bfa990cd7242261a5e28d2671 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Fri, 24 Apr 2020 00:51:16 -0500 Subject: [PATCH 6/7] add newline to space test --- test/unit/Lexer-spec.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index dee93b6bfe..79e5f8ecf9 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -806,6 +806,15 @@ a | b ] }); }); + + it('newline to space', () => { + expectInlineTokens({ + md: '`a\nb`', + tokens: [ + { type: 'codespan', raw: '`a\nb`', text: 'a b' } + ] + }); + }); }); it('br', () => { From c2d6d2fa310a94d06c0e8c776c560b0f2d27aaf4 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Fri, 24 Apr 2020 01:00:27 -0500 Subject: [PATCH 7/7] only strip spaces --- src/Tokenizer.js | 10 ++++------ test/unit/Lexer-spec.js | 37 ++++++++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 9cb3eedebe..c507538aab 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -481,9 +481,9 @@ module.exports = class Tokenizer { codespan(src) { const cap = this.rules.inline.code.exec(src); if (cap) { - let text = cap[2]; - const hasNonSpaceChars = /\S/.test(text); - const hasSpaceCharsOnBothEnds = /^\s/.test(text) && /\s$/.test(text); + let text = cap[2].replace(/\n/g, ' '); + const hasNonSpaceChars = /[^ ]/.test(text); + const hasSpaceCharsOnBothEnds = text.startsWith(' ') && text.endsWith(' '); if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) { text = text.substring(1, text.length - 1); } @@ -491,9 +491,7 @@ module.exports = class Tokenizer { return { type: 'codespan', raw: cap[0], - text: !this.options.pedantic - ? text.replace(/\n/g, ' ') - : text + text }; } } diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index 79e5f8ecf9..9b2b5d7612 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -762,7 +762,7 @@ a | b }); }); - it('only spaces', () => { + it('only spaces not stripped', () => { expectInlineTokens({ md: '` `', tokens: [ @@ -771,7 +771,7 @@ a | b }); }); - it('beginning space', () => { + it('beginning space only not stripped', () => { expectInlineTokens({ md: '` a`', tokens: [ @@ -780,7 +780,7 @@ a | b }); }); - it('end space', () => { + it('end space only not stripped', () => { expectInlineTokens({ md: '`a `', tokens: [ @@ -789,7 +789,7 @@ a | b }); }); - it('begin and end space', () => { + it('begin and end spaces are stripped', () => { expectInlineTokens({ md: '` a `', tokens: [ @@ -798,7 +798,34 @@ a | b }); }); - it('begin and end multiple space', () => { + it('begin and end newlines are stripped', () => { + expectInlineTokens({ + md: '`\na\n`', + tokens: [ + { type: 'codespan', raw: '`\na\n`', text: 'a' } + ] + }); + }); + + it('begin and end tabs are not stripped', () => { + expectInlineTokens({ + md: '`\ta\t`', + tokens: [ + { type: 'codespan', raw: '`\ta\t`', text: '\ta\t' } + ] + }); + }); + + it('begin and end newlines', () => { + expectInlineTokens({ + md: '`\na\n`', + tokens: [ + { type: 'codespan', raw: '`\na\n`', text: 'a' } + ] + }); + }); + + it('begin and end multiple spaces only one stripped', () => { expectInlineTokens({ md: '` a `', tokens: [