Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Full Commonmark compliance for Lists #2112

Merged
merged 23 commits into from
Aug 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
4482e0c
Rewrite List Tokenizer, Fix incorrect "new" spec tests
calculuschild Jun 18, 2021
ec216ae
cleanup
calculuschild Jun 18, 2021
5cfa5de
more cleanup
calculuschild Jun 18, 2021
1697a2b
Merge remote-tracking branch 'upstream/master' into cleanUpLists
calculuschild Jun 18, 2021
26a56fa
Passing all Spec tests
calculuschild Jun 21, 2021
65907ee
Fix some unit tests (lists no longer consume blank lines at end of list)
calculuschild Jun 21, 2021
3dd2aff
Fix more "lists consuming blank lines" unit tests.
calculuschild Jun 21, 2021
4fe6e7c
All unit tests passing!
calculuschild Jun 21, 2021
41f9bde
Lint
calculuschild Jun 21, 2021
bb8fe00
Two more commonmark examples fixed
calculuschild Jun 21, 2021
d5ba0f3
List and List Items fully Commonmark Compliant!!!
calculuschild Jun 22, 2021
3a409a1
bundles
calculuschild Jun 22, 2021
724ea9e
Replace \h with ' '
calculuschild Jun 23, 2021
c934790
Clean comments out of Rules.js
calculuschild Jun 23, 2021
8007f8b
Don't rebuild "next bullet regex" for each line / replace 'match' wit…
calculuschild Jun 23, 2021
a79772b
Replace more .match and .exec with .test for speed
calculuschild Jun 23, 2021
7014613
Merge remote-tracking branch 'upstream/master' into cleanUpLists
calculuschild Jun 25, 2021
f78d06e
update to commonmark 0.30. Still passing all tests...
calculuschild Jun 25, 2021
7140744
Rebase onto #2124
calculuschild Aug 6, 2021
26c58fd
Merge branch 'cleanUpLists' of https://github.com/calculuschild/marke…
calculuschild Aug 6, 2021
7d31421
Finish rebase
calculuschild Aug 6, 2021
823dccf
lint
calculuschild Aug 6, 2021
2dfda0e
update packaged lib
calculuschild Aug 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
962 changes: 453 additions & 509 deletions lib/marked.esm.js

Large diffs are not rendered by default.

986 changes: 481 additions & 505 deletions lib/marked.js

Large diffs are not rendered by default.

12 changes: 8 additions & 4 deletions src/Lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,9 @@ module.exports = class Lexer {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
// An indented code block cannot interrupt a paragraph.
if (lastToken && lastToken.type === 'paragraph') {
if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
this.inlineQueue.pop();
this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
} else {
tokens.push(token);
Expand Down Expand Up @@ -217,9 +216,14 @@ module.exports = class Lexer {
}

// def
if (this.state.top && (token = this.tokenizer.def(src))) {
if (token = this.tokenizer.def(src)) {
src = src.substring(token.raw.length);
if (!this.tokens.links[token.tag]) {
lastToken = tokens[tokens.length - 1];
if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.raw;
this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
} else if (!this.tokens.links[token.tag]) {
this.tokens.links[token.tag] = {
href: token.href,
title: token.title
Expand Down
2 changes: 1 addition & 1 deletion src/Parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ module.exports = class Parser {
if (item.task) {
checkbox = this.renderer.checkbox(checked);
if (loose) {
if (item.tokens.length > 0 && item.tokens[0].type === 'text') {
if (item.tokens.length > 0 && item.tokens[0].type === 'paragraph') {
item.tokens[0].text = checkbox + ' ' + item.tokens[0].text;
if (item.tokens[0].tokens && item.tokens[0].tokens.length > 0 && item.tokens[0].tokens[0].type === 'text') {
item.tokens[0].tokens[0].text = checkbox + ' ' + item.tokens[0].tokens[0].text;
Expand Down
214 changes: 109 additions & 105 deletions src/Tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -164,145 +164,149 @@ module.exports = class Tokenizer {
}

list(src) {
const cap = this.rules.block.list.exec(src);
let cap = this.rules.block.list.exec(src);
if (cap) {
let raw = cap[0];
const bull = cap[2];
let raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine,
line, lines, itemContents;

let bull = cap[1].trim();
const isordered = bull.length > 1;

const list = {
type: 'list',
raw,
raw: '',
ordered: isordered,
start: isordered ? +bull.slice(0, -1) : '',
loose: false,
items: []
};

// Get each top-level item.
const itemMatch = cap[0].match(this.rules.block.item);

let next = false,
item,
space,
bcurr,
bnext,
addBack,
loose,
istask,
ischecked,
endMatch;

let l = itemMatch.length;
bcurr = this.rules.block.listItemStart.exec(itemMatch[0]);
for (let i = 0; i < l; i++) {
item = itemMatch[i];
raw = item;

if (!this.options.pedantic) {
// Determine if current item contains the end of the list
endMatch = item.match(new RegExp('\\n\\s*\\n {0,' + (bcurr[0].length - 1) + '}\\S'));
if (endMatch) {
addBack = item.length - endMatch.index + itemMatch.slice(i + 1).join('\n').length;
list.raw = list.raw.substring(0, list.raw.length - addBack);

item = item.substring(0, endMatch.index);
raw = item;
l = i + 1;
}
bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}`;

if (this.options.pedantic) {
bull = isordered ? bull : '[*+-]';
}

// Get next list item
const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*| *)(?:\\n[^\\n]*)*(?:\\n|$))`);

// Get each top-level item
while (src) {
if (this.rules.block.hr.test(src)) { // End list if we encounter an HR (possibly move into itemRegex?)
break;
}

// Determine whether the next list item belongs here.
// Backpedal if it does not belong in this list.
if (i !== l - 1) {
bnext = this.rules.block.listItemStart.exec(itemMatch[i + 1]);
if (
!this.options.pedantic
? bnext[1].length >= bcurr[0].length || bnext[1].length > 3
: bnext[1].length > bcurr[1].length
) {
// nested list or continuation
itemMatch.splice(i, 2, itemMatch[i] + (!this.options.pedantic && bnext[1].length < bcurr[0].length && !itemMatch[i].match(/\n$/) ? '' : '\n') + itemMatch[i + 1]);
i--;
l--;
continue;
} else if (
// different bullet style
!this.options.pedantic || this.options.smartLists
? bnext[2][bnext[2].length - 1] !== bull[bull.length - 1]
: isordered === (bnext[2].length === 1)
) {
addBack = itemMatch.slice(i + 1).join('\n').length;
list.raw = list.raw.substring(0, list.raw.length - addBack);
i = l - 1;
}
bcurr = bnext;
if (!(cap = itemRegex.exec(src))) {
break;
}

// Remove the list item's bullet
// so it is seen as the next token.
space = item.length;
item = item.replace(/^ *([*+-]|\d+[.)]) ?/, '');

// Outdent whatever the
// list item contains. Hacky.
if (~item.indexOf('\n ')) {
space -= item.length;
item = !this.options.pedantic
? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
: item.replace(/^ {1,4}/gm, '');
lines = cap[2].split('\n');

if (this.options.pedantic) {
indent = 2;
itemContents = lines[0].trimLeft();
} else {
indent = cap[2].search(/[^ ]/); // Find first non-space char
indent = cap[1].length + (indent > 4 ? 1 : indent); // intented code blocks after 4 spaces; indent is always 1
itemContents = lines[0].slice(indent - cap[1].length);
}

// trim item newlines at end
item = rtrim(item, '\n');
if (i !== l - 1) {
raw = raw + '\n';
blankLine = false;
raw = cap[0];

if (!lines[0] && /^ *$/.test(lines[1])) { // items begin with at most one blank line
raw = cap[1] + lines.slice(0, 2).join('\n') + '\n';
list.loose = true;
lines = [];
}

// Determine whether item is loose or not.
// Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
// for discount behavior.
loose = next || /\n\n(?!\s*$)/.test(raw);
if (i !== l - 1) {
next = raw.slice(-2) === '\n\n';
if (!loose) loose = next;
const nextBulletRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])`);

for (i = 1; i < lines.length; i++) {
line = lines[i];

if (this.options.pedantic) { // Re-align to follow commonmark nesting rules
line = line.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
}

// End list item if found start of new bullet
if (nextBulletRegex.test(line)) {
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
break;
}

// Until we encounter a blank line, item contents do not need indentation
if (!blankLine) {
if (!line.trim()) { // Check if current line is empty
blankLine = true;
}

// Dedent if possible
if (line.search(/[^ ]/) >= indent) {
itemContents += '\n' + line.slice(indent);
} else {
itemContents += '\n' + line;
}
continue;
}

// Dedent this line
if (line.search(/[^ ]/) >= indent || !line.trim()) {
itemContents += '\n' + line.slice(indent);
continue;
} else { // Line was not properly indented; end of this item
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
break;
}
}

if (loose) {
list.loose = true;
if (!list.loose) {
// If the previous item ended with a blank line, the list is loose
if (endsWithBlankLine) {
list.loose = true;
} else if (/\n *\n *$/.test(raw)) {
endsWithBlankLine = true;
}
}

// Check for task list items
if (this.options.gfm) {
istask = /^\[[ xX]\] /.test(item);
ischecked = undefined;
istask = /^\[[ xX]\] /.exec(itemContents);
if (istask) {
ischecked = item[1] !== ' ';
item = item.replace(/^\[[ xX]\] +/, '');
ischecked = istask[0] !== '[ ] ';
itemContents = itemContents.replace(/^\[[ xX]\] +/, '');
}
}

this.lexer.state.top = false;

const token = {
list.items.push({
type: 'list_item',
raw,
task: istask,
raw: raw,
task: !!istask,
checked: ischecked,
loose: loose,
text: item,
tokens: this.lexer.blockTokens(item, [])
};
loose: false,
text: itemContents
});

// this.lexer.inline(token.text, )
list.items.push(token);
list.raw += raw;
src = src.slice(raw.length);
}

// l2 = token.items.length;
// for (j = 0; j < l2; j++) {
// this.inline(token.items[j].tokens);
// }
// break;
// Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
list.items[list.items.length - 1].raw = raw.trimRight();
list.items[list.items.length - 1].text = itemContents.trimRight();
list.raw = list.raw.trimRight();

const l = list.items.length;

// Item child tokens handled here at end because we needed to have the final item to trim it first
for (i = 0; i < l; i++) {
this.lexer.state.top = false;
UziTech marked this conversation as resolved.
Show resolved Hide resolved
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
if (list.items[i].tokens.some(t => t.type === 'space')) {
list.loose = true;
list.items[i].loose = true;
}
}

return list;
}
Expand Down
9 changes: 2 additions & 7 deletions src/rules.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ const {
const block = {
newline: /^(?: *(?:\n|$))+/,
code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,
fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/,
list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/,
html: '^ {0,3}(?:' // optional indentation
+ '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
+ '|comment[^\\n]*(\\n+|$)' // (2)
Expand Down Expand Up @@ -42,11 +42,6 @@ block.def = edit(block.def)
.getRegex();

block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
block.item = /^( *)(bull) ?[^\n]*(?:\n(?! *bull ?)[^\n]*)*/;
block.item = edit(block.item, 'gm')
.replace(/bull/g, block.bullet)
.getRegex();

block.listItemStart = edit(/^( *)(bull) */)
.replace('bull', block.bullet)
.getRegex();
Expand Down
Loading