Date: Wed, 17 Jun 2020 16:09:42 -0400
Subject: [PATCH 12/24] Remove extra tests accidentally left in
---
test/specs/what/em_left_square_bracket.html | 4 ----
test/specs/what/em_left_square_bracket.md | 10 ----------
test/specs/what/em_left_square_bracket0.html | 1 -
test/specs/what/em_left_square_bracket0.md | 1 -
test/specs/what/em_left_square_bracket00.html | 1 -
test/specs/what/em_left_square_bracket00.md | 1 -
test/specs/what/em_left_square_bracket000.html | 1 -
test/specs/what/em_left_square_bracket000.md | 1 -
test/specs/what/em_left_square_bracket0000.html | 1 -
test/specs/what/em_left_square_bracket0000.md | 1 -
test/specs/whats/strong_and_em_together.html | 7 -------
test/specs/whats/strong_and_em_together.md | 7 -------
12 files changed, 36 deletions(-)
delete mode 100644 test/specs/what/em_left_square_bracket.html
delete mode 100644 test/specs/what/em_left_square_bracket.md
delete mode 100644 test/specs/what/em_left_square_bracket0.html
delete mode 100644 test/specs/what/em_left_square_bracket0.md
delete mode 100644 test/specs/what/em_left_square_bracket00.html
delete mode 100644 test/specs/what/em_left_square_bracket00.md
delete mode 100644 test/specs/what/em_left_square_bracket000.html
delete mode 100644 test/specs/what/em_left_square_bracket000.md
delete mode 100644 test/specs/what/em_left_square_bracket0000.html
delete mode 100644 test/specs/what/em_left_square_bracket0000.md
delete mode 100644 test/specs/whats/strong_and_em_together.html
delete mode 100644 test/specs/whats/strong_and_em_together.md
diff --git a/test/specs/what/em_left_square_bracket.html b/test/specs/what/em_left_square_bracket.html
deleted file mode 100644
index 7132caf872..0000000000
--- a/test/specs/what/em_left_square_bracket.html
+++ /dev/null
@@ -1,4 +0,0 @@
-[[punctuation, asterisk, punctuation should work
-[space, asterisk, punctuation should work
-pnon-punctuation, asterisk, non-punctuation should work
-p*[non-punctuation, asterisk, punctuation should NOT work*
diff --git a/test/specs/what/em_left_square_bracket.md b/test/specs/what/em_left_square_bracket.md
deleted file mode 100644
index 4f6d7a70de..0000000000
--- a/test/specs/what/em_left_square_bracket.md
+++ /dev/null
@@ -1,10 +0,0 @@
-[*[punctuation, asterisk, punctuation should work*
-
-
- *[space, asterisk, punctuation should work*
-
-
-p*non-punctuation, asterisk, non-punctuation should work*
-
-
-p*[non-punctuation, asterisk, punctuation should NOT work*
diff --git a/test/specs/what/em_left_square_bracket0.html b/test/specs/what/em_left_square_bracket0.html
deleted file mode 100644
index 8f88af1862..0000000000
--- a/test/specs/what/em_left_square_bracket0.html
+++ /dev/null
@@ -1 +0,0 @@
-foo bar
diff --git a/test/specs/what/em_left_square_bracket0.md b/test/specs/what/em_left_square_bracket0.md
deleted file mode 100644
index 300738b412..0000000000
--- a/test/specs/what/em_left_square_bracket0.md
+++ /dev/null
@@ -1 +0,0 @@
-*foo *bar**
diff --git a/test/specs/what/em_left_square_bracket00.html b/test/specs/what/em_left_square_bracket00.html
deleted file mode 100644
index 67ef9766e2..0000000000
--- a/test/specs/what/em_left_square_bracket00.html
+++ /dev/null
@@ -1 +0,0 @@
-foo bar baz
diff --git a/test/specs/what/em_left_square_bracket00.md b/test/specs/what/em_left_square_bracket00.md
deleted file mode 100644
index bc4e9514af..0000000000
--- a/test/specs/what/em_left_square_bracket00.md
+++ /dev/null
@@ -1 +0,0 @@
-*foo **bar** baz*
diff --git a/test/specs/what/em_left_square_bracket000.html b/test/specs/what/em_left_square_bracket000.html
deleted file mode 100644
index 2a71b393e9..0000000000
--- a/test/specs/what/em_left_square_bracket000.html
+++ /dev/null
@@ -1 +0,0 @@
-foo _
diff --git a/test/specs/what/em_left_square_bracket000.md b/test/specs/what/em_left_square_bracket000.md
deleted file mode 100644
index 7bc24b7d2e..0000000000
--- a/test/specs/what/em_left_square_bracket000.md
+++ /dev/null
@@ -1 +0,0 @@
-foo *_*
diff --git a/test/specs/what/em_left_square_bracket0000.html b/test/specs/what/em_left_square_bracket0000.html
deleted file mode 100644
index cd620e6acf..0000000000
--- a/test/specs/what/em_left_square_bracket0000.html
+++ /dev/null
@@ -1 +0,0 @@
-(foo)
diff --git a/test/specs/what/em_left_square_bracket0000.md b/test/specs/what/em_left_square_bracket0000.md
deleted file mode 100644
index 261a3189a0..0000000000
--- a/test/specs/what/em_left_square_bracket0000.md
+++ /dev/null
@@ -1 +0,0 @@
-*(**foo**)*
diff --git a/test/specs/whats/strong_and_em_together.html b/test/specs/whats/strong_and_em_together.html
deleted file mode 100644
index 71ec78c709..0000000000
--- a/test/specs/whats/strong_and_em_together.html
+++ /dev/null
@@ -1,7 +0,0 @@
-This is strong and em.
-
-So is this word.
-
-This is strong and em.
-
-So is this word.
diff --git a/test/specs/whats/strong_and_em_together.md b/test/specs/whats/strong_and_em_together.md
deleted file mode 100644
index 95ee690dbe..0000000000
--- a/test/specs/whats/strong_and_em_together.md
+++ /dev/null
@@ -1,7 +0,0 @@
-***This is strong and em.***
-
-So is ***this*** word.
-
-___This is strong and em.___
-
-So is ___this___ word.
From 54218fe1644cf216a014fa15efa51e4ab1ccf056 Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Wed, 17 Jun 2020 17:09:18 -0400
Subject: [PATCH 13/24] Remove straggling "shouldfail: false"
---
test/specs/gfm/commonmark.0.29.json | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/test/specs/gfm/commonmark.0.29.json b/test/specs/gfm/commonmark.0.29.json
index 2200dde2cb..8b53f95a0d 100644
--- a/test/specs/gfm/commonmark.0.29.json
+++ b/test/specs/gfm/commonmark.0.29.json
@@ -2974,8 +2974,7 @@
"example": 367,
"start_line": 6455,
"end_line": 6459,
- "section": "Emphasis and strong emphasis",
- "shouldFail":false
+ "section": "Emphasis and strong emphasis"
},
{
"markdown": "*(*foo*)*\n",
From 2a45677bf054b818bee4a8261f6c3cc93f5639c5 Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Thu, 18 Jun 2020 10:00:27 -0400
Subject: [PATCH 14/24] Remove redundant regex symbols
Found a few more cases of redundant symbols in addition to fixes suggested during review.
---
src/rules.js | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/rules.js b/src/rules.js
index ab2f963817..3b081227cf 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -168,16 +168,16 @@ const inline = {
link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
- preStrong: /^(?:\*\*)|(?:__)/,
- strong: /^(?:(\*\*(?=[`\]\*punctuation]))|\*\*)(?![\s])((?:(?:(?!emSkip)(?:[^\*]|[\\\s]\*)|emSkip)|(?:(?:(?!emSkip)(?:[^\*]|[\\\s]\*)|emSkip)*?(?
Date: Sat, 20 Jun 2020 10:25:48 -0500
Subject: [PATCH 15/24] mask reflinks
---
src/Tokenizer.js | 40 +++++++++++++++++-----------------------
src/rules.js | 6 ++++++
2 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index d0c86d3855..3b678e124f 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -58,6 +58,21 @@ function indentCodeCompensation(raw, text) {
.join('\n');
}
+function maskReflinks(text, links) {
+ if (links) {
+ links = Object.keys(links).filter(l => l.match(/[*_]/));
+ if (links.length > 0) {
+ let match;
+ while ((match = this.rules.inline.reflinkSearch.exec(text)) != null) {
+ if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
+ text = text.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + text.slice(this.rules.inline.reflinkSearch.lastIndex);
+ }
+ }
+ }
+ }
+ return text;
+}
+
/**
* Tokenizer
*/
@@ -493,17 +508,7 @@ module.exports = class Tokenizer {
let cap = this.rules.inline.preStrong.exec(src);
if (cap) {
- let text = src;
- if (links) {
- links = Object.keys(links);
- const reg = /(?:\[.*?\]\[.*?\])|(?:\[.*?\](?!\())/g;
- let match;
- while ((match = reg.exec(text)) != null) {
- if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
- text = text.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + text.slice(reg.lastIndex);
- }
- }
- }
+ const text = maskReflinks(src, links);
cap = this.rules.inline.strong.exec(text);
@@ -523,18 +528,7 @@ module.exports = class Tokenizer {
let cap = this.rules.inline.preEm.exec(src);
if (cap) {
- let text = src;
-
- if (links) {
- links = Object.keys(links);
- const reg = /(?:\[.*?\]\[.*?\])|(?:\[.*?\](?!\())/g;
- let match;
- while ((match = reg.exec(text)) != null) {
- if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
- text = text.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + text.slice(reg.lastIndex);
- }
- }
- }
+ const text = maskReflinks(src, links);
cap = this.rules.inline.em.exec(text);
diff --git a/src/rules.js b/src/rules.js
index 3b081227cf..19b420e97f 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -168,6 +168,7 @@ const inline = {
link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/,
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
+ reflinkSearch: 'reflink|nolink(?!\\()',
preStrong: /^(?:\*\*|__)/,
strong: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])((?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)|(?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)*?(?
Date: Tue, 30 Jun 2020 17:49:11 -0400
Subject: [PATCH 16/24] Links are masked only once per inline string
---
src/Lexer.js | 18 ++++++++++++++++--
src/Tokenizer.js | 29 ++++++-----------------------
test/specs/new/em_and_reflinks.html | 1 +
test/specs/new/em_and_reflinks.md | 4 ++++
4 files changed, 27 insertions(+), 25 deletions(-)
diff --git a/src/Lexer.js b/src/Lexer.js
index 4bb2750dc2..587fb9b03c 100644
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -322,6 +322,20 @@ module.exports = class Lexer {
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false, prevChar = '') {
let token;
+ // String with links masked to avoid interference with em and strong
+ let maskedSrc = src;
+ if (this.tokens.links) {
+ const links = Object.keys(this.tokens.links);
+ if (links.length > 0) {
+ let match;
+ while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
+ if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
+ maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
+ }
+ }
+ }
+ }
+
while (src) {
// escape
if (token = this.tokenizer.escape(src)) {
@@ -360,7 +374,7 @@ module.exports = class Lexer {
}
// strong
- if (token = this.tokenizer.strong(src, prevChar, this.tokens.links)) {
+ if (token = this.tokenizer.strong(src, maskedSrc, prevChar)) {
src = src.substring(token.raw.length);
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
tokens.push(token);
@@ -368,7 +382,7 @@ module.exports = class Lexer {
}
// em
- if (token = this.tokenizer.em(src, prevChar, this.tokens.links)) {
+ if (token = this.tokenizer.em(src, maskedSrc, prevChar)) {
src = src.substring(token.raw.length);
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
tokens.push(token);
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 3b678e124f..419b8bb711 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -58,21 +58,6 @@ function indentCodeCompensation(raw, text) {
.join('\n');
}
-function maskReflinks(text, links) {
- if (links) {
- links = Object.keys(links).filter(l => l.match(/[*_]/));
- if (links.length > 0) {
- let match;
- while ((match = this.rules.inline.reflinkSearch.exec(text)) != null) {
- if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
- text = text.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + text.slice(this.rules.inline.reflinkSearch.lastIndex);
- }
- }
- }
- }
- return text;
-}
-
/**
* Tokenizer
*/
@@ -504,13 +489,12 @@ module.exports = class Tokenizer {
}
}
- strong(src, prevChar = '', links) {
+ strong(src, maskedSrc, prevChar = '') {
let cap = this.rules.inline.preStrong.exec(src);
if (cap) {
- const text = maskReflinks(src, links);
-
- cap = this.rules.inline.strong.exec(text);
+ maskedSrc = maskedSrc.slice(-1*src.length);
+ cap = this.rules.inline.strong.exec(maskedSrc);
if (cap) {
if (!cap[1] || (cap[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
@@ -524,13 +508,12 @@ module.exports = class Tokenizer {
}
}
- em(src, prevChar = '', links) {
+ em(src, maskedSrc, prevChar = '') {
let cap = this.rules.inline.preEm.exec(src);
if (cap) {
- const text = maskReflinks(src, links);
-
- cap = this.rules.inline.em.exec(text);
+ maskedSrc = maskedSrc.slice(-1*src.length);
+ cap = this.rules.inline.em.exec(maskedSrc);
if (cap) {
if (!cap[1] || (cap[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
diff --git a/test/specs/new/em_and_reflinks.html b/test/specs/new/em_and_reflinks.html
index 45953c61e3..32eb3d41aa 100644
--- a/test/specs/new/em_and_reflinks.html
+++ b/test/specs/new/em_and_reflinks.html
@@ -2,3 +2,4 @@
Hello [notreflink] guys*!
Hello [notareflink] guys!
Helloreflink*bottomguys!
+Helloreflinknoemguys!
diff --git a/test/specs/new/em_and_reflinks.md b/test/specs/new/em_and_reflinks.md
index 19ddd9df3c..09c9b66bb3 100644
--- a/test/specs/new/em_and_reflinks.md
+++ b/test/specs/new/em_and_reflinks.md
@@ -8,4 +8,8 @@
*Hello [reflink*bottom] guys*!
+*Hello [reflinknoem] guys*!
+
[reflink*bottom]: theaddress
+
+[reflinknoem]: theaddress
From 4e7902ec11639ef02bea37f8d50b8416b8ce31e5 Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Tue, 30 Jun 2020 17:50:19 -0400
Subject: [PATCH 17/24] Gaaaah lint
---
src/Tokenizer.js | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 419b8bb711..410255acc7 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -493,7 +493,7 @@ module.exports = class Tokenizer {
let cap = this.rules.inline.preStrong.exec(src);
if (cap) {
- maskedSrc = maskedSrc.slice(-1*src.length);
+ maskedSrc = maskedSrc.slice(-1 * src.length);
cap = this.rules.inline.strong.exec(maskedSrc);
if (cap) {
@@ -512,7 +512,7 @@ module.exports = class Tokenizer {
let cap = this.rules.inline.preEm.exec(src);
if (cap) {
- maskedSrc = maskedSrc.slice(-1*src.length);
+ maskedSrc = maskedSrc.slice(-1 * src.length);
cap = this.rules.inline.em.exec(maskedSrc);
if (cap) {
From bd4f8c464befad2b304d51e33e89e567326e62e0 Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Thu, 2 Jul 2020 11:59:31 -0400
Subject: [PATCH 18/24] Fix unrestricted "any character" for REDOS
And remove redundant unused capture group.
---
src/rules.js | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/rules.js b/src/rules.js
index 19b420e97f..6c9269ef6f 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -173,7 +173,7 @@ const inline = {
strong: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])((?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)|(?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)*?(??@\\[\\]`^{|}~';
inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
// sequences em should skip over [title](link), `code`,
-inline._emSkip = '\\[.*?\\]\\(.*?\\)|`.*?`|<.*?>';
+inline._emSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
inline.em = edit(inline.em)
.replace(/punctuation/g, inline._punctuation)
From 211b9f9a201df6846c3943a403064ab9d13ac146 Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Wed, 8 Jul 2020 16:00:12 -0400
Subject: [PATCH 19/24] Removed Lookbehinds
Beginning and End delimiters for EM and Strong must be searched in a separate regex to work without lookbehinds. This invalidates the regex that skips over blocks (code, html, etc.) that take precedence over EM or Strong blocks.
Getting around this means we must now mask not only reflinks, but all enclosed blocks which were previously just skipped over in the Regex.
Add one check for overlapping Strong block when testing EM, now passes Commonmark 390 and 471
---
src/Lexer.js | 8 ++++-
src/Tokenizer.js | 40 ++++++++++++++++++----
src/rules.js | 38 +++++++++++++++++---
test/specs/commonmark/commonmark.0.29.json | 6 ++--
test/specs/gfm/commonmark.0.29.json | 6 ++--
5 files changed, 78 insertions(+), 20 deletions(-)
diff --git a/src/Lexer.js b/src/Lexer.js
index 587fb9b03c..d04a4e6f74 100644
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -324,10 +324,12 @@ module.exports = class Lexer {
// String with links masked to avoid interference with em and strong
let maskedSrc = src;
+ let match;
+
+ // Mask out reflinks
if (this.tokens.links) {
const links = Object.keys(this.tokens.links);
if (links.length > 0) {
- let match;
while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
@@ -335,6 +337,10 @@ module.exports = class Lexer {
}
}
}
+ // Mask out other blocks
+ while ((match = this.tokenizer.rules.inline.emSkip.exec(maskedSrc)) != null) {
+ maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.emSkip.lastIndex);
+ }
while (src) {
// escape
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 410255acc7..9d7ca9cce5 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -490,11 +490,25 @@ module.exports = class Tokenizer {
}
strong(src, maskedSrc, prevChar = '') {
- let cap = this.rules.inline.preStrong.exec(src);
+ let match = this.rules.inline.strStart.exec(src);
- if (cap) {
+ if (match) {
maskedSrc = maskedSrc.slice(-1 * src.length);
- cap = this.rules.inline.strong.exec(maskedSrc);
+ let strEnd;
+
+ if(match[0] == "**")
+ strEnd = this.rules.inline.strEndAst;
+ else
+ strEnd = this.rules.inline.strEndUnd;
+
+ strEnd.lastIndex = 0;
+
+ let cap;
+ while ((match = strEnd.exec(maskedSrc)) != null) {
+ cap = this.rules.inline.strong.exec(maskedSrc.slice(0,match.index+3));
+ if (cap)
+ break;
+ }
if (cap) {
if (!cap[1] || (cap[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
@@ -509,11 +523,25 @@ module.exports = class Tokenizer {
}
em(src, maskedSrc, prevChar = '') {
- let cap = this.rules.inline.preEm.exec(src);
+ let match = this.rules.inline.emStart.exec(src);
- if (cap) {
+ if (match) {
maskedSrc = maskedSrc.slice(-1 * src.length);
- cap = this.rules.inline.em.exec(maskedSrc);
+ let emEnd;
+
+ if(match[0] == "*")
+ emEnd = this.rules.inline.emEndAst;
+ else
+ emEnd = this.rules.inline.emEndUnd;
+
+ emEnd.lastIndex = 0;
+
+ let cap;
+ while ((match = emEnd.exec(maskedSrc)) != null) {
+ cap = this.rules.inline.em.exec(maskedSrc.slice(0,match.index+2));
+ if (cap)
+ break;
+ }
if (cap) {
if (!cap[1] || (cap[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
diff --git a/src/rules.js b/src/rules.js
index 6c9269ef6f..cd4de697de 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -169,11 +169,15 @@ const inline = {
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
reflinkSearch: 'reflink|nolink(?!\\()',
- preStrong: /^(?:\*\*|__)/,
- strong: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])((?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)|(?:(?:(?!emSkip)(?:[^*]|[\\\s]\*)|emSkip)*?(?
inline._emSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
+inline._strSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
+inline._evSkip = '__[^_]*?__';
inline.em = edit(inline.em)
.replace(/punctuation/g, inline._punctuation)
- .replace(/emSkip/g, inline._emSkip)
+ .replace(/evSkip/g, inline._evSkip)
+ .getRegex();
+
+inline.emEndAst = edit(inline.emEndAst, 'g')
+ .replace(/punctuation/g, inline._punctuation)
+ .getRegex();
+
+inline.emEndUnd = edit(inline.emEndUnd, 'g')
+ .replace(/punctuation/g, inline._punctuation)
+ .getRegex();
+
+inline.emSkip = edit(inline._emSkip, 'g')
+ .getRegex();
+
+inline.evSkip = edit(inline._evSkip, 'g')
.getRegex();
inline.strong = edit(inline.strong)
@@ -199,6 +219,14 @@ inline.strong = edit(inline.strong)
.replace(/emSkip/g, inline._emSkip)
.getRegex();
+inline.strEndAst = edit(inline.strEndAst, 'g')
+ .replace(/punctuation/g, inline._punctuation)
+ .getRegex();
+
+inline.strEndUnd = edit(inline.strEndUnd, 'g')
+ .replace(/punctuation/g, inline._punctuation)
+ .getRegex();
+
inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
diff --git a/test/specs/commonmark/commonmark.0.29.json b/test/specs/commonmark/commonmark.0.29.json
index 8f24dcacb8..b49e4ac4a0 100644
--- a/test/specs/commonmark/commonmark.0.29.json
+++ b/test/specs/commonmark/commonmark.0.29.json
@@ -3160,8 +3160,7 @@
"example": 390,
"start_line": 6672,
"end_line": 6676,
- "section": "Emphasis and strong emphasis",
- "shouldFail": true
+ "section": "Emphasis and strong emphasis"
},
{
"markdown": "**(**foo)\n",
@@ -3828,8 +3827,7 @@
"example": 471,
"start_line": 7355,
"end_line": 7359,
- "section": "Emphasis and strong emphasis",
- "shouldFail": true
+ "section": "Emphasis and strong emphasis"
},
{
"markdown": "*[bar*](/url)\n",
diff --git a/test/specs/gfm/commonmark.0.29.json b/test/specs/gfm/commonmark.0.29.json
index 8b53f95a0d..192186f19f 100644
--- a/test/specs/gfm/commonmark.0.29.json
+++ b/test/specs/gfm/commonmark.0.29.json
@@ -3160,8 +3160,7 @@
"example": 390,
"start_line": 6672,
"end_line": 6676,
- "section": "Emphasis and strong emphasis",
- "shouldFail": true
+ "section": "Emphasis and strong emphasis"
},
{
"markdown": "**(**foo)\n",
@@ -3828,8 +3827,7 @@
"example": 471,
"start_line": 7355,
"end_line": 7359,
- "section": "Emphasis and strong emphasis",
- "shouldFail": true
+ "section": "Emphasis and strong emphasis"
},
{
"markdown": "*[bar*](/url)\n",
From cc778ade42ec052f0b28315551d67a57b4681944 Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Wed, 8 Jul 2020 16:58:58 -0400
Subject: [PATCH 20/24] Removed redundancy in "startEM" check
---
src/Lexer.js | 4 ++--
src/Tokenizer.js | 30 +++++++++++++-----------------
src/rules.js | 39 +++++++++++++++++++++++----------------
3 files changed, 38 insertions(+), 35 deletions(-)
diff --git a/src/Lexer.js b/src/Lexer.js
index d04a4e6f74..bef990b5bc 100644
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -338,8 +338,8 @@ module.exports = class Lexer {
}
}
// Mask out other blocks
- while ((match = this.tokenizer.rules.inline.emSkip.exec(maskedSrc)) != null) {
- maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.emSkip.lastIndex);
+ while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
+ maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
}
while (src) {
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 9d7ca9cce5..452a04bf22 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -491,8 +491,8 @@ module.exports = class Tokenizer {
strong(src, maskedSrc, prevChar = '') {
let match = this.rules.inline.strStart.exec(src);
-
- if (match) {
+
+ if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
maskedSrc = maskedSrc.slice(-1 * src.length);
let strEnd;
@@ -511,13 +511,11 @@ module.exports = class Tokenizer {
}
if (cap) {
- if (!cap[1] || (cap[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
- return {
- type: 'strong',
- raw: src.slice(0, cap[0].length),
- text: src.slice(2, cap[0].length - 2)
- };
- }
+ return {
+ type: 'strong',
+ raw: src.slice(0, cap[0].length),
+ text: src.slice(2, cap[0].length - 2)
+ };
}
}
}
@@ -525,7 +523,7 @@ module.exports = class Tokenizer {
em(src, maskedSrc, prevChar = '') {
let match = this.rules.inline.emStart.exec(src);
- if (match) {
+ if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
maskedSrc = maskedSrc.slice(-1 * src.length);
let emEnd;
@@ -544,13 +542,11 @@ module.exports = class Tokenizer {
}
if (cap) {
- if (!cap[1] || (cap[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
- return {
- type: 'em',
- raw: src.slice(0, cap[0].length),
- text: src.slice(1, cap[0].length - 1)
- };
- }
+ return {
+ type: 'em',
+ raw: src.slice(0, cap[0].length),
+ text: src.slice(1, cap[0].length - 1)
+ };
}
}
}
diff --git a/src/rules.js b/src/rules.js
index cd4de697de..f79b74c1e4 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -169,15 +169,15 @@ const inline = {
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
reflinkSearch: 'reflink|nolink(?!\\()',
- strStart: /^\*\*|__/,
- strEndAst: /[^punctuation\s]\*\*(?!\*)|[punctuation]\*\*(?!\*)(?:(?=[punctuation\s]|$))/,
- strEndUnd: /[^\s]__(?!_)(?:(?=[punctuation\s])|$)/,
- strong: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])((?:(?:(?!evSkip)(?:[^*]|\\\*)|evSkip)|(?:(?:(?!evSkip)(?:[^*]|\\\*)|evSkip)*?(??@\\[\\]`^{|}~';
inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
// sequences em should skip over [title](link), `code`,
-inline._emSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
-inline._strSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
-inline._evSkip = '__[^_]*?__';
+inline._blockSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
+inline._overlapSkip = '__[^_]*?__|\\*\\*\\[^\\*\\]*?\\*\\*';
inline.em = edit(inline.em)
.replace(/punctuation/g, inline._punctuation)
- .replace(/evSkip/g, inline._evSkip)
+ .replace(/overlapSkip/g, inline._overlapSkip)
.getRegex();
+inline.emStart = edit(inline.emStart)
+ .replace(/punctuation/g, inline._punctuation)
+ .getRegex();
+
inline.emEndAst = edit(inline.emEndAst, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();
@@ -208,17 +211,21 @@ inline.emEndUnd = edit(inline.emEndUnd, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();
-inline.emSkip = edit(inline._emSkip, 'g')
+inline.blockSkip = edit(inline._blockSkip, 'g')
.getRegex();
-inline.evSkip = edit(inline._evSkip, 'g')
+inline.overlapSkip = edit(inline._overlapSkip, 'g')
.getRegex();
inline.strong = edit(inline.strong)
.replace(/punctuation/g, inline._punctuation)
- .replace(/emSkip/g, inline._emSkip)
+ .replace(/blockSkip/g, inline._blockSkip)
.getRegex();
+inline.strStart = edit(inline.strStart)
+ .replace(/punctuation/g, inline._punctuation)
+ .getRegex();
+
inline.strEndAst = edit(inline.strEndAst, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();
From 226bbe70b70dc325232be5606b152a62a3f09487 Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Wed, 8 Jul 2020 17:01:42 -0400
Subject: [PATCH 21/24] Lint
---
src/Tokenizer.js | 50 ++++++++++++++++++------------------------------
src/rules.js | 10 +++++-----
2 files changed, 24 insertions(+), 36 deletions(-)
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 452a04bf22..e331c8baf9 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -491,31 +491,25 @@ module.exports = class Tokenizer {
strong(src, maskedSrc, prevChar = '') {
let match = this.rules.inline.strStart.exec(src);
-
+
if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
maskedSrc = maskedSrc.slice(-1 * src.length);
let strEnd;
- if(match[0] == "**")
- strEnd = this.rules.inline.strEndAst;
- else
- strEnd = this.rules.inline.strEndUnd;
+ if (match[0] === '**') { strEnd = this.rules.inline.strEndAst; } else { strEnd = this.rules.inline.strEndUnd; }
strEnd.lastIndex = 0;
let cap;
while ((match = strEnd.exec(maskedSrc)) != null) {
- cap = this.rules.inline.strong.exec(maskedSrc.slice(0,match.index+3));
- if (cap)
- break;
- }
-
- if (cap) {
- return {
- type: 'strong',
- raw: src.slice(0, cap[0].length),
- text: src.slice(2, cap[0].length - 2)
- };
+ cap = this.rules.inline.strong.exec(maskedSrc.slice(0, match.index + 3));
+ if (cap) {
+ return {
+ type: 'strong',
+ raw: src.slice(0, cap[0].length),
+ text: src.slice(2, cap[0].length - 2)
+ };
+ }
}
}
}
@@ -527,26 +521,20 @@ module.exports = class Tokenizer {
maskedSrc = maskedSrc.slice(-1 * src.length);
let emEnd;
- if(match[0] == "*")
- emEnd = this.rules.inline.emEndAst;
- else
- emEnd = this.rules.inline.emEndUnd;
+ if (match[0] === '*') { emEnd = this.rules.inline.emEndAst; } else { emEnd = this.rules.inline.emEndUnd; }
emEnd.lastIndex = 0;
let cap;
while ((match = emEnd.exec(maskedSrc)) != null) {
- cap = this.rules.inline.em.exec(maskedSrc.slice(0,match.index+2));
- if (cap)
- break;
- }
-
- if (cap) {
- return {
- type: 'em',
- raw: src.slice(0, cap[0].length),
- text: src.slice(1, cap[0].length - 1)
- };
+ cap = this.rules.inline.em.exec(maskedSrc.slice(0, match.index + 2));
+ if (cap) {
+ return {
+ type: 'em',
+ raw: src.slice(0, cap[0].length),
+ text: src.slice(1, cap[0].length - 1)
+ };
+ }
}
}
}
diff --git a/src/rules.js b/src/rules.js
index f79b74c1e4..63b99a8dec 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -176,7 +176,7 @@ const inline = {
emStart: /^(?:(\*(?=[punctuation]))|\*)(?![*\s])|_/, // (1) returns if starts w/ punctuation
emEndAst: /[^punctuation\s]\*(?!\*)|[punctuation]\*(?!\*)(?:(?=[punctuation\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
emEndUnd: /[^\s]_(?!_)(?:(?=[punctuation\s])|$)/, // last char can't be a space, and final _ must preceed punct or \s (or endline)
- // ⬐ skip overlapping Strong ⬐repeat logic for inner *'s (must be in pairs)| Underscores ⬐ skip overlapping Strong ⬐repeat logic for inner _'s (must be in pairs)⬎
+ // ⬐ skip overlapping Strong ⬐repeat logic for inner *'s (must be in pairs)| Underscores ⬐ skip overlapping Strong ⬐repeat logic for inner _'s (must be in pairs)⬎
em: /^\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*$|^_(?![_\s])(?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?_$/,
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
br: /^( {2,}|\\)\n(?!\s*$)/,
@@ -200,8 +200,8 @@ inline.em = edit(inline.em)
.getRegex();
inline.emStart = edit(inline.emStart)
- .replace(/punctuation/g, inline._punctuation)
- .getRegex();
+ .replace(/punctuation/g, inline._punctuation)
+ .getRegex();
inline.emEndAst = edit(inline.emEndAst, 'g')
.replace(/punctuation/g, inline._punctuation)
@@ -223,8 +223,8 @@ inline.strong = edit(inline.strong)
.getRegex();
inline.strStart = edit(inline.strStart)
- .replace(/punctuation/g, inline._punctuation)
- .getRegex();
+ .replace(/punctuation/g, inline._punctuation)
+ .getRegex();
inline.strEndAst = edit(inline.strEndAst, 'g')
.replace(/punctuation/g, inline._punctuation)
From 1fb141d2755d9a6081fbc608d207ad894a42258a Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Thu, 9 Jul 2020 10:53:48 -0400
Subject: [PATCH 22/24] Make strEnd const
Co-authored-by: Tony Brix
---
src/Tokenizer.js | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index e331c8baf9..9c2e974875 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -494,9 +494,7 @@ module.exports = class Tokenizer {
if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
maskedSrc = maskedSrc.slice(-1 * src.length);
- let strEnd;
-
- if (match[0] === '**') { strEnd = this.rules.inline.strEndAst; } else { strEnd = this.rules.inline.strEndUnd; }
+ const strEnd = match[0] === '**' ? this.rules.inline.strEndAst : this.rules.inline.strEndUnd;
strEnd.lastIndex = 0;
From ad720c1cba4e5cb884785f4d4550e7fadb8d3be1 Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Thu, 9 Jul 2020 10:54:08 -0400
Subject: [PATCH 23/24] Make emEnd const
Co-authored-by: Tony Brix
---
src/Tokenizer.js | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 9c2e974875..80d9f58398 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -517,9 +517,7 @@ module.exports = class Tokenizer {
if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
maskedSrc = maskedSrc.slice(-1 * src.length);
- let emEnd;
-
- if (match[0] === '*') { emEnd = this.rules.inline.emEndAst; } else { emEnd = this.rules.inline.emEndUnd; }
+ const emEnd = match[0] === '*' ? this.rules.inline.emEndAst : this.rules.inline.emEndUnd;
emEnd.lastIndex = 0;
From e27e6f960f0b5a052e6fde496a7109a5acaf9e27 Mon Sep 17 00:00:00 2001
From: Trevor Buckner
Date: Thu, 9 Jul 2020 19:35:22 -0400
Subject: [PATCH 24/24] Sorted strong and em into sub-objects
---
src/Tokenizer.js | 24 +++++++++---------
src/rules.js | 63 +++++++++++++++++++++++++++++-------------------
2 files changed, 50 insertions(+), 37 deletions(-)
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index e331c8baf9..c7d22c8869 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -490,19 +490,19 @@ module.exports = class Tokenizer {
}
strong(src, maskedSrc, prevChar = '') {
- let match = this.rules.inline.strStart.exec(src);
+ let match = this.rules.inline.strong.start.exec(src);
if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
maskedSrc = maskedSrc.slice(-1 * src.length);
- let strEnd;
+ let endReg;
- if (match[0] === '**') { strEnd = this.rules.inline.strEndAst; } else { strEnd = this.rules.inline.strEndUnd; }
+ if (match[0] === '**') { endReg = this.rules.inline.strong.endAst; } else { endReg = this.rules.inline.strong.endUnd; }
- strEnd.lastIndex = 0;
+ endReg.lastIndex = 0;
let cap;
- while ((match = strEnd.exec(maskedSrc)) != null) {
- cap = this.rules.inline.strong.exec(maskedSrc.slice(0, match.index + 3));
+ while ((match = endReg.exec(maskedSrc)) != null) {
+ cap = this.rules.inline.strong.middle.exec(maskedSrc.slice(0, match.index + 3));
if (cap) {
return {
type: 'strong',
@@ -515,19 +515,19 @@ module.exports = class Tokenizer {
}
em(src, maskedSrc, prevChar = '') {
- let match = this.rules.inline.emStart.exec(src);
+ let match = this.rules.inline.em.start.exec(src);
if (match && (!match[1] || (match[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar))))) {
maskedSrc = maskedSrc.slice(-1 * src.length);
- let emEnd;
+ let endReg;
- if (match[0] === '*') { emEnd = this.rules.inline.emEndAst; } else { emEnd = this.rules.inline.emEndUnd; }
+ if (match[0] === '*') { endReg = this.rules.inline.em.endAst; } else { endReg = this.rules.inline.em.endUnd; }
- emEnd.lastIndex = 0;
+ endReg.lastIndex = 0;
let cap;
- while ((match = emEnd.exec(maskedSrc)) != null) {
- cap = this.rules.inline.em.exec(maskedSrc.slice(0, match.index + 2));
+ while ((match = endReg.exec(maskedSrc)) != null) {
+ cap = this.rules.inline.em.middle.exec(maskedSrc.slice(0, match.index + 2));
if (cap) {
return {
type: 'em',
diff --git a/src/rules.js b/src/rules.js
index 63b99a8dec..d4a67278b5 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -169,15 +169,18 @@ const inline = {
reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
reflinkSearch: 'reflink|nolink(?!\\()',
- strStart: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])|__/, // (1) returns if starts w/ punctuation
- strEndAst: /[^punctuation\s]\*\*(?!\*)|[punctuation]\*\*(?!\*)(?:(?=[punctuation\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
- strEndUnd: /[^\s]__(?!_)(?:(?=[punctuation\s])|$)/, // last char can't be a space, and final _ must preceed punct or \s (or endline)
- strong: /^\*\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*\*$|^__(?![\s])((?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?)__$/,
- emStart: /^(?:(\*(?=[punctuation]))|\*)(?![*\s])|_/, // (1) returns if starts w/ punctuation
- emEndAst: /[^punctuation\s]\*(?!\*)|[punctuation]\*(?!\*)(?:(?=[punctuation\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
- emEndUnd: /[^\s]_(?!_)(?:(?=[punctuation\s])|$)/, // last char can't be a space, and final _ must preceed punct or \s (or endline)
- // ⬐ skip overlapping Strong ⬐repeat logic for inner *'s (must be in pairs)| Underscores ⬐ skip overlapping Strong ⬐repeat logic for inner _'s (must be in pairs)⬎
- em: /^\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*$|^_(?![_\s])(?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?_$/,
+ strong: {
+ start: /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])|__/, // (1) returns if starts w/ punctuation
+ middle: /^\*\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*\*$|^__(?![\s])((?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?)__$/,
+ endAst: /[^punctuation\s]\*\*(?!\*)|[punctuation]\*\*(?!\*)(?:(?=[punctuation\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
+ endUnd: /[^\s]__(?!_)(?:(?=[punctuation\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
+ },
+ em: {
+ start: /^(?:(\*(?=[punctuation]))|\*)(?![*\s])|_/, // (1) returns if starts w/ punctuation
+ middle: /^\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*$|^_(?![_\s])(?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?_$/,
+ endAst: /[^punctuation\s]\*(?!\*)|[punctuation]\*(?!\*)(?:(?=[punctuation\s]|$))/, // last char can't be punct, or final * must also be followed by punct (or endline)
+ endUnd: /[^\s]_(?!_)(?:(?=[punctuation\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
+ },
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
br: /^( {2,}|\\)\n(?!\s*$)/,
del: noopTest,
@@ -194,44 +197,44 @@ inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._pu
inline._blockSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>';
inline._overlapSkip = '__[^_]*?__|\\*\\*\\[^\\*\\]*?\\*\\*';
-inline.em = edit(inline.em)
+inline.em.start = edit(inline.em.start)
.replace(/punctuation/g, inline._punctuation)
- .replace(/overlapSkip/g, inline._overlapSkip)
.getRegex();
-inline.emStart = edit(inline.emStart)
+inline.em.middle = edit(inline.em.middle)
.replace(/punctuation/g, inline._punctuation)
+ .replace(/overlapSkip/g, inline._overlapSkip)
.getRegex();
-inline.emEndAst = edit(inline.emEndAst, 'g')
+inline.em.endAst = edit(inline.em.endAst, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();
-inline.emEndUnd = edit(inline.emEndUnd, 'g')
+inline.em.endUnd = edit(inline.em.endUnd, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();
-inline.blockSkip = edit(inline._blockSkip, 'g')
- .getRegex();
-
-inline.overlapSkip = edit(inline._overlapSkip, 'g')
+inline.strong.start = edit(inline.strong.start)
+ .replace(/punctuation/g, inline._punctuation)
.getRegex();
-inline.strong = edit(inline.strong)
+inline.strong.middle = edit(inline.strong.middle)
.replace(/punctuation/g, inline._punctuation)
.replace(/blockSkip/g, inline._blockSkip)
.getRegex();
-inline.strStart = edit(inline.strStart)
+inline.strong.endAst = edit(inline.strong.endAst, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();
-inline.strEndAst = edit(inline.strEndAst, 'g')
+inline.strong.endUnd = edit(inline.strong.endUnd, 'g')
.replace(/punctuation/g, inline._punctuation)
.getRegex();
-inline.strEndUnd = edit(inline.strEndUnd, 'g')
- .replace(/punctuation/g, inline._punctuation)
+inline.blockSkip = edit(inline._blockSkip, 'g')
+ .getRegex();
+
+inline.overlapSkip = edit(inline._overlapSkip, 'g')
.getRegex();
inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
@@ -280,8 +283,18 @@ inline.normal = merge({}, inline);
*/
inline.pedantic = merge({}, inline.normal, {
- strong: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
- em: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
+ strong: {
+ start: /^__|\*\*/,
+ middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
+ endAst: /\*\*(?!\*)/g,
+ endUnd: /__(?!_)/g
+ },
+ em: {
+ start: /^_|\*/,
+ middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
+ endAst: /\*(?!\*)/g,
+ endUnd: /_(?!_)/g
+ },
link: edit(/^!?\[(label)\]\((.*?)\)/)
.replace('label', inline._label)
.getRegex(),