Skip to content

Commit

Permalink
Merge pull request #315 from orchitech/flanking-whitespace-improvements
Browse files Browse the repository at this point in the history
Correct Unicode whitespace collapsing and improve flanking white space
  • Loading branch information
pavelhoral authored Dec 2, 2020
2 parents 8ba0446 + 27bdac0 commit dcf2cf6
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 14 deletions.
35 changes: 22 additions & 13 deletions src/node.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,33 @@ function isBlank (node) {
}

function flankingWhitespace (node) {
var leading = ''
var trailing = ''
if (node.isBlock) return { leading: '', trailing: '' }

if (!node.isBlock) {
var hasLeading = /^\s/.test(node.textContent)
var hasTrailing = /\s$/.test(node.textContent)
var blankWithSpaces = node.isBlank && hasLeading && hasTrailing
var edges = edgeWhitespace(node.textContent)

if (hasLeading && !isFlankedByWhitespace('left', node)) {
leading = ' '
}
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node)) {
edges.leading = edges.leadingNonAscii
}

if (!blankWithSpaces && hasTrailing && !isFlankedByWhitespace('right', node)) {
trailing = ' '
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node)) {
edges.trailing = edges.trailingNonAscii
}

return { leading: leading, trailing: trailing }
return { leading: edges.leading, trailing: edges.trailing }
}

function edgeWhitespace (string) {
var m = string.match(/^(([ \t\r\n]*)(\s*))[\s\S]*?((\s*?)([ \t\r\n]*))$/)
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
}

function isFlankedByWhitespace (side, node) {
Expand Down
55 changes: 54 additions & 1 deletion test/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ <h2>This is a header.</h2>
<div class="input">
<p>Foo<span>&nbsp;</span>Bar</p>
</div>
<pre class="expected">Foo Bar</pre>
<pre class="expected">Foo&nbsp;Bar</pre>
</div>

<div class="case" data-name="triple tildes inside code" data-options='{"codeBlockStyle": "fenced", "fence": "~~~"}'>
Expand Down Expand Up @@ -989,6 +989,59 @@ <h2>This is a header.</h2>
```</pre>
</div>

<div class="case" data-name="text separated by ASCII and nonASCII space in an element">
<div class="input">
<p>Foo<span> &nbsp; </span>Bar</p>
</div>
<pre class="expected">Foo &nbsp; Bar</pre>
</div>

<div class="case" data-name="list-like text with non-breaking spaces">
<div class="input">&nbsp;1. First<br>&nbsp;2. Second</div>
<pre class="expected">&nbsp;1. First <!-- hard break -->
&nbsp;2. Second</pre>
</div>

<div class="case" data-name="element with trailing nonASCII WS followed by nonWS">
<div class="input"><i>foo&nbsp;</i>bar</div>
<pre class="expected">_foo_&nbsp;bar</pre>
</div>

<div class="case" data-name="element with trailing nonASCII WS followed by nonASCII WS">
<div class="input"><i>foo&nbsp;</i>&nbsp;bar</div>
<pre class="expected">_foo_&nbsp;&nbsp;bar</pre>
</div>

<div class="case" data-name="element with trailing ASCII WS followed by nonASCII WS">
<div class="input"><i>foo </i>&nbsp;bar</div>
<pre class="expected">_foo_ &nbsp;bar</pre>
</div>

<div class="case" data-name="element with trailing nonASCII WS followed by ASCII WS">
<div class="input"><i>foo&nbsp;</i> bar</div>
<pre class="expected">_foo_&nbsp; bar</pre>
</div>

<div class="case" data-name="nonWS followed by element with leading nonASCII WS">
<div class="input">foo<i>&nbsp;bar</i></div>
<pre class="expected">foo&nbsp;_bar_</pre>
</div>

<div class="case" data-name="nonASCII WS followed by element with leading nonASCII WS">
<div class="input">foo&nbsp;<i>&nbsp;bar</i></div>
<pre class="expected">foo&nbsp;&nbsp;_bar_</pre>
</div>

<div class="case" data-name="nonASCII WS followed by element with leading ASCII WS">
<div class="input">foo&nbsp;<i> bar</i></div>
<pre class="expected">foo&nbsp; _bar_</pre>
</div>

<div class="case" data-name="ASCII WS followed by element with leading nonASCII WS">
<div class="input">foo <i>&nbsp;bar</i></div>
<pre class="expected">foo &nbsp;_bar_</pre>
</div>

<!-- /TEST CASES -->

<script src="turndown-test.browser.js"></script>
Expand Down

0 comments on commit dcf2cf6

Please sign in to comment.