From b73068c82d54db6ff54ad448729e805b3bb94c3a Mon Sep 17 00:00:00 2001 From: Kevin Dew Date: Wed, 3 Apr 2019 20:17:20 +0100 Subject: [PATCH] Consistently handle inline elements with spaces This resolves some odd situations that can occur when there are inline elements that contain spaces in sentences. The first situation is when there is an element that includes a space between words, for example 'Test content'. This would previously have produced a two space result: 'Test content' because this element would have matched both leading and trailing whitespace tests. The second situation is when there is an element that includes a space outside the tests, which is the case of a non-breaking space character (unicode U+00A0), then the space is removed. An example of this is 'Test content' which would result in 'Testcontent' as this wouldn't match the tests for leading/trailing whitespace. This resolves these problems by changing the whitespace tests to use \s rather than a subset of space characters (which is consistent with the blank test [1]) and only allows a leading space if the test for both leading and trailing whitespace passes on a blank element. [1]: https://github.com/domchristie/turndown/blob/80297cebeae4b35c8d299b1741b383c74eddc7c1/src/node.js#L14 --- src/node.js | 8 +++++--- test/index.html | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/node.js b/src/node.js index 8ca8fca3..675e9799 100644 --- a/src/node.js +++ b/src/node.js @@ -22,13 +22,15 @@ function flankingWhitespace (node) { var trailing = '' if (!node.isBlock) { - var hasLeading = /^[ \r\n\t]/.test(node.textContent) - var hasTrailing = /[ \r\n\t]$/.test(node.textContent) + var hasLeading = /^\s/.test(node.textContent) + var hasTrailing = /\s$/.test(node.textContent) + var blankWithSpaces = node.isBlank && hasLeading && hasTrailing if (hasLeading && !isFlankedByWhitespace('left', node)) { leading = ' ' } - if (hasTrailing && !isFlankedByWhitespace('right', node)) { + + if (!blankWithSpaces && hasTrailing && !isFlankedByWhitespace('right', node)) { trailing = ' ' } } diff --git a/test/index.html b/test/index.html index b52c5928..b4d6215e 100644 --- a/test/index.html +++ b/test/index.html @@ -888,6 +888,20 @@

This is a header.

![](http://example.com/logo.png)
+
+
+

Foo Bar

+
+
Foo Bar
+
+ +
+
+

Foo Bar

+
+
Foo Bar
+
+