From 2002186eeddb5bbbab14ad625956734517baa9ed Mon Sep 17 00:00:00 2001 From: fredck Date: Wed, 18 Dec 2019 18:32:01 +0100 Subject: [PATCH 1/5] Added test for code blocks with triple/quadruple ticks inside. --- tests/gfmdataprocessor/code.js | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/gfmdataprocessor/code.js b/tests/gfmdataprocessor/code.js index b3dd7df..9af7bc9 100644 --- a/tests/gfmdataprocessor/code.js +++ b/tests/gfmdataprocessor/code.js @@ -276,5 +276,41 @@ describe( 'GFMDataProcessor', () => { '`code `` code ``` `' ); } ); + + it( 'should handle triple ticks inside code', () => { + testDataProcessor( + '````\n' + + '```\n' + + 'Code\n' + + '```\n' + + '````', + + '
' +
+				'```\n' +
+				'Code\n' +
+				'```' +
+				'
' + ); + } ); + + it( 'should handle triple and quatruple ticks inside code', () => { + testDataProcessor( + '`````\n' + + '````\n' + + '```\n' + + 'Code\n' + + '```\n' + + '````\n' + + '`````', + + '
' +
+				'````\n' +
+				'```\n' +
+				'Code\n' +
+				'```\n' +
+				'````' +
+				'
' + ); + } ); } ); } ); From 72174d9aac1733b0922cbec8269aac64c95660e3 Mon Sep 17 00:00:00 2001 From: fredck Date: Tue, 25 Feb 2020 16:09:20 +0100 Subject: [PATCH 2/5] Fixed the html->markdown conversion of code blocks with multiple ticks inside. --- src/html2markdown/html2markdown.js | 39 +++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/html2markdown/html2markdown.js b/src/html2markdown/html2markdown.js index 88d4afd..2bffb93 100644 --- a/src/html2markdown/html2markdown.js +++ b/src/html2markdown/html2markdown.js @@ -27,7 +27,8 @@ const turndownService = new TurndownService( { turndownService.use( [ gfm, - todoList + todoList, + fencedCodeBlock ] ); export default function html2markdown( html ) { @@ -47,3 +48,39 @@ function todoList( turndownService ) { } } ); } + +// This one fixes https://github.com/domchristie/turndown/issues/300. It's based on: +// https://github.com/domchristie/turndown/blob/cae7098f97bcf14118a916e13e807536f432f3ac/src/commonmark-rules.js#L101-L121 +function fencedCodeBlock( turndownService ) { + turndownService.addRule( 'taskListItems', { + filter( node, options ) { + return ( + options.codeBlockStyle === 'fenced' && + node.nodeName === 'PRE' && + node.firstChild && + node.firstChild.nodeName === 'CODE' + ); + }, + + replacement( content, node ) { + const className = node.firstChild.className || ''; + const language = ( className.match( /language-(\S+)/ ) || [ null, '' ] )[ 1 ]; + const code = node.firstChild.textContent; + let fenceSize = 3; + + for ( const match of code.matchAll( /^`{3,}/gm ) ) { + if ( match[ 0 ].length >= fenceSize ) { + fenceSize = match[ 0 ].length + 1; + } + } + + const fence = '`'.repeat( fenceSize ); + + return ( + '\n\n' + fence + language + '\n' + + code + + '\n' + fence + '\n\n' + ); + } + } ); +} From 19446ee6ed2a9ffc9985316d36722f96e8093ad0 Mon Sep 17 00:00:00 2001 From: fredck Date: Mon, 9 Mar 2020 13:10:12 +0100 Subject: [PATCH 3/5] Fixed the html->markdown conversion to avoid escaping urls in text nodes. --- src/html2markdown/html2markdown.js | 39 ++++++++++++++++++++++++--- tests/gfmdataprocessor/text.js | 43 ++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 tests/gfmdataprocessor/text.js diff --git a/src/html2markdown/html2markdown.js b/src/html2markdown/html2markdown.js index 2bffb93..51bf635 100644 --- a/src/html2markdown/html2markdown.js +++ b/src/html2markdown/html2markdown.js @@ -10,12 +10,43 @@ import { gfm } from 'turndown-plugin-gfm'; { const originalEscape = TurndownService.prototype.escape; TurndownService.prototype.escape = function( string ) { - string = originalEscape( string ); + // Urls should not be escaped. Our strategy is using a regex to find them and escape everything + // which is out of the matches parts. - // Escape "<". - string = string.replace( /]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()[\]{};:'".,<>?«»“”‘’])/g; - return string; + let escaped = ''; + let lastIndex = 0; + let m; + do { + m = regex.exec( string ); + + // The substring should to to the matched index or, if nothing found, the end of the string. + const index = m ? m.index : string.length; + + // Append the substring between the last match and the current one (if anything). + if ( index > lastIndex ) { + escaped += escape( string.substring( lastIndex, index ) ); + } + + // Append the match itself now, if anything. + m && ( escaped += m[ 0 ] ); + + lastIndex = regex.lastIndex; + } + while ( m ); + + return escaped; + + function escape( string ) { + string = originalEscape( string ); + + // Escape "<". + string = string.replace( / { + describe( 'text', () => { + describe( 'urls', () => { + it( 'should not escape urls', () => { + testDataProcessor( + 'escape\\_this https://test.com/do_[not]-escape escape\\_this', + '

escape_this https://test.com/do_[not]-escape escape_this

' + ); + } ); + + it( 'should not escape urls (at start)', () => { + testDataProcessor( + 'https://test.com/do_[not]-escape escape\\_this', + '

https://test.com/do_[not]-escape escape_this

' + ); + } ); + + it( 'should not escape urls (at end)', () => { + testDataProcessor( + 'escape\\_this https://test.com/do_[not]-escape', + '

escape_this https://test.com/do_[not]-escape

' + ); + } ); + + [ + 'https://test.com/do_[not]-escape', + 'http://test.com/do_[not]-escape', + 'www.test.com/do_[not]-escape' + ].forEach( url => { + it( `should not escape urls (${ url })`, () => { + testDataProcessor( url, `

${ url }

` ); + } ); + } ); + } ); + } ); +} ); From 88004bd11b98ff48993f896a70ed99b0258360f5 Mon Sep 17 00:00:00 2001 From: fredck Date: Fri, 13 Mar 2020 17:09:44 +0100 Subject: [PATCH 4/5] Upgraded to Turndown v6.0.0. --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index bf502d4..1b9dc46 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "dependencies": { "@ckeditor/ckeditor5-engine": "^17.0.0", "marked": "^0.7.0", - "turndown": "^5.0.3", + "turndown": "^6.0.0", "turndown-plugin-gfm": "^1.0.2" }, "devDependencies": { From 43c79726ad47875d6c2bfa4f5af0bf3275045bac Mon Sep 17 00:00:00 2001 From: fredck Date: Fri, 13 Mar 2020 17:10:54 +0100 Subject: [PATCH 5/5] Revert "Fixed the html->markdown conversion of code blocks with multiple ticks inside.". Turndown v6.0.0 upgrade follow-up. This reverts commit 72174d9a. --- src/html2markdown/html2markdown.js | 39 +----------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/src/html2markdown/html2markdown.js b/src/html2markdown/html2markdown.js index 2bffb93..88d4afd 100644 --- a/src/html2markdown/html2markdown.js +++ b/src/html2markdown/html2markdown.js @@ -27,8 +27,7 @@ const turndownService = new TurndownService( { turndownService.use( [ gfm, - todoList, - fencedCodeBlock + todoList ] ); export default function html2markdown( html ) { @@ -48,39 +47,3 @@ function todoList( turndownService ) { } } ); } - -// This one fixes https://github.com/domchristie/turndown/issues/300. It's based on: -// https://github.com/domchristie/turndown/blob/cae7098f97bcf14118a916e13e807536f432f3ac/src/commonmark-rules.js#L101-L121 -function fencedCodeBlock( turndownService ) { - turndownService.addRule( 'taskListItems', { - filter( node, options ) { - return ( - options.codeBlockStyle === 'fenced' && - node.nodeName === 'PRE' && - node.firstChild && - node.firstChild.nodeName === 'CODE' - ); - }, - - replacement( content, node ) { - const className = node.firstChild.className || ''; - const language = ( className.match( /language-(\S+)/ ) || [ null, '' ] )[ 1 ]; - const code = node.firstChild.textContent; - let fenceSize = 3; - - for ( const match of code.matchAll( /^`{3,}/gm ) ) { - if ( match[ 0 ].length >= fenceSize ) { - fenceSize = match[ 0 ].length + 1; - } - } - - const fence = '`'.repeat( fenceSize ); - - return ( - '\n\n' + fence + language + '\n' + - code + - '\n' + fence + '\n\n' - ); - } - } ); -}