diff --git a/packages/blocks/src/api/index.js b/packages/blocks/src/api/index.js index d9fb8f5668964a..0bdf1d01abd6be 100644 --- a/packages/blocks/src/api/index.js +++ b/packages/blocks/src/api/index.js @@ -11,7 +11,7 @@ export { getBlockAttributes, parseWithAttributeSchema, } from './parser'; -export { default as rawHandler, getPhrasingContentSchema } from './raw-handling'; +export { pasteHandler, rawHandler, getPhrasingContentSchema } from './raw-handling'; export { default as serialize, getBlockContent, diff --git a/packages/blocks/src/api/raw-handling/index.js b/packages/blocks/src/api/raw-handling/index.js index 82488e70380776..c38cdab46e0dcc 100644 --- a/packages/blocks/src/api/raw-handling/index.js +++ b/packages/blocks/src/api/raw-handling/index.js @@ -65,6 +65,51 @@ function getRawTransformations() { } ); } +/** + * Converts HTML directly to blocks. Looks for a matching transform for each + * top-level tag. The HTML should be filtered to not have any text between + * top-level tags and formatted in a way that blocks can handle the HTML. + * + * @param {Object} $1 Named parameters. + * @param {string} $1.html HTML to convert. + * @param {Array} $1.rawTransforms Transforms that can be used. + * + * @return {Array} An array of blocks. + */ +function htmlToBlocks( { html, rawTransforms } ) { + const doc = document.implementation.createHTMLDocument( '' ); + + doc.body.innerHTML = html; + + return Array.from( doc.body.children ).map( ( node ) => { + const rawTransform = findTransform( rawTransforms, ( { isMatch } ) => isMatch( node ) ); + + if ( ! rawTransform ) { + console.warn( + 'A block registered a raw transformation schema for `' + node.nodeName + '` but did not match it. ' + + 'Make sure there is a `selector` or `isMatch` property that can match the schema.\n' + + 'Sanitized HTML: `' + node.outerHTML + '`' + ); + + return; + } + + const { transform, blockName } = rawTransform; + + if ( transform ) { + return transform( node ); + } + + return createBlock( + blockName, + getBlockAttributes( + getBlockType( blockName ), + node.outerHTML + ) + ); + } ); +} + /** * Converts an HTML string to known blocks. Strips everything else. * @@ -79,7 +124,7 @@ function getRawTransformations() { * * @return {Array|string} A list of blocks or a string, depending on `handlerMode`. */ -export default function rawHandler( { HTML = '', plainText = '', mode = 'AUTO', tagName, canUserUseUnfilteredHTML = false } ) { +export function pasteHandler( { HTML = '', plainText = '', mode = 'AUTO', tagName, canUserUseUnfilteredHTML = false } ) { // First of all, strip any meta tags. HTML = HTML.replace( /]+>/, '' ); @@ -137,9 +182,9 @@ export default function rawHandler( { HTML = '', plainText = '', mode = 'AUTO', return filterInlineHTML( HTML ); } - const rawTransformations = getRawTransformations(); + const rawTransforms = getRawTransformations(); const phrasingContentSchema = getPhrasingContentSchema(); - const blockContentSchema = getBlockContentSchema( rawTransformations ); + const blockContentSchema = getBlockContentSchema( rawTransforms ); const blocks = compact( flatMap( pieces, ( piece ) => { // Already a block from shortcode. @@ -176,37 +221,7 @@ export default function rawHandler( { HTML = '', plainText = '', mode = 'AUTO', // Allows us to ask for this information when we get a report. console.log( 'Processed HTML piece:\n\n', piece ); - const doc = document.implementation.createHTMLDocument( '' ); - - doc.body.innerHTML = piece; - - return Array.from( doc.body.children ).map( ( node ) => { - const rawTransformation = findTransform( rawTransformations, ( { isMatch } ) => isMatch( node ) ); - - if ( ! rawTransformation ) { - console.warn( - 'A block registered a raw transformation schema for `' + node.nodeName + '` but did not match it. ' + - 'Make sure there is a `selector` or `isMatch` property that can match the schema.\n' + - 'Sanitized HTML: `' + node.outerHTML + '`' - ); - - return; - } - - const { transform, blockName } = rawTransformation; - - if ( transform ) { - return transform( node ); - } - - return createBlock( - blockName, - getBlockAttributes( - getBlockType( blockName ), - node.outerHTML - ) - ); - } ); + return htmlToBlocks( { html: piece, rawTransforms } ); } ) ); // If we're allowed to return inline content and there is only one block @@ -225,3 +240,48 @@ export default function rawHandler( { HTML = '', plainText = '', mode = 'AUTO', return blocks; } + +/** + * Converts an HTML string to known blocks. + * + * @param {string} $1.HTML The HTML to convert. + * + * @return {Array} A list of blocks. + */ +export function rawHandler( { HTML = '' } ) { + // If we detect block delimiters, parse entirely as blocks. + if ( HTML.indexOf( ' +

Howdy

+ + + +
\\"\\"/
+ + + +

This is a paragraph.

+ + + +

Preserve me!

+ + + +

More tag

+ + + + + + + +

Shortcode

+ + + + +" +`; diff --git a/test/integration/blocks-raw-handling.spec.js b/test/integration/blocks-raw-handling.spec.js index 26bee315fc5a51..b0997d3b757d9a 100644 --- a/test/integration/blocks-raw-handling.spec.js +++ b/test/integration/blocks-raw-handling.spec.js @@ -9,11 +9,16 @@ import path from 'path'; */ import { getBlockContent, + pasteHandler, rawHandler, serialize, } from '@wordpress/blocks'; import { registerCoreBlocks } from '@wordpress/block-library'; +function readFile( filePath ) { + return fs.existsSync( filePath ) ? fs.readFileSync( filePath, 'utf8' ).trim() : ''; +} + describe( 'Blocks raw handling', () => { beforeAll( () => { // Load all hooks that modify blocks @@ -22,7 +27,7 @@ describe( 'Blocks raw handling', () => { } ); it( 'should filter inline content', () => { - const filtered = rawHandler( { + const filtered = pasteHandler( { HTML: '

test

', mode: 'INLINE', } ); @@ -32,7 +37,7 @@ describe( 'Blocks raw handling', () => { } ); it( 'should parse Markdown', () => { - const filtered = rawHandler( { + const filtered = pasteHandler( { HTML: '* one
* two
* three', plainText: '* one\n* two\n* three', mode: 'AUTO', @@ -43,7 +48,7 @@ describe( 'Blocks raw handling', () => { } ); it( 'should parse inline Markdown', () => { - const filtered = rawHandler( { + const filtered = pasteHandler( { HTML: 'Some **bold** text.', plainText: 'Some **bold** text.', mode: 'AUTO', @@ -54,7 +59,7 @@ describe( 'Blocks raw handling', () => { } ); it( 'should parse HTML in plainText', () => { - const filtered = rawHandler( { + const filtered = pasteHandler( { HTML: '<p>Some <strong>bold</strong> text.</p>', plainText: '

Some bold text.

', mode: 'AUTO', @@ -65,7 +70,7 @@ describe( 'Blocks raw handling', () => { } ); it( 'should parse Markdown with HTML', () => { - const filtered = rawHandler( { + const filtered = pasteHandler( { HTML: '', plainText: '# Some heading\n\nA paragraph.', mode: 'AUTO', @@ -76,7 +81,7 @@ describe( 'Blocks raw handling', () => { } ); it( 'should break up forced inline content', () => { - const filtered = rawHandler( { + const filtered = pasteHandler( { HTML: '

test

test

', mode: 'INLINE', } ); @@ -86,7 +91,7 @@ describe( 'Blocks raw handling', () => { } ); it( 'should normalize decomposed characters', () => { - const filtered = rawHandler( { + const filtered = pasteHandler( { HTML: 'schön', mode: 'INLINE', } ); @@ -96,7 +101,7 @@ describe( 'Blocks raw handling', () => { } ); it( 'should treat single list item as inline text', () => { - const filtered = rawHandler( { + const filtered = pasteHandler( { HTML: '', plainText: 'Some bold text.\n', mode: 'AUTO', @@ -107,7 +112,7 @@ describe( 'Blocks raw handling', () => { } ); it( 'should treat multiple list items as a block', () => { - const filtered = rawHandler( { + const filtered = pasteHandler( { HTML: '', plainText: 'One\nTwo\nThree\n', mode: 'AUTO', @@ -117,11 +122,7 @@ describe( 'Blocks raw handling', () => { expect( console ).toHaveLogged(); } ); - describe( 'serialize', () => { - function readFile( filePath ) { - return fs.existsSync( filePath ) ? fs.readFileSync( filePath, 'utf8' ).trim() : ''; - } - + describe( 'pasteHandler', () => { [ 'plain', 'classic', @@ -143,7 +144,7 @@ describe( 'Blocks raw handling', () => { const HTML = readFile( path.join( __dirname, `fixtures/${ type }-in.html` ) ); const plainText = readFile( path.join( __dirname, `fixtures/${ type }-in.txt` ) ); const output = readFile( path.join( __dirname, `fixtures/${ type }-out.html` ) ); - const converted = rawHandler( { HTML, plainText, canUserUseUnfilteredHTML: true } ); + const converted = pasteHandler( { HTML, plainText, canUserUseUnfilteredHTML: true } ); const serialized = typeof converted === 'string' ? converted : serialize( converted ); expect( serialized ).toBe( output ); @@ -154,4 +155,11 @@ describe( 'Blocks raw handling', () => { } ); } ); } ); + + describe( 'rawHandler', () => { + it( 'should convert HTML post to blocks with minimal content changes', () => { + const HTML = readFile( path.join( __dirname, 'fixtures/wordpress-convert.html' ) ); + expect( serialize( rawHandler( { HTML } ) ) ).toMatchSnapshot(); + } ); + } ); } ); diff --git a/test/integration/fixtures/wordpress-convert.html b/test/integration/fixtures/wordpress-convert.html new file mode 100644 index 00000000000000..1cb01568c37059 --- /dev/null +++ b/test/integration/fixtures/wordpress-convert.html @@ -0,0 +1,8 @@ +

Howdy

+ +

This is a paragraph.

+

Preserve me!

+

More tag

+

+

Shortcode

+

[gallery ids="1"]