Skip to content

Commit

Permalink
Avoid regexes, use private api
Browse files Browse the repository at this point in the history
  • Loading branch information
ellatrix committed Oct 10, 2023
1 parent ca18b17 commit a50954a
Show file tree
Hide file tree
Showing 14 changed files with 187 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { useRef } from '@wordpress/element';
import { useRefEffect } from '@wordpress/compose';
import {
pasteHandler,
getClipboardEventData,
privateApis,
findTransform,
getBlockTransforms,
} from '@wordpress/blocks';
Expand All @@ -17,6 +17,9 @@ import { isURL } from '@wordpress/url';
*/
import { addActiveFormats } from './utils';
import { splitValue } from './split-value';
import { unlock } from '../../lock-unlock';

const { getClipboardEventData } = unlock( privateApis );

/** @typedef {import('@wordpress/rich-text').RichTextValue} RichTextValue */

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import {
serialize,
pasteHandler,
getClipboardEventData,
privateApis,
createBlock,
findTransform,
getBlockTransforms,
Expand All @@ -22,6 +22,9 @@ import { useRefEffect } from '@wordpress/compose';
*/
import { store as blockEditorStore } from '../../store';
import { useNotifyCopy } from '../../utils/use-notify-copy';
import { unlock } from '../../lock-unlock';

const { getClipboardEventData } = unlock( privateApis );

export default function useClipboardHandler() {
const {
Expand Down
4 changes: 4 additions & 0 deletions packages/blocks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,10 @@ _Returns_

- `Array|string`: A list of blocks or a string, depending on `handlerMode`.

### privateApis

Private @wordpress/blocks APIs.

### rawHandler

Converts an HTML string to known blocks.
Expand Down
42 changes: 10 additions & 32 deletions packages/blocks/src/api/raw-handling/get-clipboard-event-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,12 @@
import { getFilesFromDataTransfer } from '@wordpress/dom';

/**
* Normalizes a given string of HTML to remove the Windows-specific "Fragment"
* comments and any preceding and trailing content.
*
* @param {string} html the html to be normalized
* @return {string} the normalized html
* Internal dependencies
*/
export function removeWindowsFragments( html ) {
// Strip Windows markers.
return html
.replace(
/^\s*<html[^>]*>\s*<body[^>]*>(?:\s*<!--\s*StartFragment\s*-->)?/i,
''
)
.replace(
/(?:<!--\s*EndFragment\s*-->\s*)?<\/body>\s*<\/html>\s*$/i,
''
);
}

/**
* Removes the charset meta tag inserted by Chromium.
* See:
* - https://github.com/WordPress/gutenberg/issues/33585
* - https://bugs.chromium.org/p/chromium/issues/detail?id=1264616#c4
*
* @param {string} html the html to be stripped of the meta tag.
* @return {string} the cleaned html
*/
export function removeMetaTags( html ) {
return html.replace( /<meta[^>]+>/gi, '' );
}
import { deepFilterHTML } from './utils';
import wrapperRemover from './wrapper-remover';
import msFragmentRemover from './ms-fragment-remover';
import metaRemover from './meta-remover';

export function getClipboardEventData( event ) {
const { clipboardData } = event;
Expand All @@ -58,8 +33,11 @@ export function getClipboardEventData( event ) {
}
}

html = removeWindowsFragments( html );
html = removeMetaTags( html );
html = deepFilterHTML( html, [
wrapperRemover,
msFragmentRemover,
metaRemover,
] );

const files = getFilesFromDataTransfer( clipboardData );

Expand Down
13 changes: 13 additions & 0 deletions packages/blocks/src/api/raw-handling/meta-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* Removes the charset meta tag inserted by Chromium, along with any other ones.
* See:
* - https://github.com/WordPress/gutenberg/issues/33585
* - https://bugs.chromium.org/p/chromium/issues/detail?id=1264616#c4
*
* @param {Node} node The node to be processed.
*/
export default function metaRemover( node ) {
if ( node.nodeName === 'META' ) {
node.remove();
}
}
18 changes: 18 additions & 0 deletions packages/blocks/src/api/raw-handling/ms-fragment-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/**
* WordPress dependencies
*/
import { remove } from '@wordpress/dom';

/**
* Windows-specific "Fragment" comments.
*
* @param {Node} node The node to be processed.
*/
export default function msFragmentRemover( node ) {
if (
node.nodeType === node.COMMENT_NODE &&
[ 'StartFragment', 'EndFragment' ].includes( node.textContent.trim() )
) {
remove( node );
}
}
14 changes: 8 additions & 6 deletions packages/blocks/src/api/raw-handling/paste-handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@ import brRemover from './br-remover';
import { deepFilterHTML, isPlain, getBlockContentSchema } from './utils';
import emptyParagraphRemover from './empty-paragraph-remover';
import slackParagraphCorrector from './slack-paragraph-corrector';
import {
removeMetaTags,
removeWindowsFragments,
} from './get-clipboard-event-data';
import wrapperRemover from './wrapper-remover';
import msFragmentRemover from './ms-fragment-remover';
import metaRemover from './meta-remover';

/**
* Browser dependencies
Expand Down Expand Up @@ -94,8 +93,11 @@ export function pasteHandler( {
tagName,
preserveWhiteSpace,
} ) {
HTML = removeMetaTags( HTML );
HTML = removeWindowsFragments( HTML );
HTML = deepFilterHTML( HTML, [
wrapperRemover,
msFragmentRemover,
metaRemover,
] );

// If we detect block delimiters in HTML, parse entirely as blocks.
if ( mode !== 'INLINE' ) {
Expand Down
10 changes: 10 additions & 0 deletions packages/blocks/src/api/raw-handling/wrapper-remover.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/**
* WordPress dependencies
*/
import { unwrap } from '@wordpress/dom';

export default function wrapperRemover( node ) {
if ( [ 'BODY', 'HTML' ].includes( node.nodeName ) ) {
unwrap( node );
}
}
1 change: 1 addition & 0 deletions packages/blocks/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
export { store } from './store';
export * from './api';
export * from './deprecated';
export { privateApis } from './private-apis';
13 changes: 13 additions & 0 deletions packages/blocks/src/private-apis.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* Internal dependencies
*/
import { lock } from './lock-unlock';
import { getClipboardEventData } from './api/raw-handling/get-clipboard-event-data';

/**
* Private @wordpress/blocks APIs.
*/
export const privateApis = {};
lock( privateApis, {
getClipboardEventData,
} );
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ exports[`Blocks raw handling pasteHandler iframe-embed 1`] = `""`;

exports[`Blocks raw handling pasteHandler markdown 1`] = `"This is a heading with <em>italic</em><br>This is a paragraph with a <a href="https://w.org/">link</a>, <strong>bold</strong>, and <del>strikethrough</del>.<br>Preserve<br>line breaks please.<br>Lists<br>A<br>Bulleted Indented<br>List<br>One<br>Two<br>Three<br>Table<br>First Header<br>Second Header<br>Content from cell 1<br>Content from cell 2<br>Content in the first column<br>Content in the second column<br><br><br><br>Table with empty cells.<br>Quote<br>First<br>Second<br>Code<br>Inline <code>code</code> tags should work.<br><code>This is a code block.</code>"`;

exports[`Blocks raw handling pasteHandler ms-excel 1`] = `"<br><br><br>Cell 1<br>Cell 2<br>Cell 3<br>Cell 4"`;

exports[`Blocks raw handling pasteHandler ms-word 1`] = `"This is a title<br>&nbsp;<br>This is a subtitle<br>&nbsp;<br>This is a heading level 1<br>&nbsp;<br>This is a heading level 2<br>&nbsp;<br>This is a <strong>paragraph</strong> with a <a href="https://w.org/">link</a>.<br>&nbsp;<br>A<br>Bulleted<br>Indented<br>List<br>&nbsp;<br>One<br>Two<br>Three<br>&nbsp;<br>One<br>Two<br>Three<br>1<br>2<br>3<br>I<br>II<br>III<br>&nbsp;<br>An image:<br>&nbsp;<br><img width="451" height="338" src="file:LOW-RES.png"><br><a href="#anchor">This is an anchor link</a> that leads to the next paragraph.<br><a id="anchor">This is the paragraph with the anchor.</a><br><a href="#nowhere">This is an anchor link</a> that leads nowhere.<br><a>This is a paragraph with an anchor with no link pointing to it.</a><br>This is a reference to a footnote<a href="#_ftn1" id="_ftnref1">[1]</a>.<br>This is a reference to an endnote<a href="#_edn1" id="_ednref1">[i]</a>.<br><br><br><a href="#_ftnref1" id="_ftn1">[1]</a> This is a footnote.<br><br><br><a href="#_ednref1" id="_edn1">[i]</a> This is an endnote."`;

exports[`Blocks raw handling pasteHandler ms-word-list 1`] = `"<a>This is a headline?</a><br>This is a text:<br>One<br>Two<br>Three<br><a>Lorem Ipsum.</a><br>&nbsp;"`;
Expand Down
1 change: 1 addition & 0 deletions test/integration/blocks-raw-handling.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ describe( 'Blocks raw handling', () => {
'google-docs-table-with-rowspan',
'google-docs-table-with-comments',
'google-docs-with-comments',
'ms-excel',
'ms-word',
'ms-word-list',
'ms-word-styled',
Expand Down
96 changes: 96 additions & 0 deletions test/integration/fixtures/documents/ms-excel-in.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
<html xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns="http://www.w3.org/TR/REC-html40">

<head>
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
<meta name=ProgId content=Excel.Sheet>
<meta name=Generator content="Microsoft Excel 15">
<link id=Main-File rel=Main-File
href="file:///C:/Users/TETSUA~1.HAM/AppData/Local/Temp/msohtmlclip1/01/clip.htm">
<link rel=File-List
href="file:///C:/Users/TETSUA~1.HAM/AppData/Local/Temp/msohtmlclip1/01/clip_filelist.xml">
<style>
<!--table
{mso-displayed-decimal-separator:"\.";
mso-displayed-thousand-separator:"\,";}
@page
{margin:.75in .7in .75in .7in;
mso-header-margin:.3in;
mso-footer-margin:.3in;}
.font5
{color:windowtext;
font-size:6.0pt;
font-weight:400;
font-style:normal;
text-decoration:none;
font-family:游ゴシック, monospace;
mso-font-charset:128;}
tr
{mso-height-source:auto;
mso-ruby-visibility:none;}
col
{mso-width-source:auto;
mso-ruby-visibility:none;}
br
{mso-data-placement:same-cell;}
td
{padding-top:1px;
padding-right:1px;
padding-left:1px;
mso-ignore:padding;
color:black;
font-size:11.0pt;
font-weight:400;
font-style:normal;
text-decoration:none;
font-family:游ゴシック, monospace;
mso-font-charset:128;
mso-number-format:General;
text-align:general;
vertical-align:bottom;
border:none;
mso-background-source:auto;
mso-pattern:auto;
mso-protection:locked visible;
white-space:nowrap;
mso-rotate:0;}
.xl65
{font-family:"Noto Sans JP", sans-serif;
mso-font-charset:128;}
ruby
{ruby-align:left;}
rt
{color:windowtext;
font-size:6.0pt;
font-weight:400;
font-style:normal;
text-decoration:none;
font-family:游ゴシック, monospace;
mso-font-charset:128;
mso-char-type:katakana;
display:none;}
-->
</style>
</head>

<body link="#0563C1" vlink="#954F72">

<table border=0 cellpadding=0 cellspacing=0 width=140 style='border-collapse:
collapse;width:106pt'>
<!--StartFragment-->
<col width=70 span=2 style='width:53pt'>
<tr height=24 style='height:18.0pt'>
<td height=24 class=xl65 width=70 style='height:18.0pt;width:53pt'>Cell 1</td>
<td class=xl65 width=70 style='width:53pt'>Cell 2</td>
</tr>
<tr height=24 style='height:18.0pt'>
<td height=24 class=xl65 style='height:18.0pt'>Cell 3</td>
<td class=xl65>Cell 4</td>
</tr>
<!--EndFragment-->
</table>

</body>

</html>
3 changes: 3 additions & 0 deletions test/integration/fixtures/documents/ms-excel-out.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<!-- wp:table -->
<figure class="wp-block-table"><table><tbody><tr><td>Cell 1</td><td>Cell 2</td></tr><tr><td>Cell 3</td><td>Cell 4</td></tr></tbody></table></figure>
<!-- /wp:table -->

0 comments on commit a50954a

Please sign in to comment.