Skip to content

Commit

Permalink
[Data Liberation] Block markup consumers and producers (#2121)
Browse files Browse the repository at this point in the history
A part of #1894

Introduces a standardized API for converting between static data formats
and blocks+metadata.

* The `data format -> blocks+metadata` operation is represented by the
WP_Data_Format_Consumer interface
* The `blocks+metadata -> data format` operation is represented by the
WP_Data_Format_Producer interface

This PR also ships a few initial consumers and producers:

* `WP_Annotated_Block_Markup_Consumer` – for consuming static block
markup with `<meta>` tags.
* `WP_Markup_Processor_Consumer` – for consuming an HTML/XHTML markup
processor instance. It handles just the regular HTML/XHTML markup, not
block markup.
* `WP_Annotated_Block_Markup_Producer` – for serializing block markup +
metadata array as block markup with `<meta>` tags

## Example

The two-way conversion pipeline shipped in this PR goes between this:

```php
$block_markup = <<<BLOCKS
<!-- wp:paragraph -->
<p>Hello <b>world</b>!</p>
<!-- /wp:paragraph -->
BLOCKS;

$metadata =  array(
     'post_title' => array( 'My first post' ),
);
```

And this:

```html
<meta name="post_title" content="My first post">
<!-- wp:paragraph -->
<p>Hello <b>world</b>!</p>
<!-- /wp:paragraph -->
```

## Other changes

This PR also ships the block parser from WordPress core to enable
running unit tests – we need to call `parse_blocks()` now.

 ## Testing

The code isn't used anywhere yet – just rely on the CI.
  • Loading branch information
adamziel authored Jan 10, 2025
1 parent cbd8ea4 commit 3443569
Show file tree
Hide file tree
Showing 23 changed files with 3,965 additions and 181 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,46 +21,26 @@
use League\CommonMark\Extension\Table\TableRow;
use League\CommonMark\Extension\Table\TableSection;

class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter {
const STATE_READY = 'STATE_READY';
const STATE_COMPLETE = 'STATE_COMPLETE';

private $state = self::STATE_READY;
class WP_Markdown_Consumer implements WP_Data_Format_Consumer {
private $root_block;
private $block_stack = array();
private $current_block = null;

private $frontmatter = array();
private $markdown;
private $parsed_blocks = array();
private $block_markup = '';
private $parsed;

public function __construct( $markdown ) {
$this->markdown = $markdown;
}

public function convert() {
if ( self::STATE_READY !== $this->state ) {
return false;
}
$this->convert_markdown_to_blocks();
$this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks );
return true;
}

public function get_all_metadata() {
return $this->frontmatter;
}

public function get_first_meta_value( $key ) {
if ( ! array_key_exists( $key, $this->frontmatter ) ) {
return null;
}
return $this->frontmatter[ $key ][0];
}

public function get_block_markup() {
return $this->block_markup;
public function consume() {
if( ! $this->parsed ) {
$this->convert_markdown_to_blocks();
$this->parsed = new WP_Blocks_With_Metadata( WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks ), $this->frontmatter );
}
return $this->parsed;
}

private function convert_markdown_to_blocks() {
Expand All @@ -82,7 +62,7 @@ private function convert_markdown_to_blocks() {
$document = $parser->parse( $this->markdown );
$this->frontmatter = array();
foreach ( $document->data as $key => $value ) {
// Use an array as a value to comply with the WP_Block_Markup_Converter interface.
// Use an array as a value to comply with the WP_Data_Format_Consumer interface.
$this->frontmatter[ $key ] = array( $value );
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function ( $cursor = null ) use ( $markdown_directory ) {
'allowed_extensions' => array( 'md' ),
'index_file_patterns' => array( '#^index\.md$#' ),
'markup_converter_factory' => function ( $content ) {
return new WP_Markdown_To_Blocks( $content );
return new WP_Markdown_Consumer( $content );
},
)
);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php

require_once __DIR__ . '/WP_Markdown_Importer.php';
require_once __DIR__ . '/WP_Markdown_To_Blocks.php';
require_once __DIR__ . '/WP_Markdown_Consumer.php';

require_once __DIR__ . '/../vendor/autoload.php';
12 changes: 10 additions & 2 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,22 @@
require_once __DIR__ . '/src/wordpress-core-html-api/html5-named-character-references.php';
}

require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Converter.php';
require_once __DIR__ . '/src/Data_Liberation_Exception.php';
require_once __DIR__ . '/src/data-format-consumers/WP_Blocks_With_Metadata.php';
require_once __DIR__ . '/src/data-format-consumers/WP_Data_Format_Consumer.php';
require_once __DIR__ . '/src/data-format-consumers/WP_Markup_Processor_Consumer.php';
require_once __DIR__ . '/src/data-format-consumers/WP_Annotated_Block_Markup_Consumer.php';

require_once __DIR__ . '/src/data-format-producers/WP_Data_Format_Producer.php';
require_once __DIR__ . '/src/data-format-producers/WP_Annotated_Block_Markup_Producer.php';

require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Url_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_URL_In_Text_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_URL.php';
require_once __DIR__ . '/src/block-markup/WP_HTML_To_Blocks.php';

require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_Blocks_With_Metadata_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_EPub_Entity_Reader.php';
require_once __DIR__ . '/src/entity-readers/WP_WXR_Entity_Reader.php';
Expand Down
3 changes: 1 addition & 2 deletions packages/playground/data-liberation/phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
<testsuites>
<testsuite name="Application Test Suite">
<file>tests/WPHTMLEntityReaderTests.php</file>
<file>tests/WPHTMLToBlocksTests.php</file>
<file>tests/WPWXRReaderTests.php</file>
<file>tests/WPRewriteUrlsTests.php</file>
<file>tests/WPHTMLToBlocksTests.php</file>
<file>tests/WPMarkupProcessorConsumerTests.php</file>
<file>tests/WPHTMLEntityReaderTests.php</file>
<file>tests/WPURLInTextProcessorTests.php</file>
<file>tests/WPBlockMarkupProcessorTests.php</file>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?php

/**
* Represents an error that occurs during the data liberation process.
*/
class Data_Liberation_Exception extends RuntimeException {

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?php

class WP_Data_Liberation_HTML_Processor extends WP_HTML_Processor {

public function get_inner_html() {
if ( '#tag' !== $this->get_token_type() ) {
return false;
}

if ( $this->is_tag_closer() ) {
return false;
}

if ( false === WP_HTML_Tag_Processor::set_bookmark( 'tag-start' ) ) {
return false;
}

$this->skip_to_closer();

if ( false === WP_HTML_Tag_Processor::set_bookmark( 'tag-end' ) ) {
WP_HTML_Tag_Processor::release_bookmark( 'tag-start' );
return false;
}

$inner_html_start = $this->bookmarks['tag-start']->start + $this->bookmarks['tag-start']->length;
$inner_html_end = $this->bookmarks['tag-end']->start - $inner_html_start;

WP_HTML_Tag_Processor::seek( 'tag-start' );
WP_HTML_Tag_Processor::release_bookmark( 'tag-start' );
WP_HTML_Tag_Processor::release_bookmark( 'tag-end' );

return substr(
$this->html,
$inner_html_start,
$inner_html_end
);
}

public function skip_to_closer() {
$starting_depth = $this->get_current_depth();
while ( $this->next_token() ) {
if (
$this->get_token_type() === '#tag' &&
$this->is_tag_closer() &&
$this->get_current_depth() === $starting_depth - 1
) {
return true;
}
}

return false;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?php
/**
* Converts a metadata-annotated block markup into block markup+metadata pair.
*
* Example:
*
* <meta name="post_title" content="My first post">
* <!-- wp:paragraph {"className":"my-class"} -->
* <p class="my-class">Hello world!</p>
* <!-- /wp:paragraph -->
*
* Becomes:
*
* <!-- wp:paragraph -->
* <p>Hello <b>world</b>!</p>
* <!-- /wp:paragraph -->
*
* With the following metadata:
*
* array(
* 'post_title' => array( 'My first post' ),
* )
*/
class WP_Annotated_Block_Markup_Consumer implements WP_Data_Format_Consumer {

/**
* @var string
*/
private $original_html;

/**
* @var WP_Consumed_Block_Markup
*/
private $result;

public function __construct( $original_html ) {
$this->original_html = $original_html;
}

public function consume() {
if ( ! $this->result ) {
$block_markup = '';
$metadata = array();
foreach ( parse_blocks( $this->original_html ) as $block ) {
if ( $block['blockName'] === null ) {
$html_converter = new WP_Markup_Processor_Consumer( WP_HTML_Processor::create_fragment( $block['innerHTML'] ) );
$result = $html_converter->consume();
$block_markup .= $result->get_block_markup() . "\n";
$metadata = array_merge( $metadata, $result->get_all_metadata() );
} else {
$block_markup .= serialize_block( $block ) . "\n";
}
}
$this->result = new WP_Blocks_With_Metadata(
$block_markup,
$metadata
);
}

return $this->result;
}
}
Original file line number Diff line number Diff line change
@@ -1,25 +1,39 @@
<?php

/**
* Represents a {Data Format} -> Block Markup + Metadata converter.
*
* Used by the Data Liberation importers to accept data formatted as HTML, Markdown, etc.
* and convert them to WordPress posts.
* Represents the result of a {data format} -> block markup conversion.
*/
interface WP_Block_Markup_Converter {
/**
* Converts the input document specified in the constructor to block markup.
*
* @return bool Whether the conversion was successful.
*/
public function convert();
class WP_Blocks_With_Metadata {

private $block_markup;
private $metadata;

public function __construct( $block_markup, $metadata = array() ) {
$this->block_markup = $block_markup;
$this->metadata = $metadata;
}

/**
* Gets the block markup generated by the convert() method.
* Gets the first metadata value for a given key.
*
* @return string The block markup.
* Example:
*
* Metadata:
* array(
* 'post_title' => array( 'The Name of the Wind' ),
* 'post_author' => array( 'Patrick Rothfuss', 'Betsy Wollheim' )
* )
*
* get_first_meta_value( 'post_author' ) returns 'Patrick Rothfuss'.
*
* @param string $key The metadata key.
* @return mixed The metadata value.
*/
public function get_block_markup();
public function get_first_meta_value( $key ) {
if ( ! array_key_exists( $key, $this->metadata ) ) {
return null;
}
return $this->metadata[ $key ][0];
}

/**
* Gets all the metadata sourced from the input document by the convert() method.
Expand All @@ -35,23 +49,16 @@ public function get_block_markup();
*
* @return array The metadata sourced from the input document.
*/
public function get_all_metadata();
public function get_all_metadata() {
return $this->metadata;
}

/**
* Gets the first metadata value for a given key.
*
* Example:
*
* Metadata:
* array(
* 'post_title' => array( 'The Name of the Wind' ),
* 'post_author' => array( 'Patrick Rothfuss', 'Betsy Wollheim' )
* )
*
* get_first_meta_value( 'post_author' ) returns 'Patrick Rothfuss'.
* Gets the block markup generated by the convert() method.
*
* @param string $key The metadata key.
* @return mixed The metadata value.
* @return string The block markup.
*/
public function get_first_meta_value( $key );
public function get_block_markup() {
return $this->block_markup;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php

/**
* Represents a {Data Format} -> Block Markup + Metadata consumer.
*
* Used by the Data Liberation importers to accept data formatted as HTML, Markdown, etc.
* and convert them to WordPress posts.
*/
interface WP_Data_Format_Consumer {
/**
* Converts the input document specified in the constructor to block markup.
*
* @return WP_Blocks_With_Metadata The consumed block markup and metadata.
*/
public function consume();
}
Loading

0 comments on commit 3443569

Please sign in to comment.