Skip to content

Commit

Permalink
Add more markdown support
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed Apr 3, 2024
1 parent 407a433 commit e9165f6
Showing 1 changed file with 69 additions and 19 deletions.
88 changes: 69 additions & 19 deletions src/wp-includes/html-api/class-wp-html-to-markdown-converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,18 @@

class WP_HTML_To_Markdown_Converter {
public static function convert( $html ) {
$processor = WP_HTML_Processor::create_fragment( $html );
$md = '';
$list_items = array();
$depth = 0;
echo "\n";

echo "\e[90mFound these nodes…\e[m\n";
$node_count = 8;
$processor = WP_HTML_Processor::create_fragment( $html );
$md = '';
$list_items = array();
$depth = 0;
$blockquotes = array();
$link = null;

while ( $processor->next_token() ) {
$indent = str_pad( '', $depth * 2, ' ' );
$token_name = $processor->get_token_name();
$breadcrumbs = $processor->get_breadcrumbs();

$closer = $processor->is_tag_closer() ? '/' : '';
if ( 0 === --$node_count ) {
$node_count = 8;
echo "\n";
}
echo "\e[36m{$closer}\e[32m{$token_name}\e[m ";

if ( $processor->is_tag_closer() ) {
switch ( $token_name ) {
case 'H1':
Expand All @@ -33,9 +25,24 @@ public static function convert( $html ) {
$md .= "\n";
break;

case 'A':
$md .= "]({$link})";
$link = null;
break;

case 'B':
case 'STRONG':
$md .= '*';
$md .= '**';
break;

case 'BLOCKQUOTE':
$blockquote_at = array_pop( $blockquotes );
$blockquote = substr( $md, $blockquote_at );
$blockquote = implode( "\n", array_map( fn ( $l ) => "> {$l}", explode( "\n", $blockquote ) ) );
$md = substr( $md, 0, $blockquote_at ) . "\n" . $blockquote;
if ( 0 === count( $blockquotes ) ) {
$md .= "\n";
}
break;

case 'I':
Expand All @@ -56,7 +63,19 @@ public static function convert( $html ) {

switch ( $token_name ) {
case '#text':
$md .= $processor->get_modifiable_text();
$text_chunk = $processor->get_modifiable_text();

// Skip inter-element whitespace.
// @todo: Detect this properly, ensuring it's actually inter-element.
if ( '' === trim( $text_chunk, "\t\r\n\f" ) ) {
break;
}

if ( null !== $link ) {
$text_chunk = str_replace( ']', '\\]', $text_chunk );
}

$md .= $text_chunk;
break;

case 'P':
Expand All @@ -74,6 +93,12 @@ public static function convert( $html ) {
$md .= "\n\n{$hashes} ";
break;

case 'A':
$href = $processor->get_attribute( 'href' );
$link = $href;
$md .= '[';
break;

case 'B':
case 'STRONG':
$md .= '*';
Expand All @@ -84,6 +109,16 @@ public static function convert( $html ) {
$md .= '_';
break;

case 'IMG':
$src = $processor->get_attribute( 'src' );
$src = str_replace( ')', '%29', $src );
$md .= "![]({$src})";
break;

case 'BLOCKQUOTE':
$blockquotes[] = strlen( $md );
break;

case 'LI':
$list_item = end( $list_items );
$md .= "\n{$indent}{$list_item} ";
Expand Down Expand Up @@ -132,9 +167,24 @@ public static function convert( $html ) {
$md .= "\n";
break;

case 'A':
$md .= "]({$link})";
$link = null;
break;

case 'B':
case 'STRONG':
$md .= '*';
$md .= '**';
break;

case 'BLOCKQUOTE':
$blockquote_at = array_pop( $blockquotes );
$blockquote = substr( $md, $blockquote_at );
$blockquote = implode( "\n", array_map( fn ( $l ) => "> {$l}", explode( "\n", $blockquote ) ) );
$md = substr( $md, 0, $blockquote_at ) . "\n" . $blockquote;
if ( 0 === count( $blockquotes ) ) {
$md .= "\n";
}
break;

case 'I':
Expand All @@ -150,6 +200,6 @@ public static function convert( $html ) {
}
}

return $md;
return trim( $md );
}
}

0 comments on commit e9165f6

Please sign in to comment.