diff --git a/Mf2/Parser.php b/Mf2/Parser.php index cce846e..7048794 100644 --- a/Mf2/Parser.php +++ b/Mf2/Parser.php @@ -130,7 +130,7 @@ function unicodeTrim($str) { function mfNamesFromClass($class, $prefix='h-') { $class = str_replace(array(' ', ' ', "\n"), ' ', $class); $classes = explode(' ', $class); - $classes = preg_grep('#^[a-z\-]+$#', $classes); + $classes = preg_grep('#^(h|p|u|dt|e)-([a-z0-9]+-)?[a-z]+(-[a-z]+)*$#', $classes); $matches = array(); foreach ($classes as $classname) { @@ -1257,16 +1257,15 @@ public function parseRelsAndAlternates() { // Iterate through all a, area and link elements with rel attributes foreach ($this->xpath->query('//a[@rel and @href] | //link[@rel and @href] | //area[@rel and @href]') as $hyperlink) { - if ($hyperlink->getAttribute('rel') == '') { + // Parse the set of rels for the current link + $linkRels = array_unique(array_filter(preg_split('/[\t\n\f\r ]/', $hyperlink->getAttribute('rel')))); + if (count($linkRels) === 0) { continue; } // Resolve the href $href = $this->resolveUrl($hyperlink->getAttribute('href')); - // Split up the rel into space-separated values - $linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel'))); - $rel_attributes = array(); if ($hyperlink->hasAttribute('media')) { @@ -1285,8 +1284,8 @@ public function parseRelsAndAlternates() { $rel_attributes['type'] = $hyperlink->getAttribute('type'); } - if ($hyperlink->nodeValue) { - $rel_attributes['text'] = $hyperlink->nodeValue; + if (strlen($hyperlink->textContent) > 0) { + $rel_attributes['text'] = $hyperlink->textContent; } if ($this->enableAlternates) { @@ -1303,16 +1302,34 @@ public function parseRelsAndAlternates() { } foreach ($linkRels as $rel) { - $rels[$rel][] = $href; + if (!array_key_exists($rel, $rels)) { + $rels[$rel] = array($href); + } elseif (!in_array($href, $rels[$rel])) { + $rels[$rel][] = $href; + } } - if (!in_array($href, $rel_urls)) { - $rel_urls[$href] = array_merge( - $rel_attributes, - array('rels' => $linkRels) - ); + if (!array_key_exists($href, $rel_urls)) { + $rel_urls[$href] = array('rels' => array()); } + // Add the attributes collected only if they were not already set + $rel_urls[$href] = array_merge( + $rel_attributes, + $rel_urls[$href] + ); + + // Merge current rels with those already set + $rel_urls[$href]['rels'] = array_merge( + $rel_urls[$href]['rels'], + $linkRels + ); + } + + // Alphabetically sort the rels arrays after removing duplicates + foreach ($rel_urls as $href => $object) { + $rel_urls[$href]['rels'] = array_unique($rel_urls[$href]['rels']); + sort($rel_urls[$href]['rels']); } if (empty($rels) and $this->jsonMode) { @@ -1321,8 +1338,8 @@ public function parseRelsAndAlternates() { if (empty($rel_urls) and $this->jsonMode) { $rel_urls = new stdClass(); - } - + } + return array($rels, $rel_urls, $alternates); } diff --git a/tests/Mf2/ParserTest.php b/tests/Mf2/ParserTest.php index 751dcad..e5767ab 100644 --- a/tests/Mf2/ParserTest.php +++ b/tests/Mf2/ParserTest.php @@ -703,5 +703,68 @@ public function testNoErrantWhitespaceOnEHtml() $output = Mf2\parse($input); $this->assertEquals('

1

2

', $output['items'][0]['properties']['content'][0]['html']); } + + /** + * @see https://github.com/indieweb/php-mf2/issues/158 + */ + public function testPrefixWithNumbers() { + $input = '
  • + +
    + Coffee +
    +
  • '; + $output = Mf2\parse($input); + + $this->assertArrayHasKey('p3k-drank', $output['items'][0]['properties']); + $this->assertCount(1, $output['items'][0]['properties']['p3k-drank']); + $this->assertEquals('h-p3k-food', $output['items'][0]['properties']['p3k-drank'][0]['type'][0]); + } + + /** + * @see https://github.com/indieweb/php-mf2/issues/160 + */ + public function testConsecutiveDashes() { + $input = '
    +

    http://example.com/post posted:

    +Too many dashes +leading dash +middle dash +trailing dash +
    '; + $output = Mf2\parse($input); + + $this->assertCount(1, $output['items'][0]['type']); + $this->assertEquals('h-entry', $output['items'][0]['type'][0]); + $this->assertCount(1, $output['items'][0]['properties']); + $this->assertArrayHasKey('name', $output['items'][0]['properties']); + } + + /** + * Additional test from mf2py. Covers consecutive dashes, numbers in vendor prefix, and capital letters. + * Added markup for numbers-only prefix and capital letter in prefix + * @see https://github.com/kartikprabhu/mf2py/blob/experimental/test/examples/class_names_format.html + * @see https://github.com/indieweb/php-mf2/issues/160 + * @see https://github.com/indieweb/php-mf2/issues/158 + */ + public function testMfClassRegex() { + $input = '
    + URL + name +
    '; + $output = Mf2\parse($input); + + $this->assertCount(3, $output['items'][0]['type']); + $this->assertContains('h-feed', $output['items'][0]['type']); + $this->assertContains('h-p3k-entry', $output['items'][0]['type']); + $this->assertContains('h-x-test', $output['items'][0]['type']); + $this->assertCount(5, $output['items'][0]['properties']); + $this->assertArrayHasKey('url', $output['items'][0]['properties']); + $this->assertArrayHasKey('p3k-url', $output['items'][0]['properties']); + $this->assertArrayHasKey('name', $output['items'][0]['properties']); + $this->assertArrayHasKey('p3k-name', $output['items'][0]['properties']); + $this->assertArrayHasKey('123-url', $output['items'][0]['properties']); + } + } diff --git a/tests/Mf2/RelTest.php b/tests/Mf2/RelTest.php index 55c08f0..2d737a2 100644 --- a/tests/Mf2/RelTest.php +++ b/tests/Mf2/RelTest.php @@ -176,4 +176,47 @@ public function testRelURLs() { $this->assertArrayHasKey('rels', $output['rel-urls']['http://example.com/articles.atom']); } + /** + * @see https://github.com/microformats/microformats2-parsing/issues/29 + * @see https://github.com/microformats/microformats2-parsing/issues/30 + */ + public function testRelURLsRelsUniqueAndSorted() { + $input = ' +'; + $parser = new Parser($input); + $output = $parser->parse(); + $this->assertEquals($output['rel-urls']['#']['rels'], array('archived', 'bookmark', 'me')); + } + + public function testRelURLsInfoMergesCorrectly() { + $input = 'This nodeValue +Not this nodeValue'; + $parser = new Parser($input); + $output = $parser->parse(); + $this->assertEquals($output['rel-urls']['#']['hreflang'], 'en'); + $this->assertArrayNotHasKey('media', $output['rel-urls']['#']); + $this->assertArrayNotHasKey('title', $output['rel-urls']['#']); + $this->assertArrayNotHasKey('type', $output['rel-urls']['#']); + $this->assertEquals($output['rel-urls']['#']['text'], 'This nodeValue'); + } + + public function testRelURLsNoDuplicates() { + $input = ' + +'; + $parser = new Parser($input); + $output = $parser->parse(); + $this->assertEquals($output['rels']['a'], array('#a', '#b')); + } + + public function testRelURLsFalsyTextVSEmpty() { + $input = '0 +'; + $parser = new Parser($input); + $output = $parser->parse(); + $this->assertArrayHasKey('text', $output['rel-urls']['#a']); + $this->assertEquals($output['rel-urls']['#a']['text'], '0'); + $this->assertArrayNotHasKey('text', $output['rel-urls']['#b']); + } + }