diff --git a/Mf2/Parser.php b/Mf2/Parser.php
index cce846e..7048794 100644
--- a/Mf2/Parser.php
+++ b/Mf2/Parser.php
@@ -130,7 +130,7 @@ function unicodeTrim($str) {
function mfNamesFromClass($class, $prefix='h-') {
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
$classes = explode(' ', $class);
- $classes = preg_grep('#^[a-z\-]+$#', $classes);
+ $classes = preg_grep('#^(h|p|u|dt|e)-([a-z0-9]+-)?[a-z]+(-[a-z]+)*$#', $classes);
$matches = array();
foreach ($classes as $classname) {
@@ -1257,16 +1257,15 @@ public function parseRelsAndAlternates() {
// Iterate through all a, area and link elements with rel attributes
foreach ($this->xpath->query('//a[@rel and @href] | //link[@rel and @href] | //area[@rel and @href]') as $hyperlink) {
- if ($hyperlink->getAttribute('rel') == '') {
+ // Parse the set of rels for the current link
+ $linkRels = array_unique(array_filter(preg_split('/[\t\n\f\r ]/', $hyperlink->getAttribute('rel'))));
+ if (count($linkRels) === 0) {
continue;
}
// Resolve the href
$href = $this->resolveUrl($hyperlink->getAttribute('href'));
- // Split up the rel into space-separated values
- $linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel')));
-
$rel_attributes = array();
if ($hyperlink->hasAttribute('media')) {
@@ -1285,8 +1284,8 @@ public function parseRelsAndAlternates() {
$rel_attributes['type'] = $hyperlink->getAttribute('type');
}
- if ($hyperlink->nodeValue) {
- $rel_attributes['text'] = $hyperlink->nodeValue;
+ if (strlen($hyperlink->textContent) > 0) {
+ $rel_attributes['text'] = $hyperlink->textContent;
}
if ($this->enableAlternates) {
@@ -1303,16 +1302,34 @@ public function parseRelsAndAlternates() {
}
foreach ($linkRels as $rel) {
- $rels[$rel][] = $href;
+ if (!array_key_exists($rel, $rels)) {
+ $rels[$rel] = array($href);
+ } elseif (!in_array($href, $rels[$rel])) {
+ $rels[$rel][] = $href;
+ }
}
- if (!in_array($href, $rel_urls)) {
- $rel_urls[$href] = array_merge(
- $rel_attributes,
- array('rels' => $linkRels)
- );
+ if (!array_key_exists($href, $rel_urls)) {
+ $rel_urls[$href] = array('rels' => array());
}
+ // Add the attributes collected only if they were not already set
+ $rel_urls[$href] = array_merge(
+ $rel_attributes,
+ $rel_urls[$href]
+ );
+
+ // Merge current rels with those already set
+ $rel_urls[$href]['rels'] = array_merge(
+ $rel_urls[$href]['rels'],
+ $linkRels
+ );
+ }
+
+ // Alphabetically sort the rels arrays after removing duplicates
+ foreach ($rel_urls as $href => $object) {
+ $rel_urls[$href]['rels'] = array_unique($rel_urls[$href]['rels']);
+ sort($rel_urls[$href]['rels']);
}
if (empty($rels) and $this->jsonMode) {
@@ -1321,8 +1338,8 @@ public function parseRelsAndAlternates() {
if (empty($rel_urls) and $this->jsonMode) {
$rel_urls = new stdClass();
- }
-
+ }
+
return array($rels, $rel_urls, $alternates);
}
diff --git a/tests/Mf2/ParserTest.php b/tests/Mf2/ParserTest.php
index 751dcad..e5767ab 100644
--- a/tests/Mf2/ParserTest.php
+++ b/tests/Mf2/ParserTest.php
@@ -703,5 +703,68 @@ public function testNoErrantWhitespaceOnEHtml()
$output = Mf2\parse($input);
$this->assertEquals('
1
2
', $output['items'][0]['properties']['content'][0]['html']);
}
+
+ /**
+ * @see https://github.com/indieweb/php-mf2/issues/158
+ */
+ public function testPrefixWithNumbers() {
+ $input = '
+
+
+ Coffee
+
+';
+ $output = Mf2\parse($input);
+
+ $this->assertArrayHasKey('p3k-drank', $output['items'][0]['properties']);
+ $this->assertCount(1, $output['items'][0]['properties']['p3k-drank']);
+ $this->assertEquals('h-p3k-food', $output['items'][0]['properties']['p3k-drank'][0]['type'][0]);
+ }
+
+ /**
+ * @see https://github.com/indieweb/php-mf2/issues/160
+ */
+ public function testConsecutiveDashes() {
+ $input = '
+
http://example.com/post posted:
+
Too many dashes
+
leading dash
+
middle dash
+
trailing dash
+
';
+ $output = Mf2\parse($input);
+
+ $this->assertCount(1, $output['items'][0]['type']);
+ $this->assertEquals('h-entry', $output['items'][0]['type'][0]);
+ $this->assertCount(1, $output['items'][0]['properties']);
+ $this->assertArrayHasKey('name', $output['items'][0]['properties']);
+ }
+
+ /**
+ * Additional test from mf2py. Covers consecutive dashes, numbers in vendor prefix, and capital letters.
+ * Added markup for numbers-only prefix and capital letter in prefix
+ * @see https://github.com/kartikprabhu/mf2py/blob/experimental/test/examples/class_names_format.html
+ * @see https://github.com/indieweb/php-mf2/issues/160
+ * @see https://github.com/indieweb/php-mf2/issues/158
+ */
+ public function testMfClassRegex() {
+ $input = '
+ URL
+ name
+';
+ $output = Mf2\parse($input);
+
+ $this->assertCount(3, $output['items'][0]['type']);
+ $this->assertContains('h-feed', $output['items'][0]['type']);
+ $this->assertContains('h-p3k-entry', $output['items'][0]['type']);
+ $this->assertContains('h-x-test', $output['items'][0]['type']);
+ $this->assertCount(5, $output['items'][0]['properties']);
+ $this->assertArrayHasKey('url', $output['items'][0]['properties']);
+ $this->assertArrayHasKey('p3k-url', $output['items'][0]['properties']);
+ $this->assertArrayHasKey('name', $output['items'][0]['properties']);
+ $this->assertArrayHasKey('p3k-name', $output['items'][0]['properties']);
+ $this->assertArrayHasKey('123-url', $output['items'][0]['properties']);
+ }
+
}
diff --git a/tests/Mf2/RelTest.php b/tests/Mf2/RelTest.php
index 55c08f0..2d737a2 100644
--- a/tests/Mf2/RelTest.php
+++ b/tests/Mf2/RelTest.php
@@ -176,4 +176,47 @@ public function testRelURLs() {
$this->assertArrayHasKey('rels', $output['rel-urls']['http://example.com/articles.atom']);
}
+ /**
+ * @see https://github.com/microformats/microformats2-parsing/issues/29
+ * @see https://github.com/microformats/microformats2-parsing/issues/30
+ */
+ public function testRelURLsRelsUniqueAndSorted() {
+ $input = '
+';
+ $parser = new Parser($input);
+ $output = $parser->parse();
+ $this->assertEquals($output['rel-urls']['#']['rels'], array('archived', 'bookmark', 'me'));
+ }
+
+ public function testRelURLsInfoMergesCorrectly() {
+ $input = 'This nodeValue
+Not this nodeValue';
+ $parser = new Parser($input);
+ $output = $parser->parse();
+ $this->assertEquals($output['rel-urls']['#']['hreflang'], 'en');
+ $this->assertArrayNotHasKey('media', $output['rel-urls']['#']);
+ $this->assertArrayNotHasKey('title', $output['rel-urls']['#']);
+ $this->assertArrayNotHasKey('type', $output['rel-urls']['#']);
+ $this->assertEquals($output['rel-urls']['#']['text'], 'This nodeValue');
+ }
+
+ public function testRelURLsNoDuplicates() {
+ $input = '
+
+';
+ $parser = new Parser($input);
+ $output = $parser->parse();
+ $this->assertEquals($output['rels']['a'], array('#a', '#b'));
+ }
+
+ public function testRelURLsFalsyTextVSEmpty() {
+ $input = '0
+';
+ $parser = new Parser($input);
+ $output = $parser->parse();
+ $this->assertArrayHasKey('text', $output['rel-urls']['#a']);
+ $this->assertEquals($output['rel-urls']['#a']['text'], '0');
+ $this->assertArrayNotHasKey('text', $output['rel-urls']['#b']);
+ }
+
}