Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix implied URL parsing #192

Merged
merged 2 commits into from
Aug 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 23 additions & 27 deletions Mf2/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -1083,35 +1083,31 @@ public function parseH(\DOMElement $e, $is_backcompat = false, $has_nested_mf =

}

// Check for u-url
if (!array_key_exists('url', $return) && !$is_backcompat) {
$url = null;
// Look for img @src
if ($e->tagName == 'a' or $e->tagName == 'area') {
$url = $e->getAttribute('href');
}

// Look for nested a @href
foreach ($this->xpath->query('./a[count(preceding-sibling::a)+count(following-sibling::a)=0]', $e) as $em) {
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames)) {
$url = $em->getAttribute('href');
break;
}
}

// Look for nested area @src
foreach ($this->xpath->query('./area[count(preceding-sibling::area)+count(following-sibling::area)=0]', $e) as $em) {
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames)) {
$url = $em->getAttribute('href');
break;
// Do we need to imply a url property?
// if no explicit "url" property, and no other explicit u-* properties, and no nested microformats
if (!array_key_exists('url', $return) && !in_array('u-', $prefixes) && !$has_nested_mf && !$is_backcompat) {
// a.h-x[href] or area.h-x[href]
if (($e->tagName === 'a' || $e->tagName === 'area') && $e->hasAttribute('href')) {
$return['url'][] = $this->resolveUrl($e->getAttribute('href'));
} else {
$xpaths = array(
// .h-x>a[href]:only-of-type:not[.h-*]
'./a[not(contains(concat(" ", @class), " h-")) and count(../a) = 1 and @href]',
// .h-x>area[href]:only-of-type:not[.h-*]
'./area[not(contains(concat(" ", @class), " h-")) and count(../area) = 1 and @href]',
// .h-x>:only-child:not[.h-*]>a[href]:only-of-type:not[.h-*]
'./*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(a) = 1]/a[not(contains(concat(" ", @class), " h-")) and @href]',
// .h-x>:only-child:not[.h-*]>area[href]:only-of-type:not[.h-*]
'./*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(area) = 1]/area[not(contains(concat(" ", @class), " h-")) and @href]'
);
foreach ($xpaths as $xpath) {
$url = $this->xpath->query($xpath, $e);
if ($url !== false && $url->length === 1) {
$return['url'][] = $this->resolveUrl($url->item(0)->getAttribute('href'));
break;
}
}
}

if (!is_null($url)) {
$return['url'][] = $this->resolveUrl($url);
}
}

// Make sure things are unique and in alphabetical order
Expand Down
2 changes: 1 addition & 1 deletion tests/Mf2/ClassicMicroformatsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ public function testMixedMf2andMf1Case3() {
$parser = new Parser($input);
$result = $parser->parse();

$this->assertCount(3, $result['items'][0]['properties']);
$this->assertCount(2, $result['items'][0]['properties']);
$this->assertArrayNotHasKey('street-address', $result['items'][0]['properties']);
$this->assertArrayNotHasKey('locality', $result['items'][0]['properties']);
$this->assertArrayNotHasKey('country-name', $result['items'][0]['properties']);
Expand Down
13 changes: 13 additions & 0 deletions tests/Mf2/ParseImpliedTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -331,5 +331,18 @@ public function testBackcompatNoImpliedUrl() {
$this->assertArrayHasKey('content', $result['items'][0]['properties']);
}


/**
* Don't imply u-url if there are other u-*
* @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
* @see https://github.com/microformats/php-mf2/issues/183
*/
public function testNoImpliedUrl() {
$input = '<div class="h-entry"> <h1 class="p-name"><a href="https://example.com/this-post">Title</a></h1> <div class="e-content"> <p> blah blah blah </p> </div> <a href="https://example.org/syndicate" class="u-syndication"></a> </div>';
$result = Mf2\parse($input);

$this->assertArrayNotHasKey('url', $result['items'][0]['properties']);
}

}