-
Notifications
You must be signed in to change notification settings - Fork 29
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
EZP-29289: Migrating ezxmltext with invalid name or id attributes #47
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -157,9 +157,44 @@ protected function reportNonUniqueIds(DOMDocument $document, $contentFieldId) | |
$id = $node->attributes->getNamedItem('id')->nodeValue; | ||
// id has format "duplicated_id_foo_bar_idm45226413447104" where "foo_bar" is the duplicated id | ||
$duplicatedId = substr($id, strlen('duplicated_id_'), strrpos($id, '_') - strlen('duplicated_id_')); | ||
if ($this->logger !== null) { | ||
$this->logger->warning("Duplicated id in original ezxmltext for contentobject_attribute.id=$contentFieldId, automatically generated new id : $duplicatedId --> $id"); | ||
} | ||
$this->logger->warning("Duplicated id in original ezxmltext for contentobject_attribute.id=$contentFieldId, automatically generated new id : $duplicatedId --> $id"); | ||
} | ||
} | ||
|
||
protected function validateAttributeValues(DOMDocument $document, $contentFieldId) | ||
{ | ||
$xpath = new DOMXPath($document); | ||
$whitelist1st = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'; | ||
$replaceStr1st = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; | ||
|
||
$whitelist = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-'; | ||
$replaceStr = ''; | ||
/* | ||
* We want to pick elements which has id value | ||
* #1 not starting with a..z or '_' | ||
* #2 not a..z, '0..9', '_' or '-' after 1st character | ||
* So, no xpath v2 to our disposal... | ||
* 1st line : we check the 1st char(substring) in id, converts it to 'a' if it in whitelist(translate), then check if it string now starts with 'a'(starts-with), then we invert result(not) | ||
* : So we replace first char with 'a' if it is whitelisted, then we select the element if id value does not start with 'a' | ||
* 2nd line: now we check remaining(omit 1st char) part of string (substring), removes any character that *is* whitelisted(translate), then check if there are any non-whitelisted characters left(string-lenght) | ||
* 3rd line: Due to the not() in 1st line, we pick all elements not matching that 1st line. That also includes elements not having a xml:id at all.. | ||
* : So, we want to make sure we only pick elements which has a xml:id attribute. | ||
*/ | ||
$nodes = $xpath->query("//*[ | ||
( | ||
not(starts-with(translate(substring(@xml:id, 1, 1), '$whitelist1st', '$replaceStr1st'), 'a')) | ||
or string-length(translate(substring(@xml:id, 2), '$whitelist', '$replaceStr')) > 0 | ||
) and string-length(@xml:id) > 0]"); | ||
|
||
if ($contentFieldId === null) { | ||
$contentFieldId = '[unknown]'; | ||
} | ||
foreach ($nodes as $node) { | ||
$orgValue = $node->attributes->getNamedItem('id')->nodeValue; | ||
$newValue = 'rewrite_' . $node->attributes->getNamedItem('id')->nodeValue; | ||
$newValue = preg_replace("/[^$whitelist]/", '_', $newValue); | ||
$node->attributes->getNamedItem('id')->nodeValue = $newValue; | ||
$this->logger->warning("Replaced non-validating id value in richtext for contentobject_attribute.id=$contentFieldId, changed from : $orgValue --> $newValue"); | ||
} | ||
} | ||
|
||
|
@@ -317,10 +352,11 @@ protected function checkEmptyEmbedTags(DOMDocument $inputDocument) | |
* | ||
* @param DOMDocument $inputDocument | ||
* @param bool $checkDuplicateIds | ||
* @param bool $checkIdValues | ||
* @param null|int $contentFieldId | ||
* @return string | ||
*/ | ||
public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false, $contentFieldId = null) | ||
public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false, $checkIdValues = false, $contentFieldId = null) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: Missing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch!. Fixed |
||
{ | ||
$this->removeComments($inputDocument); | ||
|
||
|
@@ -329,6 +365,9 @@ public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false, | |
if ($checkDuplicateIds) { | ||
$this->reportNonUniqueIds($convertedDocument, $contentFieldId); | ||
} | ||
if ($checkIdValues) { | ||
$this->validateAttributeValues($convertedDocument, $contentFieldId); | ||
} | ||
|
||
// Needed by some disabled output escaping (eg. legacy ezxml paragraph <line/> elements) | ||
$convertedDocumentNormalized = new DOMDocument(); | ||
|
@@ -339,7 +378,7 @@ public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false, | |
|
||
$result = $convertedDocumentNormalized->saveXML(); | ||
|
||
if (!empty($errors) && $this->logger !== null) { | ||
if (!empty($errors)) { | ||
$this->logger->error( | ||
"Validation errors when converting ezxmltext for contentobject_attribute.id=$contentFieldId", | ||
['result' => $result, 'errors' => $errors, 'xmlString' => $inputDocument->saveXML()] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
33 changes: 33 additions & 0 deletions
33
tests/lib/FieldType/Converter/_fixtures/richtext/input/041-anchor_invalid_id.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<section | ||
xmlns:image="http://ez.no/namespaces/ezpublish3/image/" | ||
xmlns:xhtml="http://ez.no/namespaces/ezpublish3/xhtml/" | ||
xmlns:custom="http://ez.no/namespaces/ezpublish3/custom/"> | ||
<paragraph align="justify">Here is an anchor | ||
<anchor name="1name"/> | ||
</paragraph> | ||
<paragraph align="justify">Here is an anchor | ||
<anchor name="n1ame"/> | ||
</paragraph> | ||
<paragraph align="justify">Here is an anchor | ||
<anchor name="-1name"/> | ||
</paragraph> | ||
<paragraph align="justify">Here is an anchor | ||
<anchor name="_name"/> | ||
</paragraph> | ||
<paragraph align="justify">Here is an anchor | ||
<anchor name="aname"/> | ||
</paragraph> | ||
<paragraph align="justify">Here is an anchor | ||
<anchor name="#aname"/> | ||
</paragraph> | ||
<paragraph align="justify">Here is an anchor | ||
<anchor name="a@name"/> | ||
</paragraph> | ||
<paragraph align="justify">Here is an anchor | ||
<anchor name="an£ame"/> | ||
</paragraph> | ||
<paragraph align="justify">Here is an anchor | ||
<anchor name="aname["/> | ||
</paragraph> | ||
</section> |
2 changes: 2 additions & 0 deletions
2
tests/lib/FieldType/Converter/_fixtures/richtext/log/001-dupid-title.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Duplicated id in original ezxmltext for contentobject_attribute.id=[unknown], automatically generated new id : inv5 --> duplicated_id_inv5_* | ||
Duplicated id in original ezxmltext for contentobject_attribute.id=[unknown], automatically generated new id : inv5 --> duplicated_id_inv5_* |
1 change: 1 addition & 0 deletions
1
tests/lib/FieldType/Converter/_fixtures/richtext/log/002-dupid-embed.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Duplicated id in original ezxmltext for contentobject_attribute.id=[unknown], automatically generated new id : myembed_id --> duplicated_id_myembed_id_idm* |
1 change: 1 addition & 0 deletions
1
tests/lib/FieldType/Converter/_fixtures/richtext/log/003-dupid-anchor.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Duplicated id in original ezxmltext for contentobject_attribute.id=[unknown], automatically generated new id : anchor --> duplicated_id_anchor_idm* |
6 changes: 6 additions & 0 deletions
6
tests/lib/FieldType/Converter/_fixtures/richtext/log/041-anchor_invalid_id.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
Replaced non-validating id value in richtext for contentobject_attribute.id=[unknown], changed from : 1name --> rewrite_1name | ||
Replaced non-validating id value in richtext for contentobject_attribute.id=[unknown], changed from : -1name --> rewrite_-1name | ||
Replaced non-validating id value in richtext for contentobject_attribute.id=[unknown], changed from : #aname --> rewrite__aname | ||
Replaced non-validating id value in richtext for contentobject_attribute.id=[unknown], changed from : a@name --> rewrite_a_name | ||
Replaced non-validating id value in richtext for contentobject_attribute.id=[unknown], changed from : an£ame --> rewrite_an__ame | ||
Replaced non-validating id value in richtext for contentobject_attribute.id=[unknown], changed from : aname[ --> rewrite_aname_ |
1 change: 1 addition & 0 deletions
1
tests/lib/FieldType/Converter/_fixtures/richtext/log/121-embed-no_idref.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Warning: ezxmltext for contentobject_attribute.id=contains embed or embed-inline tag(s) without node_id or object_id |
1 change: 1 addition & 0 deletions
1
tests/lib/FieldType/Converter/_fixtures/richtext/log/132-embed-inline-no_idref.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Warning: ezxmltext for contentobject_attribute.id=contains embed or embed-inline tag(s) without node_id or object_id |
34 changes: 34 additions & 0 deletions
34
tests/lib/FieldType/Converter/_fixtures/richtext/output/041-anchor_invalid_id.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<section | ||
xmlns="http://docbook.org/ns/docbook" | ||
xmlns:xlink="http://www.w3.org/1999/xlink" | ||
xmlns:ezxhtml="http://ez.no/xmlns/ezpublish/docbook/xhtml" | ||
xmlns:ezcustom="http://ez.no/xmlns/ezpublish/docbook/custom" version="5.0-variant ezpublish-1.0"> | ||
<para ezxhtml:textalign="justify">Here is an anchor | ||
<anchor xml:id="rewrite_1name"/> | ||
</para> | ||
<para ezxhtml:textalign="justify">Here is an anchor | ||
<anchor xml:id="n1ame"/> | ||
</para> | ||
<para ezxhtml:textalign="justify">Here is an anchor | ||
<anchor xml:id="rewrite_-1name"/> | ||
</para> | ||
<para ezxhtml:textalign="justify">Here is an anchor | ||
<anchor xml:id="_name"/> | ||
</para> | ||
<para ezxhtml:textalign="justify">Here is an anchor | ||
<anchor xml:id="aname"/> | ||
</para> | ||
<para ezxhtml:textalign="justify">Here is an anchor | ||
<anchor xml:id="rewrite__aname"/> | ||
</para> | ||
<para ezxhtml:textalign="justify">Here is an anchor | ||
<anchor xml:id="rewrite_a_name"/> | ||
</para> | ||
<para ezxhtml:textalign="justify">Here is an anchor | ||
<anchor xml:id="rewrite_an__ame"/> | ||
</para> | ||
<para ezxhtml:textalign="justify">Here is an anchor | ||
<anchor xml:id="rewrite_aname_"/> | ||
</para> | ||
</section> |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great to see this comment! 🙂