Skip to content

Commit

Permalink
EZP-29289: Migrating ezxmltext with invalid name or id attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
vidarl committed Jun 15, 2018
1 parent 1075fdd commit a7895b1
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 5 deletions.
12 changes: 9 additions & 3 deletions bundle/Command/ConvertXmlTextToRichTextCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ protected function configure()
InputOption::VALUE_NONE,
'Disable the check for duplicate html ids in every attribute. This might increase execution time on large databases'
)
->addOption(
'disable-id-value-check',
null,
InputOption::VALUE_NONE,
'Disable the check for non-validating id/name values. This might increase execution time on large databases'
)
->addOption(
'test-content-object',
null,
Expand Down Expand Up @@ -123,7 +129,7 @@ protected function execute(InputInterface $input, OutputInterface $output)
$dryRun = true;
}

$this->convertFields($dryRun, $testContentId, !$input->getOption('disable-duplicate-id-check'), $output);
$this->convertFields($dryRun, $testContentId, !$input->getOption('disable-duplicate-id-check'), !$input->getOption('disable-id-value-check'), $output);
}

protected function getContentTypeIds($contentTypeIdentifiers)
Expand Down Expand Up @@ -330,7 +336,7 @@ protected function updateFieldRow($dryRun, $id, $version, $datatext)
}
}

protected function convertFields($dryRun, $contentId, $checkDuplicateIds, OutputInterface $output)
protected function convertFields($dryRun, $contentId, $checkDuplicateIds, $checkIdValues, OutputInterface $output)
{
$count = $this->getRowCountOfContentObjectAttributes('ezxmltext', $contentId);

Expand All @@ -345,7 +351,7 @@ protected function convertFields($dryRun, $contentId, $checkDuplicateIds, Output
$inputValue = $row['data_text'];
}

$converted = $this->converter->convert($this->createDocument($inputValue), $checkDuplicateIds, $row['id']);
$converted = $this->converter->convert($this->createDocument($inputValue), $checkDuplicateIds, $checkIdValues, $row['id']);

$this->updateFieldRow($dryRun, $row['id'], $row['version'], $converted);

Expand Down
25 changes: 24 additions & 1 deletion lib/FieldType/XmlText/Converter/RichText.php
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,26 @@ protected function reportNonUniqueIds(DOMDocument $document, $contentFieldId)
}
}

protected function ValidateAttributeValues(DOMDocument $document, $contentFieldId)
{
$xpath = new DOMXPath($document);
$whitelist = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_';
$replaceStr = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
$nodes = $xpath->query("//*[not(starts-with(translate(substring(@xml:id, 1, 1), '$whitelist', '$replaceStr'), 'a')) and string-length(@xml:id) > 0]");

if ($contentFieldId === null) {
$contentFieldId = '[unknown]';
}
foreach ($nodes as $node) {
$orgValue = $node->attributes->getNamedItem('id')->nodeValue;
$newValue = 'rewrite_' . $node->attributes->getNamedItem('id')->nodeValue;
$node->attributes->getNamedItem('id')->nodeValue = $newValue;
if ($this->logger !== null) {
$this->logger->warning("Replaced non-validating id value in richtext for contentobject_attribute.id=$contentFieldId, changed from : $orgValue --> $newValue");
}
}
}

/**
* @param $id
* @param bool $isContentId Whatever provided $id is a content id or location id
Expand Down Expand Up @@ -307,14 +327,17 @@ public function tagEmbeddedImages(DOMDocument $richtextDocument, $contentFieldId
* @param null|int $contentFieldId
* @return string
*/
public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false, $contentFieldId = null)
public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false, $checkIdValues = false, $contentFieldId = null)
{
$this->removeComments($inputDocument);

$convertedDocument = $this->getConverter()->convert($inputDocument);
if ($checkDuplicateIds) {
$this->reportNonUniqueIds($convertedDocument, $contentFieldId);
}
if ($checkIdValues) {
$this->ValidateAttributeValues($convertedDocument, $contentFieldId);
}

// Needed by some disabled output escaping (eg. legacy ezxml paragraph <line/> elements)
$convertedDocumentNormalized = new DOMDocument();
Expand Down
2 changes: 1 addition & 1 deletion tests/lib/FieldType/Converter/RichTextTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ public function testConvert($inputFilePath, $outputFilePath)
$richText = new RichText($apiRepositoryStub, $logger);
$richText->setImageContentTypes([27]);

$result = $richText->convert($inputDocument, true);
$result = $richText->convert($inputDocument, true, true);

$convertedDocument = $this->createDocument($result, false);
$expectedDocument = $this->createDocument($outputFilePath);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="utf-8"?>
<section
xmlns:image="http://ez.no/namespaces/ezpublish3/image/"
xmlns:xhtml="http://ez.no/namespaces/ezpublish3/xhtml/"
xmlns:custom="http://ez.no/namespaces/ezpublish3/custom/">
<paragraph align="justify">Here is an anchor
<anchor name="1name"/>
</paragraph>
<paragraph align="justify">Here is an anchor
<anchor name="n1ame"/>
</paragraph>
<paragraph align="justify">Here is an anchor
<anchor name="-1name"/>
</paragraph>
<paragraph align="justify">Here is an anchor
<anchor name="_name"/>
</paragraph>
</section>
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<section
xmlns="http://docbook.org/ns/docbook"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:ezxhtml="http://ez.no/xmlns/ezpublish/docbook/xhtml"
xmlns:ezcustom="http://ez.no/xmlns/ezpublish/docbook/custom" version="5.0-variant ezpublish-1.0">
<para ezxhtml:textalign="justify">Here is an anchor
<anchor xml:id="rewrite_1name"/>
</para>
<para ezxhtml:textalign="justify">Here is an anchor
<anchor xml:id="n1ame"/>
</para>
<para ezxhtml:textalign="justify">Here is an anchor
<anchor xml:id="rewrite_-1name"/>
</para>
<para ezxhtml:textalign="justify">Here is an anchor
<anchor xml:id="_name"/>
</para>
</section>

0 comments on commit a7895b1

Please sign in to comment.