diff --git a/lib/FieldType/XmlText/Converter/RichText.php b/lib/FieldType/XmlText/Converter/RichText.php index 87f9400d..48001378 100644 --- a/lib/FieldType/XmlText/Converter/RichText.php +++ b/lib/FieldType/XmlText/Converter/RichText.php @@ -169,6 +169,17 @@ protected function validateAttributeValues(DOMDocument $document, $contentFieldI $whitelist = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-'; $replaceStr = ''; + /* + * We want to pick elements which has id value + * #1 not starting with a..z or '_' + * #2 not a..z, '0..9', '_' or '-' after 1st character + * So, no xpath v2 to our disposal... + * 1st line : we check the 1st char(substring) in id, converts it to 'a' if it in whitelist(translate), then check if it string now starts with 'a'(starts-with), then we invert result(not) + * : So we replace first char with 'a' if it is whitelisted, then we select the element if id value does not start with 'a' + * 2nd line: now we check remaining(omit 1st char) part of string (substring), removes any character that *is* whitelisted(translate), then check if there are any non-whitelisted characters left(string-lenght) + * 3rd line: Due to the not() in 1st line, we pick all elements not matching that 1st line. That also includes elements not having a xml:id at all.. + * : So, we want to make sure we only pick elements which has a xml:id attribute. + */ $nodes = $xpath->query("//*[ ( not(starts-with(translate(substring(@xml:id, 1, 1), '$whitelist1st', '$replaceStr1st'), 'a'))