From 299f4e4409ac668b2ed6372f6fb9701e78cd5dbc Mon Sep 17 00:00:00 2001 From: Harmen Wessels <97173058+harmen-xb@users.noreply.github.com> Date: Wed, 6 Mar 2024 11:10:42 +0100 Subject: [PATCH] Added apache commons text dependency. Updated XMLUtils to use StringEscapeUtils to escape and unescape XML. Updated XmlDecomposer to rely on the XML bytes and unscape/escape XML chars for the formalized extended attributes. --- PowerDeComposer/pom.xml | 6 ++++++ .../com/xbreeze/xml/decompose/XmlDecomposer.java | 11 ++++++++--- .../main/java/com/xbreeze/xml/utils/XMLUtils.java | 15 +++++++++++++-- .../xml/test/decompose/ExtendedAttributes.feature | 1 - 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/PowerDeComposer/pom.xml b/PowerDeComposer/pom.xml index 2d76775..415c2ca 100644 --- a/PowerDeComposer/pom.xml +++ b/PowerDeComposer/pom.xml @@ -108,6 +108,12 @@ commons-io 2.11.0 + + + org.apache.commons + commons-text + 1.11.0 + jakarta.xml.bind diff --git a/PowerDeComposer/src/main/java/com/xbreeze/xml/decompose/XmlDecomposer.java b/PowerDeComposer/src/main/java/com/xbreeze/xml/decompose/XmlDecomposer.java index 2f5ea18..534e2df 100644 --- a/PowerDeComposer/src/main/java/com/xbreeze/xml/decompose/XmlDecomposer.java +++ b/PowerDeComposer/src/main/java/com/xbreeze/xml/decompose/XmlDecomposer.java @@ -447,7 +447,11 @@ private VTDNav formalizeExtendedAttributesText(VTDNav nv) throws Exception { if (nv.getTokenType(extAttrsTextNodeIndex) == VTDNav.TOKEN_STARTING_TAG) { // Get the extended attribute text. // Replace LF with CRLF, since VTD-Nav removes the carriage returns in the file (and PowerDesigner always has CRLF). - String extendedAttributesText = nv.toString(nv.getText()).replace("\n", "\r\n"); + int extendedAttributeTextIndex = nv.getText(); + //String extendedAttributesText = nv.toString(extendedAttributeTextIndex).replace("\n", "\r\n"); + String extendedAttributesText = new String(nv.getXML().getBytes(nv.getTokenOffset(extendedAttributeTextIndex), nv.getTokenLength(extendedAttributeTextIndex))); + // We unescape the XML characters here, so the length property in the extended attributes can be used (cause it doesn't account for escaped XML characters). + extendedAttributesText = XMLUtils.unescapeXMLChars(extendedAttributesText); logger.fine(String.format("Found extended attributes text: %s", extendedAttributesText.replaceAll("\n", "[LF]\n").replaceAll("\r", "[CR]"))); // The extended attribute text needs to be parsed so we can create the new XML elements. @@ -473,7 +477,8 @@ private VTDNav formalizeExtendedAttributesText(VTDNav nv) throws Exception { if (extAttrsEnd > extendedAttributesText.length()) throw new Exception("Error while formalizing extended attributes text: The extended attribute length is longer then the contents of the string. This should never happen!"); - String extExtAttrContent = extendedAttributesText.substring(extAttrStart, extAttrsEnd); + // Get the extended attribute contents and escape XML chars, so we can make valid XML. + String extExtAttrContent = XMLUtils.escapeXMLChars(extendedAttributesText.substring(extAttrStart, extAttrsEnd)); // If we are inside a extension section, so currentExtensionExtAttrEnd != -1. And we find a match where the the end index is after the end of extension section we have a problem. // The end of an extension section should always be equal or after any child sections. @@ -785,7 +790,7 @@ private Path parseAndWriteDocumentParts(VTDNav nv, Charset fileCharset, TargetFi // Loop through the include attributes to add the min the include tag. for (String includeAttributeName : includeAttributesWithValues.keySet()) { // Insert the include sub element in the include tag. - includeElementStringBuffer.append(String.format(" %s=\"%s\"", includeAttributeName, XMLUtils.excapeXMLChars(includeAttributesWithValues.get(includeAttributeName)))); + includeElementStringBuffer.append(String.format(" %s=\"%s\"", includeAttributeName, XMLUtils.escapeXMLChars(includeAttributesWithValues.get(includeAttributeName)))); } includeElementStringBuffer.append(" />"); diff --git a/PowerDeComposer/src/main/java/com/xbreeze/xml/utils/XMLUtils.java b/PowerDeComposer/src/main/java/com/xbreeze/xml/utils/XMLUtils.java index 7c8d9d2..314b885 100644 --- a/PowerDeComposer/src/main/java/com/xbreeze/xml/utils/XMLUtils.java +++ b/PowerDeComposer/src/main/java/com/xbreeze/xml/utils/XMLUtils.java @@ -29,6 +29,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.text.StringEscapeUtils; + import com.ximpleware.AutoPilot; import com.ximpleware.ModifyException; import com.ximpleware.NavException; @@ -49,8 +51,17 @@ public class XMLUtils { * @param input The text to escape. * @return The escaped input. */ - public static String excapeXMLChars(String input) { - return input.replaceAll("\\<", "<").replaceAll("\\>", ">"); + public static String escapeXMLChars(String input) { + return StringEscapeUtils.escapeXml10(input); + } + + /** + * Unescape XML characters. + * @param input The xml input with escaped XML. + * @return The xml with unescaped chars. + */ + public static String unescapeXMLChars(String input) { + return StringEscapeUtils.unescapeXml(input); } /** diff --git a/PowerDeComposer/src/test/resources/com/xbreeze/xml/test/decompose/ExtendedAttributes.feature b/PowerDeComposer/src/test/resources/com/xbreeze/xml/test/decompose/ExtendedAttributes.feature index ded8149..900d5d7 100644 --- a/PowerDeComposer/src/test/resources/com/xbreeze/xml/test/decompose/ExtendedAttributes.feature +++ b/PowerDeComposer/src/test/resources/com/xbreeze/xml/test/decompose/ExtendedAttributes.feature @@ -150,7 +150,6 @@ Feature: Decompose Extended Attributes """ - @Debug Scenario: Formalize extended attributes with XML chars Given the config file: """