Skip to content

Commit

Permalink
Remove special chars from xml output
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Michael Osipov <1983-01-06@gmx.net>
(cherry picked from commit c9d72af)
  • Loading branch information
slawekjaranowski committed May 20, 2024
1 parent 4274b2e commit be6885b
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
import java.util.regex.Pattern;

/**
* Implementation of XMLWriter which emits nicely formatted documents.
*
* <p>Implementation of XMLWriter which emits nicely formatted documents.</p>
*
* <p>C0n control characters except <code>\n</code>, <code>\r</code>, and <code>\t</code> are omitted from output</p>
*/
public class PrettyPrintXMLWriter implements XMLWriter {
/** Line separator ("\n" on UNIX) */
Expand Down Expand Up @@ -185,7 +185,7 @@ private void writeText(String text, boolean escapeXml) {
finishTag();

if (escapeXml) {
text = escapeXml(text);
text = escapeXmlText(text);
}

write(StringUtils.unifyLineSeparators(text, lineSeparator));
Expand Down Expand Up @@ -225,10 +225,12 @@ private static String escapeXml(String text) {

private static final Pattern crlf = Pattern.compile(crlf_str);

private static final Pattern lowers = Pattern.compile("([\000-\037])");
private static final Pattern lowers = Pattern.compile("([\\x00-\\x1F])");

private static final Pattern illegalC0Characters = Pattern.compile("([\\x00-\\x08\\x0B-\\x0C\\x0E-\\x1F])");

private static String escapeXmlAttribute(String text) {
text = escapeXml(text);
text = escapeXmlText(text);

// Windows
Matcher crlfmatcher = crlf.matcher(text);
Expand All @@ -246,6 +248,19 @@ private static String escapeXmlAttribute(String text) {
return b.toString();
}

private static String escapeXmlText(String text) {
text = escapeXml(text);

Matcher matcher = illegalC0Characters.matcher(text);
StringBuffer b = new StringBuffer();
while (matcher.find()) {
matcher = matcher.appendReplacement(b, "");
}
matcher.appendTail(b);

return b.toString();
}

/** {@inheritDoc} */
@Override
public void addAttribute(String key, String value) {
Expand Down
17 changes: 4 additions & 13 deletions src/main/java/org/codehaus/plexus/util/xml/pull/MXSerializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* <li>PROPERTY_SERIALIZER_INDENTATION
* <li>PROPERTY_SERIALIZER_LINE_SEPARATOR
* </ul>
* <p>C0n control characters except <code>\n</code>, <code>\r</code>, and <code>\t</code> are omitted from output</p>
*/
public class MXSerializer implements XmlSerializer {
protected static final String XML_URI = "http://www.w3.org/XML/1998/namespace";
Expand Down Expand Up @@ -943,19 +944,9 @@ protected void writeElementContent(String text, Writer out) throws IOException {
// out.write(';');
// pos = i + 1;
} else {
throw new IllegalStateException(
"character " + Integer.toString(ch) + " is not allowed in output" + getLocation());
// in XML 1.1 legal are [#x1-#xD7FF]
// if(ch > 0) {
// if(i > pos) out.write(text.substring(pos, i));
// out.write("&#");
// out.write(Integer.toString(ch));
// out.write(';');
// pos = i + 1;
// } else {
// throw new IllegalStateException(
// "character zero is not allowed in XML 1.1 output"+getLocation());
// }
// skip special char
if (i > pos) out.write(text.substring(pos, i));
pos = i + 1;
}
}
if (seenBracket) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ private String createExpectedXML(boolean escape) {
buf.append(LS);
buf.append(" </el6>");
buf.append(LS);
if (escape) {
buf.append(" <el8>special-char-</el8>");
} else {
buf.append(" <el8>special-char-" + (char) 7 + "</el8>");
}
buf.append(LS);
buf.append("</root>");

return buf.toString();
Expand All @@ -95,7 +101,7 @@ private Xpp3Dom createXpp3Dom() {
dom.addChild(el1);

Xpp3Dom el2 = new Xpp3Dom("el2");
el2.setAttribute("att2", "attribute2\nnextline");
el2.setAttribute("att2", "attribute2\nnextline" + (char) 7);
dom.addChild(el2);

Xpp3Dom el3 = new Xpp3Dom("el3");
Expand All @@ -119,6 +125,10 @@ private Xpp3Dom createXpp3Dom() {
el7.setValue("element7\n&\"\'<>");
el6.addChild(el7);

Xpp3Dom el8 = new Xpp3Dom("el8");
el8.setValue("special-char-" + (char) 7);

dom.addChild(el8);
return dom;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package org.codehaus.plexus.util.xml.pull;

import java.io.StringReader;
import java.io.StringWriter;
import java.util.Arrays;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

class MXSerializerTest {

@Test
void testSerialize() throws Exception {

StringWriter writer = new StringWriter();

MXSerializer sr = new MXSerializer();
sr.setOutput(writer);

sr.startDocument(null, Boolean.TRUE);
sr.startTag(null, "root");
for (int i : Arrays.asList(8, 9, 10, 11, 13, 15)) {
sr.startTag(null, "char");
sr.text(Character.getName(i) + ": " + ((char) i));
sr.endTag(null, "char");
}

sr.endTag(null, "root");
sr.endDocument();
assertEquals(expectedOutput(), writer.toString());
}

@Test
void testDeserialize() throws Exception {
MXParser parser = new MXParser();
parser.setInput(new StringReader(expectedOutput()));
int eventType = parser.getEventType();

while (eventType != XmlPullParser.END_DOCUMENT) {
eventType = parser.next();
}
}

private String expectedOutput() {
StringBuilder out = new StringBuilder();
out.append("<?xml version=\"1.0\" standalone=\"yes\"?>");
out.append("<root>");
out.append("<char>BACKSPACE: </char>");
out.append("<char>CHARACTER TABULATION: \t</char>");
out.append("<char>LINE FEED (LF): \n</char>");
out.append("<char>LINE TABULATION: </char>");
out.append("<char>CARRIAGE RETURN (CR): \r</char>");
out.append("<char>SHIFT IN: </char>");
out.append("</root>");
return out.toString();
}
}

0 comments on commit be6885b

Please sign in to comment.