diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java index 430c2236..4ce9bf0c 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java @@ -11,6 +11,7 @@ import java.io.EOFException; import java.io.IOException; +import java.io.InputStreamReader; import java.io.Reader; import java.io.UnsupportedEncodingException; @@ -122,6 +123,8 @@ private String newStringIntern( char[] cbuf, int off, int len ) // private String elValue[]; private int elNamespaceCount[]; + private String fileEncoding = "UTF8"; + /** * Make sure that we have enough space to keep element stack if passed size. It will always create one additional * slot then current depth @@ -659,6 +662,15 @@ public void setInput( Reader in ) { reset(); reader = in; + + if ( reader instanceof InputStreamReader ) + { + InputStreamReader isr = (InputStreamReader) reader; + if ( isr.getEncoding() != null ) + { + fileEncoding = isr.getEncoding().toUpperCase(); + } + } } @Override @@ -1771,6 +1783,17 @@ private int parseProlog() // skipping UNICODE int Order Mark (so called BOM) ch = more(); } + else if ( ch == '\uFFFD' ) + { + // UTF-16 BOM in an UTF-8 encoded file? + // This is a hack...not the best way to check for BOM in UTF-16 + ch = more(); + if ( ch == '\uFFFD' ) + { + throw new XmlPullParserException( "UTF-16 BOM in a UTF-8 encoded file is incompatible", this, + null ); + } + } } seenMarkup = false; boolean gotS = false; @@ -2723,18 +2746,19 @@ else if ( ch >= 'A' && ch <= 'F' ) } posEnd = pos - 1; - int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 ); - boolean isValidCodePoint = isValidCodePoint( codePoint ); - if ( isValidCodePoint ) + boolean isValidCodePoint = true; + try { - try + int codePoint = Integer.parseInt( sb.toString(), isHex ? 16 : 10 ); + isValidCodePoint = isValidCodePoint( codePoint ); + if ( isValidCodePoint ) { charRefOneCharBuf = Character.toChars( codePoint ); } - catch ( IllegalArgumentException e ) - { - isValidCodePoint = false; - } + } + catch ( IllegalArgumentException e ) + { + isValidCodePoint = false; } if ( !isValidCodePoint ) @@ -3328,6 +3352,17 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd ) // TODO reconcile with setInput encodingName inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart ); + + if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) ) + { + throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible", + this, null ); + } + else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" )) + { + throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible", + this, null ); + } } ch = more(); diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java new file mode 100644 index 00000000..cf1fe16a --- /dev/null +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java @@ -0,0 +1,278 @@ +package org.codehaus.plexus.util.xml.pull; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.StandardCharsets; + +import org.junit.Before; +import org.junit.Test; + +/** + * Test class that execute a particular set of tests associated to a TESCASES tag from the XML W3C Conformance Tests. + * TESCASES PROFILE:
Bjoern Hoehrmann via HST 2013-09-18+ * XML test files base folder:
xmlconf/eduni/misc/+ * + * @author Gabriel Belingueres + */ +public class eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test +{ + + final static File testResourcesDir = new File("src/test/resources/", "xmlconf/eduni/misc/"); + + MXParser parser; + + @Before + public void setUp() + { + parser = new MXParser(); + } + + /** + * Test ID:
hst-bh-001+ * Test URI:
001.xml+ * Comment:
decimal charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow+ * Sections:
2.2 [2], 4.1 [66]+ * Version: + * + * @throws IOException if there is an I/O error + */ + @Test + public void testhst_bh_001() + throws IOException + { + try ( Reader reader = new FileReader( new File( testResourcesDir, "001.xml" ) ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "decimal charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "character reference (with hex value FF000000F6) is invalid" ) ); + } + } + + /** + * Test ID:
hst-bh-002+ * Test URI:
002.xml+ * Comment:
hex charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow+ * Sections:
2.2 [2], 4.1 [66]+ * Version: + * + * @throws IOException if there is an I/O error + */ + @Test + public void testhst_bh_002() + throws IOException + { + try ( Reader reader = new FileReader( new File( testResourcesDir, "002.xml" ) ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "hex charref > 10FFFF, indeed > max 32 bit integer, checking for recovery from possible overflow" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "character reference (with decimal value 4294967542) is invalid" ) ); + } + } + + /** + * Test ID:
hst-bh-003+ * Test URI:
003.xml+ * Comment:
decimal charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow+ * Sections:
2.2 [2], 4.1 [66]+ * Version: + * + * @throws IOException if there is an I/O error + */ + @Test + public void testhst_bh_003() + throws IOException + { + try ( Reader reader = new FileReader( new File( testResourcesDir, "003.xml" ) ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "decimal charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "character reference (with hex value FFFFFFFF000000F6) is invalid" ) ); + } + } + + /** + * Test ID:
hst-bh-004+ * Test URI:
004.xml+ * Comment:
hex charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow+ * Sections:
2.2 [2], 4.1 [66]+ * Version: + * + * @throws IOException if there is an I/O error + */ + @Test + public void testhst_bh_004() + throws IOException + { + try ( Reader reader = new FileReader( new File( testResourcesDir, "004.xml" ) ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "hex charref > 10FFFF, indeed > max 64 bit integer, checking for recovery from possible overflow" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "character reference (with decimal value 18446744073709551862) is invalid" ) ); + } + } + + /** + * Test ID:
hst-bh-005+ * Test URI:
005.xml+ * Comment:
xmlns:xml is an attribute as far as validation is concerned and must be declared+ * Sections:
3.1 [41]+ * Version: + * + * @throws IOException if there is an I/O error + * + * NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing. + */ + // @Test + public void testhst_bh_005() + throws IOException + { + try ( Reader reader = new FileReader( new File( testResourcesDir, "005.xml" ) ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "xmlns:xml is an attribute as far as validation is concerned and must be declared" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( true ); + } + } + + /** + * Test ID:
hst-bh-006+ * Test URI:
006.xml+ * Comment:
xmlns:foo is an attribute as far as validation is concerned and must be declared+ * Sections:
3.1 [41]+ * Version: + * + * @throws IOException if there is an I/O error + * + * NOTE: This test is SKIPPED as MXParser do not supports DOCDECL parsing. + */ + // @Test + public void testhst_bh_006() + throws IOException + { + try ( Reader reader = new FileReader( new File( testResourcesDir, "006.xml" ) ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "xmlns:foo is an attribute as far as validation is concerned and must be declared" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( true ); + } + } + + /** + * Test ID:
hst-lhs-007+ * Test URI:
007.xml+ * Comment:
UTF-8 BOM plus xml decl of iso-8859-1 incompatible+ * Sections:
4.3.3+ * Version: + * + * @throws IOException if there is an I/O error + */ + @Test + public void testhst_lhs_007() + throws IOException + { + try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) ); + InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "UTF-8 BOM plus xml decl of iso-8859-1 incompatible" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of iso-8859-1 is incompatible" ) ); + } + } + + /** + * Test ID:
hst-lhs-008+ * Test URI:
008.xml+ * Comment:
UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible+ * Sections:
4.3.3+ * Version: + * + * @throws IOException if there is an I/O error + */ + @Test + public void testhst_lhs_008() + throws IOException + { + try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) ); + InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "UTF-16 BOM plus xml decl of utf-8 is incompatible" ) ); + } + } + + /** + * Test ID:
hst-lhs-009+ * Test URI:
009.xml+ * Comment:
UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible+ * Sections:
4.3.3+ * Version: + * + * @throws IOException if there is an I/O error + */ + @Test + public void testhst_lhs_009() + throws IOException + { + try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) ); + InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) ) + { + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" ); + } + catch ( XmlPullParserException e ) + { + assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) ); + } + } + +} \ No newline at end of file diff --git a/src/test/resources/xmlconf/eduni/misc/001.xml b/src/test/resources/xmlconf/eduni/misc/001.xml new file mode 100644 index 00000000..76de9900 --- /dev/null +++ b/src/test/resources/xmlconf/eduni/misc/001.xml @@ -0,0 +1,4 @@ + +]> +
Fail
diff --git a/src/test/resources/xmlconf/eduni/misc/002.xml b/src/test/resources/xmlconf/eduni/misc/002.xml new file mode 100644 index 00000000..943d284e --- /dev/null +++ b/src/test/resources/xmlconf/eduni/misc/002.xml @@ -0,0 +1,4 @@ + +]> +Fail
diff --git a/src/test/resources/xmlconf/eduni/misc/003.xml b/src/test/resources/xmlconf/eduni/misc/003.xml new file mode 100644 index 00000000..c2fb6990 --- /dev/null +++ b/src/test/resources/xmlconf/eduni/misc/003.xml @@ -0,0 +1,4 @@ + +]> +Fail
diff --git a/src/test/resources/xmlconf/eduni/misc/004.xml b/src/test/resources/xmlconf/eduni/misc/004.xml new file mode 100644 index 00000000..1e83a946 --- /dev/null +++ b/src/test/resources/xmlconf/eduni/misc/004.xml @@ -0,0 +1,4 @@ + +]> +Fail
diff --git a/src/test/resources/xmlconf/eduni/misc/005.xml b/src/test/resources/xmlconf/eduni/misc/005.xml new file mode 100644 index 00000000..d353623a --- /dev/null +++ b/src/test/resources/xmlconf/eduni/misc/005.xml @@ -0,0 +1,2 @@ + ]> +