Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#57 Uncaught IllegalArgumentException due to malformed unicode entity ref #58

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -2664,13 +2664,16 @@ protected char[] parseEntityRef()
entityRefName = null;
posStart = pos;
char ch = more();
StringBuilder sb = new StringBuilder();
if ( ch == '#' )
{
// parse character reference

char charRef = 0;
ch = more();
if ( ch == 'x' )
StringBuilder sb = new StringBuilder();
boolean isHex = ( ch == 'x' );

if ( isHex )
{
// encoded in hex
while ( true )
Expand Down Expand Up @@ -2710,6 +2713,7 @@ else if ( ch >= 'A' && ch <= 'F' )
if ( ch >= '0' && ch <= '9' )
{
charRef = (char) ( charRef * 10 + ( ch - '0' ) );
sb.append( ch );
}
else if ( ch == ';' )
{
Expand All @@ -2724,20 +2728,19 @@ else if ( ch >= 'A' && ch <= 'F' )
}
}
posEnd = pos - 1;
if ( sb.length() > 0 )
try
{
char[] tmp = toChars( Integer.parseInt( sb.toString(), 16 ) );
charRefOneCharBuf = tmp;
if ( tokenize )
{
text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length );
}
return charRefOneCharBuf;
charRefOneCharBuf = toChars( Integer.parseInt( sb.toString(), isHex ? 16 : 10 ) );
}
charRefOneCharBuf[0] = charRef;
catch ( IllegalArgumentException e )
{
throw new XmlPullParserException( "character reference (with " + ( isHex ? "hex" : "decimal" )
+ " value " + sb.toString() + ") is invalid", this, null );
}

if ( tokenize )
{
text = newString( charRefOneCharBuf, 0, 1 );
text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length );
}
return charRefOneCharBuf;
}
Expand Down Expand Up @@ -3996,15 +3999,21 @@ private static boolean isHighSurrogate( char ch )
return ( MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch );
}

private static final int MIN_CODE_POINT = 0x000000;

private static final int MAX_CODE_POINT = 0x10FFFF;

private static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;

/**
* Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char}
*
* @param codePoint the numeric value to check
* @return true if it is a valid numeric character reference. False otherwise.
*/
private static boolean isValidCodePoint( int codePoint )
{
return ( MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint );
// Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
return codePoint == 0x9 || codePoint == 0xA || codePoint == 0xD || ( 0x20 <= codePoint && codePoint <= 0xD7FF )
|| ( 0xE000 <= codePoint && codePoint <= 0xFFFD ) || ( 0x10000 <= codePoint && codePoint <= 0x10FFFF );
}

private static boolean isSupplementaryCodePoint( int codePoint )
Expand Down
129 changes: 129 additions & 0 deletions src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
*/

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.IOException;
import java.io.StringReader;
Expand Down Expand Up @@ -156,6 +158,133 @@ public void testUnicodeEntities()
assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
}

@Test
public void testInvalidCharacterReferenceHexa()
throws Exception
{
MXParser parser = new MXParser();
String input = "<root>&#x110000;</root>";
parser.setInput( new StringReader( input ) );

try
{
assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
fail( "Should fail since &#x110000; is an illegal character reference" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with hex value 110000) is invalid" ) );
}
}

@Test
public void testValidCharacterReferenceHexa()
throws Exception
{
MXParser parser = new MXParser();
String input = "<root>&#x9;&#xA;&#xD;&#x20;&#x200;&#xD7FF;&#xE000;&#xFFA2;&#xFFFD;&#x10000;&#x10FFFD;&#x10FFFF;</root>";
parser.setInput( new StringReader( input ) );

try
{
assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0x9, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0xA, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0xD, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0x20, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0x200, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0xD7FF, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0xE000, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0xFFA2, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0xFFFD, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0x10000, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0x10FFFD, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 0x10FFFF, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
}
catch ( XmlPullParserException e )
{
fail( "Should success since the input represents all legal character references" );
}
}

@Test
public void testInvalidCharacterReferenceDecimal()
hboutemy marked this conversation as resolved.
Show resolved Hide resolved
throws Exception
{
MXParser parser = new MXParser();
String input = "<root>&#1114112;</root>";
parser.setInput( new StringReader( input ) );

try
{
assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
fail( "Should fail since &#1114112; is an illegal character reference" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "character reference (with decimal value 1114112) is invalid" ) );
}
}

@Test
public void testValidCharacterReferenceDecimal()
throws Exception
{
MXParser parser = new MXParser();
String input =
"<root>&#9;&#10;&#13;&#32;&#512;&#55295;&#57344;&#65442;&#65533;&#65536;&#1114109;&#1114111;</root>";
parser.setInput( new StringReader( input ) );

try
{
assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 9, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 10, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 13, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 32, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 512, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 55295, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 57344, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 65442, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 65533, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 65536, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 1114109, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
assertEquals( 1114111, parser.getText().codePointAt( 0 ) );
assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
}
catch ( XmlPullParserException e )
{
fail( "Should success since the input represents all legal character references" );
}
}

@Test
public void testProcessingInstruction()
throws Exception
Expand Down