Skip to content
This repository has been archived by the owner on Dec 4, 2018. It is now read-only.

Commit

Permalink
Add additional checks for valid characters to the HTTP request line
Browse files Browse the repository at this point in the history
parsing so invalid request lines are rejected sooner.

git-svn-id: https://svn.apache.org/repos/asf/tomcat/tc8.0.x/trunk@1767653 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
markt-asf committed Nov 2, 2016
1 parent 273caa0 commit 779d5d3
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 99 deletions.
54 changes: 1 addition & 53 deletions java/org/apache/coyote/http11/AbstractInputBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,62 +30,10 @@

public abstract class AbstractInputBuffer<S> implements InputBuffer{

protected static final boolean[] HTTP_TOKEN_CHAR = new boolean[128];

/**
* The string manager for this package.
*/
protected static final StringManager sm =
StringManager.getManager(Constants.Package);


static {

This comment has been minimized.

Copy link
@ZwergNaseXXL

ZwergNaseXXL Nov 29, 2016

Correct me if I'm wrong, but don't you reject some chars which are only deemed unsafe in obsolete RFC1738 ('"', '{', '}', ''), but OTOH you accept some chars which are deemed unsafe in RFC3986 or even in both ('#', '!', '$', '&', "'", '*', '+')?

This comment has been minimized.

Copy link
@markt-asf

markt-asf Nov 29, 2016

Author Contributor

You are wrong, on several levels.
First your analysis of the code's behaviour is incorrect. For example, '#' is not accepted in the request target.
Secondly, your understanding of what characters are, and are not, valid in a request target is not correct.
Look at the ABNF for request-target in RFC7230 and follow all the possible definitions (you'll need to refer to RFC 3986 as well) and you'll see that Tomcat is correctly validating the request target and only rejecting request targets that contain characters that are never valid in the request target. Note that some components of the request target are more restricted than the request target as a whole. That validation is not performed at this point.

This comment has been minimized.

Copy link
@ZwergNaseXXL

ZwergNaseXXL Nov 30, 2016

Note to self: Never react to customer escalations before the first coffee has reached it's destination.
I actually checked this part here, which was replaced by your commit. The red background color didn't deter me...

As for valid chars per RFC3986: You're right that I hadn't followed the grammar, I went with the textual description in section 2, which has some gaps plus some differences with the previous RFC and the existing implementations. In a nutshell, I guess I have to percent-encode every char that isn't either "unreserved" or a delimiter.

Sorry to bother you.

for (int i = 0; i < 128; i++) {
if (i < 32) {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == 127) {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '(') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == ')') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '<') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '>') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '@') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == ',') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == ';') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == ':') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '\\') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '\"') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '/') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '[') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == ']') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '?') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '=') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '{') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == '}') {
HTTP_TOKEN_CHAR[i] = false;
} else if (i == ' ') {
HTTP_TOKEN_CHAR[i] = false;
} else {
HTTP_TOKEN_CHAR[i] = true;
}
}
}
protected static final StringManager sm = StringManager.getManager(Constants.Package);


/**
Expand Down
14 changes: 9 additions & 5 deletions java/org/apache/coyote/http11/AbstractNioInputBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import org.apache.coyote.Request;
import org.apache.tomcat.util.buf.MessageBytes;
import org.apache.tomcat.util.http.parser.HttpParser;

public abstract class AbstractNioInputBuffer<S> extends AbstractInputBuffer<S> {

Expand Down Expand Up @@ -228,7 +229,7 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
if (buf[pos] == Constants.SP || buf[pos] == Constants.HT) {
space = true;
request.method().setBytes(buf, parsingRequestLineStart, pos - parsingRequestLineStart);
} else if (!HTTP_TOKEN_CHAR[buf[pos]]) {
} else if (!HttpParser.isToken(buf[pos])) {
throw new IllegalArgumentException(sm.getString("iib.invalidmethod"));
}
pos++;
Expand Down Expand Up @@ -276,9 +277,10 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
parsingRequestLineEol = true;
space = true;
end = pos;
} else if ((buf[pos] == Constants.QUESTION)
&& (parsingRequestLineQPos == -1)) {
} else if ((buf[pos] == Constants.QUESTION) && (parsingRequestLineQPos == -1)) {
parsingRequestLineQPos = pos;
} else if (HttpParser.isNotRequestTarget(buf[pos])) {
throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget"));
}
pos++;
}
Expand Down Expand Up @@ -315,7 +317,7 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
if (parsingRequestLinePhase == 6) {
//
// Reading the protocol
// Protocol is always US-ASCII
// Protocol is always "HTTP/" DIGIT "." DIGIT
//
while (!parsingRequestLineEol) {
// Read new bytes if needed
Expand All @@ -330,6 +332,8 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
if (end == 0)
end = pos;
parsingRequestLineEol = true;
} else if (!HttpParser.isHttpProtocol(buf[pos])) {
throw new IllegalArgumentException(sm.getString("iib.invalidHttpProtocol"));
}
pos++;
}
Expand Down Expand Up @@ -470,7 +474,7 @@ private HeaderParseStatus parseHeader()
headerData.realPos = pos;
headerData.lastSignificantChar = pos;
break;
} else if (chr < 0 || !HTTP_TOKEN_CHAR[chr]) {
} else if (!HttpParser.isToken(chr)) {
// If a non-token header is detected, skip the line and
// ignore the header
headerData.lastSignificantChar = pos;
Expand Down
14 changes: 9 additions & 5 deletions java/org/apache/coyote/http11/InternalAprInputBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.tomcat.jni.Status;
import org.apache.tomcat.util.buf.ByteChunk;
import org.apache.tomcat.util.buf.MessageBytes;
import org.apache.tomcat.util.http.parser.HttpParser;
import org.apache.tomcat.util.net.AbstractEndpoint;
import org.apache.tomcat.util.net.SocketWrapper;

Expand Down Expand Up @@ -181,7 +182,7 @@ public boolean parseRequestLine(boolean useAvailableData)
if (buf[pos] == Constants.SP || buf[pos] == Constants.HT) {
space = true;
request.method().setBytes(buf, start, pos - start);
} else if (!HTTP_TOKEN_CHAR[buf[pos]]) {
} else if (!HttpParser.isToken(buf[pos])) {
throw new IllegalArgumentException(sm.getString("iib.invalidmethod"));
}

Expand Down Expand Up @@ -232,9 +233,10 @@ public boolean parseRequestLine(boolean useAvailableData)
eol = true;
space = true;
end = pos;
} else if ((buf[pos] == Constants.QUESTION)
&& (questionPos == -1)) {
} else if ((buf[pos] == Constants.QUESTION) && (questionPos == -1)) {
questionPos = pos;
} else if (HttpParser.isNotRequestTarget(buf[pos])) {
throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget"));
}

pos++;
Expand Down Expand Up @@ -270,7 +272,7 @@ public boolean parseRequestLine(boolean useAvailableData)

//
// Reading the protocol
// Protocol is always US-ASCII
// Protocol is always "HTTP/" DIGIT "." DIGIT
//

while (!eol) {
Expand All @@ -287,6 +289,8 @@ public boolean parseRequestLine(boolean useAvailableData)
if (end == 0)
end = pos;
eol = true;
} else if (!HttpParser.isHttpProtocol(buf[pos])) {
throw new IllegalArgumentException(sm.getString("iib.invalidHttpProtocol"));
}

pos++;
Expand Down Expand Up @@ -385,7 +389,7 @@ private boolean parseHeader()
if (buf[pos] == Constants.COLON) {
colon = true;
headerValue = headers.addValue(buf, start, pos - start);
} else if (buf[pos] < 0 || !HTTP_TOKEN_CHAR[buf[pos]]) {
} else if (!HttpParser.isToken(buf[pos])) {
// If a non-token header is detected, skip the line and
// ignore the header
skipLine(start);
Expand Down
15 changes: 9 additions & 6 deletions java/org/apache/coyote/http11/InternalInputBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.juli.logging.LogFactory;
import org.apache.tomcat.util.buf.ByteChunk;
import org.apache.tomcat.util.buf.MessageBytes;
import org.apache.tomcat.util.http.parser.HttpParser;
import org.apache.tomcat.util.net.AbstractEndpoint;
import org.apache.tomcat.util.net.SocketWrapper;

Expand Down Expand Up @@ -142,7 +143,7 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
if (buf[pos] == Constants.SP || buf[pos] == Constants.HT) {
space = true;
request.method().setBytes(buf, start, pos - start);
} else if (!HTTP_TOKEN_CHAR[buf[pos]]) {
} else if (!HttpParser.isToken(buf[pos])) {
throw new IllegalArgumentException(sm.getString("iib.invalidmethod"));
}

Expand Down Expand Up @@ -193,9 +194,10 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
eol = true;
space = true;
end = pos;
} else if ((buf[pos] == Constants.QUESTION)
&& (questionPos == -1)) {
} else if ((buf[pos] == Constants.QUESTION) && (questionPos == -1)) {
questionPos = pos;
} else if (HttpParser.isNotRequestTarget(buf[pos])) {
throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget"));
}

pos++;
Expand Down Expand Up @@ -230,9 +232,8 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)

//
// Reading the protocol
// Protocol is always US-ASCII
// Protocol is always "HTTP/" DIGIT "." DIGIT
//

while (!eol) {

// Read new bytes if needed
Expand All @@ -247,6 +248,8 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
if (end == 0)
end = pos;
eol = true;
} else if (!HttpParser.isHttpProtocol(buf[pos])) {
throw new IllegalArgumentException(sm.getString("iib.invalidHttpProtocol"));
}

pos++;
Expand Down Expand Up @@ -345,7 +348,7 @@ private boolean parseHeader()
if (buf[pos] == Constants.COLON) {
colon = true;
headerValue = headers.addValue(buf, start, pos - start);
} else if (buf[pos] < 0 || !HTTP_TOKEN_CHAR[buf[pos]]) {
} else if (!HttpParser.isToken(buf[pos])) {
// If a non-token header is detected, skip the line and
// ignore the header
skipLine(start);
Expand Down
4 changes: 3 additions & 1 deletion java/org/apache/coyote/http11/LocalStrings.properties
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ iib.available.readFail=A non-blocking read failed while attempting to determine
iib.eof.error=Unexpected EOF read on the socket
iib.failedread.apr=Read failed with APR/native error code [{0}]
iib.filter.npe=You may not add a null filter
iib.invalidheader=The HTTP header line [{0}] does not conform to RFC 2616 and has been ignored.
iib.invalidheader=The HTTP header line [{0}] does not conform to RFC 7230 and has been ignored.
iib.invalidmethod=Invalid character found in method name. HTTP method names must be tokens
iib.invalidRequestTarget=Invalid character found in the request target. The valid characters are defined in RFC 7230 and RFC 3986
iib.invalidHttpProtocol=Invalid character found in the HTTP protocol
iib.parseheaders.ise.error=Unexpected state: headers already parsed. Buffer not recycled?
iib.readtimeout=Timeout attempting to read data from the socket
iib.requestheadertoolarge.error=Request header is too large
Expand Down
44 changes: 43 additions & 1 deletion java/org/apache/tomcat/util/http/parser/HttpParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public class HttpParser {
private static final boolean[] IS_SEPARATOR = new boolean[ARRAY_SIZE];
private static final boolean[] IS_TOKEN = new boolean[ARRAY_SIZE];
private static final boolean[] IS_HEX = new boolean[ARRAY_SIZE];
private static final boolean[] IS_NOT_REQUEST_TARGET = new boolean[ARRAY_SIZE];
private static final boolean[] IS_HTTP_PROTOCOL = new boolean[ARRAY_SIZE];

static {
for (int i = 0; i < ARRAY_SIZE; i++) {
Expand All @@ -65,6 +67,21 @@ public class HttpParser {
if ((i >= '0' && i <='9') || (i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F')) {
IS_HEX[i] = true;
}

// Not valid for request target.
// Combination of multiple rules from RFC7230 and RFC 3986. Must be
// ASCII, no controls plus a few additional characters excluded
if (IS_CONTROL[i] || i > 127 ||
i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' || i == '\\' ||
i == '^' || i == '`' || i == '{' || i == '|' || i == '}') {

This comment has been minimized.

Copy link
@encounter

encounter Nov 29, 2016

The | character was not rejected under the old rules, so this is a breaking change.

This comment has been minimized.

Copy link
@markt-asf

markt-asf Nov 29, 2016

Author Contributor

It is only a breaking change for clients that are sending non-specification compliant requests. Clients that aren't percent encoding | need to be fixed so that they do.

IS_NOT_REQUEST_TARGET[i] = true;
}

// Not valid for HTTP protocol
// "HTTP/" DIGIT "." DIGIT
if (i == 'H' || i == 'T' || i == 'P' || i == '/' || i == '.' || (i >= '0' && i <= '9')) {
IS_HTTP_PROTOCOL[i] = true;
}
}
}

Expand Down Expand Up @@ -99,6 +116,7 @@ public static String unquote(String input) {
return result.toString();
}


public static boolean isToken(int c) {
// Fast for correct values, slower for incorrect ones
try {
Expand All @@ -108,15 +126,39 @@ public static boolean isToken(int c) {
}
}


public static boolean isHex(int c) {
// Fast for correct values, slower for incorrect ones
// Fast for correct values, slower for some incorrect ones
try {
return IS_HEX[c];
} catch (ArrayIndexOutOfBoundsException ex) {
return false;
}
}


public static boolean isNotRequestTarget(int c) {
// Fast for valid request target characters, slower for some incorrect
// ones
try {
return IS_NOT_REQUEST_TARGET[c];
} catch (ArrayIndexOutOfBoundsException ex) {
return true;
}
}


public static boolean isHttpProtocol(int c) {
// Fast for valid HTTP protocol characters, slower for some incorrect
// ones
try {
return IS_HTTP_PROTOCOL[c];
} catch (ArrayIndexOutOfBoundsException ex) {
return false;
}
}


// Skip any LWS and return the next char
static int skipLws(StringReader input, boolean withReset) throws IOException {

Expand Down
Loading

1 comment on commit 779d5d3

@albinowax
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies if you are already aware of this and don't care, but Internet Explorer 11 will happily place " and assorted other characters unencoded in the query string. Old versions of Chrome will too, if I recall correctly.

Please sign in to comment.