Add additional checks for valid characters to the HTTP request line

parsing so invalid request lines are rejected sooner. git-svn-id: https://svn.apache.org/repos/asf/tomcat/tc8.0.x/trunk@1767653 13f79535-47bb-0310-9956-ffa450edef68
apache · Nov 2, 2016 · 779d5d3 · ZwergNaseXXL · Nov 29, 2016 · markt-asf
1 parent 273caa0
commit 779d5d3
Show file tree

Hide file tree

Showing 8 changed files with 109 additions and 99 deletions.
diff --git a/java/org/apache/coyote/http11/AbstractInputBuffer.java b/java/org/apache/coyote/http11/AbstractInputBuffer.java
@@ -30,62 +30,10 @@
 
 public abstract class AbstractInputBuffer<S> implements InputBuffer{
 
-    protected static final boolean[] HTTP_TOKEN_CHAR = new boolean[128];
-
     /**
      * The string manager for this package.
      */
-    protected static final StringManager sm =
-        StringManager.getManager(Constants.Package);
-
-
-    static {
-        for (int i = 0; i < 128; i++) {
-            if (i < 32) {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == 127) {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '(') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == ')') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '<') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '>') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '@') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == ',') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == ';') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == ':') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '\\') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '\"') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '/') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '[') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == ']') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '?') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '=') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '{') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == '}') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else if (i == ' ') {
-                HTTP_TOKEN_CHAR[i] = false;
-            } else {
-                HTTP_TOKEN_CHAR[i] = true;
-            }
-        }
-    }
+    protected static final StringManager sm = StringManager.getManager(Constants.Package);
 
 
     /**

diff --git a/java/org/apache/coyote/http11/AbstractNioInputBuffer.java b/java/org/apache/coyote/http11/AbstractNioInputBuffer.java
@@ -21,6 +21,7 @@
 
 import org.apache.coyote.Request;
 import org.apache.tomcat.util.buf.MessageBytes;
+import org.apache.tomcat.util.http.parser.HttpParser;
 
 public abstract class AbstractNioInputBuffer<S> extends AbstractInputBuffer<S> {
 
@@ -228,7 +229,7 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
                 if (buf[pos] == Constants.SP || buf[pos] == Constants.HT) {
                     space = true;
                     request.method().setBytes(buf, parsingRequestLineStart, pos - parsingRequestLineStart);
-                } else if (!HTTP_TOKEN_CHAR[buf[pos]]) {
+                } else if (!HttpParser.isToken(buf[pos])) {
                     throw new IllegalArgumentException(sm.getString("iib.invalidmethod"));
                 }
                 pos++;
@@ -276,9 +277,10 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
                     parsingRequestLineEol = true;
                     space = true;
                     end = pos;
-                } else if ((buf[pos] == Constants.QUESTION)
-                           && (parsingRequestLineQPos == -1)) {
+                } else if ((buf[pos] == Constants.QUESTION) && (parsingRequestLineQPos == -1)) {
                     parsingRequestLineQPos = pos;
+                } else if (HttpParser.isNotRequestTarget(buf[pos])) {
+                    throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget"));
                 }
                 pos++;
             }
@@ -315,7 +317,7 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
         if (parsingRequestLinePhase == 6) {
             //
             // Reading the protocol
-            // Protocol is always US-ASCII
+            // Protocol is always "HTTP/" DIGIT "." DIGIT
             //
             while (!parsingRequestLineEol) {
                 // Read new bytes if needed
@@ -330,6 +332,8 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
                     if (end == 0)
                         end = pos;
                     parsingRequestLineEol = true;
+                } else if (!HttpParser.isHttpProtocol(buf[pos])) {
+                    throw new IllegalArgumentException(sm.getString("iib.invalidHttpProtocol"));
                 }
                 pos++;
             }
@@ -470,7 +474,7 @@ private HeaderParseStatus parseHeader()
                 headerData.realPos = pos;
                 headerData.lastSignificantChar = pos;
                 break;
-            } else if (chr < 0 || !HTTP_TOKEN_CHAR[chr]) {
+            } else if (!HttpParser.isToken(chr)) {
                 // If a non-token header is detected, skip the line and
                 // ignore the header
                 headerData.lastSignificantChar = pos;

diff --git a/java/org/apache/coyote/http11/InternalAprInputBuffer.java b/java/org/apache/coyote/http11/InternalAprInputBuffer.java
@@ -32,6 +32,7 @@
 import org.apache.tomcat.jni.Status;
 import org.apache.tomcat.util.buf.ByteChunk;
 import org.apache.tomcat.util.buf.MessageBytes;
+import org.apache.tomcat.util.http.parser.HttpParser;
 import org.apache.tomcat.util.net.AbstractEndpoint;
 import org.apache.tomcat.util.net.SocketWrapper;
 
@@ -181,7 +182,7 @@ public boolean parseRequestLine(boolean useAvailableData)
             if (buf[pos] == Constants.SP || buf[pos] == Constants.HT) {
                 space = true;
                 request.method().setBytes(buf, start, pos - start);
-            } else if (!HTTP_TOKEN_CHAR[buf[pos]]) {
+            } else if (!HttpParser.isToken(buf[pos])) {
                 throw new IllegalArgumentException(sm.getString("iib.invalidmethod"));
             }
 
@@ -232,9 +233,10 @@ public boolean parseRequestLine(boolean useAvailableData)
                 eol = true;
                 space = true;
                 end = pos;
-            } else if ((buf[pos] == Constants.QUESTION)
-                       && (questionPos == -1)) {
+            } else if ((buf[pos] == Constants.QUESTION) && (questionPos == -1)) {
                 questionPos = pos;
+            } else if (HttpParser.isNotRequestTarget(buf[pos])) {
+                throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget"));
             }
 
             pos++;
@@ -270,7 +272,7 @@ public boolean parseRequestLine(boolean useAvailableData)
 
         //
         // Reading the protocol
-        // Protocol is always US-ASCII
+        // Protocol is always "HTTP/" DIGIT "." DIGIT
         //
 
         while (!eol) {
@@ -287,6 +289,8 @@ public boolean parseRequestLine(boolean useAvailableData)
                 if (end == 0)
                     end = pos;
                 eol = true;
+            } else if (!HttpParser.isHttpProtocol(buf[pos])) {
+                throw new IllegalArgumentException(sm.getString("iib.invalidHttpProtocol"));
             }
 
             pos++;
@@ -385,7 +389,7 @@ private boolean parseHeader()
             if (buf[pos] == Constants.COLON) {
                 colon = true;
                 headerValue = headers.addValue(buf, start, pos - start);
-            } else if (buf[pos] < 0 || !HTTP_TOKEN_CHAR[buf[pos]]) {
+            } else if (!HttpParser.isToken(buf[pos])) {
                 // If a non-token header is detected, skip the line and
                 // ignore the header
                 skipLine(start);

diff --git a/java/org/apache/coyote/http11/InternalInputBuffer.java b/java/org/apache/coyote/http11/InternalInputBuffer.java
@@ -28,6 +28,7 @@
 import org.apache.juli.logging.LogFactory;
 import org.apache.tomcat.util.buf.ByteChunk;
 import org.apache.tomcat.util.buf.MessageBytes;
+import org.apache.tomcat.util.http.parser.HttpParser;
 import org.apache.tomcat.util.net.AbstractEndpoint;
 import org.apache.tomcat.util.net.SocketWrapper;
 
@@ -142,7 +143,7 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
             if (buf[pos] == Constants.SP || buf[pos] == Constants.HT) {
                 space = true;
                 request.method().setBytes(buf, start, pos - start);
-            } else if (!HTTP_TOKEN_CHAR[buf[pos]]) {
+            } else if (!HttpParser.isToken(buf[pos])) {
                 throw new IllegalArgumentException(sm.getString("iib.invalidmethod"));
             }
 
@@ -193,9 +194,10 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
                 eol = true;
                 space = true;
                 end = pos;
-            } else if ((buf[pos] == Constants.QUESTION)
-                       && (questionPos == -1)) {
+            } else if ((buf[pos] == Constants.QUESTION) && (questionPos == -1)) {
                 questionPos = pos;
+            } else if (HttpParser.isNotRequestTarget(buf[pos])) {
+                throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget"));
             }
 
             pos++;
@@ -230,9 +232,8 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
 
         //
         // Reading the protocol
-        // Protocol is always US-ASCII
+        // Protocol is always "HTTP/" DIGIT "." DIGIT
         //
-
         while (!eol) {
 
             // Read new bytes if needed
@@ -247,6 +248,8 @@ public boolean parseRequestLine(boolean useAvailableDataOnly)
                 if (end == 0)
                     end = pos;
                 eol = true;
+            } else if (!HttpParser.isHttpProtocol(buf[pos])) {
+                throw new IllegalArgumentException(sm.getString("iib.invalidHttpProtocol"));
             }
 
             pos++;
@@ -345,7 +348,7 @@ private boolean parseHeader()
             if (buf[pos] == Constants.COLON) {
                 colon = true;
                 headerValue = headers.addValue(buf, start, pos - start);
-            } else if (buf[pos] < 0 || !HTTP_TOKEN_CHAR[buf[pos]]) {
+            } else if (!HttpParser.isToken(buf[pos])) {
                 // If a non-token header is detected, skip the line and
                 // ignore the header
                 skipLine(start);

diff --git a/java/org/apache/coyote/http11/LocalStrings.properties b/java/org/apache/coyote/http11/LocalStrings.properties
@@ -33,8 +33,10 @@ iib.available.readFail=A non-blocking read failed while attempting to determine
 iib.eof.error=Unexpected EOF read on the socket
 iib.failedread.apr=Read failed with APR/native error code [{0}]
 iib.filter.npe=You may not add a null filter
-iib.invalidheader=The HTTP header line [{0}] does not conform to RFC 2616 and has been ignored.
+iib.invalidheader=The HTTP header line [{0}] does not conform to RFC 7230 and has been ignored.
 iib.invalidmethod=Invalid character found in method name. HTTP method names must be tokens
+iib.invalidRequestTarget=Invalid character found in the request target. The valid characters are defined in RFC 7230 and RFC 3986
+iib.invalidHttpProtocol=Invalid character found in the HTTP protocol
 iib.parseheaders.ise.error=Unexpected state: headers already parsed. Buffer not recycled?
 iib.readtimeout=Timeout attempting to read data from the socket
 iib.requestheadertoolarge.error=Request header is too large

diff --git a/java/org/apache/tomcat/util/http/parser/HttpParser.java b/java/org/apache/tomcat/util/http/parser/HttpParser.java
@@ -40,6 +40,8 @@ public class HttpParser {
     private static final boolean[] IS_SEPARATOR = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_TOKEN = new boolean[ARRAY_SIZE];
     private static final boolean[] IS_HEX = new boolean[ARRAY_SIZE];
+    private static final boolean[] IS_NOT_REQUEST_TARGET = new boolean[ARRAY_SIZE];
+    private static final boolean[] IS_HTTP_PROTOCOL = new boolean[ARRAY_SIZE];
 
     static {
         for (int i = 0; i < ARRAY_SIZE; i++) {
@@ -65,6 +67,21 @@ public class HttpParser {
             if ((i >= '0' && i <='9') || (i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F')) {
                 IS_HEX[i] = true;
             }
+
+            // Not valid for request target.
+            // Combination of multiple rules from RFC7230 and RFC 3986. Must be
+            // ASCII, no controls plus a few additional characters excluded
+            if (IS_CONTROL[i] || i > 127 ||
+                    i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' || i == '\\' ||
+                    i == '^' || i == '`'  || i == '{' || i == '|' || i == '}') {
+                IS_NOT_REQUEST_TARGET[i] = true;
+            }
+
+            // Not valid for HTTP protocol
+            // "HTTP/" DIGIT "." DIGIT
+            if (i == 'H' || i == 'T' || i == 'P' || i == '/' || i == '.' || (i >= '0' && i <= '9')) {
+                IS_HTTP_PROTOCOL[i] = true;
+            }
         }
     }
 
@@ -99,6 +116,7 @@ public static String unquote(String input) {
         return result.toString();
     }
 
+
     public static boolean isToken(int c) {
         // Fast for correct values, slower for incorrect ones
         try {
@@ -108,15 +126,39 @@ public static boolean isToken(int c) {
         }
     }
 
+
     public static boolean isHex(int c) {
-        // Fast for correct values, slower for incorrect ones
+        // Fast for correct values, slower for some incorrect ones
         try {
             return IS_HEX[c];
         } catch (ArrayIndexOutOfBoundsException ex) {
             return false;
         }
     }
 
+
+    public static boolean isNotRequestTarget(int c) {
+        // Fast for valid request target characters, slower for some incorrect
+        // ones
+        try {
+            return IS_NOT_REQUEST_TARGET[c];
+        } catch (ArrayIndexOutOfBoundsException ex) {
+            return true;
+        }
+    }
+
+
+    public static boolean isHttpProtocol(int c) {
+        // Fast for valid HTTP protocol characters, slower for some incorrect
+        // ones
+        try {
+            return IS_HTTP_PROTOCOL[c];
+        } catch (ArrayIndexOutOfBoundsException ex) {
+            return false;
+        }
+    }
+
+
     // Skip any LWS and return the next char
     static int skipLws(StringReader input, boolean withReset) throws IOException {