Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Content-Length and Transfer-Encoding problems #2518

Merged
merged 1 commit into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 62 additions & 17 deletions src/brpc/details/http_message.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

namespace brpc {

DEFINE_bool(allow_chunked_length, false,
"Allow both Transfer-Encoding and Content-Length headers are present.");
DEFINE_bool(http_verbose, false,
"[DEBUG] Print EVERY http request/response");
DEFINE_int32(http_verbose_max_body_length, 512,
Expand Down Expand Up @@ -172,6 +174,28 @@ int HttpMessage::on_headers_complete(http_parser *parser) {
}
}

// https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3
// If a message is received with both a Transfer-Encoding and a
// Content-Length header field, the Transfer-Encoding overrides the
// Content-Length. Such a message might indicate an attempt to
// perform request smuggling (Section 9.5) or response splitting
// (Section 9.4) and ought to be handled as an error. A sender MUST
// remove the received Content-Length field prior to forwarding such
// a message.

// Reject message if both Transfer-Encoding and Content-Length headers
// are present or if allowed by gflag and 'Transfer-Encoding'
// is chunked - remove Content-Length and serve request.
if (parser->uses_transfer_encoding && parser->flags & F_CONTENTLENGTH) {
if (parser->flags & F_CHUNKED && FLAGS_allow_chunked_length) {
http_message->header().RemoveHeader("Content-Length");
} else {
LOG(ERROR) << "HTTP/1.1 protocol error: both Content-Length "
<< "and Transfer-Encoding are set.";
return -1;
}
}

// If server receives a response to a HEAD request, returns 1 and then
// the parser will interpret that as saying that this message has no body.
if (parser->type == HTTP_RESPONSE &&
Expand Down Expand Up @@ -401,6 +425,7 @@ HttpMessage::HttpMessage(bool read_body_progressively,
, _cur_value(NULL)
, _vbodylen(0) {
http_parser_init(&_parser, HTTP_BOTH);
_parser.allow_chunked_length = 1;
_parser.data = this;
}

Expand Down Expand Up @@ -489,6 +514,9 @@ static void DescribeHttpParserFlags(std::ostream& os, unsigned int flags) {
if (flags & F_SKIPBODY) {
os << "F_SKIPBODY|";
}
if (flags & F_CONTENTLENGTH) {
os << "F_CONTENTLENGTH|";
}
}

std::ostream& operator<<(std::ostream& os, const http_parser& parser) {
Expand Down Expand Up @@ -548,7 +576,13 @@ void MakeRawHttpRequest(butil::IOBuf* request,
<< h->minor_version() << BRPC_CRLF;
// Never use "Content-Length" set by user.
h->RemoveHeader("Content-Length");
if (h->method() != HTTP_METHOD_GET) {
const std::string* transfer_encoding = h->GetHeader("Transfer-Encoding");
if (h->method() == HTTP_METHOD_GET) {
h->RemoveHeader("Transfer-Encoding");
} else if (!transfer_encoding) {
// https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2
// A sender MUST NOT send a Content-Length header field in any message
// that contains a Transfer-Encoding header field.
os << "Content-Length: " << (content ? content->length() : 0)
<< BRPC_CRLF;
}
Expand Down Expand Up @@ -638,25 +672,36 @@ void MakeRawHttpResponse(butil::IOBuf* response,
// A server MUST NOT send a Content-Length header field in any response
// with a status code of 1xx (Informational) or 204 (No Content).
h->RemoveHeader("Content-Length");
} else if (content) {
const std::string* content_length = h->GetHeader("Content-Length");
if (is_head_req) {
// Prioritize "Content-Length" set by user.
// If "Content-Length" is not set, set it to the length of content.
if (!content_length) {
os << "Content-Length: " << content->length() << BRPC_CRLF;
}
} else {
if (content_length) {
h->RemoveHeader("Content-Length");
} else {
const std::string* transfer_encoding = h->GetHeader("Transfer-Encoding");
// https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2
// A sender MUST NOT send a Content-Length header field in any message
// that contains a Transfer-Encoding header field.
if (transfer_encoding) {
h->RemoveHeader("Content-Length");
}
if (content) {
const std::string* content_length = h->GetHeader("Content-Length");
if (is_head_req) {
if (!content_length && !transfer_encoding) {
// Prioritize "Content-Length" set by user.
// If "Content-Length" is not set, set it to the length of content.
os << "Content-Length: " << content->length() << BRPC_CRLF;
}
} else {
if (!transfer_encoding) {
if (content_length) {
h->RemoveHeader("Content-Length");
}
// Never use "Content-Length" set by user.
// Always set Content-Length since lighttpd requires the header to be
// set to 0 for empty content.
os << "Content-Length: " << content->length() << BRPC_CRLF;
}
}
// Never use "Content-Length" set by user.
// Always set Content-Length since lighttpd requires the header to be
// set to 0 for empty content.
os << "Content-Length: " << content->length() << BRPC_CRLF;
}
}
if (!h->content_type().empty()) {
if (!is_invalid_content && !h->content_type().empty()) {
os << "Content-Type: " << h->content_type()
<< BRPC_CRLF;
}
Expand Down
111 changes: 109 additions & 2 deletions src/brpc/details/http_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,10 @@ enum header_states
, h_transfer_encoding
, h_upgrade

, h_matching_transfer_encoding_token_start
, h_matching_transfer_encoding_chunked
, h_matching_transfer_encoding_token

, h_matching_connection_keep_alive
, h_matching_connection_close

Expand Down Expand Up @@ -499,6 +502,12 @@ bool is_url_char(char c) { return IS_URL_CHAR(c); }

#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)

/**
* Verify that a char is a valid visible (printable) US-ASCII
* character or %x80-FF
**/
#define IS_HEADER_CHAR(ch) \
(ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))

#if BRPC_HTTP_PARSER_STRICT
# define STRICT_CHECK(cond) \
Expand Down Expand Up @@ -691,6 +700,8 @@ size_t http_parser_execute (http_parser *parser,
const char *url_mark = 0;
const char *body_mark = 0;
const char *status_mark = 0;
const unsigned int lenient = parser->lenient_http_headers;
const unsigned int allow_chunked_length = parser->allow_chunked_length;

/* We're in an error state. Don't bother doing anything. */
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
Expand Down Expand Up @@ -782,6 +793,7 @@ size_t http_parser_execute (http_parser *parser,
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->uses_transfer_encoding = 0;
parser->content_length = ULLONG_MAX;

if (ch == 'H') {
Expand Down Expand Up @@ -817,6 +829,7 @@ size_t http_parser_execute (http_parser *parser,
case s_start_res:
{
parser->flags = 0;
parser->uses_transfer_encoding = 0;
parser->content_length = ULLONG_MAX;

switch (ch) {
Expand Down Expand Up @@ -1015,6 +1028,7 @@ size_t http_parser_execute (http_parser *parser,
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->uses_transfer_encoding = 0;
parser->content_length = ULLONG_MAX;

if (!IS_ALPHA(ch)) {
Expand Down Expand Up @@ -1470,6 +1484,7 @@ size_t http_parser_execute (http_parser *parser,
parser->header_state = h_general;
} else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
parser->header_state = h_transfer_encoding;
parser->uses_transfer_encoding = 1;
}
break;

Expand Down Expand Up @@ -1544,16 +1559,26 @@ size_t http_parser_execute (http_parser *parser,
if ('c' == c) {
parser->header_state = h_matching_transfer_encoding_chunked;
} else {
parser->header_state = h_general;
parser->header_state = h_matching_transfer_encoding_token;
}
break;

/* Multi-value `Transfer-Encoding` header */
case h_matching_transfer_encoding_token_start:
break;

case h_content_length:
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
goto error;
}

if (parser->flags & F_CONTENTLENGTH) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}

parser->flags |= F_CONTENTLENGTH;
parser->content_length = ch - '0';
break;

Expand Down Expand Up @@ -1591,6 +1616,11 @@ size_t http_parser_execute (http_parser *parser,
goto reexecute_byte;
}

if (!lenient && !IS_HEADER_CHAR(ch)) {
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
goto error;
}

c = LOWER(ch);

switch (parser->header_state) {
Expand Down Expand Up @@ -1627,17 +1657,47 @@ size_t http_parser_execute (http_parser *parser,
break;
}

/* Transfer-Encoding: chunked */
case h_matching_transfer_encoding_token_start:
/* looking for 'Transfer-Encoding: chunked' */
if ('c' == c) {
parser->header_state = h_matching_transfer_encoding_chunked;
} else if (TOKEN(c)) {
/* NOTE(gejun): Not use strict mode for these macros since the additional
* characeters seem to be OK.
*/

/* TODO(indutny): similar code below does this, but why?
* At the very least it seems to be inconsistent given that
* h_matching_transfer_encoding_token does not check for
* `STRICT_TOKEN`
*/
parser->header_state = h_matching_transfer_encoding_token;
} else if (c == ' ' || c == '\t') {
/* Skip lws */
} else {
parser->header_state = h_general;
}
break;

/* Transfer-Encoding: chunked */
case h_matching_transfer_encoding_chunked:
parser->index++;
if (parser->index > sizeof(CHUNKED)-1
|| c != CHUNKED[parser->index]) {
parser->header_state = h_general;
parser->header_state = h_matching_transfer_encoding_token;
} else if (parser->index == sizeof(CHUNKED)-2) {
parser->header_state = h_transfer_encoding_chunked;
}
break;

case h_matching_transfer_encoding_token:
if (ch == ',') {
parser->header_state = h_matching_transfer_encoding_token_start;
parser->index = 0;
}
break;

/* looking for 'Connection: keep-alive' */
case h_matching_connection_keep_alive:
parser->index++;
Expand All @@ -1660,6 +1720,9 @@ size_t http_parser_execute (http_parser *parser,
break;

case h_transfer_encoding_chunked:
if (ch != ' ') parser->header_state = h_matching_transfer_encoding_token;
break;

case h_connection_keep_alive:
case h_connection_close:
if (ch != ' ') parser->header_state = h_general;
Expand Down Expand Up @@ -1739,6 +1802,24 @@ size_t http_parser_execute (http_parser *parser,
break;
}

/* Cannot use transfer-encoding and a content-length header together
per the HTTP specification. (RFC 7230 Section 3.3.3) */
if ((parser->uses_transfer_encoding == 1) &&
(parser->flags & F_CONTENTLENGTH)) {
/* Allow it for lenient parsing as long as `Transfer-Encoding` is
* not `chunked` or allow_length_with_encoding is set
*/
if (parser->flags & F_CHUNKED) {
if (!allow_chunked_length) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}
} else if (!lenient) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}
}

parser->state = s_headers_done;

/* Here we call the headers_complete callback. This is somewhat
Expand Down Expand Up @@ -1791,6 +1872,26 @@ size_t http_parser_execute (http_parser *parser,
} else if (parser->flags & F_CHUNKED) {
/* chunked encoding - ignore Content-Length header */
parser->state = s_chunk_size_start;
} else if (parser->uses_transfer_encoding == 1) {
if (parser->type == HTTP_REQUEST && !lenient) {
/* RFC 7230 3.3.3 */
/* If a Transfer-Encoding header field
* is present in a request and the chunked transfer coding is not
* the final encoding, the message body length cannot be determined
* reliably; the server MUST respond with the 400 (Bad Request)
* status code and then close the connection.
*/
SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
return (p - data); /* Error */
} else {
/* RFC 7230 3.3.3 */
/* If a Transfer-Encoding header field is present in a response and
* the chunked transfer coding is not the final encoding, the
* message body length is determined by reading the connection until
* it is closed by the server.
*/
parser->state = s_body_identity_eof;
}
} else {
if (parser->content_length == 0) {
/* Content-Length header given but zero: Content-Length: 0\r\n */
Expand Down Expand Up @@ -2037,6 +2138,12 @@ http_message_needs_eof (const http_parser *parser)
return 0;
}

/* RFC 7230 3.3.3, see `s_headers_almost_done` */
if ((parser->uses_transfer_encoding == 1) &&
(parser->flags & F_CHUNKED) == 0) {
return 1;
}

if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
return 0;
}
Expand Down
20 changes: 15 additions & 5 deletions src/brpc/details/http_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ enum http_parser_flags
, F_TRAILING = 1 << 3
, F_UPGRADE = 1 << 4
, F_SKIPBODY = 1 << 5
, F_CONTENTLENGTH = 1 << 6
};


Expand Down Expand Up @@ -178,13 +179,17 @@ enum http_parser_flags
XX(INVALID_HEADER_TOKEN, "invalid character in header") \
XX(INVALID_CONTENT_LENGTH, \
"invalid character in content-length header") \
XX(UNEXPECTED_CONTENT_LENGTH, \
"unexpected content-length header") \
XX(INVALID_CHUNK_SIZE, \
"invalid character in chunk size header") \
XX(INVALID_CONSTANT, "invalid constant string") \
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
XX(STRICT, "strict mode assertion failed") \
XX(PAUSED, "parser is paused") \
XX(UNKNOWN, "an unknown error occurred")
XX(UNKNOWN, "an unknown error occurred") \
XX(INVALID_TRANSFER_ENCODING, \
"request has invalid transfer-encoding")


/* Define HPE_* values for each errno value above */
Expand All @@ -202,10 +207,15 @@ enum http_errno {
struct http_parser {
/** PRIVATE **/
unsigned int type : 2; /* enum http_parser_type */
unsigned int flags : 6; /* F_* values from 'flags' enum; semi-public */
unsigned int state : 8; /* enum state from http_parser.c */
unsigned int header_state : 8; /* enum header_state from http_parser.c */
unsigned int index : 8; /* index into current matcher */
unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
unsigned int state : 7; /* enum state from http_parser.c */
unsigned int header_state : 7; /* enum header_state from http_parser.c */
unsigned int index : 5; /* index into current matcher */
unsigned int uses_transfer_encoding : 1; /* Transfer-Encoding header is present */
unsigned int allow_chunked_length : 1; /* Allow headers with both
* `Content-Length` and
* `Transfer-Encoding: chunked` set */
unsigned int lenient_http_headers : 1;

uint32_t nread; /* # bytes read in various scenarios */
uint64_t content_length; /* # bytes in body. `(uint64_t) -1` (all bits one)
Expand Down
Loading
Loading