Skip to content

Commit

Permalink
HTTP/2 HPACK (#309):
Browse files Browse the repository at this point in the history
1. Correction of comment about HTTP/2 design;
2. Some corrections in the Huffman decoding functionality.
  • Loading branch information
aleksostapenko committed Oct 4, 2019
1 parent 1f55e2c commit 9e06e66
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 45 deletions.
46 changes: 30 additions & 16 deletions tempesta_fw/hpack.c
Original file line number Diff line number Diff line change
Expand Up @@ -1314,7 +1314,7 @@ huffman_decode_tail(TfwHPack *__restrict hp, TfwHttpReq *__restrict req,
T_DBG3("%s: hp->curr=%d, hp->hctx=%hx, hp->length=%lu,"
" i=%u, shift=%d, offset=%u\n", __func__,
hp->curr, hp->hctx, hp->length, i, shift, offset);
if (likely(shift >= 0)) {
if (likely(shift > 0)) {
if (shift <= hp->curr + HT_NBITS) {
sym = (char)ht_decode[offset + i].offset;
if (tfw_hpack_huffman_write(sym, req))
Expand All @@ -1325,7 +1325,8 @@ huffman_decode_tail(TfwHPack *__restrict hp, TfwHttpReq *__restrict req,
} else {
break;
}
} else {
}
else if (shift < 0) {
/*
* Last full prefix processed here, to allow EOS
* padding detection.
Expand All @@ -1351,6 +1352,11 @@ huffman_decode_tail(TfwHPack *__restrict hp, TfwHttpReq *__restrict req,

return T_DROP;
}
else {
/* @shift must not be zero. */
WARN_ON_ONCE(1);
return T_DROP;
}
}
if (likely(offset == 0)) {
if ((i ^ (HT_EOS_HIGH >> 1)) < (1U << -hp->curr)) {
Expand Down Expand Up @@ -1378,25 +1384,21 @@ huffman_decode_tail_s(TfwHPack *__restrict hp, TfwHttpReq *__restrict req,
" shift=%d, offset=%u\n", __func__, hp->curr, hp->hctx,
hp->length, i, shift, offset);

if (likely(shift >= 0)) {
if (likely(shift > 0)) {
if (likely(shift <= hp->curr + HT_NBITS)) {
sym = (char)ht_decode[offset + i].offset;
if (tfw_hpack_huffman_write(sym, req))
return T_DROP;
hp->curr -= shift;
return huffman_decode_tail(hp, req, 0);
}
} else {
}
else {
/*
* Condition here equivalent to the
* "-shift <= hp->curr + HT_NBITS", but
* working faster.
* @shift for short tables must be greater
* than zero.
*/
if (shift >= -HT_NBITS - hp->curr) {
if (ht_decode[offset + i].offset == 0) {
return T_DROP;
}
}
WARN_ON_ONCE(1);
}

return T_DROP;
Expand Down Expand Up @@ -1442,12 +1444,13 @@ tfw_huffman_decode(TfwHPack *__restrict hp, TfwHttpReq *__restrict req,
" shift=%d, offset=%u, offset=%c\n", __func__,
hp->curr, hp->hctx, hp->length, n, last - src,
i, shift, offset, (char)offset);
if (shift >= 0) {
if (likely(shift > 0)) {
if (tfw_hpack_huffman_write((char)offset, req))
return T_DROP;
hp->curr -= shift;
offset = 0;
} else {
}
else if (shift < 0) {
hp->curr += shift;
if (offset >= HT_SMALL) {
break;
Expand All @@ -1456,6 +1459,11 @@ tfw_huffman_decode(TfwHPack *__restrict hp, TfwHttpReq *__restrict req,
goto end;
}
}
else {
/* @shift must not be zero. */
WARN_ON_ONCE(1);
goto end;
}
}
hp->curr += HT_NBITS - HT_MBITS;
/*
Expand Down Expand Up @@ -1484,11 +1492,17 @@ tfw_huffman_decode(TfwHPack *__restrict hp, TfwHttpReq *__restrict req,
" shift=%d, offset=%u, offset=%c\n", __func__,
hp->curr, hp->hctx, hp->length, n, last - src,
i, shift, offset, (char)offset);
if (likely(shift >= 0)) {
if (likely(shift > 0)) {
if (tfw_hpack_huffman_write((char)offset, req))
return T_DROP;
hp->curr -= shift;
} else {
}
else {
/*
* @shift for short tables must be greater
* than zero.
*/
WARN_ON_ONCE(1);
break;
}
}
Expand Down
88 changes: 59 additions & 29 deletions tempesta_fw/http.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,67 @@
*
* Notes about the design of HTTP/2 implementation.
*
* It could be efficient to make HTTP/2 => HTTP/1.1 transformation on
* HTTP/2 request decoding, but this approach not scales for the case of
* HTTP/2 => HTTP/2 proxying, since during request receiving/processing
* we do not know what backend (HTTP/2 or HTTP/1.1) the request attended
* for. Thus, until the backend determination moment, we need to leave
* HTTP/2 representation in the source skb. From the other side we need to
* perform our internal HTTP-specific work (HTTP-tables, HTTP-limits,
* Sessions module, Scheduling etc.), and the choice here could be an
* implementation HTTP/2-specific functionality set with custom flags in
* @TfwHttpReq.h_tbl - for this work, but this variant is very hard to apply
* for the Tempesta FW design, since we need to rewrite the whole logic of
* header analysis (mentioned above HTTP-tables, Scheduling etc.); besides,
* having HTTP/1.1 and HTTP/2 clients, we must to keep for all this logic two
* versions of strings: in format of HTTP/1.1 (ASCII) and in HTTP/2 format
* (Huffman, etc).
* In order to perform internal HTTP-specific analysis (HTTP-tables,
* HTTP-limits, Sessions module, Scheduling etc.) - for both HTTP/2 and
* HTTP/1.1 processing - we need representation of HTTP-message content.
* In case of HTTP/1.1 we have such representation right from the message's
* source skb - those are ASCII strings, which don't need to be decrypted
* and allow quick processing with SIMD instructions. However, content of
* HTTP/2-message (i.e. headers) could be encrypted with special HPACK
* methods (static/dynamic indexing and Huffman coding), which doesn't
* allow to analyze the message content directly. o solve this issue for
* HTTP/2-requests processing, current implementation of HTTP-layer creates
* HTTP/1.1-representation of HTTP/2 headers (during HPACK-decoding and
* HTTP/2-parsing) and places that representation into the special pool
* @TfwHttpReq.pit.pool.
*
* As a result, we cannot avoid decoding of HTTP/2 format. The solution
* should be the creation of HTTP/1.1 representation of HTTP/2 headers (via
* HPACK-decoding and HTPP/2-parsing) and place that representation into
* the special pool @TfwHttpReq.pit.pool; during adjusting stage (before
* re-sending request to backend) in @tfw_http_adjust_req() the obtained
* HTTP/1.1 representation should be used for new request assembling in case
* of HTTP/2 => HTTP/1.1 transformation, and for headers re-indexing in
* case of HTTP/2 => HTTP/2 proxying.
* TODO #309: during adjusting stage (before re-sending request to backend)
* in @tfw_h2_adjust_req() the obtained @TfwHttpReq.pit.pool with
* HTTP/1.1-representation must be used for request assembling in case of
* HTTP/2 => HTTP/1.1 transformation.
*
* Actually, we don't need to decode all HTTP/2 headers: some of them could
* be not encoded (not in Huffman form - in ASCII instead) and some - could be
* just indexed (we can keep static and dynamic indexes during internal request
* analysis and convert them into ASCII strings in-place - on demand), so, we
* can save memory/processor resources and create additional HTTP/1.1
* representation only for Huffman encoded headers.
* TODO #309: actually, we don't need to create separate HTTP/1.1-representation
* for all HTTP/2 headers: some of them could be not encoded (not in Huffman
* form - in ASCII instead) and some - could be just indexed (we can keep static
* and dynamic indexes during internal request analysis and convert them into
* ASCII strings in-place - on demand), so, we can save memory/processor
* resources and create additional HTTP/1.1-representation only for Huffman
* encoded headers.
*
* Described above approach was chosen instead of immediate HTTP/2 => HTTP/1.1
* transformation on HTTP/2-message decoding, because the latter one is not
* scales for the case of HTTP/2 => HTTP/2 proxying, since during request
* receiving/processing we do not know what backend (HTTP/2 or HTTP/1.1)
* the request attended for. Thus, until the backend determination moment,
* we need to leave HTTP/2-representation in the source skb.
* Yet another choice (instead of HTTP/1.1-representation creation for HTTP/2
* message) could be an implementation HTTP/2-specific functionality set with
* custom flags in @TfwHttpReq.h_tbl - to perform HTTP-specific analysis right
* over source HTTP/2-representation of the message. However, there are several
* problems with HTTP/2-representation analysis:
* 1. HTTP/2 still allows ASCII strings, and even the same HTTP message may
* contain the same header in ASCII and in static table index. Thus, even
* with pure HTTP/2 operation we still must care about binary headers
* representation and plain ASCII representation. That means that the whole
* headers analyzing logic (mentioned above Schedulers, HTTPTables etc.) must
* either encode/decode strings on the fly or keep the two representations
* for the headers;
* 2. Huffman decoding is extremely slow: it operates on units less than a byte,
* while we use SIMD operations for ASCII processing. The bytes-crossing
* logic doesn't allow to efficiently encode Huffman decoding in SIMD;
* 3. Huffman encoding, also due to byte boundary crossing, mangles string
* information representation, so efficient strings matching algorithms
* required for #496 and #732 can not be applied.
* Thus, the points specified above (and #732) leave us only one possibility:
* since we must process ASCII-headers along with Huffman-encoded, we have to
* decode Huffman on HTTP-message reception.
*
* Regarding of internal HTTP/2-responses (error and cached) generation - dual
* logic design is used; i.e. along with existing functionality for
* HTTP/1.1-responses creation, separate logic implemented for HTTP/2-responses
* generation, in order to get performance benefits of creating a message right
* away in HTTP/2-format instead of transforming into HTTP/2 from already
* created HTTP/1.1-message.
*
* Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
* Copyright (C) 2015-2019 Tempesta Technologies, Inc.
Expand Down

0 comments on commit 9e06e66

Please sign in to comment.