From 39c2bbbb9db734a9b0e8f752ab42dea02a8e6790 Mon Sep 17 00:00:00 2001 From: Will Jordan Date: Wed, 6 Jul 2022 11:11:17 -0700 Subject: [PATCH 1/5] Return 429 response on HTTP max connection limit Instead of silently closing the connection, return a `429 Too Many Requests` HTTP response with a helpful error message to aid debugging when the connection limit is unintentionally reached. --- command/agent/http.go | 4 ++-- command/agent/http_test.go | 44 ++++++++------------------------------ 2 files changed, 11 insertions(+), 37 deletions(-) diff --git a/command/agent/http.go b/command/agent/http.go index c3284764bbf6..ad41c6e3aee0 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -216,7 +216,7 @@ func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) fu // Still return the connection limiter return connlimit.NewLimiter(connlimit.Config{ MaxConnsPerClientIP: connLimit, - }).HTTPConnStateFunc() + }).HTTPConnStateFuncWithDefault429Handler(0) } return nil } @@ -226,7 +226,7 @@ func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) fu // handshake timeout. connLimiter := connlimit.NewLimiter(connlimit.Config{ MaxConnsPerClientIP: connLimit, - }).HTTPConnStateFunc() + }).HTTPConnStateFuncWithDefault429Handler(0) return func(conn net.Conn, state http.ConnState) { switch state { diff --git a/command/agent/http_test.go b/command/agent/http_test.go index ef1c62f118a2..b6bfc78bee4f 100644 --- a/command/agent/http_test.go +++ b/command/agent/http_test.go @@ -1223,41 +1223,15 @@ func TestHTTPServer_Limits_OK(t *testing.T) { // Create a new connection that will go over the connection limit. limitConn, err := dial(t, addr, useTLS) - // At this point, the state of the connection + handshake are up in the - // air. - // - // 1) dial failed, handshake never started - // => Conn is nil + io.EOF - // 2) dial completed, handshake failed - // => Conn is malformed + (net.OpError OR io.EOF) - // 3) dial completed, handshake succeeded - // => Conn is not nil + no error, however using the Conn should - // result in io.EOF - // - // At no point should Nomad actually write through the limited Conn. - - if limitConn == nil || err != nil { - // Case 1 or Case 2 - returned Conn is useless and the error should - // be one of: - // "EOF" - // "closed network connection" - // "connection reset by peer" - msg := err.Error() - acceptable := strings.Contains(msg, "EOF") || - strings.Contains(msg, "closed network connection") || - strings.Contains(msg, "connection reset by peer") - require.True(t, acceptable) - } else { - // Case 3 - returned Conn is usable, but Nomad should not write - // anything before closing it. - buf := make([]byte, bufSize) - deadline := time.Now().Add(10 * time.Second) - require.NoError(t, limitConn.SetReadDeadline(deadline)) - n, err := limitConn.Read(buf) - require.Equal(t, io.EOF, err) - require.Zero(t, n) - require.NoError(t, limitConn.Close()) - } + response := "HTTP/1.1 429" + buf := make([]byte, len(response)) + deadline := time.Now().Add(10 * time.Second) + require.NoError(t, limitConn.SetReadDeadline(deadline)) + n, err := limitConn.Read(buf) + require.Equal(t, response, string(buf)) + require.Nil(t, err) + require.Equal(t, len(response), n) + require.NoError(t, limitConn.Close()) // Assert existing connections are ok require.Len(t, errCh, 0) From 2b5f72b04504e34ef9e775897cde7021978f0a10 Mon Sep 17 00:00:00 2001 From: Will Jordan Date: Wed, 6 Jul 2022 15:15:06 -0700 Subject: [PATCH 2/5] set 10-millisecond write timeout for connection-limit 429 response --- command/agent/http.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/command/agent/http.go b/command/agent/http.go index ad41c6e3aee0..c43d61b7d3ae 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -216,7 +216,7 @@ func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) fu // Still return the connection limiter return connlimit.NewLimiter(connlimit.Config{ MaxConnsPerClientIP: connLimit, - }).HTTPConnStateFuncWithDefault429Handler(0) + }).HTTPConnStateFuncWithDefault429Handler(10 * time.Millisecond) } return nil } @@ -226,7 +226,7 @@ func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) fu // handshake timeout. connLimiter := connlimit.NewLimiter(connlimit.Config{ MaxConnsPerClientIP: connLimit, - }).HTTPConnStateFuncWithDefault429Handler(0) + }).HTTPConnStateFuncWithDefault429Handler(10 * time.Millisecond) return func(conn net.Conn, state http.ConnState) { switch state { From 3372c070ffa8e2a63bd5eeab1a2a512f4f134cb1 Mon Sep 17 00:00:00 2001 From: Will Jordan Date: Fri, 15 Jul 2022 16:14:19 -0700 Subject: [PATCH 3/5] add rate-limit to connection-limiter error handler The rate-limit prevents the TLS handshake necessary to write the HTTP response from consuming too many server resources. Add "Too many concurrent connections" log warning when connection limit is reached. --- command/agent/http.go | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/command/agent/http.go b/command/agent/http.go index c43d61b7d3ae..84c65992bf65 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -6,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + "golang.org/x/time/rate" "net" "net/http" "net/http/pprof" @@ -153,7 +154,7 @@ func NewHTTPServers(agent *Agent, config *Config) ([]*HTTPServer, error) { httpServer := http.Server{ Addr: srv.Addr, Handler: handlers.CompressHandler(srv.mux), - ConnState: makeConnState(config.TLSConfig.EnableHTTP, handshakeTimeout, maxConns), + ConnState: makeConnState(config.TLSConfig.EnableHTTP, handshakeTimeout, maxConns, srv.logger), ErrorLog: newHTTPServerLogger(srv.logger), } @@ -210,13 +211,12 @@ func NewHTTPServers(agent *Agent, config *Config) ([]*HTTPServer, error) { // // If limit > 0, a per-address connection limit will be enabled regardless of // TLS. If connLimit == 0 there is no connection limit. -func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) func(conn net.Conn, state http.ConnState) { +func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int, logger log.Logger) func(conn net.Conn, state http.ConnState) { + connLimiter := connLimiter(connLimit, logger) if !isTLS || handshakeTimeout == 0 { if connLimit > 0 { // Still return the connection limiter - return connlimit.NewLimiter(connlimit.Config{ - MaxConnsPerClientIP: connLimit, - }).HTTPConnStateFuncWithDefault429Handler(10 * time.Millisecond) + return connLimiter } return nil } @@ -224,9 +224,6 @@ func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) fu if connLimit > 0 { // Return conn state callback with connection limiting and a // handshake timeout. - connLimiter := connlimit.NewLimiter(connlimit.Config{ - MaxConnsPerClientIP: connLimit, - }).HTTPConnStateFuncWithDefault429Handler(10 * time.Millisecond) return func(conn net.Conn, state http.ConnState) { switch state { @@ -265,6 +262,34 @@ func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) fu } } +// connLimiter returns a connection-limiter function with a rate-limited 429-response error handler. +// The rate-limit prevents the TLS handshake necessary to write the HTTP response +// from consuming too many server resources. +func connLimiter(connLimit int, logger log.Logger) func(conn net.Conn, state http.ConnState) { + // Global rate-limit of 10 responses per second with a 100-response burst. + limiter := rate.NewLimiter(10, 100) + + tooManyConnsMsg := "Your IP is issuing too many concurrent connections, please rate limit your calls\n" + tooManyRequestsResponse := []byte(fmt.Sprintf("HTTP/1.1 429 Too Many Requests\r\n"+ + "Content-Type: text/plain\r\n"+ + "Content-Length: %d\r\n"+ + "Connection: close\r\n\r\n%s", len(tooManyConnsMsg), tooManyConnsMsg)) + return connlimit.NewLimiter(connlimit.Config{ + MaxConnsPerClientIP: connLimit, + }).HTTPConnStateFuncWithErrorHandler(func(err error, conn net.Conn) { + if err == connlimit.ErrPerClientIPLimitReached { + if n := limiter.Reserve(); n.Delay() == 0 { + logger.Warn("Too many concurrent connections", "address", conn.RemoteAddr().String(), "limit", connLimit) + conn.SetDeadline(time.Now().Add(10 * time.Millisecond)) + conn.Write(tooManyRequestsResponse) + } else { + n.Cancel() + } + } + conn.Close() + }) +} + // tcpKeepAliveListener sets TCP keep-alive timeouts on accepted // connections. It's used by NewHttpServer so // dead TCP connections eventually go away. From 9768e429ea457a5f3b39cb3d330884de5a094adb Mon Sep 17 00:00:00 2001 From: Will Jordan Date: Fri, 15 Jul 2022 16:34:55 -0700 Subject: [PATCH 4/5] add nomad.agent.http.exceeded metric Count of HTTP connections exceeding concurrency limit. --- command/agent/http.go | 2 ++ website/content/docs/operations/metrics-reference.mdx | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/command/agent/http.go b/command/agent/http.go index 84c65992bf65..00d9af28fa5c 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -6,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/armon/go-metrics" "golang.org/x/time/rate" "net" "net/http" @@ -278,6 +279,7 @@ func connLimiter(connLimit int, logger log.Logger) func(conn net.Conn, state htt MaxConnsPerClientIP: connLimit, }).HTTPConnStateFuncWithErrorHandler(func(err error, conn net.Conn) { if err == connlimit.ErrPerClientIPLimitReached { + metrics.IncrCounter([]string{"nomad", "agent", "http", "exceeded"}, 1) if n := limiter.Reserve(); n.Delay() == 0 { logger.Warn("Too many concurrent connections", "address", conn.RemoteAddr().String(), "limit", connLimit) conn.SetDeadline(time.Now().Add(10 * time.Millisecond)) diff --git a/website/content/docs/operations/metrics-reference.mdx b/website/content/docs/operations/metrics-reference.mdx index edc1286a8578..89a80a7e9f2b 100644 --- a/website/content/docs/operations/metrics-reference.mdx +++ b/website/content/docs/operations/metrics-reference.mdx @@ -479,6 +479,14 @@ Raft database metrics are emitted by the `raft-boltdb` library. | `nomad.raft.boltdb.txstats.write` | Count of total write operations | Integer | Counter | | `nomad.raft.boltdb.txstats.writeTime` | Sample of write operation times | Nanoseconds | Summary | +## Agent Metrics + +Agent metrics are emitted by all Nomad agents running in either client or server mode. + +| Metric | Description | Unit | Type | +| ----------------------------------------- | ----------------------------------------------------- | ----------- | ------- | +| `nomad.agent.http.exceeded` | Count of HTTP connections exceeding concurrency limit | Integer | Counter | + [tagged-metrics]: /docs/telemetry/metrics#tagged-metrics [s_port_plan_failure]: /s/port-plan-failure From 2b7de37e0085de8db5e66b822c847c0c7ca3735a Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 20 Jul 2022 13:41:33 -0400 Subject: [PATCH 5/5] add changelog and run goimports -d --- .changelog/13621.txt | 3 +++ command/agent/http.go | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 .changelog/13621.txt diff --git a/.changelog/13621.txt b/.changelog/13621.txt new file mode 100644 index 000000000000..acdffdf07f95 --- /dev/null +++ b/.changelog/13621.txt @@ -0,0 +1,3 @@ +```release-note:improvement +api: HTTP server now returns a 429 error code when hitting the connection limit +``` diff --git a/command/agent/http.go b/command/agent/http.go index 00d9af28fa5c..79cac970bac1 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -6,8 +6,6 @@ import ( "encoding/json" "errors" "fmt" - "github.com/armon/go-metrics" - "golang.org/x/time/rate" "net" "net/http" "net/http/pprof" @@ -16,6 +14,7 @@ import ( "strings" "time" + "github.com/armon/go-metrics" assetfs "github.com/elazarl/go-bindata-assetfs" "github.com/gorilla/handlers" "github.com/gorilla/websocket" @@ -24,6 +23,7 @@ import ( "github.com/hashicorp/go-msgpack/codec" multierror "github.com/hashicorp/go-multierror" "github.com/rs/cors" + "golang.org/x/time/rate" "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/helper/noxssrw"