Skip to content

Commit

Permalink
Return 429 response on HTTP max connection limit (#13621)
Browse files Browse the repository at this point in the history
Return 429 response on HTTP max connection limit. Instead of silently closing
the connection, return a `429 Too Many Requests` HTTP response with a helpful
error message to aid debugging when the connection limit is unintentionally
reached.

Set a 10-millisecond write timeout and rate limiter for connection-limit 429
response to prevent writing the HTTP response from consuming too many server
resources.

Add `nomad.agent.http.exceeded metric` counting the number of HTTP connections
exceeding concurrency limit.
  • Loading branch information
wjordan committed Jul 20, 2022
1 parent e8bfd9d commit 662a12a
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 43 deletions.
3 changes: 3 additions & 0 deletions .changelog/13621.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
api: HTTP server now returns a 429 error code when hitting the connection limit
```
43 changes: 35 additions & 8 deletions command/agent/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"strings"
"time"

"github.com/armon/go-metrics"
assetfs "github.com/elazarl/go-bindata-assetfs"
"github.com/gorilla/handlers"
"github.com/gorilla/websocket"
Expand All @@ -22,6 +23,7 @@ import (
"github.com/hashicorp/go-msgpack/codec"
multierror "github.com/hashicorp/go-multierror"
"github.com/rs/cors"
"golang.org/x/time/rate"

"github.com/hashicorp/nomad/acl"
"github.com/hashicorp/nomad/helper/noxssrw"
Expand Down Expand Up @@ -156,7 +158,7 @@ func NewHTTPServers(agent *Agent, config *Config) ([]*HTTPServer, error) {
httpServer := http.Server{
Addr: srv.Addr,
Handler: handlers.CompressHandler(srv.mux),
ConnState: makeConnState(config.TLSConfig.EnableHTTP, handshakeTimeout, maxConns),
ConnState: makeConnState(config.TLSConfig.EnableHTTP, handshakeTimeout, maxConns, srv.logger),
ErrorLog: newHTTPServerLogger(srv.logger),
}

Expand Down Expand Up @@ -213,23 +215,19 @@ func NewHTTPServers(agent *Agent, config *Config) ([]*HTTPServer, error) {
//
// If limit > 0, a per-address connection limit will be enabled regardless of
// TLS. If connLimit == 0 there is no connection limit.
func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) func(conn net.Conn, state http.ConnState) {
func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int, logger log.Logger) func(conn net.Conn, state http.ConnState) {
connLimiter := connLimiter(connLimit, logger)
if !isTLS || handshakeTimeout == 0 {
if connLimit > 0 {
// Still return the connection limiter
return connlimit.NewLimiter(connlimit.Config{
MaxConnsPerClientIP: connLimit,
}).HTTPConnStateFunc()
return connLimiter
}
return nil
}

if connLimit > 0 {
// Return conn state callback with connection limiting and a
// handshake timeout.
connLimiter := connlimit.NewLimiter(connlimit.Config{
MaxConnsPerClientIP: connLimit,
}).HTTPConnStateFunc()

return func(conn net.Conn, state http.ConnState) {
switch state {
Expand Down Expand Up @@ -268,6 +266,35 @@ func makeConnState(isTLS bool, handshakeTimeout time.Duration, connLimit int) fu
}
}

// connLimiter returns a connection-limiter function with a rate-limited 429-response error handler.
// The rate-limit prevents the TLS handshake necessary to write the HTTP response
// from consuming too many server resources.
func connLimiter(connLimit int, logger log.Logger) func(conn net.Conn, state http.ConnState) {
// Global rate-limit of 10 responses per second with a 100-response burst.
limiter := rate.NewLimiter(10, 100)

tooManyConnsMsg := "Your IP is issuing too many concurrent connections, please rate limit your calls\n"
tooManyRequestsResponse := []byte(fmt.Sprintf("HTTP/1.1 429 Too Many Requests\r\n"+
"Content-Type: text/plain\r\n"+
"Content-Length: %d\r\n"+
"Connection: close\r\n\r\n%s", len(tooManyConnsMsg), tooManyConnsMsg))
return connlimit.NewLimiter(connlimit.Config{
MaxConnsPerClientIP: connLimit,
}).HTTPConnStateFuncWithErrorHandler(func(err error, conn net.Conn) {
if err == connlimit.ErrPerClientIPLimitReached {
metrics.IncrCounter([]string{"nomad", "agent", "http", "exceeded"}, 1)
if n := limiter.Reserve(); n.Delay() == 0 {
logger.Warn("Too many concurrent connections", "address", conn.RemoteAddr().String(), "limit", connLimit)
conn.SetDeadline(time.Now().Add(10 * time.Millisecond))
conn.Write(tooManyRequestsResponse)
} else {
n.Cancel()
}
}
conn.Close()
})
}

// tcpKeepAliveListener sets TCP keep-alive timeouts on accepted
// connections. It's used by NewHttpServer so
// dead TCP connections eventually go away.
Expand Down
44 changes: 9 additions & 35 deletions command/agent/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1254,41 +1254,15 @@ func TestHTTPServer_Limits_OK(t *testing.T) {
// Create a new connection that will go over the connection limit.
limitConn, err := dial(t, addr, useTLS)

// At this point, the state of the connection + handshake are up in the
// air.
//
// 1) dial failed, handshake never started
// => Conn is nil + io.EOF
// 2) dial completed, handshake failed
// => Conn is malformed + (net.OpError OR io.EOF)
// 3) dial completed, handshake succeeded
// => Conn is not nil + no error, however using the Conn should
// result in io.EOF
//
// At no point should Nomad actually write through the limited Conn.

if limitConn == nil || err != nil {
// Case 1 or Case 2 - returned Conn is useless and the error should
// be one of:
// "EOF"
// "closed network connection"
// "connection reset by peer"
msg := err.Error()
acceptable := strings.Contains(msg, "EOF") ||
strings.Contains(msg, "closed network connection") ||
strings.Contains(msg, "connection reset by peer")
require.True(t, acceptable)
} else {
// Case 3 - returned Conn is usable, but Nomad should not write
// anything before closing it.
buf := make([]byte, bufSize)
deadline := time.Now().Add(10 * time.Second)
require.NoError(t, limitConn.SetReadDeadline(deadline))
n, err := limitConn.Read(buf)
require.Equal(t, io.EOF, err)
require.Zero(t, n)
require.NoError(t, limitConn.Close())
}
response := "HTTP/1.1 429"
buf := make([]byte, len(response))
deadline := time.Now().Add(10 * time.Second)
require.NoError(t, limitConn.SetReadDeadline(deadline))
n, err := limitConn.Read(buf)
require.Equal(t, response, string(buf))
require.Nil(t, err)
require.Equal(t, len(response), n)
require.NoError(t, limitConn.Close())

// Assert existing connections are ok
require.Len(t, errCh, 0)
Expand Down
8 changes: 8 additions & 0 deletions website/content/docs/operations/metrics-reference.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,14 @@ Raft database metrics are emitted by the `raft-boltdb` library.
| `nomad.raft.boltdb.txstats.write` | Count of total write operations | Integer | Counter |
| `nomad.raft.boltdb.txstats.writeTime` | Sample of write operation times | Nanoseconds | Summary |

## Agent Metrics

Agent metrics are emitted by all Nomad agents running in either client or server mode.

| Metric | Description | Unit | Type |
| ----------------------------------------- | ----------------------------------------------------- | ----------- | ------- |
| `nomad.agent.http.exceeded` | Count of HTTP connections exceeding concurrency limit | Integer | Counter |

[tagged-metrics]: /docs/telemetry/metrics#tagged-metrics
[sticky]: /docs/job-specification/ephemeral_disk#sticky
[s_port_plan_failure]: /s/port-plan-failure

0 comments on commit 662a12a

Please sign in to comment.