Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

net 2898, graceful_startup #239

Merged
merged 18 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
trevorLeonHC marked this conversation as resolved.
Show resolved Hide resolved
Binary file not shown.
3 changes: 3 additions & 0 deletions .changelog/239.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
Add graceful_startup endpoint and postStart hook in order to guarantee that dataplane starts up before application container.
```
9 changes: 8 additions & 1 deletion cmd/consul-dataplane/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ type EnvoyFlags struct {
GracefulShutdownPath *string `json:"gracefulShutdownPath,omitempty"`
GracefulPort *int `json:"gracefulPort,omitempty"`
DumpEnvoyConfigOnExitEnabled *bool `json:"dumpEnvoyConfigOnExitEnabled,omitempty"`

StartupGracePeriodSeconds *int `json:"startupGracePeriodSeconds,omitempty"`
GracefulStartupPath *string `json:"gracefulStartupPath,omitempty"`
trevorLeonHC marked this conversation as resolved.
Show resolved Hide resolved
}

const (
Expand Down Expand Up @@ -217,7 +220,9 @@ func buildDefaultConsulDPFlags() (DataplaneConfigFlags, error) {
"shutdownGracePeriodSeconds": 0,
"gracefulShutdownPath": "/graceful_shutdown",
"gracefulPort": 20300,
"dumpEnvoyConfigOnExitEnabled": false
"dumpEnvoyConfigOnExitEnabled": false,
"gracefulStartupPath": "/graceful_startup",
"startupGracePeriodSeconds": 0
},
"xdsServer": {
"bindAddress": "127.0.0.1",
Expand Down Expand Up @@ -295,6 +300,8 @@ func constructRuntimeConfig(cfg DataplaneConfigFlags, extraArgs []string) (*cons
DumpEnvoyConfigOnExitEnabled: boolVal(cfg.Envoy.DumpEnvoyConfigOnExitEnabled),
GracefulShutdownPath: stringVal(cfg.Envoy.GracefulShutdownPath),
GracefulPort: intVal(cfg.Envoy.GracefulPort),
StartupGracePeriodSeconds: intVal(cfg.Envoy.StartupGracePeriodSeconds),
GracefulStartupPath: stringVal(cfg.Envoy.GracefulStartupPath),
ExtraArgs: extraArgs,
},
Telemetry: &consuldp.TelemetryConfig{
Expand Down
4 changes: 4 additions & 0 deletions cmd/consul-dataplane/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ func TestConfigGeneration(t *testing.T) {
GracefulShutdownPath: "/graceful_shutdown",
EnvoyDrainTimeSeconds: 30,
GracefulPort: 20300,
GracefulStartupPath: "/graceful_startup",
},
Telemetry: &consuldp.TelemetryConfig{
UseCentralConfig: true,
Expand Down Expand Up @@ -189,6 +190,7 @@ func TestConfigGeneration(t *testing.T) {
EnvoyDrainTimeSeconds: 30,
GracefulPort: 20300,
DumpEnvoyConfigOnExitEnabled: true,
GracefulStartupPath: "/graceful_startup",
},
Telemetry: &consuldp.TelemetryConfig{
UseCentralConfig: true,
Expand Down Expand Up @@ -287,6 +289,7 @@ func TestConfigGeneration(t *testing.T) {
EnvoyDrainTimeSeconds: 30,
GracefulPort: 20300,
DumpEnvoyConfigOnExitEnabled: false,
GracefulStartupPath: "/graceful_startup",
},
Telemetry: &consuldp.TelemetryConfig{
UseCentralConfig: true,
Expand Down Expand Up @@ -410,6 +413,7 @@ func TestConfigGeneration(t *testing.T) {
EnvoyDrainTimeSeconds: 30,
GracefulPort: 20300,
DumpEnvoyConfigOnExitEnabled: false,
GracefulStartupPath: "/graceful_startup",
},
Telemetry: &consuldp.TelemetryConfig{
UseCentralConfig: true,
Expand Down
3 changes: 2 additions & 1 deletion cmd/consul-dataplane/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ func init() {
IntVar(flags, &flagOpts.dataplaneConfig.Envoy.ShutdownGracePeriodSeconds, "shutdown-grace-period-seconds", "DP_SHUTDOWN_GRACE_PERIOD_SECONDS", "Amount of time to wait after receiving a SIGTERM signal before terminating the proxy.")
StringVar(flags, &flagOpts.dataplaneConfig.Envoy.GracefulShutdownPath, "graceful-shutdown-path", "DP_GRACEFUL_SHUTDOWN_PATH", "An HTTP path to serve the graceful shutdown endpoint.")
IntVar(flags, &flagOpts.dataplaneConfig.Envoy.GracefulPort, "graceful-port", "DP_GRACEFUL_PORT", "A port to serve HTTP endpoints for graceful shutdown.")

IntVar(flags, &flagOpts.dataplaneConfig.Envoy.StartupGracePeriodSeconds, "startup-grace-period-seconds", "DP_STARTUP_GRACE_PERIOD_SECONDS", "Amount of time to wait for consul-dataplane startup.")
StringVar(flags, &flagOpts.dataplaneConfig.Envoy.GracefulStartupPath, "graceful-startup-path", "DP_GRACEFUL_STARTUP_PATH", "An HTTP path to serve the graceful startup endpoint.")
// Default is false, may be useful for debugging unexpected termination.
BoolVar(flags, &flagOpts.dataplaneConfig.Envoy.DumpEnvoyConfigOnExitEnabled, "dump-envoy-config-on-exit", "DP_DUMP_ENVOY_CONFIG_ON_EXIT", "Call the Envoy /config_dump endpoint during consul-dataplane controlled shutdown.")

Expand Down
4 changes: 4 additions & 0 deletions pkg/consuldp/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,10 @@ type EnvoyConfig struct {
ShutdownGracePeriodSeconds int
// GracefulShutdownPath is the path on which the HTTP endpoint to initiate a graceful shutdown of Envoy is served
GracefulShutdownPath string
//StartupGracePeriodSeconds is the amount of time to block application after startup for Envoy proxy to be ready.
StartupGracePeriodSeconds int
// GracefulStartupPath is the path on which the HTTP endpoint to initiate a graceful startup of Envoy is served
trevorLeonHC marked this conversation as resolved.
Show resolved Hide resolved
GracefulStartupPath string
// GracefulPort is the port on which the HTTP server for graceful shutdown endpoints will be available.
GracefulPort int
// DumpEnvoyConfigOnExitEnabled configures whether to call Envoy's /config_dump endpoint during consul-dataplane controlled shutdown.
Expand Down
70 changes: 66 additions & 4 deletions pkg/consuldp/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const (
cdpLifecycleUrl = "http://" + cdpLifecycleBindAddr

defaultLifecycleShutdownPath = "/graceful_shutdown"
defaultLifecycleStartupPath = "/graceful_startup"
)

// lifecycleConfig handles all configuration related to managing the Envoy proxy
Expand All @@ -36,8 +37,9 @@ type lifecycleConfig struct {
shutdownGracePeriodSeconds int
gracefulPort int
gracefulShutdownPath string

dumpEnvoyConfigOnExitEnabled bool
startupGracePeriodSeconds int
gracefulStartupPath string
dumpEnvoyConfigOnExitEnabled bool

// manager for controlling the Envoy proxy process
proxy envoy.ProxyManager
Expand All @@ -58,8 +60,9 @@ func NewLifecycleConfig(cfg *Config, proxy envoy.ProxyManager) *lifecycleConfig
gracefulPort: cfg.Envoy.GracefulPort,
gracefulShutdownPath: cfg.Envoy.GracefulShutdownPath,
dumpEnvoyConfigOnExitEnabled: cfg.Envoy.DumpEnvoyConfigOnExitEnabled,

proxy: proxy,
startupGracePeriodSeconds: cfg.Envoy.StartupGracePeriodSeconds,
gracefulStartupPath: cfg.Envoy.GracefulStartupPath,
proxy: proxy,

errorExitCh: make(chan struct{}, 1),
mu: sync.Mutex{},
Expand Down Expand Up @@ -97,6 +100,8 @@ func (m *lifecycleConfig) startLifecycleManager(ctx context.Context) error {
m.logger.Info(fmt.Sprintf("setting graceful shutdown path: %s\n", cdpLifecycleShutdownPath))
mux.HandleFunc(cdpLifecycleShutdownPath, m.gracefulShutdownHandler)

mux.HandleFunc(defaultLifecycleStartupPath, m.gracefulStartupHandler)

// Determine what the proxy lifecycle management server bind port is. It can be
// set as a flag.
cdpLifecycleBindPort := defaultLifecycleBindPort
Expand Down Expand Up @@ -211,3 +216,60 @@ func (m *lifecycleConfig) gracefulShutdown() {
// Wait for context timeout to elapse
wg.Wait()
}

func (m *lifecycleConfig) gracefulStartupHandler(rw http.ResponseWriter, _ *http.Request) {

//Unlike in gracefulShutdown, we want to delay the OK response until envoy is ready
trevorLeonHC marked this conversation as resolved.
Show resolved Hide resolved
//in order to block application container.
m.gracefulStartup()
rw.WriteHeader(http.StatusOK)

}
trevorLeonHC marked this conversation as resolved.
Show resolved Hide resolved

// gracefulStartup blocks until the startup grace period has elapsed or we have confirmed that
// Envoy proxy is ready.
func (m *lifecycleConfig) gracefulStartup() {
timeout := time.Duration(m.startupGracePeriodSeconds) * time.Second
trevorLeonHC marked this conversation as resolved.
Show resolved Hide resolved
m.logger.Info(fmt.Sprintf("blocking container startup until Envoy ready or grace period of %d seconds elapsed", m.startupGracePeriodSeconds))
ctx, cancel := context.WithTimeout(context.Background(), timeout)
t-eckert marked this conversation as resolved.
Show resolved Hide resolved
defer cancel()

envoyReady := false
var wg sync.WaitGroup
wg.Add(1)
envoyStatus := make(chan bool)

go func() {
defer wg.Done()
envoyReady, _ = m.proxy.Ready()
trevorLeonHC marked this conversation as resolved.
Show resolved Hide resolved
envoyStatus <- envoyReady

//Loop until either proxy is ready or timeout expires.
loop:
for {
select {
case <-ctx.Done():
break loop
case envoyReady = <-envoyStatus:
if envoyReady {
break loop
} else {
//Check if Envoy is ready, don't block here so that timeout break can still happen.
go func() {
ready, _ := m.proxy.Ready()
envoyStatus <- ready
}()
}

}
}

}()

wg.Wait()

if !envoyReady {
m.logger.Info("startup grace period reached before envoy ready")
trevorLeonHC marked this conversation as resolved.
Show resolved Hide resolved
}

}
78 changes: 59 additions & 19 deletions pkg/consuldp/lifecycle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ var (
envoyAdminAddr = "127.0.0.1"
)

// TestLifecycleServerClosed tests that the lifecycle manager properly starts up
// and shuts down when the context passed into it is cancelled.
t-eckert marked this conversation as resolved.
Show resolved Hide resolved
func TestLifecycleServerClosed(t *testing.T) {
cfg := Config{
Envoy: &EnvoyConfig{
Expand All @@ -38,35 +40,35 @@ func TestLifecycleServerClosed(t *testing.T) {
require.Eventually(t, func() bool {
return !m.running
}, time.Second*2, time.Second)

}

func TestLifecycleServerEnabled(t *testing.T) {
func TestLifecycleServer(t *testing.T) {
trevorLeonHC marked this conversation as resolved.
Show resolved Hide resolved
t-eckert marked this conversation as resolved.
Show resolved Hide resolved
cases := map[string]struct {
shutdownDrainListenersEnabled bool
shutdownGracePeriodSeconds int
gracefulShutdownPath string
startupGracePeriodSeconds int
gracefulStartupPath string
gracefulPort int
proxyStartupDelaySeconds int
}{
// TODO: testing the actual Envoy behavior here such as how open or new
// connections are handled should happpen in integration or acceptance tests
"connection draining disabled without grace period": {
"connection draining disabled without shutdown grace period": {
// All inbound and outbound connections are terminated immediately.
},
"connection draining enabled without grace period": {
"connection draining enabled without shutdown grace period": {
// This should immediately send "Connection: close" to inbound HTTP1
// connections, GOAWAY to inbound HTTP2, and terminate connections on
// request completion. Outbound connections should start being rejected
// immediately.
shutdownDrainListenersEnabled: true,
},
"connection draining disabled with grace period": {
"connection draining disabled with shutdown grace period": {
// This should immediately terminate any open inbound connections.
// Outbound connections should be allowed until the grace period has
// elapsed.
shutdownGracePeriodSeconds: 5,
},
"connection draining enabled with grace period": {
"connection draining enabled with shutdown grace period": {
// This should immediately send "Connection: close" to inbound HTTP1
// connections, GOAWAY to inbound HTTP2, and terminate connections on
// request completion.
Expand All @@ -80,11 +82,21 @@ func TestLifecycleServerEnabled(t *testing.T) {
shutdownDrainListenersEnabled: true,
shutdownGracePeriodSeconds: 5,
gracefulShutdownPath: "/quit-nicely",
// TODO: should this be random or use freeport? logic disallows passing
// zero value explicitly
gracefulPort: 23108,
gracefulPort: 23108,
},
"startup grace period with default path, no startup time": {
t-eckert marked this conversation as resolved.
Show resolved Hide resolved
startupGracePeriodSeconds: 5,
},
"startup grace period with default path, small startup time": {
startupGracePeriodSeconds: 10,
proxyStartupDelaySeconds: 5,
},
"startup grace period with default path, large startup time": {
startupGracePeriodSeconds: 10,
proxyStartupDelaySeconds: 15,
},
}

for name, c := range cases {
c := c
log.Printf("config = %v", c)
Expand Down Expand Up @@ -145,12 +157,32 @@ func TestLifecycleServerEnabled(t *testing.T) {
if c.gracefulShutdownPath != "" {
require.Equal(t, m.gracefulShutdownPath, c.gracefulShutdownPath, "failed to set lifecycle server graceful shutdown HTTP endpoint path")
}
shutdownUrl := fmt.Sprintf("http://127.0.0.1:%d%s", port, m.gracefulShutdownPath)

// Check lifecycle server graceful shutdown path configuration
url := fmt.Sprintf("http://127.0.0.1:%d%s", port, m.gracefulShutdownPath)
log.Printf("sending request to %s\n", url)
// Check lifecycle server graceful startup path configuration
if c.gracefulStartupPath != "" {
require.Equal(t, m.gracefulStartupPath, c.gracefulStartupPath, "failed to set lifecycle server graceful startup HTTP endpoint path")
}
startupURL := fmt.Sprintf("http://127.0.0.1:%d%s", port, m.gracefulStartupPath)

resp, err := http.Get(url)
// Start the mock proxy.
go m.proxy.Run(ctx)
start := time.Now()
log.Printf("sending startup check request to %s\n", startupURL)
resp, err := http.Get(startupURL)
require.NoError(t, err)
require.True(t, resp.StatusCode == 200)
duration := time.Since(start)
var expectedTime int
if c.proxyStartupDelaySeconds < c.startupGracePeriodSeconds {
expectedTime = c.proxyStartupDelaySeconds
} else {
expectedTime = c.startupGracePeriodSeconds
}
require.True(t, duration.Seconds()-float64(time.Duration(expectedTime)) < 1)

log.Printf("sending request to %s\n", shutdownUrl)
resp, err = http.Get(shutdownUrl)

// HTTP handler is not blocking, so need to wait and check mock
// client for expected method calls to proxy manager within
Expand Down Expand Up @@ -185,14 +217,19 @@ func TestLifecycleServerEnabled(t *testing.T) {
}

type mockProxy struct {
runCalled int
drainCalled int
quitCalled int
killCalled int
runCalled int
drainCalled int
quitCalled int
killCalled int
isReady bool
startupDelaySeconds int
}

func (p *mockProxy) Run(ctx context.Context) error {
p.runCalled++
time.Sleep(time.Duration(p.startupDelaySeconds) * time.Second)
p.isReady = true

return nil
}

Expand All @@ -213,3 +250,6 @@ func (p *mockProxy) Kill() error {
func (p *mockProxy) DumpConfig() error {
return nil
}
func (p *mockProxy) Ready() (bool, error) {
return p.isReady, nil
}
29 changes: 29 additions & 0 deletions pkg/envoy/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type ProxyManager interface {
Quit() error
Kill() error
DumpConfig() error
Ready() (bool, error)
}

// Proxy manages an Envoy proxy process.
Expand Down Expand Up @@ -422,3 +423,31 @@ func removeArgAndGetValue(stringAr []string, key string) ([]string, string) {
}
return stringAr, ""
}

func (p *Proxy) Ready() (bool, error) {

switch p.getState() {
case stateExited:
// Nothing to do!
return false, nil
case stateStopped:
// Nothing to do!
return false, nil
case stateDraining:
// Nothing to do!
return false, nil
case stateRunning, stateInitial:
// Query ready endpoint to check if proxy is Ready
envoyReadyURL := fmt.Sprintf("http://%s:%v/ready", p.cfg.AdminAddr, p.cfg.AdminBindPort)
rsp, err := p.client.Get(envoyReadyURL)
defer rsp.Body.Close()
if err != nil {
p.cfg.Logger.Error("envoy: admin endpoint not available", "error", err)
return false, err
}
return rsp.StatusCode == 200, nil
default:
return false, nil
}

}