Skip to content

Commit

Permalink
connect: write envoy bootstrap debugging info
Browse files Browse the repository at this point in the history
When Consul Connect just works, it's wonderful. When it doesn't work it
can be exceeding difficult to debug: operators have to check task
events, Nomad logs, Consul logs, Consul APIs, and even then critical
information is missing.

Using Consul to generate a bootstrap config for Envoy is notoriously
difficult. Nomad doesn't even log stderr, so operators are left trying
to piece together what went wrong.

This patch attempts to provide *maximal* context which unfortunately
includes secrets. **Secrets are always restricted to the secrets/
directory.** This makes debugging a little harder, but allows operators
to know exactly what operation Nomad was trying to perform.

What's added:

- stderr is sent to alloc/logs/envoy_bootstrap.stderr.0
- the CLI is written to secrets/.envoy_bootstrap.cmd
- the environment is written to secrets/.envoy_bootstrap.env as JSON

Accessing this information is unfortunately awkward:
```
nomad alloc exec -task connect-proxy-count-countdash b36a cat secrets/.envoy_bootstrap.env
nomad alloc exec -task connect-proxy-count-countdash b36a cat secrets/.envoy_bootstrap.cmd
nomad alloc fs b36a alloc/logs/envoy_bootstrap.stderr.0
```

The above assumes an alloc id that starts with `b36a` and a Connect
sidecar proxy for a service named `count-countdash`.

If the alloc is unable to start successfully, the debugging files are
only accessible from the host filesystem.
  • Loading branch information
schmichael committed Feb 18, 2022
1 parent 22bd089 commit aad24ac
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 35 deletions.
3 changes: 3 additions & 0 deletions .changelog/11975.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
consul/connect: write Envoy bootstrapping information to disk for debugging
```
88 changes: 53 additions & 35 deletions client/allocrunner/taskrunner/envoy_bootstrap_hook.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
package taskrunner

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"time"

"github.com/hashicorp/go-hclog"
Expand Down Expand Up @@ -261,6 +263,11 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
// it to the secrets directory like Vault tokens.
bootstrapFilePath := filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json")

// Write everything related to the command to enable debugging
bootstrapStderrPath := filepath.Join(req.TaskDir.LogDir, "envoy_bootstrap.stderr.0")
bootstrapEnvPath := filepath.Join(req.TaskDir.SecretsDir, ".envoy_bootstrap.env")
bootstrapCmdPath := filepath.Join(req.TaskDir.SecretsDir, ".envoy_bootstrap.cmd")

siToken, err := h.maybeLoadSIToken(req.Task.Name, req.TaskDir.SecretsDir)
if err != nil {
h.logger.Error("failed to generate envoy bootstrap config", "sidecar_for", service.Name)
Expand All @@ -269,16 +276,46 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
h.logger.Debug("check for SI token for task", "task", req.Task.Name, "exists", siToken != "")

bootstrap := h.newEnvoyBootstrapArgs(h.alloc.TaskGroup, service, grpcAddr, envoyAdminBind, envoyReadyBind, siToken, bootstrapFilePath)

// Create command line arguments
bootstrapArgs := bootstrap.args()

// Write args to file for debugging
argsFile, err := os.Create(bootstrapCmdPath)
if err != nil {
return errors.Wrap(err, "failed to write bootstrap command line")
}
defer argsFile.Close()
if _, err := io.WriteString(argsFile, strings.Join(bootstrapArgs, " ")+"\n"); err != nil {
return errors.Wrap(err, "failed to encode bootstrap command line")
}

// Create environment
bootstrapEnv := bootstrap.env(os.Environ())

// Write env to file for debugging
envFile, err := os.Create(bootstrapEnvPath)
if err != nil {
return errors.Wrap(err, "failed to write bootstrap environment")
}
defer envFile.Close()
envEnc := json.NewEncoder(envFile)
envEnc.SetIndent("", " ")
if err := envEnc.Encode(bootstrapEnv); err != nil {
return errors.Wrap(err, "failed to encode bootstrap environment")
}

// keep track of latest error returned from exec-ing consul envoy bootstrap
var cmdErr error

// Since Consul services are registered asynchronously with this task
// hook running, retry until timeout or success.
if backoffErr := exptime.Backoff(func() (bool, error) {

backoffOpts := exptime.BackoffOptions{
MaxSleepTime: h.envoyBootstrapWaitTime,
InitialGapSize: h.envoyBoostrapInitialGap,
MaxJitterSize: h.envoyBootstrapMaxJitter,
}
backoffErr := exptime.Backoff(func() (bool, error) {
// If hook is killed, just stop.
select {
case <-ctx.Done():
Expand All @@ -291,22 +328,24 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
cmd.Env = bootstrapEnv

// Redirect stdout to secrets/envoy_bootstrap.json.
fd, fileErr := os.Create(bootstrapFilePath)
stdout, fileErr := os.Create(bootstrapFilePath)
if fileErr != nil {
return false, fmt.Errorf("failed to create secrets/envoy_bootstrap.json for envoy: %w", fileErr)
}
cmd.Stdout = fd
defer stdout.Close()
cmd.Stdout = stdout

// Redirect stderr into a buffer for later reading.
buf := bytes.NewBuffer(nil)
cmd.Stderr = buf
// Redirect stderr into another file for later debugging.
stderr, fileErr := os.OpenFile(bootstrapStderrPath, os.O_RDWR|os.O_CREATE, 0644)
if fileErr != nil {
return false, fmt.Errorf("failed to create alloc/logs/envoy_bootstrap.stderr.0 for envoy: %w", fileErr)
}
defer stderr.Close()
cmd.Stderr = stderr

// Generate bootstrap
cmdErr = cmd.Run()

// Close bootstrap.json regardless of any command errors.
_ = fd.Close()

// Command succeeded, exit.
if cmdErr == nil {
// Bootstrap written. Mark as done and move on.
Expand All @@ -324,11 +363,9 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
_ = os.Remove(bootstrapFilePath)

return true, cmdErr
}, exptime.BackoffOptions{
MaxSleepTime: h.envoyBootstrapWaitTime,
InitialGapSize: h.envoyBoostrapInitialGap,
MaxJitterSize: h.envoyBootstrapMaxJitter,
}); backoffErr != nil {
}, backoffOpts)

if backoffErr != nil {
// Wrap the last error from Consul and set that as our status.
_, recoverable := cmdErr.(*exec.ExitError)
return structs.NewRecoverableError(
Expand Down Expand Up @@ -394,25 +431,6 @@ func (h *envoyBootstrapHook) writeConfig(filename, config string) error {
return nil
}

func (h *envoyBootstrapHook) execute(cmd *exec.Cmd) (string, error) {
var (
stdout bytes.Buffer
stderr bytes.Buffer
)

cmd.Stdout = &stdout
cmd.Stderr = &stderr

if err := cmd.Run(); err != nil {
_, recoverable := err.(*exec.ExitError)
// ExitErrors are recoverable since they indicate the
// command was runnable but exited with a unsuccessful
// error code.
return stderr.String(), structs.NewRecoverableError(err, recoverable)
}
return stdout.String(), nil
}

// grpcAddress determines the Consul gRPC endpoint address to use.
//
// In host networking this will default to 127.0.0.1:8502.
Expand Down

0 comments on commit aad24ac

Please sign in to comment.