Skip to content

Commit

Permalink
consul/connect: fixed a bug where restarting proxy tasks failed. (#16815
Browse files Browse the repository at this point in the history
)

The first start of a Consul Connect proxy sidecar triggers a run
of the envoy_version hook which modifies the task config image
entry. The modification takes into account a number of factors to
correctly populate this. Importantly, once the hook has run, it
marks itself as done so the taskrunner will not execute it again.

When the client receives a non-destructive update for the
allocation which the proxy sidecar is a member of, it will update
and overwrite the task definition within the taskerunner. In doing
so it overwrite the modification performed by the hook. If the
allocation is restarted, the envoy_version hook will be skipped as
it previously marked itself as done, and therefore the sidecar
config image is incorrect and causes a driver error.

The fix removes the hook in marking itself as done to the view of
the taskrunner.
  • Loading branch information
jrasell committed Apr 11, 2023
1 parent 7797680 commit 1c88881
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 57 deletions.
3 changes: 3 additions & 0 deletions .changelog/16815.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
client: Fixed a bug where restarting proxy sidecar tasks failed
```
4 changes: 1 addition & 3 deletions client/allocrunner/taskrunner/envoy_version_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func (_ *envoyVersionHook) Name() string {
return envoyVersionHookName
}

func (h *envoyVersionHook) Prestart(_ context.Context, request *ifs.TaskPrestartRequest, response *ifs.TaskPrestartResponse) error {
func (h *envoyVersionHook) Prestart(_ context.Context, request *ifs.TaskPrestartRequest, _ *ifs.TaskPrestartResponse) error {
// First interpolation of the task image. Typically this turns the default
// ${meta.connect.sidecar_task} into envoyproxy/envoy:v${NOMAD_envoy_version}
// but could be a no-op or some other value if so configured.
Expand All @@ -73,7 +73,6 @@ func (h *envoyVersionHook) Prestart(_ context.Context, request *ifs.TaskPrestart
// - task is a connect sidecar or gateway
// - task image needs ${NOMAD_envoy_version} resolved
if h.skip(request) {
response.Done = true
return nil
}

Expand All @@ -95,7 +94,6 @@ func (h *envoyVersionHook) Prestart(_ context.Context, request *ifs.TaskPrestart
// Set the resulting image.
h.logger.Trace("setting task envoy image", "image", image)
request.Task.Config["image"] = image
response.Done = true
return nil
}

Expand Down
144 changes: 90 additions & 54 deletions client/allocrunner/taskrunner/envoy_version_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/require"
"github.com/shoenig/test/must"
)

var (
Expand All @@ -29,19 +29,19 @@ func TestEnvoyVersionHook_semver(t *testing.T) {

t.Run("with v", func(t *testing.T) {
result, err := semver("v1.2.3")
require.NoError(t, err)
require.Equal(t, "1.2.3", result)
must.NoError(t, err)
must.Eq(t, "1.2.3", result)
})

t.Run("without v", func(t *testing.T) {
result, err := semver("1.2.3")
require.NoError(t, err)
require.Equal(t, "1.2.3", result)
must.NoError(t, err)
must.Eq(t, "1.2.3", result)
})

t.Run("unexpected", func(t *testing.T) {
_, err := semver("foo")
require.EqualError(t, err, "unexpected envoy version format: Malformed version: foo")
must.ErrorContains(t, err, "unexpected envoy version format: Malformed version: foo")
})
}

Expand All @@ -52,21 +52,21 @@ func TestEnvoyVersionHook_taskImage(t *testing.T) {
result := (*envoyVersionHook)(nil).taskImage(map[string]interface{}{
// empty
})
require.Equal(t, envoy.ImageFormat, result)
must.Eq(t, envoy.ImageFormat, result)
})

t.Run("not a string", func(t *testing.T) {
result := (*envoyVersionHook)(nil).taskImage(map[string]interface{}{
"image": 7, // not a string
})
require.Equal(t, envoy.ImageFormat, result)
must.Eq(t, envoy.ImageFormat, result)
})

t.Run("normal", func(t *testing.T) {
result := (*envoyVersionHook)(nil).taskImage(map[string]interface{}{
"image": "custom/envoy:latest",
})
require.Equal(t, "custom/envoy:latest", result)
must.Eq(t, "custom/envoy:latest", result)
})
}

Expand All @@ -77,32 +77,32 @@ func TestEnvoyVersionHook_tweakImage(t *testing.T) {

t.Run("legacy", func(t *testing.T) {
result, err := (*envoyVersionHook)(nil).tweakImage(image, nil)
require.NoError(t, err)
require.Equal(t, envoy.FallbackImage, result)
must.NoError(t, err)
must.Eq(t, envoy.FallbackImage, result)
})

t.Run("unexpected", func(t *testing.T) {
_, err := (*envoyVersionHook)(nil).tweakImage(image, map[string][]string{
"envoy": {"foo", "bar", "baz"},
})
require.EqualError(t, err, "unexpected envoy version format: Malformed version: foo")
must.ErrorContains(t, err, "unexpected envoy version format: Malformed version: foo")
})

t.Run("standard envoy", func(t *testing.T) {
result, err := (*envoyVersionHook)(nil).tweakImage(image, map[string][]string{
"envoy": {"1.15.0", "1.14.4", "1.13.4", "1.12.6"},
})
require.NoError(t, err)
require.Equal(t, "envoyproxy/envoy:v1.15.0", result)
must.NoError(t, err)
must.Eq(t, "envoyproxy/envoy:v1.15.0", result)
})

t.Run("custom image", func(t *testing.T) {
custom := "custom-${NOMAD_envoy_version}/envoy:${NOMAD_envoy_version}"
result, err := (*envoyVersionHook)(nil).tweakImage(custom, map[string][]string{
"envoy": {"1.15.0", "1.14.4", "1.13.4", "1.12.6"},
})
require.NoError(t, err)
require.Equal(t, "custom-1.15.0/envoy:1.15.0", result)
must.NoError(t, err)
must.Eq(t, "custom-1.15.0/envoy:1.15.0", result)
})
}

Expand All @@ -116,23 +116,23 @@ func TestEnvoyVersionHook_interpolateImage(t *testing.T) {
Config: map[string]interface{}{"image": envoy.SidecarConfigVar},
}
hook.interpolateImage(task, taskEnvDefault)
require.Equal(t, envoy.ImageFormat, task.Config["image"])
must.Eq(t, envoy.ImageFormat, task.Config["image"])
})

t.Run("default gateway", func(t *testing.T) {
task := &structs.Task{
Config: map[string]interface{}{"image": envoy.GatewayConfigVar},
}
hook.interpolateImage(task, taskEnvDefault)
require.Equal(t, envoy.ImageFormat, task.Config["image"])
must.Eq(t, envoy.ImageFormat, task.Config["image"])
})

t.Run("custom static", func(t *testing.T) {
task := &structs.Task{
Config: map[string]interface{}{"image": "custom/envoy"},
}
hook.interpolateImage(task, taskEnvDefault)
require.Equal(t, "custom/envoy", task.Config["image"])
must.Eq(t, "custom/envoy", task.Config["image"])
})

t.Run("custom interpolated", func(t *testing.T) {
Expand All @@ -144,15 +144,15 @@ func TestEnvoyVersionHook_interpolateImage(t *testing.T) {
}, map[string]string{
"MY_ENVOY": "my/envoy",
}, nil, nil, "", ""))
require.Equal(t, "my/envoy", task.Config["image"])
must.Eq(t, "my/envoy", task.Config["image"])
})

t.Run("no image", func(t *testing.T) {
task := &structs.Task{
Config: map[string]interface{}{},
}
hook.interpolateImage(task, taskEnvDefault)
require.Empty(t, task.Config)
must.MapEmpty(t, task.Config)
})
}

Expand All @@ -168,7 +168,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
Config: nil,
},
})
require.True(t, skip)
must.True(t, skip)
})

t.Run("not connect", func(t *testing.T) {
Expand All @@ -178,7 +178,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
Kind: "",
},
})
require.True(t, skip)
must.True(t, skip)
})

t.Run("version not needed", func(t *testing.T) {
Expand All @@ -191,7 +191,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
},
},
})
require.True(t, skip)
must.True(t, skip)
})

t.Run("version needed custom", func(t *testing.T) {
Expand All @@ -204,7 +204,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
},
},
})
require.False(t, skip)
must.False(t, skip)
})

t.Run("version needed standard", func(t *testing.T) {
Expand All @@ -217,7 +217,7 @@ func TestEnvoyVersionHook_skip(t *testing.T) {
},
},
})
require.False(t, skip)
must.False(t, skip)
})
}

Expand Down Expand Up @@ -249,19 +249,16 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_standard(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))

// Prepare a response
var response ifs.TaskPrestartResponse

// Run the hook
require.NoError(t, h.Prestart(context.Background(), request, &response))

// Assert the hook is Done
require.True(t, response.Done)
must.NoError(t, h.Prestart(context.Background(), request, &response))

// Assert the Task.Config[image] is concrete
require.Equal(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])
must.Eq(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])
}

func TestTaskRunner_EnvoyVersionHook_Prestart_custom(t *testing.T) {
Expand Down Expand Up @@ -293,19 +290,16 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_custom(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))

// Prepare a response
var response ifs.TaskPrestartResponse

// Run the hook
require.NoError(t, h.Prestart(context.Background(), request, &response))

// Assert the hook is Done
require.True(t, response.Done)
must.NoError(t, h.Prestart(context.Background(), request, &response))

// Assert the Task.Config[image] is concrete
require.Equal(t, "custom-1.14.1:latest", request.Task.Config["image"])
must.Eq(t, "custom-1.14.1:latest", request.Task.Config["image"])
}

func TestTaskRunner_EnvoyVersionHook_Prestart_skip(t *testing.T) {
Expand Down Expand Up @@ -340,19 +334,16 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_skip(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))

// Prepare a response
var response ifs.TaskPrestartResponse

// Run the hook
require.NoError(t, h.Prestart(context.Background(), request, &response))

// Assert the hook is Done
require.True(t, response.Done)
must.NoError(t, h.Prestart(context.Background(), request, &response))

// Assert the Task.Config[image] does not get set
require.Empty(t, request.Task.Config["image"])
must.MapNotContainsKey(t, request.Task.Config, "image")
}

func TestTaskRunner_EnvoyVersionHook_Prestart_fallback(t *testing.T) {
Expand Down Expand Up @@ -381,19 +372,16 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_fallback(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))

// Prepare a response
var response ifs.TaskPrestartResponse

// Run the hook
require.NoError(t, h.Prestart(context.Background(), request, &response))

// Assert the hook is Done
require.True(t, response.Done)
must.NoError(t, h.Prestart(context.Background(), request, &response))

// Assert the Task.Config[image] is the fallback image
require.Equal(t, "envoyproxy/envoy:v1.11.2@sha256:a7769160c9c1a55bb8d07a3b71ce5d64f72b1f665f10d81aa1581bc3cf850d09", request.Task.Config["image"])
must.Eq(t, "envoyproxy/envoy:v1.11.2@sha256:a7769160c9c1a55bb8d07a3b71ce5d64f72b1f665f10d81aa1581bc3cf850d09", request.Task.Config["image"])
}

func TestTaskRunner_EnvoyVersionHook_Prestart_error(t *testing.T) {
Expand Down Expand Up @@ -422,15 +410,63 @@ func TestTaskRunner_EnvoyVersionHook_Prestart_error(t *testing.T) {
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
require.NoError(t, request.TaskDir.Build(false, nil))
must.NoError(t, request.TaskDir.Build(false, nil))

// Prepare a response
var response ifs.TaskPrestartResponse

// Run the hook, error should be recoverable
err := h.Prestart(context.Background(), request, &response)
require.EqualError(t, err, "error retrieving supported Envoy versions from Consul: some consul error")
must.ErrorContains(t, err, "error retrieving supported Envoy versions from Consul: some consul error")
}

func TestTaskRunner_EnvoyVersionHook_Prestart_restart(t *testing.T) {
ci.Parallel(t)

logger := testlog.HCLogger(t)

// Setup an Allocation
alloc := mock.ConnectAlloc()
alloc.Job.TaskGroups[0].Tasks[0] = mock.ConnectSidecarTask()
allocDir, cleanupDir := allocdir.TestAllocDir(t, logger, "EnvoyVersionHook", alloc.ID)
defer cleanupDir()

// Set up a mock for Consul API.
mockProxiesAPI := consul.MockSupportedProxiesAPI{
Value: map[string][]string{
"envoy": {"1.15.0", "1.14.4"},
},
Error: nil,
}

// Run envoy_version hook
h := newEnvoyVersionHook(newEnvoyVersionHookConfig(alloc, mockProxiesAPI, logger))

// Create a prestart request
request := &ifs.TaskPrestartRequest{
Task: alloc.Job.TaskGroups[0].Tasks[0],
TaskDir: allocDir.NewTaskDir(alloc.Job.TaskGroups[0].Tasks[0].Name),
TaskEnv: taskEnvDefault,
}
must.NoError(t, request.TaskDir.Build(false, nil))

// Prepare a response
var response ifs.TaskPrestartResponse

// Run the hook and ensure the tasks image has been modified.
must.NoError(t, h.Prestart(context.Background(), request, &response))
must.Eq(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])

// Overwrite the previously modified image. This is the same behaviour that
// occurs when the server sends a non-destructive allocation update.
request.Task.Config["image"] = "${meta.connect.sidecar_image}"

// Run the Prestart hook function again, and ensure the image is updated.
must.NoError(t, h.Prestart(context.Background(), request, &response))
must.Eq(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])

// Assert the hook is not Done
require.False(t, response.Done)
// Run the hook again, and ensure the config is still the same mimicking
// a non-user initiated restart.
must.NoError(t, h.Prestart(context.Background(), request, &response))
must.Eq(t, "envoyproxy/envoy:v1.15.0", request.Task.Config["image"])
}

0 comments on commit 1c88881

Please sign in to comment.