From a6d530ad3720bd92abb92b247a79117df5009894 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 22 Apr 2022 11:20:05 -0400 Subject: [PATCH 1/2] CSI: plugin supervisor prestart should not mark itself done The task runner hook `Prestart` response object includes a `Done` field that's intended to tell the client not to run the hook again. The plugin supervisor creates mount points for the task during prestart and saves these mounts in the hook resources. But if a client restarts the hook resources will not be populated. If the plugin task restarts at any time after the client restarts, it will fail to have the correct mounts and crash loop until restart attempts run out. Fix this by not returning `Done` in the response, just as we do for the `volume_mount_hook`. --- .../allocrunner/taskrunner/plugin_supervisor_hook.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/client/allocrunner/taskrunner/plugin_supervisor_hook.go b/client/allocrunner/taskrunner/plugin_supervisor_hook.go index 415b6e20b419..6e63dcfbee7d 100644 --- a/client/allocrunner/taskrunner/plugin_supervisor_hook.go +++ b/client/allocrunner/taskrunner/plugin_supervisor_hook.go @@ -129,10 +129,10 @@ func (*csiPluginSupervisorHook) Name() string { } // Prestart is called before the task is started including after every -// restart (but not after restore). This requires that the mount paths -// for a plugin be idempotent, despite us not knowing the name of the -// plugin ahead of time. Because of this, we use the allocid_taskname -// as the unique identifier for a plugin on the filesystem. +// restart. This requires that the mount paths for a plugin be +// idempotent, despite us not knowing the name of the plugin ahead of +// time. Because of this, we use the allocid_taskname as the unique +// identifier for a plugin on the filesystem. func (h *csiPluginSupervisorHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { @@ -191,9 +191,11 @@ func (h *csiPluginSupervisorHook) Prestart(ctx context.Context, mounts = ensureMountpointInserted(mounts, volumeStagingMounts) mounts = ensureMountpointInserted(mounts, devMount) + // we normally would set resp.Mounts here but without setting the + // hookResources before returning we can get a postrun hook that's + // missing resources. h.runner.hookResources.setMounts(mounts) - resp.Done = true return nil } From 30ece425fd5746208992a41ae7e03986d5bb87f6 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 22 Apr 2022 13:06:40 -0400 Subject: [PATCH 2/2] changelog entry --- .changelog/12752.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/12752.txt diff --git a/.changelog/12752.txt b/.changelog/12752.txt new file mode 100644 index 000000000000..5e271c950e42 --- /dev/null +++ b/.changelog/12752.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where plugins would not restart if they failed any time after a client restart +```