From df76de44198761874141bf21a6f57ebc88fdade2 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 11 Feb 2022 14:21:47 -0500 Subject: [PATCH] csi: plugin instance manager should retry creating gRPC client --- client/pluginmanager/csimanager/instance.go | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/client/pluginmanager/csimanager/instance.go b/client/pluginmanager/csimanager/instance.go index 062b73972783..f76d4a703a34 100644 --- a/client/pluginmanager/csimanager/instance.go +++ b/client/pluginmanager/csimanager/instance.go @@ -10,6 +10,7 @@ import ( ) const managerFingerprintInterval = 30 * time.Second +const managerFingerprintRetryInterval = 5 * time.Second // instanceManager is used to manage the fingerprinting and supervision of a // single CSI Plugin. @@ -73,15 +74,6 @@ func newInstanceManager(logger hclog.Logger, eventer TriggerNodeEvent, updater U } func (i *instanceManager) run() { - c, err := csi.NewClient(i.info.ConnectionInfo.SocketPath, i.logger) - if err != nil { - i.logger.Error("failed to setup instance manager client", "error", err) - close(i.shutdownCh) - return - } - i.client = c - i.fp.client = c - go i.setupVolumeManager() go i.runLoop() } @@ -96,6 +88,9 @@ func (i *instanceManager) setupVolumeManager() { case <-i.shutdownCtx.Done(): return case <-i.fp.hadFirstSuccessfulFingerprintCh: + // the runLoop goroutine populates i.client but we never get + // the first fingerprint until after it's been populated, so + // this is safe i.volumeManager = newVolumeManager(i.logger, i.eventer, i.client, i.mountPoint, i.containerMountPoint, i.fp.requiresStaging) i.logger.Debug("volume manager setup complete") close(i.volumeManagerSetupCh) @@ -142,6 +137,17 @@ func (i *instanceManager) runLoop() { return case <-timer.C: + if i.client == nil { + c, err := csi.NewClient(i.info.ConnectionInfo.SocketPath, i.logger) + if err != nil { + i.logger.Debug("failed to setup instance manager client", "error", err) + timer.Reset(managerFingerprintRetryInterval) + continue + } + i.client = c + i.fp.client = c + } + ctx, cancelFn := i.requestCtxWithTimeout(managerFingerprintInterval) info := i.fp.fingerprint(ctx) cancelFn()