diff --git a/README.md b/README.md index 3deb53b2b4..99afdece00 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ See the [storage capacity section](#capacity-support) below for details. * `--node-deployment`: Enables deploying the external-provisioner together with a CSI driver on nodes to manage node-local volumes. Off by default. -* `--node-deployment-immediate-binding`: Determines whether immediate binding is supported when deployed on each node. Enabled by default, use `--node-deployment-immediate-binding=false` if not desired. +* `--node-deployment-immediate-binding`: Determines whether immediate binding is supported when deployed on each node. Enabled by default, use `--node-deployment-immediate-binding=false` if not desired. Disabling it may be useful for example when a custom controller will select nodes for PVCs. * `--node-deployment-base-delay`: Determines how long the external-provisioner sleeps initially before trying to own a PVC with immediate binding. Defaults to 20 seconds. @@ -335,7 +335,10 @@ volume was probably higher, but that wasn't measured. Note that the QPS settings of kube-controller-manager and external-provisioner have to be increased at the moment (Kubernetes -1.19) to provision volumes faster than around 4 volumes/second. +1.19) to provision volumes faster than around 4 volumes/second. Those +settings will eventually get replaced with [flow control in the API +server +itself](https://kubernetes.io/docs/concepts/cluster-administration/flow-control/). Beware that if *no* node has sufficient storage available, then also no `CreateVolume` call is attempted and thus no events are generated @@ -365,6 +368,9 @@ back, then the local volumes can be removed manually: - force-delete objects: `kubectl delete pv --wait=false --grace-period=0 --force` - remove all finalizers: `kubectl patch pv -p '{"metadata":{"finalizers":null}}'` +It may also be necessary to scrub disks before reusing them because +the CSI driver had no chance to do that. + If there still was a PVC which was bound to that PV, it then will be moved to phase "Lost". It has to be deleted and re-created if still needed because no new volume will be created for it. Editing the PVC diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 1e6e475fb7..f97abe54a5 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -209,12 +209,22 @@ type requiredCapabilities struct { // NodeDeployment contains additional parameters for running external-provisioner alongside a // CSI driver on one or more nodes in the cluster. type NodeDeployment struct { - NodeName string - ClaimInformer coreinformers.PersistentVolumeClaimInformer - NodeInfo csi.NodeGetInfoResponse + // NodeName is the name of the node in Kubernetes on which the external-provisioner runs. + NodeName string + // ClaimInformer is needed to detect when some other external-provisioner + // became the owner of a PVC while the local one is still waiting before + // trying to become the owner itself. + ClaimInformer coreinformers.PersistentVolumeClaimInformer + // NodeInfo is the result of NodeGetInfo. It is need to determine which + // PVs were created for the node. + NodeInfo csi.NodeGetInfoResponse + // ImmediateBinding enables support for PVCs with immediate binding. ImmediateBinding bool - BaseDelay time.Duration - MaxDelay time.Duration + // BaseDelay is the initial time that the external-provisioner waits + // before trying to become the owner of a PVC with immediate binding. + BaseDelay time.Duration + // MaxDelay is the maximum for the initial wait time. + MaxDelay time.Duration } type internalNodeDeployment struct { @@ -1362,7 +1372,15 @@ func (nc *internalNodeDeployment) becomeOwner(ctx context.Context, p *csiProvisi klog.V(5).Infof("will try to become owner of PVC %s/%s with resource version %s in %s (attempt #%d)", claim.Namespace, claim.Name, claim.ResourceVersion, delay, requeues) sleep, cancel := context.WithTimeout(ctx, delay) defer cancel() - ticker := time.NewTicker(10 * time.Millisecond) + // When the base delay is high we also should check less often. + // With multiple provisioners running in parallel, it becomes more + // likely that one of them became the owner quickly, so we don't + // want to check too slowly either. + pollInterval := nc.BaseDelay / 100 + if pollInterval < 10*time.Millisecond { + pollInterval = 10 * time.Millisecond + } + ticker := time.NewTicker(pollInterval) defer ticker.Stop() check := func() (bool, *v1.PersistentVolumeClaim, error) { current, err := nc.ClaimInformer.Lister().PersistentVolumeClaims(claim.Namespace).Get(claim.Name)