Skip to content

Commit

Permalink
wip dif backoff
Browse files Browse the repository at this point in the history
  • Loading branch information
AndrewSirenko committed Jun 4, 2024
1 parent 1a6763c commit 5688367
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions pkg/controllers/node/termination/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,15 @@ var (
_ operatorcontroller.FinalizingTypedController[*v1.Node] = (*Controller)(nil)

volumeDetachmentsBackoff = wait.Backoff{
Duration: 2 * time.Second,
Factor: 1,
Steps: 10,
}

outOfServiceTaintBackoff = wait.Backoff{
Duration: 1 * time.Second,
Factor: 1,
Steps: 20,
Steps: 10,
}
)

Expand Down Expand Up @@ -117,7 +123,6 @@ func (c *Controller) Finalize(ctx context.Context, node *v1.Node) (reconcile.Res
// In order for stateful pods to smoothly migrate from the terminating Node, we wait for VolumeAttachments
// to be cleaned up before terminating the node and removing it from the cluster.
if err := c.waitForVolumeDetachments(ctx, node, volumeDetachmentsBackoff); err != nil {
// TODO Q Do we prefer logging a warning and resolving errors in waitForVolumeDetachments instead of publishing an event?
c.recorder.Publish(terminatorevents.NodeVolumeAttachmentsRemaining(node, err))
}
// Be careful when removing this delete call in the Node termination flow
Expand All @@ -130,10 +135,9 @@ func (c *Controller) Finalize(ctx context.Context, node *v1.Node) (reconcile.Res
if err := c.terminator.TaintOutOfService(ctx, node); err != nil {
return reconcile.Result{}, fmt.Errorf("tainting node as out-of-service, %w", err)
}
// TODO Q We can either have a shorter backoff here or just wait until all volumeattachments have DeletionTimestamp if we're in a rush to remove node finalizer. Today the node object has to stay up long enough for attachdetach controller to see that the node has this out-of-service taint so that it can apply deletion timestamp to volumeattachment. This usually takes a few seconds on a small cluster.
if err := c.waitForVolumeDetachments(ctx, node, volumeDetachmentsBackoff); err != nil {
// TODO Q
}
// Wait to make sure AttachDetach controller notices node is out-of-service.
_ = c.waitForVolumeDetachments(ctx, node, outOfServiceTaintBackoff)

if err := c.removeFinalizer(ctx, node); err != nil {
return reconcile.Result{}, err
}
Expand Down

0 comments on commit 5688367

Please sign in to comment.