Skip to content

Commit

Permalink
Merge pull request #1287 from abbshr/startup-taint-remove-compensation
Browse files Browse the repository at this point in the history
make sure the startup taint will eventually being removed after efs driver ready
  • Loading branch information
k8s-ci-robot committed Jun 7, 2024
2 parents 3618623 + 4e270c4 commit 6d8c526
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 5 deletions.
8 changes: 4 additions & 4 deletions pkg/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"net"
"strings"
"time"

"github.com/container-storage-interface/spec/lib/go/csi"
"google.golang.org/grpc"
Expand Down Expand Up @@ -129,10 +130,9 @@ func (d *Driver) Run() error {

// Remove taint from node to indicate driver startup success
// This is done at the last possible moment to prevent race conditions or false positive removals
err = removeNotReadyTaint(cloud.DefaultKubernetesAPIClient)
if err != nil {
klog.ErrorS(err, "Unexpected failure when attempting to remove node taint(s)")
}
go tryRemoveNotReadyTaintUntilSucceed(time.Second, func() error {
return removeNotReadyTaint(cloud.DefaultKubernetesAPIClient)
})

klog.Infof("Listening for connections on address: %#v", listener.Addr())
return d.srv.Serve(listener)
Expand Down
16 changes: 15 additions & 1 deletion pkg/driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"

"github.com/container-storage-interface/spec/lib/go/csi"
"github.com/kubernetes-sigs/aws-efs-csi-driver/pkg/cloud"
Expand Down Expand Up @@ -464,7 +465,7 @@ type JSONPatch struct {
Value interface{} `json:"value"`
}

// removeNotReadyTaint removes the taint ebs.csi.aws.com/agent-not-ready from the local node
// removeNotReadyTaint removes the taint efs.csi.aws.com/agent-not-ready from the local node
// This taint can be optionally applied by users to prevent startup race conditions such as
// https://github.com/kubernetes/kubernetes/issues/95911
func removeNotReadyTaint(k8sClient cloud.KubernetesAPIClient) error {
Expand Down Expand Up @@ -524,3 +525,16 @@ func removeNotReadyTaint(k8sClient cloud.KubernetesAPIClient) error {
klog.InfoS("Removed taint(s) from local node", "node", nodeName)
return nil
}

// remove taint may failed, this keep retring until succeed, make sure the taint will eventually being removed
func tryRemoveNotReadyTaintUntilSucceed(interval time.Duration, removeFn func() error) {
for {
err := removeFn()
if err == nil {
return
}

klog.ErrorS(err, "Unexpected failure when attempting to remove node taint(s)")
time.Sleep(interval)
}
}
30 changes: 30 additions & 0 deletions pkg/driver/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package driver

import (
"context"
"errors"
"fmt"
"os"
"reflect"
Expand Down Expand Up @@ -1012,3 +1013,32 @@ func getNodeMock(mockCtl *gomock.Controller, nodeName string, returnNode *corev1

return mockClient, mockNode
}

func TestTryRemoveNotReadyTaintUntilSucceed(t *testing.T) {
{
i := 0
tryRemoveNotReadyTaintUntilSucceed(time.Second, func() error {
i++
if i < 3 {
return errors.New("test")
}

return nil
})

if i != 3 {
t.Fatalf("unexpected result")
}
}
{
i := 0
tryRemoveNotReadyTaintUntilSucceed(time.Second, func() error {
i++
return nil
})

if i != 1 {
t.Fatalf("unexpected result")
}
}
}

0 comments on commit 6d8c526

Please sign in to comment.