Skip to content

Commit

Permalink
Merge pull request #9027 from sbueringer/pr-1.4-fix-cct-informer-dead…
Browse files Browse the repository at this point in the history
…lock

[release-1.4] 🌱 ClusterCacheTracker: ensure Get/List calls are not getting stuck when apiserver is unreachable
  • Loading branch information
k8s-ci-robot committed Jul 21, 2023
2 parents 3785302 + 1f11902 commit 0d4a1ff
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions controllers/remote/cluster_cache_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,13 @@ func (t *ClusterCacheTracker) newClusterAccessor(ctx context.Context, cluster cl
return nil, err
}

// Wrap the client with a client that sets timeouts on all Get and List calls
// If we don't set timeouts here Get and List calls can get stuck if they lazily create a new informer
// and the informer than doesn't sync because the workload cluster apiserver is not reachable.
// An alternative would be to set timeouts in the contexts we pass into all Get and List calls.
// It should be reasonable to have Get and List calls timeout within the duration configured in the restConfig.
delegatingClient = newClientWithTimeout(delegatingClient, config.Timeout)

// Generating a new private key to be used for generating temporary certificates to connect to
// etcd on the target cluster.
// NOTE: Generating a private key is an expensive operation, so we store it in the cluster accessor.
Expand Down Expand Up @@ -603,3 +610,33 @@ func (t *ClusterCacheTracker) healthCheckCluster(ctx context.Context, in *health
t.deleteAccessor(ctx, in.cluster)
}
}

// newClientWithTimeout returns a new client which sets the specified timeout on all Get and List calls.
// If we don't set timeouts here Get and List calls can get stuck if they lazily create a new informer
// and the informer than doesn't sync because the workload cluster apiserver is not reachable.
// An alternative would be to set timeouts in the contexts we pass into all Get and List calls.
func newClientWithTimeout(client client.Client, timeout time.Duration) client.Client {
return clientWithTimeout{
Client: client,
timeout: timeout,
}
}

type clientWithTimeout struct {
client.Client
timeout time.Duration
}

var _ client.Client = &clientWithTimeout{}

func (c clientWithTimeout) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error {
ctx, cancel := context.WithTimeout(ctx, c.timeout)
defer cancel()
return c.Client.Get(ctx, key, obj, opts...)
}

func (c clientWithTimeout) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
ctx, cancel := context.WithTimeout(ctx, c.timeout)
defer cancel()
return c.Client.List(ctx, list, opts...)
}

0 comments on commit 0d4a1ff

Please sign in to comment.