Skip to content

Commit

Permalink
Merge pull request #9028 from sbueringer/pr-fix-cct-informer-deadlock
Browse files Browse the repository at this point in the history
🌱 ClusterCacheTracker: ensure Get/List calls are not getting stuck when apiserver is unreachable
  • Loading branch information
k8s-ci-robot committed Jul 21, 2023
2 parents 7399f5f + 8183c39 commit 20f9b5f
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions controllers/remote/cluster_cache_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,13 @@ func (t *ClusterCacheTracker) createClient(ctx context.Context, config *rest.Con
return nil, nil, nil, fmt.Errorf("failed waiting for cache for remote cluster %v to sync: %w", cluster, cacheCtx.Err())
}

// Wrap the cached client with a client that sets timeouts on all Get and List calls
// If we don't set timeouts here Get and List calls can get stuck if they lazily create a new informer
// and the informer than doesn't sync because the workload cluster apiserver is not reachable.
// An alternative would be to set timeouts in the contexts we pass into all Get and List calls.
// It should be reasonable to have Get and List calls timeout within the duration configured in the restConfig.
cachedClient = newClientWithTimeout(cachedClient, config.Timeout)

// Start cluster healthcheck!!!
go t.healthCheckCluster(cacheCtx, &healthCheckInput{
cluster: cluster,
Expand Down Expand Up @@ -656,3 +663,33 @@ func (t *ClusterCacheTracker) healthCheckCluster(ctx context.Context, in *health
t.deleteAccessor(ctx, in.cluster)
}
}

// newClientWithTimeout returns a new client which sets the specified timeout on all Get and List calls.
// If we don't set timeouts here Get and List calls can get stuck if they lazily create a new informer
// and the informer than doesn't sync because the workload cluster apiserver is not reachable.
// An alternative would be to set timeouts in the contexts we pass into all Get and List calls.
func newClientWithTimeout(client client.Client, timeout time.Duration) client.Client {
return clientWithTimeout{
Client: client,
timeout: timeout,
}
}

type clientWithTimeout struct {
client.Client
timeout time.Duration
}

var _ client.Client = &clientWithTimeout{}

func (c clientWithTimeout) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error {
ctx, cancel := context.WithTimeout(ctx, c.timeout)
defer cancel()
return c.Client.Get(ctx, key, obj, opts...)
}

func (c clientWithTimeout) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error {
ctx, cancel := context.WithTimeout(ctx, c.timeout)
defer cancel()
return c.Client.List(ctx, list, opts...)
}

0 comments on commit 20f9b5f

Please sign in to comment.