Skip to content

Commit

Permalink
cluster/topology: use cached Cluster get in Reconcile
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Büringer buringerst@vmware.com
  • Loading branch information
sbueringer committed Jun 29, 2023
1 parent 4920e6e commit d2bd005
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
6 changes: 1 addition & 5 deletions internal/controllers/topology/cluster/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re

// Fetch the Cluster instance.
cluster := &clusterv1.Cluster{}
// Use the live client here so that we do not reconcile a stale cluster object.
// Example: If 2 reconcile loops are triggered in quick succession (one from the cluster and the other from the clusterclass)
// the first reconcile loop could update the cluster object (set the infrastructure cluster ref and control plane ref). If we
// do not use the live client the second reconcile loop could potentially pick up the stale cluster object from the cache.
if err := r.APIReader.Get(ctx, req.NamespacedName, cluster); err != nil {
if err := r.Client.Get(ctx, req.NamespacedName, cluster); err != nil {
if apierrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
Expand Down
20 changes: 20 additions & 0 deletions internal/controllers/topology/cluster/reconcile_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ import (
"context"
"fmt"
"strings"
"time"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/storage/names"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -438,6 +440,24 @@ func (r *Reconciler) reconcileCluster(ctx context.Context, s *scope.Scope) error
return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: s.Current.Cluster})
}
r.recorder.Eventf(s.Current.Cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q", tlog.KObj{Obj: s.Current.Cluster})

// Wait until Cluster is updated in the cache.
// Note: We have to do this because otherwise using a cached client in the Reconcile func could
// return a stale state of the Cluster we just patched (because the cache might be stale).
// Note: It is good enough to check that the resource version changed. Other controllers might have updated the
// Cluster as well, but the combination of the patch call above without a conflict and a changed resource
// version here guarantees that we see the changes of our own update.
err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
key := client.ObjectKey{Namespace: s.Current.Cluster.GetNamespace(), Name: s.Current.Cluster.GetName()}
cachedCluster := &clusterv1.Cluster{}
if err := r.Client.Get(ctx, key, cachedCluster); err != nil {
return false, err
}
return s.Current.Cluster.GetResourceVersion() != cachedCluster.GetResourceVersion(), nil
})
if err != nil {
return errors.Wrapf(err, "failed to patch %s: failed waiting for Cluster to be updated in cache", tlog.KObj{Obj: s.Current.Cluster})
}
return nil
}

Expand Down

0 comments on commit d2bd005

Please sign in to comment.