From 97957893190e6f18348504df24571f96ccaf5264 Mon Sep 17 00:00:00 2001 From: Jont828 Date: Wed, 25 Oct 2023 21:13:33 -0400 Subject: [PATCH] Fix bug where MachinePool Machine ownerRefs weren't updating --- .../controllers/machinepool_controller.go | 4 + .../machinepool_controller_phases.go | 102 ++++++------------ .../machinepool_controller_phases_test.go | 9 +- .../controllers/machine/machine_controller.go | 7 +- internal/webhooks/machine.go | 6 ++ test/framework/ownerreference_helpers.go | 4 +- 6 files changed, 57 insertions(+), 75 deletions(-) diff --git a/exp/internal/controllers/machinepool_controller.go b/exp/internal/controllers/machinepool_controller.go index a8db7b93540b..6c11686e5d12 100644 --- a/exp/internal/controllers/machinepool_controller.go +++ b/exp/internal/controllers/machinepool_controller.go @@ -41,6 +41,7 @@ import ( "sigs.k8s.io/cluster-api/controllers/external" "sigs.k8s.io/cluster-api/controllers/remote" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" @@ -69,6 +70,7 @@ type MachinePoolReconciler struct { WatchFilterValue string controller controller.Controller + ssaCache ssa.Cache recorder record.EventRecorder externalTracker external.ObjectTracker } @@ -105,6 +107,8 @@ func (r *MachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.M Controller: c, Cache: mgr.GetCache(), } + r.ssaCache = ssa.NewCache() + return nil } diff --git a/exp/internal/controllers/machinepool_controller_phases.go b/exp/internal/controllers/machinepool_controller_phases.go index 9449b692dc7a..0eed26251cf3 100644 --- a/exp/internal/controllers/machinepool_controller_phases.go +++ b/exp/internal/controllers/machinepool_controller_phases.go @@ -28,7 +28,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" kerrors "k8s.io/apimachinery/pkg/util/errors" - "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/storage/names" "k8s.io/klog/v2" @@ -43,6 +42,7 @@ import ( capierrors "sigs.k8s.io/cluster-api/errors" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" utilexp "sigs.k8s.io/cluster-api/exp/util" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" @@ -376,100 +376,63 @@ func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, mp *expv1 return err } - updatedMachines, err := r.createMachinesIfNotExists(ctx, mp, machineList.Items, infraMachineList.Items) - if err != nil { - return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) - } - - if err := r.ensureInfraMachineOnwerRefs(ctx, updatedMachines, infraMachineList.Items); err != nil { + if err := r.createOrUpdateMachines(ctx, mp, machineList.Items, infraMachineList.Items); err != nil { return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) } return nil } -// createMachinesIfNotExists creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef. -func (r *MachinePoolReconciler) createMachinesIfNotExists(ctx context.Context, mp *expv1.MachinePool, machines []clusterv1.Machine, infraMachines []unstructured.Unstructured) ([]clusterv1.Machine, error) { +// createOrUpdateMachines creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef. +func (r *MachinePoolReconciler) createOrUpdateMachines(ctx context.Context, mp *expv1.MachinePool, machines []clusterv1.Machine, infraMachines []unstructured.Unstructured) error { log := ctrl.LoggerFrom(ctx) // Construct a set of names of infraMachines that already have a Machine. - infraRefNames := sets.Set[string]{} + infraMachineToMachine := map[string]clusterv1.Machine{} for _, machine := range machines { infraRef := machine.Spec.InfrastructureRef - infraRefNames.Insert(infraRef.Name) + infraMachineToMachine[infraRef.Name] = machine } createdMachines := []clusterv1.Machine{} var errs []error for i := range infraMachines { infraMachine := &infraMachines[i] - // If infraMachine already has a Machine, skip it. - if infraRefNames.Has(infraMachine.GetName()) { - log.V(4).Info("Machine already exists for infraMachine", "infraMachine", klog.KObj(infraMachine)) - continue - } - // Otherwise create a new Machine for the infraMachine. - log.Info("Creating new Machine for infraMachine", infraMachine.GroupVersionKind().Kind, klog.KObj(infraMachine)) - machine := getNewMachine(mp, infraMachine) - if err := r.Client.Create(ctx, machine); err != nil { - errs = append(errs, errors.Wrapf(err, "failed to create new Machine for infraMachine %q in namespace %q", infraMachine.GetName(), infraMachine.GetNamespace())) - continue + // If infraMachine already has a Machine, update it if needed. + if existingMachine, ok := infraMachineToMachine[infraMachine.GetName()]; ok { + log.V(2).Info("Patching existing Machine for infraMachine", "infraMachine", klog.KObj(infraMachine), "machine", klog.KObj(&existingMachine)) + + desiredMachine := computeDesiredMachine(mp, infraMachine, &existingMachine) + if err := ssa.Patch(ctx, r.Client, MachinePoolControllerName, desiredMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: &existingMachine}); err != nil { + log.Error(err, "failed to update Machine", "Machine", klog.KObj(desiredMachine)) + errs = append(errs, errors.Wrapf(err, "failed to update Machine %q", klog.KObj(desiredMachine))) + } + } else { + // Otherwise create a new Machine for the infraMachine. + log.Info("Creating new Machine for infraMachine", "infraMachine", klog.KObj(infraMachine)) + machine := computeDesiredMachine(mp, infraMachine, nil) + + if err := r.Client.Create(ctx, machine); err != nil { + errs = append(errs, errors.Wrapf(err, "failed to create new Machine for infraMachine %q in namespace %q", infraMachine.GetName(), infraMachine.GetNamespace())) + continue + } + + createdMachines = append(createdMachines, *machine) } - createdMachines = append(createdMachines, *machine) } - machines = append(machines, createdMachines...) if err := r.waitForMachineCreation(ctx, createdMachines); err != nil { errs = append(errs, errors.Wrapf(err, "failed to wait for machines to be created")) } if len(errs) > 0 { - return nil, kerrors.NewAggregate(errs) - } - - return machines, nil -} - -// ensureInfraMachineOnwerRefs sets the ownerReferences on the each infraMachine to its associated MachinePool Machine if it hasn't already been done. -func (r *MachinePoolReconciler) ensureInfraMachineOnwerRefs(ctx context.Context, updatedMachines []clusterv1.Machine, infraMachines []unstructured.Unstructured) error { - log := ctrl.LoggerFrom(ctx) - infraMachineNameToMachine := make(map[string]clusterv1.Machine) - for _, machine := range updatedMachines { - infraRef := machine.Spec.InfrastructureRef - infraMachineNameToMachine[infraRef.Name] = machine - } - - for i := range infraMachines { - infraMachine := &infraMachines[i] - ownerRefs := infraMachine.GetOwnerReferences() - - machine, ok := infraMachineNameToMachine[infraMachine.GetName()] - if !ok { - return errors.Errorf("failed to patch ownerRef for infraMachine %q because no Machine has an infraRef pointing to it", infraMachine.GetName()) - } - machineRef := metav1.NewControllerRef(&machine, machine.GroupVersionKind()) - if !util.HasOwnerRef(ownerRefs, *machineRef) { - log.V(2).Info("Setting ownerRef on infraMachine", "infraMachine", infraMachine.GetName(), "namespace", infraMachine.GetNamespace(), "machine", machine.GetName()) - - patchHelper, err := patch.NewHelper(infraMachine, r.Client) - if err != nil { - return errors.Wrapf(err, "failed to create patch helper for %s", klog.KObj(infraMachine)) - } - - ownerRefs = util.EnsureOwnerRef(ownerRefs, *machineRef) - infraMachine.SetOwnerReferences(ownerRefs) - - if err := patchHelper.Patch(ctx, infraMachine); err != nil { - return errors.Wrapf(err, "failed to patch %s", klog.KObj(infraMachine)) - } - - log.V(4).Info("Successfully set ownerRef on infraMachine", "infraMachine", infraMachine.GetName(), "namespace", infraMachine.GetNamespace(), "machine", machine.GetName()) - } + return kerrors.NewAggregate(errs) } return nil } -// getNewMachine creates a new Machine object. -func getNewMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructured) *clusterv1.Machine { +// computeDesiredMachine constructs the desired Machine for an infraMachine. +// If the Machine exists, it ensures the Machine always owned by the MachinePool. +func computeDesiredMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructured, existingMachine *clusterv1.Machine) *clusterv1.Machine { infraRef := corev1.ObjectReference{ APIVersion: infraMachine.GetAPIVersion(), Kind: infraMachine.GetKind(), @@ -492,6 +455,11 @@ func getNewMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructure }, } + if existingMachine != nil { + machine.SetName(existingMachine.Name) + machine.SetUID(existingMachine.UID) + } + for k, v := range mp.Spec.Template.Annotations { machine.Annotations[k] = v } diff --git a/exp/internal/controllers/machinepool_controller_phases_test.go b/exp/internal/controllers/machinepool_controller_phases_test.go index 2c4642795913..febc0c372837 100644 --- a/exp/internal/controllers/machinepool_controller_phases_test.go +++ b/exp/internal/controllers/machinepool_controller_phases_test.go @@ -39,7 +39,7 @@ import ( "sigs.k8s.io/cluster-api/controllers/remote" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/internal/test/builder" - "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/cluster-api/util/labels/format" ) @@ -1312,7 +1312,8 @@ func TestReconcileMachinePoolMachines(t *testing.T) { } r := &MachinePoolReconciler{ - Client: fake.NewClientBuilder().WithObjects(objs...).Build(), + Client: fake.NewClientBuilder().WithObjects(objs...).Build(), + ssaCache: ssa.NewCache(), } err := r.reconcileMachines(ctx, tc.machinepool, infraConfig) @@ -1335,10 +1336,8 @@ func TestReconcileMachinePoolMachines(t *testing.T) { g.Expect(machineList.Items).To(HaveLen(len(tc.infraMachines))) for i := range machineList.Items { machine := &machineList.Items[i] - infraMachine, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Namespace) + _, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Namespace) g.Expect(err).ToNot(HaveOccurred()) - - g.Expect(util.IsControlledBy(infraMachine, machine)).To(BeTrue()) } } else { g.Expect(machineList.Items).To(BeEmpty()) diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 772b371bddba..31663a5de504 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -797,13 +797,18 @@ func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool { } // Note: following checks are required because after restore from a backup both the Machine controller and the - // MachineSet/ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529 + // MachineSet, MachinePool, or ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529 // If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine. if _, ok := m.Labels[clusterv1.MachineSetNameLabel]; ok { return false } + // If the Machine is originated by a MachinePool object, it should not be adopted directly by the Cluster as a stand-alone Machine. + if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok { + return false + } + // If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine. if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok { return false diff --git a/internal/webhooks/machine.go b/internal/webhooks/machine.go index 6c76eb0cf849..f36cee1f352e 100644 --- a/internal/webhooks/machine.go +++ b/internal/webhooks/machine.go @@ -173,6 +173,12 @@ func (webhook *Machine) validate(oldM, newM *clusterv1.Machine) error { } func isMachinePoolMachine(m *clusterv1.Machine) bool { + if m.Labels != nil { + if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok { + return true + } + } + for _, owner := range m.OwnerReferences { if owner.Kind == "MachinePool" { return true diff --git a/test/framework/ownerreference_helpers.go b/test/framework/ownerreference_helpers.go index 0d91c40fedb5..5b5d78861819 100644 --- a/test/framework/ownerreference_helpers.go +++ b/test/framework/ownerreference_helpers.go @@ -171,8 +171,8 @@ var CoreOwnerReferenceAssertion = map[string]func([]metav1.OwnerReference) error return HasExactOwners(owners, machineDeploymentController) }, machineKind: func(owners []metav1.OwnerReference) error { - // Machines must be owned and controlled by a MachineSet or a KubeadmControlPlane, depending on if this Machine is part of a ControlPlane or not. - return HasOneOfExactOwners(owners, []metav1.OwnerReference{machineSetController}, []metav1.OwnerReference{kubeadmControlPlaneController}) + // Machines must be owned and controlled by a MachineSet, MachinePool, or a KubeadmControlPlane, depending on if this Machine is part of a Machine Deployment, MachinePool, or ControlPlane. + return HasOneOfExactOwners(owners, []metav1.OwnerReference{machineSetController}, []metav1.OwnerReference{machinePoolController}, []metav1.OwnerReference{kubeadmControlPlaneController}) }, machineHealthCheckKind: func(owners []metav1.OwnerReference) error { // MachineHealthChecks must be owned by the Cluster.