Skip to content

Commit

Permalink
Fix bug where MachinePool Machine ownerRefs weren't updating
Browse files Browse the repository at this point in the history
  • Loading branch information
Jont828 committed Nov 1, 2023
1 parent a02a993 commit 43a5091
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 75 deletions.
4 changes: 4 additions & 0 deletions exp/internal/controllers/machinepool_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"sigs.k8s.io/cluster-api/controllers/external"
"sigs.k8s.io/cluster-api/controllers/remote"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
Expand Down Expand Up @@ -69,6 +70,7 @@ type MachinePoolReconciler struct {
WatchFilterValue string

controller controller.Controller
ssaCache ssa.Cache
recorder record.EventRecorder
externalTracker external.ObjectTracker
}
Expand Down Expand Up @@ -105,6 +107,8 @@ func (r *MachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.M
Controller: c,
Cache: mgr.GetCache(),
}
r.ssaCache = ssa.NewCache()

return nil
}

Expand Down
102 changes: 35 additions & 67 deletions exp/internal/controllers/machinepool_controller_phases.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/storage/names"
"k8s.io/klog/v2"
Expand All @@ -43,6 +42,7 @@ import (
capierrors "sigs.k8s.io/cluster-api/errors"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
utilexp "sigs.k8s.io/cluster-api/exp/util"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
Expand Down Expand Up @@ -376,100 +376,63 @@ func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, mp *expv1
return err
}

updatedMachines, err := r.createMachinesIfNotExists(ctx, mp, machineList.Items, infraMachineList.Items)
if err != nil {
return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace)
}

if err := r.ensureInfraMachineOnwerRefs(ctx, updatedMachines, infraMachineList.Items); err != nil {
if err := r.createOrUpdateMachines(ctx, mp, machineList.Items, infraMachineList.Items); err != nil {
return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace)
}

return nil
}

// createMachinesIfNotExists creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef.
func (r *MachinePoolReconciler) createMachinesIfNotExists(ctx context.Context, mp *expv1.MachinePool, machines []clusterv1.Machine, infraMachines []unstructured.Unstructured) ([]clusterv1.Machine, error) {
// createOrUpdateMachines creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef.
func (r *MachinePoolReconciler) createOrUpdateMachines(ctx context.Context, mp *expv1.MachinePool, machines []clusterv1.Machine, infraMachines []unstructured.Unstructured) error {
log := ctrl.LoggerFrom(ctx)

// Construct a set of names of infraMachines that already have a Machine.
infraRefNames := sets.Set[string]{}
infraMachineToMachine := map[string]clusterv1.Machine{}
for _, machine := range machines {
infraRef := machine.Spec.InfrastructureRef
infraRefNames.Insert(infraRef.Name)
infraMachineToMachine[infraRef.Name] = machine
}

createdMachines := []clusterv1.Machine{}
var errs []error
for i := range infraMachines {
infraMachine := &infraMachines[i]
// If infraMachine already has a Machine, skip it.
if infraRefNames.Has(infraMachine.GetName()) {
log.V(4).Info("Machine already exists for infraMachine", "infraMachine", klog.KObj(infraMachine))
continue
}
// Otherwise create a new Machine for the infraMachine.
log.Info("Creating new Machine for infraMachine", infraMachine.GroupVersionKind().Kind, klog.KObj(infraMachine))
machine := getNewMachine(mp, infraMachine)
if err := r.Client.Create(ctx, machine); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to create new Machine for infraMachine %q in namespace %q", infraMachine.GetName(), infraMachine.GetNamespace()))
continue
// If infraMachine already has a Machine, update it if needed.
if existingMachine, ok := infraMachineToMachine[infraMachine.GetName()]; ok {
log.V(2).Info("Patching existing Machine for infraMachine", "infraMachine", klog.KObj(infraMachine), "machine", klog.KObj(&existingMachine))

desiredMachine := computeDesiredMachine(mp, infraMachine, &existingMachine)
if err := ssa.Patch(ctx, r.Client, MachinePoolControllerName, desiredMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: &existingMachine}); err != nil {
log.Error(err, "failed to update Machine", "Machine", klog.KObj(desiredMachine))
errs = append(errs, errors.Wrapf(err, "failed to update Machine %q", klog.KObj(desiredMachine)))
}
} else {
// Otherwise create a new Machine for the infraMachine.
log.Info("Creating new Machine for infraMachine", "infraMachine", klog.KObj(infraMachine))
machine := computeDesiredMachine(mp, infraMachine, nil)

if err := r.Client.Create(ctx, machine); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to create new Machine for infraMachine %q in namespace %q", infraMachine.GetName(), infraMachine.GetNamespace()))
continue
}

createdMachines = append(createdMachines, *machine)
}
createdMachines = append(createdMachines, *machine)
}
machines = append(machines, createdMachines...)
if err := r.waitForMachineCreation(ctx, createdMachines); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to wait for machines to be created"))
}
if len(errs) > 0 {
return nil, kerrors.NewAggregate(errs)
}

return machines, nil
}

// ensureInfraMachineOnwerRefs sets the ownerReferences on the each infraMachine to its associated MachinePool Machine if it hasn't already been done.
func (r *MachinePoolReconciler) ensureInfraMachineOnwerRefs(ctx context.Context, updatedMachines []clusterv1.Machine, infraMachines []unstructured.Unstructured) error {
log := ctrl.LoggerFrom(ctx)
infraMachineNameToMachine := make(map[string]clusterv1.Machine)
for _, machine := range updatedMachines {
infraRef := machine.Spec.InfrastructureRef
infraMachineNameToMachine[infraRef.Name] = machine
}

for i := range infraMachines {
infraMachine := &infraMachines[i]
ownerRefs := infraMachine.GetOwnerReferences()

machine, ok := infraMachineNameToMachine[infraMachine.GetName()]
if !ok {
return errors.Errorf("failed to patch ownerRef for infraMachine %q because no Machine has an infraRef pointing to it", infraMachine.GetName())
}
machineRef := metav1.NewControllerRef(&machine, machine.GroupVersionKind())
if !util.HasOwnerRef(ownerRefs, *machineRef) {
log.V(2).Info("Setting ownerRef on infraMachine", "infraMachine", infraMachine.GetName(), "namespace", infraMachine.GetNamespace(), "machine", machine.GetName())

patchHelper, err := patch.NewHelper(infraMachine, r.Client)
if err != nil {
return errors.Wrapf(err, "failed to create patch helper for %s", klog.KObj(infraMachine))
}

ownerRefs = util.EnsureOwnerRef(ownerRefs, *machineRef)
infraMachine.SetOwnerReferences(ownerRefs)

if err := patchHelper.Patch(ctx, infraMachine); err != nil {
return errors.Wrapf(err, "failed to patch %s", klog.KObj(infraMachine))
}

log.V(4).Info("Successfully set ownerRef on infraMachine", "infraMachine", infraMachine.GetName(), "namespace", infraMachine.GetNamespace(), "machine", machine.GetName())
}
return kerrors.NewAggregate(errs)
}

return nil
}

// getNewMachine creates a new Machine object.
func getNewMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructured) *clusterv1.Machine {
// computeDesiredMachine constructs the desired Machine for an infraMachine.
// If the Machine exists, it ensures the Machine always owned by the MachinePool.
func computeDesiredMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructured, existingMachine *clusterv1.Machine) *clusterv1.Machine {
infraRef := corev1.ObjectReference{
APIVersion: infraMachine.GetAPIVersion(),
Kind: infraMachine.GetKind(),
Expand All @@ -492,6 +455,11 @@ func getNewMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructure
},
}

if existingMachine != nil {
machine.SetName(existingMachine.Name)
machine.SetUID(existingMachine.UID)
}

for k, v := range mp.Spec.Template.Annotations {
machine.Annotations[k] = v
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import (
"sigs.k8s.io/cluster-api/controllers/remote"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/internal/test/builder"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/cluster-api/util/labels/format"
)
Expand Down Expand Up @@ -1312,7 +1312,8 @@ func TestReconcileMachinePoolMachines(t *testing.T) {
}

r := &MachinePoolReconciler{
Client: fake.NewClientBuilder().WithObjects(objs...).Build(),
Client: fake.NewClientBuilder().WithObjects(objs...).Build(),
ssaCache: ssa.NewCache(),
}

err := r.reconcileMachines(ctx, tc.machinepool, infraConfig)
Expand All @@ -1335,10 +1336,8 @@ func TestReconcileMachinePoolMachines(t *testing.T) {
g.Expect(machineList.Items).To(HaveLen(len(tc.infraMachines)))
for i := range machineList.Items {
machine := &machineList.Items[i]
infraMachine, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Namespace)
_, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Namespace)
g.Expect(err).ToNot(HaveOccurred())

g.Expect(util.IsControlledBy(infraMachine, machine)).To(BeTrue())
}
} else {
g.Expect(machineList.Items).To(BeEmpty())
Expand Down
7 changes: 6 additions & 1 deletion internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -797,13 +797,18 @@ func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool {
}

// Note: following checks are required because after restore from a backup both the Machine controller and the
// MachineSet/ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
// MachineSet, MachinePool, or ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529

// If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachineSetNameLabel]; ok {
return false
}

// If the Machine is originated by a MachinePool object, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok {
return false
}

// If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok {
return false
Expand Down
6 changes: 6 additions & 0 deletions internal/webhooks/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ func (webhook *Machine) validate(oldM, newM *clusterv1.Machine) error {
}

func isMachinePoolMachine(m *clusterv1.Machine) bool {
if m.Labels != nil {
if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok {
return true
}
}

for _, owner := range m.OwnerReferences {
if owner.Kind == "MachinePool" {
return true
Expand Down
4 changes: 2 additions & 2 deletions test/framework/ownerreference_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ var CoreOwnerReferenceAssertion = map[string]func([]metav1.OwnerReference) error
return HasExactOwners(owners, machineDeploymentController)
},
machineKind: func(owners []metav1.OwnerReference) error {
// Machines must be owned and controlled by a MachineSet or a KubeadmControlPlane, depending on if this Machine is part of a ControlPlane or not.
return HasOneOfExactOwners(owners, []metav1.OwnerReference{machineSetController}, []metav1.OwnerReference{kubeadmControlPlaneController})
// Machines must be owned and controlled by a MachineSet, MachinePool, or a KubeadmControlPlane, depending on if this Machine is part of a Machine Deployment, MachinePool, or ControlPlane.
return HasOneOfExactOwners(owners, []metav1.OwnerReference{machineSetController}, []metav1.OwnerReference{machinePoolController}, []metav1.OwnerReference{kubeadmControlPlaneController})
},
machineHealthCheckKind: func(owners []metav1.OwnerReference) error {
// MachineHealthChecks must be owned by the Cluster.
Expand Down

0 comments on commit 43a5091

Please sign in to comment.