Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Fix bug where MachinePool Machine ownerRefs weren't updating #9619

Merged
merged 1 commit into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions exp/internal/controllers/machinepool_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"sigs.k8s.io/cluster-api/controllers/external"
"sigs.k8s.io/cluster-api/controllers/remote"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
Expand Down Expand Up @@ -69,6 +70,7 @@ type MachinePoolReconciler struct {
WatchFilterValue string

controller controller.Controller
ssaCache ssa.Cache
recorder record.EventRecorder
externalTracker external.ObjectTracker
}
Expand Down Expand Up @@ -105,6 +107,8 @@ func (r *MachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.M
Controller: c,
Cache: mgr.GetCache(),
}
r.ssaCache = ssa.NewCache()

return nil
}

Expand Down
102 changes: 35 additions & 67 deletions exp/internal/controllers/machinepool_controller_phases.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/storage/names"
"k8s.io/klog/v2"
Expand All @@ -43,6 +42,7 @@ import (
capierrors "sigs.k8s.io/cluster-api/errors"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
utilexp "sigs.k8s.io/cluster-api/exp/util"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
Expand Down Expand Up @@ -376,100 +376,63 @@ func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, mp *expv1
return err
}

updatedMachines, err := r.createMachinesIfNotExists(ctx, mp, machineList.Items, infraMachineList.Items)
if err != nil {
return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace)
}

if err := r.ensureInfraMachineOnwerRefs(ctx, updatedMachines, infraMachineList.Items); err != nil {
if err := r.createOrUpdateMachines(ctx, mp, machineList.Items, infraMachineList.Items); err != nil {
return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace)
}

return nil
}

// createMachinesIfNotExists creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef.
func (r *MachinePoolReconciler) createMachinesIfNotExists(ctx context.Context, mp *expv1.MachinePool, machines []clusterv1.Machine, infraMachines []unstructured.Unstructured) ([]clusterv1.Machine, error) {
// createOrUpdateMachines creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef.
func (r *MachinePoolReconciler) createOrUpdateMachines(ctx context.Context, mp *expv1.MachinePool, machines []clusterv1.Machine, infraMachines []unstructured.Unstructured) error {
log := ctrl.LoggerFrom(ctx)

// Construct a set of names of infraMachines that already have a Machine.
infraRefNames := sets.Set[string]{}
infraMachineToMachine := map[string]clusterv1.Machine{}
for _, machine := range machines {
infraRef := machine.Spec.InfrastructureRef
infraRefNames.Insert(infraRef.Name)
infraMachineToMachine[infraRef.Name] = machine
}

createdMachines := []clusterv1.Machine{}
var errs []error
for i := range infraMachines {
infraMachine := &infraMachines[i]
// If infraMachine already has a Machine, skip it.
if infraRefNames.Has(infraMachine.GetName()) {
log.V(4).Info("Machine already exists for infraMachine", "infraMachine", klog.KObj(infraMachine))
continue
}
// Otherwise create a new Machine for the infraMachine.
log.Info("Creating new Machine for infraMachine", infraMachine.GroupVersionKind().Kind, klog.KObj(infraMachine))
machine := getNewMachine(mp, infraMachine)
if err := r.Client.Create(ctx, machine); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to create new Machine for infraMachine %q in namespace %q", infraMachine.GetName(), infraMachine.GetNamespace()))
continue
// If infraMachine already has a Machine, update it if needed.
if existingMachine, ok := infraMachineToMachine[infraMachine.GetName()]; ok {
log.V(2).Info("Patching existing Machine for infraMachine", "infraMachine", klog.KObj(infraMachine), "machine", klog.KObj(&existingMachine))

desiredMachine := computeDesiredMachine(mp, infraMachine, &existingMachine)
if err := ssa.Patch(ctx, r.Client, MachinePoolControllerName, desiredMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: &existingMachine}); err != nil {
log.Error(err, "failed to update Machine", "Machine", klog.KObj(desiredMachine))
errs = append(errs, errors.Wrapf(err, "failed to update Machine %q", klog.KObj(desiredMachine)))
}
} else {
// Otherwise create a new Machine for the infraMachine.
log.Info("Creating new Machine for infraMachine", "infraMachine", klog.KObj(infraMachine))
machine := computeDesiredMachine(mp, infraMachine, nil)

if err := r.Client.Create(ctx, machine); err != nil {
Jont828 marked this conversation as resolved.
Show resolved Hide resolved
errs = append(errs, errors.Wrapf(err, "failed to create new Machine for infraMachine %q in namespace %q", infraMachine.GetName(), infraMachine.GetNamespace()))
continue
}

createdMachines = append(createdMachines, *machine)
}
createdMachines = append(createdMachines, *machine)
}
machines = append(machines, createdMachines...)
if err := r.waitForMachineCreation(ctx, createdMachines); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to wait for machines to be created"))
}
if len(errs) > 0 {
return nil, kerrors.NewAggregate(errs)
}

return machines, nil
}

// ensureInfraMachineOnwerRefs sets the ownerReferences on the each infraMachine to its associated MachinePool Machine if it hasn't already been done.
func (r *MachinePoolReconciler) ensureInfraMachineOnwerRefs(ctx context.Context, updatedMachines []clusterv1.Machine, infraMachines []unstructured.Unstructured) error {
Jont828 marked this conversation as resolved.
Show resolved Hide resolved
log := ctrl.LoggerFrom(ctx)
infraMachineNameToMachine := make(map[string]clusterv1.Machine)
for _, machine := range updatedMachines {
infraRef := machine.Spec.InfrastructureRef
infraMachineNameToMachine[infraRef.Name] = machine
}

for i := range infraMachines {
infraMachine := &infraMachines[i]
ownerRefs := infraMachine.GetOwnerReferences()

machine, ok := infraMachineNameToMachine[infraMachine.GetName()]
if !ok {
return errors.Errorf("failed to patch ownerRef for infraMachine %q because no Machine has an infraRef pointing to it", infraMachine.GetName())
}
machineRef := metav1.NewControllerRef(&machine, machine.GroupVersionKind())
if !util.HasOwnerRef(ownerRefs, *machineRef) {
log.V(2).Info("Setting ownerRef on infraMachine", "infraMachine", infraMachine.GetName(), "namespace", infraMachine.GetNamespace(), "machine", machine.GetName())

patchHelper, err := patch.NewHelper(infraMachine, r.Client)
if err != nil {
return errors.Wrapf(err, "failed to create patch helper for %s", klog.KObj(infraMachine))
}

ownerRefs = util.EnsureOwnerRef(ownerRefs, *machineRef)
infraMachine.SetOwnerReferences(ownerRefs)

if err := patchHelper.Patch(ctx, infraMachine); err != nil {
return errors.Wrapf(err, "failed to patch %s", klog.KObj(infraMachine))
}

log.V(4).Info("Successfully set ownerRef on infraMachine", "infraMachine", infraMachine.GetName(), "namespace", infraMachine.GetNamespace(), "machine", machine.GetName())
}
return kerrors.NewAggregate(errs)
}

return nil
}

// getNewMachine creates a new Machine object.
func getNewMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructured) *clusterv1.Machine {
// computeDesiredMachine constructs the desired Machine for an infraMachine.
// If the Machine exists, it ensures the Machine always owned by the MachinePool.
func computeDesiredMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructured, existingMachine *clusterv1.Machine) *clusterv1.Machine {
infraRef := corev1.ObjectReference{
APIVersion: infraMachine.GetAPIVersion(),
Kind: infraMachine.GetKind(),
Expand All @@ -492,6 +455,11 @@ func getNewMachine(mp *expv1.MachinePool, infraMachine *unstructured.Unstructure
},
}

if existingMachine != nil {
machine.SetName(existingMachine.Name)
machine.SetUID(existingMachine.UID)
}

for k, v := range mp.Spec.Template.Annotations {
machine.Annotations[k] = v
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import (
"sigs.k8s.io/cluster-api/controllers/remote"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/internal/test/builder"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/cluster-api/util/labels/format"
)
Expand Down Expand Up @@ -1312,7 +1312,8 @@ func TestReconcileMachinePoolMachines(t *testing.T) {
}

r := &MachinePoolReconciler{
Client: fake.NewClientBuilder().WithObjects(objs...).Build(),
Client: fake.NewClientBuilder().WithObjects(objs...).Build(),
ssaCache: ssa.NewCache(),
}

err := r.reconcileMachines(ctx, tc.machinepool, infraConfig)
Expand All @@ -1335,10 +1336,8 @@ func TestReconcileMachinePoolMachines(t *testing.T) {
g.Expect(machineList.Items).To(HaveLen(len(tc.infraMachines)))
for i := range machineList.Items {
machine := &machineList.Items[i]
infraMachine, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Namespace)
_, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Namespace)
g.Expect(err).ToNot(HaveOccurred())

g.Expect(util.IsControlledBy(infraMachine, machine)).To(BeTrue())
Jont828 marked this conversation as resolved.
Show resolved Hide resolved
}
} else {
g.Expect(machineList.Items).To(BeEmpty())
Expand Down
7 changes: 6 additions & 1 deletion internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -797,13 +797,18 @@ func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool {
}

// Note: following checks are required because after restore from a backup both the Machine controller and the
// MachineSet/ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
// MachineSet, MachinePool, or ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529

// If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachineSetNameLabel]; ok {
return false
}

// If the Machine is originated by a MachinePool object, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok {
return false
}

// If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine.
if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok {
return false
Expand Down
6 changes: 6 additions & 0 deletions internal/webhooks/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ func (webhook *Machine) validate(oldM, newM *clusterv1.Machine) error {
}

func isMachinePoolMachine(m *clusterv1.Machine) bool {
if m.Labels != nil {
if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok {
return true
Jont828 marked this conversation as resolved.
Show resolved Hide resolved
}
}

for _, owner := range m.OwnerReferences {
if owner.Kind == "MachinePool" {
return true
Expand Down
4 changes: 2 additions & 2 deletions test/framework/ownerreference_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ var CoreOwnerReferenceAssertion = map[string]func([]metav1.OwnerReference) error
return HasExactOwners(owners, machineDeploymentController)
},
machineKind: func(owners []metav1.OwnerReference) error {
// Machines must be owned and controlled by a MachineSet or a KubeadmControlPlane, depending on if this Machine is part of a ControlPlane or not.
return HasOneOfExactOwners(owners, []metav1.OwnerReference{machineSetController}, []metav1.OwnerReference{kubeadmControlPlaneController})
// Machines must be owned and controlled by a MachineSet, MachinePool, or a KubeadmControlPlane, depending on if this Machine is part of a Machine Deployment, MachinePool, or ControlPlane.
return HasOneOfExactOwners(owners, []metav1.OwnerReference{machineSetController}, []metav1.OwnerReference{machinePoolController}, []metav1.OwnerReference{kubeadmControlPlaneController})
Jont828 marked this conversation as resolved.
Show resolved Hide resolved
},
machineHealthCheckKind: func(owners []metav1.OwnerReference) error {
// MachineHealthChecks must be owned by the Cluster.
Expand Down