Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚠️ in-place propagation from MS to Machines #8111

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 131 additions & 67 deletions internal/controllers/machineset/machineset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apiserver/pkg/storage/names"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -42,6 +43,7 @@ import (
"sigs.k8s.io/cluster-api/controllers/remote"
"sigs.k8s.io/cluster-api/internal/controllers/machine"
capilabels "sigs.k8s.io/cluster-api/internal/labels"
"sigs.k8s.io/cluster-api/internal/util/ssa"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/collections"
Expand All @@ -64,6 +66,8 @@ var (
stateConfirmationInterval = 100 * time.Millisecond
)

const machineSetManagerName = "capi-machineset"

// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch
// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -290,39 +294,6 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
filteredMachines = append(filteredMachines, machine)
}

// If not already present, add a label specifying the MachineSet name to Machines.
// Ensure all required labels exist on the controlled Machines.
// This logic is needed to add the `cluster.x-k8s.io/set-name` label to Machines
// which were created before the `cluster.x-k8s.io/set-name` label was added to
// all Machines created by a MachineSet or if a user manually removed the label.
for _, machine := range filteredMachines {
mdNameOnMachineSet, mdNameSetOnMachineSet := machineSet.Labels[clusterv1.MachineDeploymentNameLabel]
mdNameOnMachine := machine.Labels[clusterv1.MachineDeploymentNameLabel]

// Note: MustEqualValue is used here as the value of this label will be a hash if the MachineSet name is longer than 63 characters.
if msNameLabelValue, ok := machine.Labels[clusterv1.MachineSetNameLabel]; ok && capilabels.MustEqualValue(machineSet.Name, msNameLabelValue) &&
(!mdNameSetOnMachineSet || mdNameOnMachineSet == mdNameOnMachine) {
// Continue if the MachineSet name label is already set correctly and
// either the MachineDeployment name label is not set on the MachineSet or
// the MachineDeployment name label is set correctly on the Machine.
continue
}

helper, err := patch.NewHelper(machine, r.Client)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetNameLabel, machine.Name)
}
// Note: MustFormatValue is used here as the value of this label will be a hash if the MachineSet name is longer than 63 characters.
machine.Labels[clusterv1.MachineSetNameLabel] = capilabels.MustFormatValue(machineSet.Name)
// Propagate the MachineDeploymentNameLabel from MachineSet to Machine if it is set on the MachineSet.
if mdNameSetOnMachineSet {
machine.Labels[clusterv1.MachineDeploymentNameLabel] = mdNameOnMachineSet
}
if err := helper.Patch(ctx, machine); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to apply %s label to Machine %q", clusterv1.MachineSetNameLabel, machine.Name)
}
}

// Remediate failed Machines by deleting them.
var errs []error
for _, machine := range filteredMachines {
Expand Down Expand Up @@ -352,6 +323,10 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
return ctrl.Result{}, errors.Wrap(err, "failed to remediate machines")
}

if err := r.syncMachines(ctx, machineSet, filteredMachines); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to update Machines")
}

syncErr := r.syncReplicas(ctx, machineSet, filteredMachines)

// Always updates status as machines come up or die.
Expand Down Expand Up @@ -389,6 +364,43 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
return ctrl.Result{}, nil
}

// syncMachines updates Machines to propagate in-place mutable fields from the MachineSet.
// Note: It also cleans up managed fields of all Machines so that Machines that were created/patched before (< v1.4.0)
// the controller adopted Server-Side-Apply (SSA) can also work with SSA. Otherwise fields would be co-owned by
// our "old" "manager" and "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
// TODO: update the labels and annotations to the corresponding infra machines and the boostrap configs of the filtered machines.
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
func (r *Reconciler) syncMachines(ctx context.Context, machineSet *clusterv1.MachineSet, machines []*clusterv1.Machine) error {
log := ctrl.LoggerFrom(ctx)
for i := range machines {
m := machines[i]
// If the machine is already being deleted, we don't need to update it.
if !m.DeletionTimestamp.IsZero() {
continue
}

// Cleanup managed fields of all Machines.
// We do this so that Machines that were created/patched before the controller adopted Server-Side-Apply (SSA)
// (< v1.4.0) can also work with SSA. Otherwise, fields would be co-owned by our "old" "manager" and
// "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, m, machineSetManagerName, r.Client); err != nil {
return errors.Wrapf(err, "failed to update machine: failed to adjust the managedFields of the Machine %q", m.Name)
}

// Update Machine to propagate in-place mutable fields from the MachineSet.
updatedMachine := r.computeDesiredMachine(machineSet, m)
patchOptions := []client.PatchOption{
client.ForceOwnership,
client.FieldOwner(machineSetManagerName),
}
if err := r.Client.Patch(ctx, updatedMachine, client.Apply, patchOptions...); err != nil {
log.Error(err, "failed to update Machine", "Machine", klog.KObj(updatedMachine))
ykakarap marked this conversation as resolved.
Show resolved Hide resolved
return errors.Wrapf(err, "failed to update Machine %q", klog.KObj(updatedMachine))
}
machines[i] = updatedMachine
}
return nil
}

// syncReplicas scales Machine resources up or down.
func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine) error {
log := ctrl.LoggerFrom(ctx)
Expand All @@ -414,18 +426,18 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
for i := 0; i < diff; i++ {
// Create a new logger so the global logger is not modified.
log := log
machine := r.getNewMachine(ms)

machine := r.computeDesiredMachine(ms, nil)
// Clone and set the infrastructure and bootstrap references.
var (
infraRef, bootstrapRef *corev1.ObjectReference
err error
)

if machine.Spec.Bootstrap.ConfigRef != nil {
// Create the BootstrapConfig if necessary.
if ms.Spec.Template.Spec.Bootstrap.ConfigRef != nil {
bootstrapRef, err = external.CreateFromTemplate(ctx, &external.CreateFromTemplateInput{
Client: r.Client,
TemplateRef: machine.Spec.Bootstrap.ConfigRef,
TemplateRef: ms.Spec.Template.Spec.Bootstrap.ConfigRef,
Namespace: machine.Namespace,
ClusterName: machine.Spec.ClusterName,
Labels: machine.Labels,
Expand All @@ -439,15 +451,18 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
})
if err != nil {
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.BootstrapTemplateCloningFailedReason, clusterv1.ConditionSeverityError, err.Error())
return errors.Wrapf(err, "failed to clone bootstrap configuration from %s %s while creating a machine", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Spec.Bootstrap.ConfigRef.Namespace, machine.Spec.Bootstrap.ConfigRef.Name))
return errors.Wrapf(err, "failed to clone bootstrap configuration from %s %s while creating a machine",
ms.Spec.Template.Spec.Bootstrap.ConfigRef.Kind,
klog.KRef(ms.Spec.Template.Spec.Bootstrap.ConfigRef.Namespace, ms.Spec.Template.Spec.Bootstrap.ConfigRef.Name))
}
machine.Spec.Bootstrap.ConfigRef = bootstrapRef
log = log.WithValues(bootstrapRef.Kind, klog.KRef(bootstrapRef.Namespace, bootstrapRef.Name))
}

// Create the InfraMachine.
infraRef, err = external.CreateFromTemplate(ctx, &external.CreateFromTemplateInput{
Client: r.Client,
TemplateRef: &machine.Spec.InfrastructureRef,
TemplateRef: &ms.Spec.Template.Spec.InfrastructureRef,
Namespace: machine.Namespace,
ClusterName: machine.Spec.ClusterName,
Labels: machine.Labels,
Expand All @@ -461,12 +476,19 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
})
if err != nil {
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.InfrastructureTemplateCloningFailedReason, clusterv1.ConditionSeverityError, err.Error())
return errors.Wrapf(err, "failed to clone infrastructure machine from %s %s while creating a machine", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Spec.InfrastructureRef.Namespace, machine.Spec.InfrastructureRef.Name))
return errors.Wrapf(err, "failed to clone infrastructure machine from %s %s while creating a machine",
ms.Spec.Template.Spec.InfrastructureRef.Kind,
klog.KRef(ms.Spec.Template.Spec.InfrastructureRef.Namespace, ms.Spec.Template.Spec.InfrastructureRef.Name))
}
log = log.WithValues(infraRef.Kind, klog.KRef(infraRef.Namespace, infraRef.Name))
machine.Spec.InfrastructureRef = *infraRef

if err := r.Client.Create(ctx, machine); err != nil {
// Create the Machine.
patchOptions := []client.PatchOption{
client.ForceOwnership,
client.FieldOwner(machineSetManagerName),
}
if err := r.Client.Patch(ctx, machine, client.Apply, patchOptions...); err != nil {
log.Error(err, "Error while creating a machine")
r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedCreate", "Failed to create machine: %v", err)
errs = append(errs, err)
Expand Down Expand Up @@ -529,45 +551,87 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
return nil
}

// getNewMachine creates a new Machine object. The name of the newly created resource is going
// to be created by the API server, we set the generateName field.
func (r *Reconciler) getNewMachine(machineSet *clusterv1.MachineSet) *clusterv1.Machine {
gv := clusterv1.GroupVersion
machine := &clusterv1.Machine{
// computeDesiredMachine computes the desired Machine.
// This Machine will be used during reconciliation to:
// * create a Machine
// * update an existing Machine
// Because we are using Server-Side-Apply we always have to calculate the full object.
// There are small differences in how we calculate the Machine depending on if it
// is a create or update. Example: for a new Machine we have to calculate a new name,
// while for an existing Machine we have to use the name of the existing Machine.
func (r *Reconciler) computeDesiredMachine(machineSet *clusterv1.MachineSet, existingMachine *clusterv1.Machine) *clusterv1.Machine {
desiredMachine := &clusterv1.Machine{
TypeMeta: metav1.TypeMeta{
APIVersion: clusterv1.GroupVersion.String(),
Kind: "Machine",
},
ObjectMeta: metav1.ObjectMeta{
GenerateName: fmt.Sprintf("%s-", machineSet.Name),
// Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
Name: names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", machineSet.Name)),
Namespace: machineSet.Namespace,
// Note: By setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(machineSet, machineSetKind)},
Namespace: machineSet.Namespace,
Labels: make(map[string]string),
Annotations: machineSet.Spec.Template.Annotations,
},
TypeMeta: metav1.TypeMeta{
Kind: gv.WithKind("Machine").Kind,
APIVersion: gv.String(),
Labels: map[string]string{},
Annotations: map[string]string{},
},
Spec: machineSet.Spec.Template.Spec,
}
machine.Spec.ClusterName = machineSet.Spec.ClusterName

// Set the labels from machineSet.Spec.Template.Labels as labels for the new Machine.
Spec: *machineSet.Spec.Template.Spec.DeepCopy(),
}
// Set ClusterName.
desiredMachine.Spec.ClusterName = machineSet.Spec.ClusterName

// Clean up the refs to the incorrect objects.
// The InfrastructureRef and the Bootstrap.ConfigRef in Machine should point to the InfrastructureMachine
// and the BootstrapConfig objects. In the MachineSet these values point to InfrastructureMachineTemplate
// BootstrapConfigTemplate. Drop the values that were copied over from MachineSet during DeepCopy
// to make sure to not point to incorrect refs.
// Note: During Machine creation, these refs will be updated with the correct values after the corresponding
// objects are created.
desiredMachine.Spec.InfrastructureRef = corev1.ObjectReference{}
desiredMachine.Spec.Bootstrap.ConfigRef = nil

// If we are updating an existing Machine reuse the name, uid, infrastructureRef and bootstrap.configRef
// from the existingMachine.
// Note: we use UID to force SSA to update the existing Machine and to not accidentally create a new Machine.
// infrastructureRef and bootstrap.configRef remain the same for an existing Machine.
if existingMachine != nil {
desiredMachine.SetName(existingMachine.Name)
desiredMachine.SetUID(existingMachine.UID)
desiredMachine.Spec.Bootstrap.ConfigRef = existingMachine.Spec.Bootstrap.ConfigRef
desiredMachine.Spec.InfrastructureRef = existingMachine.Spec.InfrastructureRef
}

// Set the in-place mutable fields.
// When we create a new Machine we will just create the Machine with those fields.
// When we update an existing Machine will we update the fields on the existing Machine (in-place mutate).

// Set Labels
// Note: We can't just set `machineSet.Spec.Template.Labels` directly and thus "share" the labels
// map between Machine and machineSet.Spec.Template.Labels. This would mean that adding the
// MachineSetNameLabel and MachineDeploymentNameLabel later on the Machine would also add the labels
// to machineSet.Spec.Template.Labels and thus modify the labels of the MachineSet.
for k, v := range machineSet.Spec.Template.Labels {
machine.Labels[k] = v
desiredMachine.Labels[k] = v
}

// Enforce that the MachineSetNameLabel label is set
// Note: the MachineSetNameLabel is added by the default webhook to MachineSet.spec.template.labels if a spec.selector is empty.
machine.Labels[clusterv1.MachineSetNameLabel] = capilabels.MustFormatValue(machineSet.Name)
// Always set the MachineSetNameLabel.
// Note: If a client tries to create a MachineSet without a selector, the MachineSet webhook
// will add this label automatically. But we want this label to always be present even if the MachineSet
// has a selector which doesn't include it. Therefore, we have to set it here explicitly.
desiredMachine.Labels[clusterv1.MachineSetNameLabel] = capilabels.MustFormatValue(machineSet.Name)
// Propagate the MachineDeploymentNameLabel from MachineSet to Machine if it exists.
if mdName, ok := machineSet.Labels[clusterv1.MachineDeploymentNameLabel]; ok {
machine.Labels[clusterv1.MachineDeploymentNameLabel] = mdName
desiredMachine.Labels[clusterv1.MachineDeploymentNameLabel] = mdName
}

// Set Annotations
for k, v := range machineSet.Spec.Template.Annotations {
desiredMachine.Annotations[k] = v
}

return machine
// Set all other in-place mutable fields.
desiredMachine.Spec.NodeDrainTimeout = machineSet.Spec.Template.Spec.NodeDrainTimeout
desiredMachine.Spec.NodeDeletionTimeout = machineSet.Spec.Template.Spec.NodeDeletionTimeout
desiredMachine.Spec.NodeVolumeDetachTimeout = machineSet.Spec.Template.Spec.NodeVolumeDetachTimeout

return desiredMachine
}

// shouldExcludeMachine returns true if the machine should be filtered out, false otherwise.
Expand Down
Loading