Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add MachinePool workers support in ClusterClass #9016

Merged
merged 1 commit into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions api/v1beta1/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,19 @@ const (
// not yet completed because the upgrade for at least one of the MachineDeployments has been deferred.
TopologyReconciledMachineDeploymentsUpgradeDeferredReason = "MachineDeploymentsUpgradeDeferred"

// TopologyReconciledMachinePoolsUpgradePendingReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec.
TopologyReconciledMachinePoolsUpgradePendingReason = "MachinePoolsUpgradePending"

// TopologyReconciledMachinePoolsCreatePendingReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because at least one of the MachinePools is yet to be created.
willie-yao marked this conversation as resolved.
Show resolved Hide resolved
// This generally happens because new MachinePool creations are held off while the ControlPlane is not stable.
TopologyReconciledMachinePoolsCreatePendingReason = "MachinePoolsCreatePending"

// TopologyReconciledMachinePoolsUpgradeDeferredReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because the upgrade for at least one of the MachinePools has been deferred.
TopologyReconciledMachinePoolsUpgradeDeferredReason = "MachinePoolsUpgradeDeferred"

// TopologyReconciledHookBlockingReason (Severity=Info) documents reconciliation of a Cluster topology
// not yet completed because at least one of the lifecycle hooks is blocking.
TopologyReconciledHookBlockingReason = "LifecycleHookBlocking"
Expand Down
11 changes: 11 additions & 0 deletions cmd/clusterctl/client/cluster/topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,17 @@ func clusterClassUsesTemplate(cc *clusterv1.ClusterClass, templateRef *corev1.Ob
}
}

for _, mpClass := range cc.Spec.Workers.MachinePools {
// Check the bootstrap ref
if equalRef(mpClass.Template.Bootstrap.Ref, templateRef) {
return true
}
// Check the infrastructure ref.
if equalRef(mpClass.Template.Infrastructure.Ref, templateRef) {
return true
}
}

return false
}

Expand Down
12 changes: 12 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,18 @@ rules:
- patch
- update
- watch
- apiGroups:
- cluster.x-k8s.io
resources:
- machinepools
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- cluster.x-k8s.io
resources:
Expand Down
4 changes: 3 additions & 1 deletion controllers/alias.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ func (r *MachineHealthCheckReconciler) SetupWithManager(ctx context.Context, mgr

// ClusterTopologyReconciler reconciles a managed topology for a Cluster object.
type ClusterTopologyReconciler struct {
Client client.Client
Client client.Client
Tracker *remote.ClusterCacheTracker
// APIReader is used to list MachineSets directly via the API server to avoid
// race conditions caused by an outdated cache.
APIReader client.Reader
Expand All @@ -162,6 +163,7 @@ func (r *ClusterTopologyReconciler) SetupWithManager(ctx context.Context, mgr ct
return (&clustertopologycontroller.Reconciler{
Client: r.Client,
APIReader: r.APIReader,
Tracker: r.Tracker,
RuntimeClient: r.RuntimeClient,
UnstructuredCachingClient: r.UnstructuredCachingClient,
WatchFilterValue: r.WatchFilterValue,
Expand Down
11 changes: 10 additions & 1 deletion internal/controllers/clusterclass/clusterclass_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,20 @@ func (r *Reconciler) reconcileExternalReferences(ctx context.Context, clusterCla
}
}

for _, mpClass := range clusterClass.Spec.Workers.MachinePools {
if mpClass.Template.Bootstrap.Ref != nil {
refs = append(refs, mpClass.Template.Bootstrap.Ref)
}
if mpClass.Template.Infrastructure.Ref != nil {
refs = append(refs, mpClass.Template.Infrastructure.Ref)
}
}

// Ensure all referenced objects are owned by the ClusterClass.
// Nb. Some external objects can be referenced multiple times in the ClusterClass,
// but we only want to set the owner reference once per unique external object.
// For example the same KubeadmConfigTemplate could be referenced in multiple MachineDeployment
// classes.
// or MachinePool classes.
errs := []error{}
reconciledRefs := sets.Set[string]{}
outdatedRefs := map[*corev1.ObjectReference]*corev1.ObjectReference{}
Expand Down
28 changes: 27 additions & 1 deletion internal/controllers/topology/cluster/blueprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste
Topology: cluster.Spec.Topology,
ClusterClass: clusterClass,
MachineDeployments: map[string]*scope.MachineDeploymentBlueprint{},
MachinePools: map[string]*scope.MachinePoolBlueprint{},
}

var err error
Expand Down Expand Up @@ -82,7 +83,7 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste
// Get the bootstrap machine template.
machineDeploymentBlueprint.BootstrapTemplate, err = r.getReference(ctx, machineDeploymentClass.Template.Bootstrap.Ref)
if err != nil {
return nil, errors.Wrapf(err, "failed to get bootstrap machine template for %s, MachineDeployment class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machineDeploymentClass.Class)
return nil, errors.Wrapf(err, "failed to get bootstrap config template for %s, MachineDeployment class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machineDeploymentClass.Class)
}

// If the machineDeploymentClass defines a MachineHealthCheck add it to the blueprint.
Expand All @@ -92,5 +93,30 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste
blueprint.MachineDeployments[machineDeploymentClass.Class] = machineDeploymentBlueprint
}

// Loop over the machine pool classes in ClusterClass
// and fetch the related templates.
for _, machinePoolClass := range blueprint.ClusterClass.Spec.Workers.MachinePools {
machinePoolBlueprint := &scope.MachinePoolBlueprint{}

// Make sure to copy the metadata from the blueprint, which is later layered
// with the additional metadata defined in the Cluster's topology section
// for the MachinePool that is created or updated.
machinePoolClass.Template.Metadata.DeepCopyInto(&machinePoolBlueprint.Metadata)

// Get the InfrastructureMachinePoolTemplate.
machinePoolBlueprint.InfrastructureMachinePoolTemplate, err = r.getReference(ctx, machinePoolClass.Template.Infrastructure.Ref)
if err != nil {
return nil, errors.Wrapf(err, "failed to get InfrastructureMachinePoolTemplate for %s, MachinePool class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machinePoolClass.Class)
}

// Get the bootstrap config.
machinePoolBlueprint.BootstrapTemplate, err = r.getReference(ctx, machinePoolClass.Template.Bootstrap.Ref)
if err != nil {
return nil, errors.Wrapf(err, "failed to get bootstrap config for %s, MachinePool class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machinePoolClass.Class)
}

blueprint.MachinePools[machinePoolClass.Class] = machinePoolBlueprint
}

return blueprint, nil
}
42 changes: 40 additions & 2 deletions internal/controllers/topology/cluster/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import (
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/api/v1beta1/index"
"sigs.k8s.io/cluster-api/controllers/external"
"sigs.k8s.io/cluster-api/controllers/remote"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog"
runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1"
"sigs.k8s.io/cluster-api/feature"
Expand All @@ -57,13 +59,15 @@ import (
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusterclasses,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinehealthchecks,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;create;delete

// Reconciler reconciles a managed topology for a Cluster object.
type Reconciler struct {
Client client.Client
Client client.Client
Tracker *remote.ClusterCacheTracker
// APIReader is used to list MachineSets directly via the API server to avoid
// race conditions caused by an outdated cache.
APIReader client.Reader
Expand Down Expand Up @@ -103,6 +107,12 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
// Only trigger Cluster reconciliation if the MachineDeployment is topology owned.
builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))),
).
Watches(
&expv1.MachinePool{},
handler.EnqueueRequestsFromMapFunc(r.machinePoolToCluster),
// Only trigger Cluster reconciliation if the MachinePool is topology owned.
builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))),
).
WithOptions(options).
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
Build(r)
Expand Down Expand Up @@ -193,7 +203,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
}

// Handle normal reconciliation loop.
return r.reconcile(ctx, s)
result, err := r.reconcile(ctx, s)
if err != nil {
// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
// the current cluster because of concurrent access.
if errors.Is(err, remote.ErrClusterLocked) {
log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
return ctrl.Result{Requeue: true}, nil
}
}
return result, err
}

// reconcile handles cluster reconciliation.
Expand Down Expand Up @@ -360,6 +379,25 @@ func (r *Reconciler) machineDeploymentToCluster(_ context.Context, o client.Obje
}}
}

// machinePoolToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation
// for Cluster to update when one of its own MachinePools gets updated.
func (r *Reconciler) machinePoolToCluster(_ context.Context, o client.Object) []ctrl.Request {
mp, ok := o.(*expv1.MachinePool)
if !ok {
panic(fmt.Sprintf("Expected a MachinePool but got a %T", o))
}
if mp.Spec.ClusterName == "" {
return nil
}

return []ctrl.Request{{
NamespacedName: types.NamespacedName{
Namespace: mp.Namespace,
Name: mp.Spec.ClusterName,
},
}}
}

func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) {
// Call the BeforeClusterDelete hook if the 'ok-to-delete' annotation is not set
// and add the annotation to the cluster after receiving a successful non-blocking response.
Expand Down
17 changes: 17 additions & 0 deletions internal/controllers/topology/cluster/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,23 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason
case s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade():
fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s on hold.",
computeNameList(s.UpgradeTracker.MachinePools.PendingUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason
case s.UpgradeTracker.MachinePools.IsAnyPendingCreate():
fmt.Fprintf(msgBuilder, "MachinePool(s) for Topologies %s creation on hold.",
computeNameList(s.UpgradeTracker.MachinePools.PendingCreateTopologyNames()),
)
reason = clusterv1.TopologyReconciledMachinePoolsCreatePendingReason
case s.UpgradeTracker.MachinePools.DeferredUpgrade():
fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s deferred.",
computeNameList(s.UpgradeTracker.MachinePools.DeferredUpgradeNames()),
s.Blueprint.Topology.Version,
)
reason = clusterv1.TopologyReconciledMachinePoolsUpgradeDeferredReason
}

switch {
Expand Down
Loading