From 17da6e31daf7de6205dd8077ec99ed28e8a56dc3 Mon Sep 17 00:00:00 2001 From: Seth Pellegrino Date: Wed, 19 Feb 2020 14:03:18 -0800 Subject: [PATCH] :sparkles: KCP adopts existing machines The KCP controller identifies Machines that belong to the control plane of an existing cluster and adopts them, including finding PKI materials that may be owned by the machine's bootstrap config and pivoting their ownership to the KCP as well. Prior to adopting machines (which, if unsuccessful, will block the KCP from taking any management actions), it runs a number of safety checks including: - Ensuring the KCP has not been deleted (to prevent re-adoption of orphans, though this process races with the garbage collector) - Checking that the machine's bootstrap provider was KubeadmConfig - Verifying that the Machine is no further than one minor version off of the KCP's spec Additionally, we set set a "best guess" value for the kubeadm.controlplane.cluster.x-k8s.io/hash on the adopted machine as if it were generated by a KCP in the past. The intent is that a KCP will adopt machines matching its "spec" (to the best of its ability) without modification, which in practice works well for adopting machines with the same spec'd version. Co-authored-by: mnguyen --- controlplane/kubeadm/config/rbac/role.yaml | 1 + .../kubeadm/controllers/fakes_test.go | 4 +- .../kubeadm_control_plane_controller.go | 185 ++++++++++++-- .../kubeadm_control_plane_controller_test.go | 198 +++++++++++++++ controlplane/kubeadm/internal/cluster.go | 16 +- .../kubeadm/internal/cluster_labels.go | 16 +- controlplane/kubeadm/internal/cluster_test.go | 14 +- .../kubeadm/internal/machine_filters.go | 31 ++- test/framework/control_plane.go | 71 +----- test/framework/convenience.go | 11 +- test/framework/machines.go | 134 +++++++++++ test/framework/management_cluster.go | 2 +- .../docker/e2e/custom_assertions.go | 37 +++ .../docker/e2e/docker_suite_test.go | 4 + test/infrastructure/docker/e2e/docker_test.go | 226 +++++++++++++++++- util/util.go | 20 +- 16 files changed, 854 insertions(+), 116 deletions(-) create mode 100644 test/framework/machines.go diff --git a/controlplane/kubeadm/config/rbac/role.yaml b/controlplane/kubeadm/config/rbac/role.yaml index 481c8a77e27f..59d4d19fdde2 100644 --- a/controlplane/kubeadm/config/rbac/role.yaml +++ b/controlplane/kubeadm/config/rbac/role.yaml @@ -61,6 +61,7 @@ rules: - get - list - patch + - update - watch --- diff --git a/controlplane/kubeadm/controllers/fakes_test.go b/controlplane/kubeadm/controllers/fakes_test.go index 8a6ff6f431a6..21d3b20077c7 100644 --- a/controlplane/kubeadm/controllers/fakes_test.go +++ b/controlplane/kubeadm/controllers/fakes_test.go @@ -45,14 +45,14 @@ func (f *fakeManagementCluster) GetMachinesForCluster(c context.Context, n clien return f.Machines, nil } -func (f *fakeManagementCluster) TargetClusterControlPlaneIsHealthy(_ context.Context, _ client.ObjectKey, _ string) error { +func (f *fakeManagementCluster) TargetClusterControlPlaneIsHealthy(_ context.Context, _ client.ObjectKey) error { if !f.ControlPlaneHealthy { return errors.New("control plane is not healthy") } return nil } -func (f *fakeManagementCluster) TargetClusterEtcdIsHealthy(_ context.Context, _ client.ObjectKey, _ string) error { +func (f *fakeManagementCluster) TargetClusterEtcdIsHealthy(_ context.Context, _ client.ObjectKey) error { if !f.EtcdHealthy { return errors.New("etcd is not healthy") } diff --git a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go index 0780360ef70f..1f4aa3ca06a4 100644 --- a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go +++ b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go @@ -33,6 +33,7 @@ import ( kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apiserver/pkg/storage/names" "k8s.io/client-go/tools/record" + "k8s.io/utils/pointer" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -69,7 +70,7 @@ const ( ) // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch -// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;patch +// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch // +kubebuilder:rbac:groups=core,resources=configmaps,namespace=kube-system,verbs=get;list;watch;create // +kubebuilder:rbac:groups=rbac,resources=roles,namespace=kube-system,verbs=get;list;watch;create // +kubebuilder:rbac:groups=rbac,resources=rolebindings,namespace=kube-system,verbs=get;list;watch;create @@ -86,6 +87,8 @@ type KubeadmControlPlaneReconciler struct { recorder record.EventRecorder managementCluster internal.ManagementCluster + + uncachedClient client.Reader } func (r *KubeadmControlPlaneReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error { @@ -110,6 +113,7 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(mgr ctrl.Manager, optio if r.managementCluster == nil { r.managementCluster = &internal.Management{Client: r.Client} } + r.uncachedClient = mgr.GetAPIReader() return nil } @@ -227,13 +231,25 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * return ctrl.Result{}, err } - // TODO: handle proper adoption of Machines - ownedMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), internal.OwnedControlPlaneMachines(kcp.Name)) + controlPlaneMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), internal.ControlPlaneMachines(cluster.Name)) if err != nil { logger.Error(err, "failed to retrieve control plane machines for cluster") return ctrl.Result{}, err } + adoptableMachines := controlPlaneMachines.Filter(internal.AdoptableControlPlaneMachines(cluster.Name)) + if len(adoptableMachines) > 0 { + // We adopt the Machines and then wait for the update event for the ownership reference to re-queue them so the cache is up-to-date + err = r.AdoptMachines(ctx, kcp, adoptableMachines) + return ctrl.Result{}, err + } + + ownedMachines := controlPlaneMachines.Filter(internal.OwnedControlPlaneMachines(kcp)) + if len(ownedMachines) != len(controlPlaneMachines) { + logger.Info("Not all control plane machines are owned by this KubeadmControlPlane, refusing to operate in mixed management mode") + return ctrl.Result{}, nil + } + now := metav1.Now() var requireUpgrade internal.FilterableMachineCollection if kcp.Spec.UpgradeAfter != nil && kcp.Spec.UpgradeAfter.Before(&now) { @@ -276,17 +292,12 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * } func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster) error { - labelSelector := internal.ControlPlaneSelectorForCluster(cluster.Name) - selector, err := metav1.LabelSelectorAsSelector(labelSelector) - if err != nil { - // Since we are building up the LabelSelector above, this should not fail - return errors.Wrap(err, "failed to parse label selector") - } + selector := internal.ControlPlaneSelectorForCluster(cluster.Name) // Copy label selector to its status counterpart in string format. // This is necessary for CRDs including scale subresources. kcp.Status.Selector = selector.String() - ownedMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), internal.OwnedControlPlaneMachines(kcp.Name)) + ownedMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), internal.OwnedControlPlaneMachines(kcp)) if err != nil { return errors.Wrap(err, "failed to get list of owned machines") } @@ -444,13 +455,13 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, machines internal.FilterableMachineCollection) (ctrl.Result, error) { logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name, "cluster", cluster.Name) - if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { + if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster)); err != nil { logger.Error(err, "waiting for control plane to pass control plane health check before adding an additional control plane machine") r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass control plane health check before adding additional control plane machine: %v", err) return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: HealthCheckFailedRequeueAfter} } - if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { + if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster)); err != nil { logger.Error(err, "waiting for control plane to pass etcd health check before adding an additional control plane machine") r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass etcd health check before adding additional control plane machine: %v", err) return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: HealthCheckFailedRequeueAfter} @@ -511,7 +522,7 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(ctx context.Contex if !internal.HasAnnotationKey(controlplanev1.ScaleDownEtcdMemberRemovedAnnotation)(machineToDelete) { // Ensure etcd is healthy prior to attempting to remove the member - if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { + if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster)); err != nil { logger.Error(err, "waiting for control plane to pass etcd health check before removing a control plane machine") r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass etcd health check before removing a control plane machine: %v", err) return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: HealthCheckFailedRequeueAfter} @@ -526,7 +537,7 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(ctx context.Contex } if !internal.HasAnnotationKey(controlplanev1.ScaleDownConfigMapEntryRemovedAnnotation)(machineToDelete) { - if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { + if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster)); err != nil { logger.Error(err, "waiting for control plane to pass control plane health check before removing a control plane machine") r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass control plane health check before removing a control plane machine: %v", err) return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: HealthCheckFailedRequeueAfter} @@ -542,7 +553,7 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(ctx context.Contex } // Do a final health check of the Control Plane components prior to actually deleting the machine - if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster), kcp.Name); err != nil { + if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster)); err != nil { logger.Error(err, "waiting for control plane to pass control plane health check before removing a control plane machine") r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass control plane health check before removing a control plane machine: %v", err) return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: HealthCheckFailedRequeueAfter} @@ -724,7 +735,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, clu logger.Error(err, "failed to retrieve machines for cluster") return ctrl.Result{}, err } - ownedMachines := allMachines.Filter(internal.OwnedControlPlaneMachines(kcp.Name)) + ownedMachines := allMachines.Filter(internal.OwnedControlPlaneMachines(kcp)) // If no control plane machines remain, remove the finalizer if len(ownedMachines) == 0 { @@ -834,3 +845,145 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M return nil } + +func (r *KubeadmControlPlaneReconciler) AdoptMachines(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, machines internal.FilterableMachineCollection) error { + // We do an uncached full quorum read against the KCP to avoid re-adopting Machines the garbage collector just intentionally orphaned + // See https://github.com/kubernetes/kubernetes/issues/42639 + uncached := controlplanev1.KubeadmControlPlane{} + err := r.uncachedClient.Get(ctx, client.ObjectKey{Namespace: kcp.Namespace, Name: kcp.Name}, &uncached) + if err != nil { + return fmt.Errorf("can't recheck DeletionTimestamp: %v", err) + } + if !uncached.DeletionTimestamp.IsZero() { + return fmt.Errorf("%v/%v has just been deleted at %v", kcp.GetNamespace(), kcp.GetName(), kcp.GetDeletionTimestamp()) + } + + kcpVersion, err := semver.ParseTolerant(kcp.Spec.Version) + if err != nil { + return errors.Wrapf(err, "failed to parse kubernetes version %q", kcp.Spec.Version) + } + + for _, m := range machines { + ref := m.Spec.Bootstrap.ConfigRef + + // TODO instead of returning error here, we should instead Event and add a watch on potentially adoptable Machines + if ref == nil || ref.Kind != "KubeadmConfig" { + return fmt.Errorf("unable to adopt Machine %v/%v: expected a ConfigRef of kind KubeadmConfig but instead found %v", m.Namespace, m.Name, ref) + } + + // TODO instead of returning error here, we should instead Event and add a watch on potentially adoptable Machines + if ref.Namespace != "" && ref.Namespace != kcp.Namespace { + return fmt.Errorf("could not adopt resources from KubeadmConfig %v/%v: cannot adopt across namespaces", ref.Namespace, ref.Name) + } + + if m.Spec.Version == nil { + // if the machine's version is not immediately apparent, assume the operator knows what they're doing + continue + } + + machineVersion, err := semver.ParseTolerant(*m.Spec.Version) + if err != nil { + return errors.Wrapf(err, "failed to parse kubernetes version %q", *m.Spec.Version) + } + + dist := func(a, b uint64) uint64 { + if a > b { + return a - b + } + return b - a + } + if kcpVersion.Major != machineVersion.Major || dist(kcpVersion.Minor, machineVersion.Minor) > 1 { + r.recorder.Eventf(kcp, corev1.EventTypeWarning, "AdoptionFailed", "Could not adopt Machine %s/%s: its version (%q) is outside supported +/- one minor version skew from KCP's (%q)", m.Namespace, m.Name, *m.Spec.Version, kcp.Spec.Version) + // avoid returning an error here so we don't cause the KCP controller to spin until the operator clarifies their intent + return nil + } + } + + for _, m := range machines { + ref := m.Spec.Bootstrap.ConfigRef + obj := &bootstrapv1.KubeadmConfig{} + err := r.Client.Get(ctx, client.ObjectKey{Name: ref.Name, Namespace: kcp.Namespace}, obj) + if err != nil { + return err + } + + err = r.AdoptOwnedSecrets(ctx, kcp, obj) + if err != nil { + return err + } + + patchHelper, err := patch.NewHelper(m, r.Client) + if err != nil { + return err + } + + m.SetOwnerReferences(util.EnsureOwnerRef(m.GetOwnerReferences(), metav1.OwnerReference{ + APIVersion: controlplanev1.GroupVersion.String(), + Kind: "KubeadmControlPlane", + Name: kcp.Name, + UID: kcp.UID, + Controller: pointer.BoolPtr(true), + BlockOwnerDeletion: pointer.BoolPtr(true), + })) + + // 0. get machine.Spec.Version - the easy answer + machineKubernetesVersion := "" + if m.Spec.Version != nil { + machineKubernetesVersion = *m.Spec.Version + } + + // 1. hash the version (kubernetes version) and kubeadm_controlplane's Spec.infrastructureTemplate + newSpec := controlplanev1.KubeadmControlPlaneSpec{ + Version: machineKubernetesVersion, + InfrastructureTemplate: kcp.Spec.InfrastructureTemplate, + } + newConfigurationHash := hash.Compute(&newSpec) + // 2. add kubeadm.controlplane.cluster.x-k8s.io/hash as a label in each machine + m.Labels["kubeadm.controlplane.cluster.x-k8s.io/hash"] = newConfigurationHash + + // Note that ValidateOwnerReferences() will reject this patch if another + // OwnerReference exists with controller=true. + if err := patchHelper.Patch(ctx, m); err != nil { + return err + } + } + return nil +} + +func (r *KubeadmControlPlaneReconciler) AdoptOwnedSecrets(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, currentOwner metav1.Object) error { + secrets := corev1.SecretList{} + err := r.Client.List(ctx, &secrets, client.InNamespace(kcp.Namespace)) + + if err != nil { + return errors.Wrap(err, "error finding secrets for adoption") + } + + for _, s := range secrets.Items { + if !util.PointsTo(s.GetOwnerReferences(), currentOwner) { + continue + } + // avoid taking ownership of the bootstrap data secret + if s.Name == currentOwner.GetName() { + continue + } + + ss := corev1.Secret{} + s.DeepCopyInto(&ss) + + ss.SetOwnerReferences(util.ReconcileOwnerRef(ss.GetOwnerReferences(), metav1.OwnerReference{ + APIVersion: controlplanev1.GroupVersion.String(), + Kind: "KubeadmControlPlane", + Name: kcp.Name, + UID: kcp.UID, + Controller: pointer.BoolPtr(true), + BlockOwnerDeletion: pointer.BoolPtr(true), + }, currentOwner)) + + err := r.Client.Update(ctx, &ss) + if err != nil { + return errors.Wrapf(err, "error changing secret %v ownership from KubeadmConfig/%v to KubeadmControlPlane/%v", s.Name, currentOwner.GetName(), kcp.Name) + } + } + + return nil +} diff --git a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go index f9ce865c1a55..d687d9235369 100644 --- a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go +++ b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go @@ -464,6 +464,204 @@ func TestReconcileClusterNoEndpoints(t *testing.T) { g.Expect(machineList.Items).To(BeEmpty()) } +func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { + t.Run("adopts existing Machines", func(t *testing.T) { + g := NewWithT(t) + + cluster, kcp, tmpl := createClusterWithControlPlane() + cluster.Spec.ControlPlaneEndpoint.Host = "bar" + kcp.Spec.Version = "v2.0.0" + + fmc := &fakeManagementCluster{ + Machines: internal.FilterableMachineCollection{}, + ControlPlaneHealthy: true, + EtcdHealthy: true, + } + objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} + for i := 0; i < 3; i++ { + name := fmt.Sprintf("test-%d", i) + m := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: name, + Labels: internal.ControlPlaneLabelsForCluster(cluster.Name), + }, + Spec: clusterv1.MachineSpec{ + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: bootstrapv1.GroupVersion.String(), + Kind: "KubeadmConfig", + Name: name, + }, + }, + Version: utilpointer.StringPtr("v2.0.0"), + }, + } + cfg := &bootstrapv1.KubeadmConfig{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: name, + }, + } + objs = append(objs, m, cfg) + fmc.Machines.Insert(m) + } + + fakeClient := newFakeClient(g, objs...) + + log.SetLogger(klogr.New()) + r := &KubeadmControlPlaneReconciler{ + Client: fakeClient, + Log: log.Log, + managementCluster: fmc, + + uncachedClient: fakeClient, + } + + g.Expect(r.reconcile(context.Background(), cluster, kcp)).To(Equal(ctrl.Result{})) + + machineList := &clusterv1.MachineList{} + g.Expect(fakeClient.List(context.Background(), machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(machineList.Items).NotTo(BeEmpty()) + g.Expect(machineList.Items).To(HaveLen(3)) + for _, machine := range machineList.Items { + g.Expect(machine.OwnerReferences).To(HaveLen(1)) + g.Expect(machine.OwnerReferences).To(ContainElement(*metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")))) + g.Expect(machine.Labels).To(Equal(internal.ControlPlaneLabelsForClusterWithHash(cluster.Name, hash.Compute(&kcp.Spec)))) + } + }) + + t.Run("Deleted KubeadmControlPlanes don't adopt machines", func(t *testing.T) { + // Usually we won't get into the inner reconcile with a deleted control plane, but it's possible when deleting with "oprhanDependents": + // 1. The deletion timestamp is set in the API server, but our cache has not yet updated + // 2. The garbage collector removes our ownership reference from a Machine, triggering a re-reconcile (or we get unlucky with the periodic reconciliation) + // 3. We get into the inner reconcile function and re-adopt the Machine + // 4. The update to our cache for our deletion timestamp arrives + g := NewWithT(t) + + cluster, kcp, tmpl := createClusterWithControlPlane() + cluster.Spec.ControlPlaneEndpoint.Host = "foo" + kcp.Spec.Version = "v2.0.0" + + now := metav1.Now() + kcp.DeletionTimestamp = &now + + fmc := &fakeManagementCluster{ + Machines: internal.FilterableMachineCollection{}, + ControlPlaneHealthy: true, + EtcdHealthy: true, + } + objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} + for i := 0; i < 3; i++ { + name := fmt.Sprintf("test-%d", i) + m := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: name, + Labels: internal.ControlPlaneLabelsForCluster(cluster.Name), + }, + Spec: clusterv1.MachineSpec{ + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: bootstrapv1.GroupVersion.String(), + Kind: "KubeadmConfig", + Name: name, + }, + }, + Version: utilpointer.StringPtr("v2.0.0"), + }, + } + cfg := &bootstrapv1.KubeadmConfig{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: name, + }, + } + objs = append(objs, m, cfg) + fmc.Machines.Insert(m) + } + fakeClient := newFakeClient(g, objs...) + + log.SetLogger(klogr.New()) + r := &KubeadmControlPlaneReconciler{ + Client: fakeClient, + Log: log.Log, + managementCluster: fmc, + + uncachedClient: fakeClient, + } + + result, err := r.reconcile(context.Background(), cluster, kcp) + g.Expect(result).To(Equal(ctrl.Result{})) + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring("has just been deleted")) + + machineList := &clusterv1.MachineList{} + g.Expect(fakeClient.List(context.Background(), machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(machineList.Items).NotTo(BeEmpty()) + g.Expect(machineList.Items).To(HaveLen(3)) + for _, machine := range machineList.Items { + g.Expect(machine.OwnerReferences).To(BeEmpty()) + } + }) + + t.Run("refuses to adopt Machines that are more than one version old", func(t *testing.T) { + g := NewWithT(t) + + cluster, kcp, tmpl := createClusterWithControlPlane() + cluster.Spec.ControlPlaneEndpoint.Host = "foo" + kcp.Spec.Version = "v1.17.0" + + fmc := &fakeManagementCluster{ + Machines: internal.FilterableMachineCollection{ + "test0": &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: fmt.Sprintf("test0"), + Labels: internal.ControlPlaneLabelsForCluster(cluster.Name), + }, + Spec: clusterv1.MachineSpec{ + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: bootstrapv1.GroupVersion.String(), + Kind: "KubeadmConfig", + }, + }, + Version: utilpointer.StringPtr("v1.15.0"), + }, + }, + }, + ControlPlaneHealthy: true, + EtcdHealthy: true, + } + + fakeClient := newFakeClient(g, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy(), fmc.Machines["test0"].DeepCopy()) + + log.SetLogger(klogr.New()) + recorder := record.NewFakeRecorder(32) + r := &KubeadmControlPlaneReconciler{ + Client: fakeClient, + Log: log.Log, + managementCluster: fmc, + recorder: recorder, + + uncachedClient: fakeClient, + } + + g.Expect(r.reconcile(context.Background(), cluster, kcp)).To(Equal(ctrl.Result{})) + // Message: Warning AdoptionFailed Could not adopt Machine test/test0: its version ("v1.15.0") is outside supported +/- one minor version skew from KCP's ("v1.17.0") + g.Expect(recorder.Events).To(Receive(ContainSubstring("minor version"))) + + machineList := &clusterv1.MachineList{} + g.Expect(fakeClient.List(context.Background(), machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(machineList.Items).NotTo(BeEmpty()) + g.Expect(machineList.Items).To(HaveLen(1)) + for _, machine := range machineList.Items { + g.Expect(machine.OwnerReferences).To(BeEmpty()) + } + }) +} + func TestReconcileInitializeControlPlane(t *testing.T) { g := NewWithT(t) diff --git a/controlplane/kubeadm/internal/cluster.go b/controlplane/kubeadm/internal/cluster.go index dee688081c71..c0edad4e9dce 100644 --- a/controlplane/kubeadm/internal/cluster.go +++ b/controlplane/kubeadm/internal/cluster.go @@ -37,8 +37,8 @@ import ( // ManagementCluster defines all behaviors necessary for something to function as a management cluster. type ManagementCluster interface { GetMachinesForCluster(ctx context.Context, cluster client.ObjectKey, filters ...MachineFilter) (FilterableMachineCollection, error) - TargetClusterEtcdIsHealthy(ctx context.Context, clusterKey client.ObjectKey, controlPlaneName string) error - TargetClusterControlPlaneIsHealthy(ctx context.Context, clusterKey client.ObjectKey, controlPlaneName string) error + TargetClusterEtcdIsHealthy(ctx context.Context, clusterKey client.ObjectKey) error + TargetClusterControlPlaneIsHealthy(ctx context.Context, clusterKey client.ObjectKey) error GetWorkloadCluster(ctx context.Context, clusterKey client.ObjectKey) (WorkloadCluster, error) } @@ -118,7 +118,7 @@ type healthCheck func(context.Context) (HealthCheckResult, error) // HealthCheck will run a generic health check function and report any errors discovered. // In addition to the health check, it also ensures there is a 1;1 match between nodes and machines. -func (m *Management) healthCheck(ctx context.Context, check healthCheck, clusterKey client.ObjectKey, controlPlaneName string) error { +func (m *Management) healthCheck(ctx context.Context, check healthCheck, clusterKey client.ObjectKey) error { var errorList []error nodeChecks, err := check(ctx) if err != nil { @@ -134,7 +134,7 @@ func (m *Management) healthCheck(ctx context.Context, check healthCheck, cluster } // Make sure Cluster API is aware of all the nodes. - machines, err := m.GetMachinesForCluster(ctx, clusterKey, OwnedControlPlaneMachines(controlPlaneName)) + machines, err := m.GetMachinesForCluster(ctx, clusterKey, ControlPlaneMachines(clusterKey.Name)) if err != nil { return err } @@ -156,21 +156,21 @@ func (m *Management) healthCheck(ctx context.Context, check healthCheck, cluster } // TargetClusterControlPlaneIsHealthy checks every node for control plane health. -func (m *Management) TargetClusterControlPlaneIsHealthy(ctx context.Context, clusterKey client.ObjectKey, controlPlaneName string) error { +func (m *Management) TargetClusterControlPlaneIsHealthy(ctx context.Context, clusterKey client.ObjectKey) error { // TODO: add checks for expected taints/labels cluster, err := m.GetWorkloadCluster(ctx, clusterKey) if err != nil { return err } - return m.healthCheck(ctx, cluster.ControlPlaneIsHealthy, clusterKey, controlPlaneName) + return m.healthCheck(ctx, cluster.ControlPlaneIsHealthy, clusterKey) } // TargetClusterEtcdIsHealthy runs a series of checks over a target cluster's etcd cluster. // In addition, it verifies that there are the same number of etcd members as control plane Machines. -func (m *Management) TargetClusterEtcdIsHealthy(ctx context.Context, clusterKey client.ObjectKey, controlPlaneName string) error { +func (m *Management) TargetClusterEtcdIsHealthy(ctx context.Context, clusterKey client.ObjectKey) error { cluster, err := m.GetWorkloadCluster(ctx, clusterKey) if err != nil { return err } - return m.healthCheck(ctx, cluster.EtcdIsHealthy, clusterKey, controlPlaneName) + return m.healthCheck(ctx, cluster.EtcdIsHealthy, clusterKey) } diff --git a/controlplane/kubeadm/internal/cluster_labels.go b/controlplane/kubeadm/internal/cluster_labels.go index 165da0817c09..1a3ae8b5a6fa 100644 --- a/controlplane/kubeadm/internal/cluster_labels.go +++ b/controlplane/kubeadm/internal/cluster_labels.go @@ -17,7 +17,8 @@ limitations under the License. package internal import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" ) @@ -39,8 +40,15 @@ func ControlPlaneLabelsForCluster(clusterName string) map[string]string { } // ControlPlaneSelectorForCluster returns the label selector necessary to get control plane machines for a given cluster. -func ControlPlaneSelectorForCluster(clusterName string) *metav1.LabelSelector { - return &metav1.LabelSelector{ - MatchLabels: ControlPlaneLabelsForCluster(clusterName), +func ControlPlaneSelectorForCluster(clusterName string) labels.Selector { + must := func(r *labels.Requirement, err error) *labels.Requirement { + if err != nil { + panic(err) + } + return r } + return labels.NewSelector().Add( + *must(labels.NewRequirement(clusterv1.ClusterLabelName, selection.Equals, []string{clusterName})), + *must(labels.NewRequirement(clusterv1.MachineControlPlaneLabelName, selection.Exists, []string{})), + ) } diff --git a/controlplane/kubeadm/internal/cluster_test.go b/controlplane/kubeadm/internal/cluster_test.go index d03dcbd84a74..0c6079881a76 100644 --- a/controlplane/kubeadm/internal/cluster_test.go +++ b/controlplane/kubeadm/internal/cluster_test.go @@ -163,8 +163,8 @@ func TestGetMachinesForCluster(t *testing.T) { t.Fatalf("expected 3 machines but found %d", len(machines)) } - // Test the OwnedControlPlaneMachines works - machines, err = m.GetMachinesForCluster(context.Background(), clusterKey, OwnedControlPlaneMachines("my-control-plane")) + // Test the ControlPlaneMachines works + machines, err = m.GetMachinesForCluster(context.Background(), clusterKey, ControlPlaneMachines("my-cluster")) if err != nil { t.Fatal(err) } @@ -176,7 +176,7 @@ func TestGetMachinesForCluster(t *testing.T) { nameFilter := func(cluster *clusterv1.Machine) bool { return cluster.Name == "first-machine" } - machines, err = m.GetMachinesForCluster(context.Background(), clusterKey, OwnedControlPlaneMachines("my-control-plane"), nameFilter) + machines, err = m.GetMachinesForCluster(context.Background(), clusterKey, ControlPlaneMachines("my-cluster"), nameFilter) if err != nil { t.Fatal(err) } @@ -290,8 +290,7 @@ func TestManagementCluster_healthCheck_NoError(t *testing.T) { "three": nil, }, nil }, - clusterKey: client.ObjectKey{Namespace: "default", Name: "cluster-name"}, - controlPlaneName: "control-plane-name", + clusterKey: client.ObjectKey{Namespace: "default", Name: "cluster-name"}, }, } for _, tt := range tests { @@ -300,7 +299,7 @@ func TestManagementCluster_healthCheck_NoError(t *testing.T) { m := &Management{ Client: &fakeClient{list: tt.machineList}, } - if err := m.healthCheck(ctx, tt.check, tt.clusterKey, tt.controlPlaneName); err != nil { + if err := m.healthCheck(ctx, tt.check, tt.clusterKey); err != nil { t.Fatal("did not expect an error?") } }) @@ -406,12 +405,11 @@ func TestManagementCluster_healthCheck_Errors(t *testing.T) { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() clusterKey := client.ObjectKey{Namespace: "default", Name: "cluster-name"} - controlPlaneName := "control-plane-name" m := &Management{ Client: &fakeClient{list: tt.machineList}, } - err := m.healthCheck(ctx, tt.check, clusterKey, controlPlaneName) + err := m.healthCheck(ctx, tt.check, clusterKey) if err == nil { t.Fatal("Expected an error") } diff --git a/controlplane/kubeadm/internal/machine_filters.go b/controlplane/kubeadm/internal/machine_filters.go index da9c299fd4ba..b43a650c5a4c 100644 --- a/controlplane/kubeadm/internal/machine_filters.go +++ b/controlplane/kubeadm/internal/machine_filters.go @@ -18,6 +18,7 @@ package internal import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" @@ -83,8 +84,8 @@ func InFailureDomains(failureDomains ...*string) MachineFilter { } // OwnedControlPlaneMachines returns a MachineFilter function to find all owned control plane machines. -// Usage: managementCluster.GetMachinesForCluster(ctx, cluster, OwnedControlPlaneMachines(controlPlane.Name)) -func OwnedControlPlaneMachines(controlPlaneName string) MachineFilter { +// Usage: managementCluster.GetMachinesForCluster(ctx, cluster, OwnedControlPlaneMachines(controlPlane)) +func OwnedControlPlaneMachines(owner metav1.Object) func(machine *clusterv1.Machine) bool { return func(machine *clusterv1.Machine) bool { if machine == nil { return false @@ -93,10 +94,34 @@ func OwnedControlPlaneMachines(controlPlaneName string) MachineFilter { if controllerRef == nil { return false } - return controllerRef.Kind == "KubeadmControlPlane" && controllerRef.Name == controlPlaneName + return controllerRef.Kind == "KubeadmControlPlane" && controllerRef.Name == owner.GetName() && controllerRef.UID == owner.GetUID() } } +func ControlPlaneMachines(clusterName string) func(machine *clusterv1.Machine) bool { + selector := ControlPlaneSelectorForCluster(clusterName) + return func(machine *clusterv1.Machine) bool { + if machine == nil { + return false + } + return selector.Matches(labels.Set(machine.Labels)) + } +} + +// AdoptableControlPlaneMachines returns a MachineFilter function to find all un-controlled control plane machines. +// Usage: managementCluster.GetMachinesForCluster(ctx, cluster, AdoptableControlPlaneMachines(cluster.Name, controlPlane)) +func AdoptableControlPlaneMachines(clusterName string) func(machine *clusterv1.Machine) bool { + return And( + func(machine *clusterv1.Machine) bool { + return machine != nil + }, + ControlPlaneMachines(clusterName), + func(machine *clusterv1.Machine) bool { + return metav1.GetControllerOf(machine) == nil + }, + ) +} + // HasDeletionTimestamp is a MachineFilter to find all machines // that have a deletion timestamp. func HasDeletionTimestamp(machine *clusterv1.Machine) bool { diff --git a/test/framework/control_plane.go b/test/framework/control_plane.go index d355a817c6bb..c71bd8b99bef 100644 --- a/test/framework/control_plane.go +++ b/test/framework/control_plane.go @@ -22,10 +22,8 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" - "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" @@ -129,67 +127,6 @@ func CreateKubeadmControlPlane(ctx context.Context, input CreateKubeadmControlPl }, intervals...).Should(Succeed()) } -// CreateMachineDeploymentInput is the input for CreateMachineDeployment. -type CreateMachineDeploymentInput struct { - Creator Creator - MachineDeployment *clusterv1.MachineDeployment - BootstrapConfigTemplate runtime.Object - InfraMachineTemplate runtime.Object -} - -// CreateMachineDeployment creates the machine deployment and dependencies. -func CreateMachineDeployment(ctx context.Context, input CreateMachineDeploymentInput) { - By("creating a core MachineDeployment resource") - Expect(input.Creator.Create(ctx, input.MachineDeployment)).To(Succeed()) - - By("creating a BootstrapConfigTemplate resource") - Expect(input.Creator.Create(ctx, input.BootstrapConfigTemplate)).To(Succeed()) - - By("creating an InfrastructureMachineTemplate resource") - Expect(input.Creator.Create(ctx, input.InfraMachineTemplate)).To(Succeed()) -} - -// WaitForMachineDeploymentNodesToExistInput is the input for WaitForMachineDeploymentNodesToExist. -type WaitForMachineDeploymentNodesToExistInput struct { - Lister Lister - Cluster *clusterv1.Cluster - MachineDeployment *clusterv1.MachineDeployment -} - -// WaitForMachineDeploymentNodesToExist waits until all nodes associated with a machine deployment exist. -func WaitForMachineDeploymentNodesToExist(ctx context.Context, input WaitForMachineDeploymentNodesToExistInput, intervals ...interface{}) { - By("waiting for the workload nodes to exist") - Eventually(func() (int, error) { - selectorMap, err := metav1.LabelSelectorAsMap(&input.MachineDeployment.Spec.Selector) - if err != nil { - return 0, err - } - ms := &clusterv1.MachineSetList{} - if err := input.Lister.List(ctx, ms, client.InNamespace(input.Cluster.Namespace), client.MatchingLabels(selectorMap)); err != nil { - return 0, err - } - if len(ms.Items) == 0 { - return 0, errors.New("no machinesets were found") - } - machineSet := ms.Items[0] - selectorMap, err = metav1.LabelSelectorAsMap(&machineSet.Spec.Selector) - if err != nil { - return 0, err - } - machines := &clusterv1.MachineList{} - if err := input.Lister.List(ctx, machines, client.InNamespace(machineSet.Namespace), client.MatchingLabels(selectorMap)); err != nil { - return 0, err - } - count := 0 - for _, machine := range machines.Items { - if machine.Status.NodeRef != nil { - count++ - } - } - return count, nil - }, intervals...).Should(Equal(int(*input.MachineDeployment.Spec.Replicas))) -} - // WaitForClusterToProvisionInput is the input for WaitForClusterToProvision. type WaitForClusterToProvisionInput struct { Getter Getter @@ -224,14 +161,16 @@ func WaitForKubeadmControlPlaneMachinesToExist(ctx context.Context, input WaitFo By("waiting for all control plane nodes to exist") inClustersNamespaceListOption := client.InNamespace(input.Cluster.Namespace) // ControlPlane labels + matchControlPlaneListOption := client.HasLabels{ + clusterv1.MachineControlPlaneLabelName, + } matchClusterListOption := client.MatchingLabels{ - clusterv1.MachineControlPlaneLabelName: "", - clusterv1.ClusterLabelName: input.Cluster.Name, + clusterv1.ClusterLabelName: input.Cluster.Name, } Eventually(func() (int, error) { machineList := &clusterv1.MachineList{} - if err := input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption); err != nil { + if err := input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchControlPlaneListOption, matchClusterListOption); err != nil { fmt.Println(err) return 0, err } diff --git a/test/framework/convenience.go b/test/framework/convenience.go index e36b68b1a2fa..f3c2e21a26c9 100644 --- a/test/framework/convenience.go +++ b/test/framework/convenience.go @@ -56,12 +56,13 @@ func WaitForAPIServiceAvailable(ctx context.Context, mgmt Waiter, serviceName st Expect(err).NotTo(HaveOccurred(), "stack: %+v", err) } -// WaitForPodsReadyInNamespace will wait for all pods to be Ready in the +// WaitForDeploymentsInNamespace will wait for all deployments to be Available in the // specified namespace. -// For example, kubectl wait --for=condition=Ready --timeout=300s --namespace capi-system pods --all -func WaitForPodsReadyInNamespace(ctx context.Context, cluster Waiter, namespace string) { - By(fmt.Sprintf("waiting for pods to be ready in namespace %q", namespace)) - err := cluster.Wait(ctx, "--for", "condition=Ready", "--timeout", "300s", "--namespace", namespace, "pods", "--all") +// For example, kubectl wait --for=condition=Available --timeout=300s --namespace capi-system deployments --all +func WaitForDeploymentsInNamespace(ctx context.Context, cluster Waiter, namespace string) { + By(fmt.Sprintf("waiting for deployments to be available in namespace %q", namespace)) + + err := cluster.Wait(ctx, "--for", "condition=Available", "--timeout", "300s", "--namespace", namespace, "deployments", "--all") Expect(err).NotTo(HaveOccurred(), "stack: %+v", err) } diff --git a/test/framework/machines.go b/test/framework/machines.go new file mode 100644 index 000000000000..3ba1959c0851 --- /dev/null +++ b/test/framework/machines.go @@ -0,0 +1,134 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package framework + +import ( + "context" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// CreateMachineInput is the input for CreateMachine. +type CreateMachineInput struct { + Creator Creator + Machine *clusterv1.Machine + BootstrapConfig runtime.Object + InfraMachine runtime.Object +} + +// CreateMachine creates the machine and dependencies. +func CreateMachine(ctx context.Context, input CreateMachineInput) { + By("creating a core Machine resource") + Expect(input.Creator.Create(ctx, input.Machine)).To(Succeed()) + + By("creating a BootstrapConfigTemplate resource") + Expect(input.Creator.Create(ctx, input.BootstrapConfig)).To(Succeed()) + + By("creating an InfrastructureMachineTemplate resource") + Expect(input.Creator.Create(ctx, input.InfraMachine)).To(Succeed()) +} + +// WaitForMachineNodesToExistInput is the input for WaitForMachineNodesToExist. +type WaitForMachineNodesToExistInput struct { + Getter Getter + Machines []*clusterv1.Machine +} + +// WaitForMachineDeploymentNodesToExist waits until all nodes associated with a machine deployment exist. +func WaitForMachineNodesToExist(ctx context.Context, input WaitForMachineNodesToExistInput, intervals ...interface{}) { + By("waiting for the machines' nodes to exist") + Eventually(func() (count int, err error) { + for _, m := range input.Machines { + machine := &clusterv1.Machine{} + err = input.Getter.Get(ctx, client.ObjectKey{Namespace: m.Namespace, Name: m.Name}, machine) + if err != nil { + return + } + if machine.Status.NodeRef != nil { + count++ + } + } + return + }, intervals...).Should(Equal(len(input.Machines))) +} + +// CreateMachineDeploymentInput is the input for CreateMachineDeployment. +type CreateMachineDeploymentInput struct { + Creator Creator + MachineDeployment *clusterv1.MachineDeployment + BootstrapConfigTemplate runtime.Object + InfraMachineTemplate runtime.Object +} + +// CreateMachineDeployment creates the machine deployment and dependencies. +func CreateMachineDeployment(ctx context.Context, input CreateMachineDeploymentInput) { + By("creating a core MachineDeployment resource") + Expect(input.Creator.Create(ctx, input.MachineDeployment)).To(Succeed()) + + By("creating a BootstrapConfigTemplate resource") + Expect(input.Creator.Create(ctx, input.BootstrapConfigTemplate)).To(Succeed()) + + By("creating an InfrastructureMachineTemplate resource") + Expect(input.Creator.Create(ctx, input.InfraMachineTemplate)).To(Succeed()) +} + +// WaitForMachineDeploymentNodesToExistInput is the input for WaitForMachineDeploymentNodesToExist. +type WaitForMachineDeploymentNodesToExistInput struct { + Lister Lister + Cluster *clusterv1.Cluster + MachineDeployment *clusterv1.MachineDeployment +} + +// WaitForMachineDeploymentNodesToExist waits until all nodes associated with a machine deployment exist. +func WaitForMachineDeploymentNodesToExist(ctx context.Context, input WaitForMachineDeploymentNodesToExistInput, intervals ...interface{}) { + By("waiting for the workload nodes to exist") + Eventually(func() (int, error) { + selectorMap, err := metav1.LabelSelectorAsMap(&input.MachineDeployment.Spec.Selector) + if err != nil { + return 0, err + } + ms := &clusterv1.MachineSetList{} + if err := input.Lister.List(ctx, ms, client.InNamespace(input.Cluster.Namespace), client.MatchingLabels(selectorMap)); err != nil { + return 0, err + } + if len(ms.Items) == 0 { + return 0, errors.New("no machinesets were found") + } + machineSet := ms.Items[0] + selectorMap, err = metav1.LabelSelectorAsMap(&machineSet.Spec.Selector) + if err != nil { + return 0, err + } + machines := &clusterv1.MachineList{} + if err := input.Lister.List(ctx, machines, client.InNamespace(machineSet.Namespace), client.MatchingLabels(selectorMap)); err != nil { + return 0, err + } + count := 0 + for _, machine := range machines.Items { + if machine.Status.NodeRef != nil { + count++ + } + } + return count, nil + }, intervals...).Should(Equal(int(*input.MachineDeployment.Spec.Replicas))) +} diff --git a/test/framework/management_cluster.go b/test/framework/management_cluster.go index 754161de5fe8..fd93063ee667 100644 --- a/test/framework/management_cluster.go +++ b/test/framework/management_cluster.go @@ -115,7 +115,7 @@ func InitManagementCluster(ctx context.Context, input *InitManagementClusterInpu for _, waiter := range component.Waiters { switch waiter.Type { case PodsWaiter: - WaitForPodsReadyInNamespace(ctx, managementCluster, waiter.Value) + WaitForDeploymentsInNamespace(ctx, managementCluster, waiter.Value) case ServiceWaiter: WaitForAPIServiceAvailable(ctx, managementCluster, waiter.Value) } diff --git a/test/infrastructure/docker/e2e/custom_assertions.go b/test/infrastructure/docker/e2e/custom_assertions.go index da460f2dede0..f7d9b243d0e6 100644 --- a/test/infrastructure/docker/e2e/custom_assertions.go +++ b/test/infrastructure/docker/e2e/custom_assertions.go @@ -24,6 +24,9 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + types "github.com/onsi/gomega/types" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" @@ -64,3 +67,37 @@ func ensureDockerArtifactsDeleted(input ensureDockerArtifactsDeletedInput) { Expect(dmtl.Items).To(HaveLen(0)) By("Succeeding in deleting all docker artifacts") } + +type controllerMatch struct { + kind string + owner metav1.Object +} + +func (m *controllerMatch) Match(actual interface{}) (success bool, err error) { + actualMeta, err := meta.Accessor(actual) + if err != nil { + return false, fmt.Errorf("unable to read meta for %T: %v", actual, err) + } + + owner := metav1.GetControllerOf(actualMeta) + if owner == nil { + return false, fmt.Errorf("no controller found (owner ref with controller = true) for object %#v", actual) + } + + match := (owner.Kind == m.kind && + owner.Name == m.owner.GetName() && owner.UID == m.owner.GetUID()) + + return match, nil +} + +func (m *controllerMatch) FailureMessage(actual interface{}) string { + return fmt.Sprintf("Expected\n\t%#vto have a controller reference pointing to %s/%s (%v)", actual, m.kind, m.owner.GetName(), m.owner.GetUID()) +} + +func (m *controllerMatch) NegatedFailureMessage(actual interface{}) string { + return fmt.Sprintf("Expected\n\t%#vto not have a controller reference pointing to %s/%s (%v)", actual, m.kind, m.owner.GetName(), m.owner.GetUID()) +} + +func HaveControllerRef(kind string, owner metav1.Object) types.GomegaMatcher { + return &controllerMatch{kind, owner} +} diff --git a/test/infrastructure/docker/e2e/docker_suite_test.go b/test/infrastructure/docker/e2e/docker_suite_test.go index a31d4d46d302..a40968f773af 100644 --- a/test/infrastructure/docker/e2e/docker_suite_test.go +++ b/test/infrastructure/docker/e2e/docker_suite_test.go @@ -132,6 +132,10 @@ var _ = AfterSuite(func() { }) func writeLogs(mgmt *CAPDCluster, namespace, deploymentName, logDir string) error { + if mgmt == nil { + return nil + } + c, err := mgmt.GetClient() if err != nil { return err diff --git a/test/infrastructure/docker/e2e/docker_test.go b/test/infrastructure/docker/e2e/docker_test.go index 7a4361c790e3..f63d068539ee 100644 --- a/test/infrastructure/docker/e2e/docker_test.go +++ b/test/infrastructure/docker/e2e/docker_test.go @@ -22,6 +22,7 @@ import ( "errors" "fmt" "net/http" + "strings" "time" . "github.com/onsi/ginkgo" @@ -56,6 +57,7 @@ var _ = Describe("Docker", func() { }) AfterEach(func() { + By("cleaning up the test cluster") deleteClusterInput := framework.DeleteClusterInput{ Deleter: client, Cluster: cluster, @@ -137,7 +139,7 @@ var _ = Describe("Docker", func() { } framework.WaitForOneKubeadmControlPlaneMachineToExist(ctx, waitForOneKubeadmControlPlaneMachineToExistInput) - // Insatll a networking solution on the workload cluster + // Install a networking solution on the workload cluster workloadClient, err := mgmt.GetWorkloadClient(ctx, cluster.Namespace, cluster.Name) Expect(err).ToNot(HaveOccurred()) applyYAMLURLInput := framework.ApplyYAMLURLInput{ @@ -227,6 +229,168 @@ var _ = Describe("Docker", func() { }, "10m", "30s").Should(Equal(int(*controlPlane.Spec.Replicas))) }) }) + + Describe("Controlplane Adoption", func() { + Specify("KubeadmControlPlane adopts up-to-date control plane Machines without modification", func() { + var ( + controlPlane *controlplanev1.KubeadmControlPlane + infraCluster *infrav1.DockerCluster + template *infrav1.DockerMachineTemplate + err error + ) + replicas := 1 /* TODO: can't seem to get CAPD to bootstrap a cluster with more than one control plane machine */ + cluster, infraCluster, controlPlane, template = clusterGen.GenerateCluster(namespace, int32(replicas)) + controlPlaneRef := cluster.Spec.ControlPlaneRef + cluster.Spec.ControlPlaneRef = nil + + // Set up the client to the management cluster + client, err = mgmt.GetClient() + Expect(err).NotTo(HaveOccurred()) + + // Set up the cluster object + createClusterInput := framework.CreateClusterInput{ + Creator: client, + Cluster: cluster, + InfraCluster: infraCluster, + } + framework.CreateCluster(ctx, createClusterInput) + + version := "1.16.3" + + // Wait for the cluster to provision. + assertClusterProvisionsInput := framework.WaitForClusterToProvisionInput{ + Getter: client, + Cluster: cluster, + } + framework.WaitForClusterToProvision(ctx, assertClusterProvisionsInput) + + initMachines, bootstrap, infra := generateControlPlaneMachines(cluster, namespace, version, replicas) + for i := 0; i < len(initMachines); i++ { + // we have to go one at a time, otherwise weird things start to happen + By("initializing control plane machines") + createMachineInput := framework.CreateMachineInput{ + Creator: client, + BootstrapConfig: bootstrap[i], + InfraMachine: infra[i], + Machine: initMachines[i], + } + framework.CreateMachine(ctx, createMachineInput) + + // Wait for the first control plane machine to boot + assertMachinesProvisionInput := framework.WaitForMachineNodesToExistInput{ + Getter: client, + Machines: initMachines[i : i+1], + } + framework.WaitForMachineNodesToExist(ctx, assertMachinesProvisionInput) + + if i == 0 { + // Install a networking solution on the workload cluster + workloadClient, err := mgmt.GetWorkloadClient(ctx, cluster.Namespace, cluster.Name) + Expect(err).ToNot(HaveOccurred()) + applyYAMLURLInput := framework.ApplyYAMLURLInput{ + Client: workloadClient, + HTTPGetter: http.DefaultClient, + NetworkingURL: "https://docs.projectcalico.org/manifests/calico.yaml", + Scheme: mgmt.Scheme, + } + framework.ApplyYAMLURL(ctx, applyYAMLURLInput) + } + } + + // Set up the KubeadmControlPlane + createKubeadmControlPlaneInput := framework.CreateKubeadmControlPlaneInput{ + Creator: client, + ControlPlane: controlPlane, + MachineTemplate: template, + } + framework.CreateKubeadmControlPlane(ctx, createKubeadmControlPlaneInput) + + // We have to set the control plane ref on the cluster as well + cl := &clusterv1.Cluster{} + client.Get(ctx, ctrlclient.ObjectKey{Namespace: cluster.Namespace, Name: cluster.Name}, cl) + cl.Spec.ControlPlaneRef = controlPlaneRef + Expect(client.Update(ctx, cl)).To(Succeed()) + + // Wait for the control plane to be ready + waitForControlPlaneToBeReadyInput := framework.WaitForControlPlaneToBeReadyInput{ + Getter: client, + ControlPlane: controlPlane, + } + framework.WaitForControlPlaneToBeReady(ctx, waitForControlPlaneToBeReadyInput) + + // Wait for the controlplane nodes to exist + assertKubeadmControlPlaneNodesExistInput := framework.WaitForKubeadmControlPlaneMachinesToExistInput{ + Lister: client, + Cluster: cluster, + ControlPlane: controlPlane, + } + framework.WaitForKubeadmControlPlaneMachinesToExist(ctx, assertKubeadmControlPlaneNodesExistInput, "10m", "10s") + + machines := clusterv1.MachineList{} + Expect(client.List(ctx, &machines, + ctrlclient.InNamespace(namespace), + ctrlclient.HasLabels{ + clusterv1.MachineControlPlaneLabelName, + })).To(Succeed()) + + By("taking stable ownership of the Machines") + for _, m := range machines.Items { + Expect(&m).To(HaveControllerRef(framework.TypeToKind(controlPlane), controlPlane)) + Expect(m.CreationTimestamp.Time).To(BeTemporally("<", controlPlane.CreationTimestamp.Time)) + } + Expect(machines.Items).To(HaveLen(1)) + + By("taking ownership of the cluster's PKI material") + secrets := corev1.SecretList{} + Expect(client.List(ctx, &secrets, ctrlclient.InNamespace(namespace), ctrlclient.MatchingLabels{ + clusterv1.ClusterLabelName: cluster.Name, + })).To(Succeed()) + + for _, s := range secrets.Items { + // We don't check the data, and removing it from the object makes assertions much easier to read + s.Data = nil + + // The bootstrap secret should still be owned by the bootstrap config so its cleaned up properly, + // but the cluster PKI materials should have their ownership transferred. + switch { + case strings.HasSuffix(s.Name, "-kubeconfig"): + // Do nothing + case strings.HasPrefix(s.Name, "bootstrap-"): + fi := -1 + for i, b := range bootstrap { + if s.Name == b.Name { + fi = i + } + } + Expect(fi).To(BeNumerically(">=", 0), "could not find matching bootstrap object for Secret %s", s.Name) + Expect(&s).To(HaveControllerRef(framework.TypeToKind(bootstrap[fi]), bootstrap[fi])) + default: + Expect(&s).To(HaveControllerRef(framework.TypeToKind(controlPlane), controlPlane)) + } + } + Expect(secrets.Items).To(HaveLen(4 /* pki */ + 1 /* kubeconfig */ + int(replicas))) + + By("ensuring we can still join machines after the adoption") + md, infraTemplate, bootstrapTemplate := GenerateMachineDeployment(cluster, 1) + + // Create the workload nodes + createMachineDeploymentinput := framework.CreateMachineDeploymentInput{ + Creator: client, + MachineDeployment: md, + BootstrapConfigTemplate: bootstrapTemplate, + InfraMachineTemplate: infraTemplate, + } + framework.CreateMachineDeployment(ctx, createMachineDeploymentinput) + + // Wait for the workload nodes to exist + waitForMachineDeploymentNodesToExistInput := framework.WaitForMachineDeploymentNodesToExistInput{ + Lister: client, + Cluster: cluster, + MachineDeployment: md, + } + framework.WaitForMachineDeploymentNodesToExist(ctx, waitForMachineDeploymentNodesToExistInput) + }) + }) }) }) @@ -371,3 +535,63 @@ func (c *ClusterGenerator) GenerateCluster(namespace string, replicas int32) (*c } return cluster, infraCluster, kcp, template } + +func generateControlPlaneMachines(cluster *clusterv1.Cluster, namespace, version string, replicas int) ([]*clusterv1.Machine, []*bootstrapv1.KubeadmConfig, []*infrav1.DockerMachine) { + machines := make([]*clusterv1.Machine, 0, replicas) + bootstrap := make([]*bootstrapv1.KubeadmConfig, 0, replicas) + infra := make([]*infrav1.DockerMachine, 0, replicas) + for i := 0; i < replicas; i++ { + bootstrap = append(bootstrap, &bootstrapv1.KubeadmConfig{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: fmt.Sprintf("bootstrap-controlplane-%d", i), + }, + Spec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &v1beta1.ClusterConfiguration{ + APIServer: v1beta1.APIServer{ + // Darwin support + CertSANs: []string{"127.0.0.1"}, + }, + }, + }, + }) + + infra = append(infra, &infrav1.DockerMachine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: fmt.Sprintf("controlplane-%d-infra", i), + }, + Spec: infrav1.DockerMachineSpec{}, + }) + + machines = append(machines, &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: fmt.Sprintf("controlplane-%d", i), + Labels: map[string]string{ + clusterv1.MachineControlPlaneLabelName: "true", + }, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: cluster.GetName(), + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: bootstrapv1.GroupVersion.String(), + Kind: framework.TypeToKind(bootstrap[i]), + Namespace: bootstrap[i].GetNamespace(), + Name: bootstrap[i].GetName(), + }, + }, + InfrastructureRef: corev1.ObjectReference{ + APIVersion: infrav1.GroupVersion.String(), + Kind: framework.TypeToKind(infra[i]), + Namespace: infra[i].GetNamespace(), + Name: infra[i].GetName(), + }, + Version: &version, + }, + }) + } + + return machines, bootstrap, infra +} diff --git a/util/util.go b/util/util.go index ef1d150ce2c7..52b51f584439 100644 --- a/util/util.go +++ b/util/util.go @@ -268,6 +268,22 @@ func EnsureOwnerRef(ownerReferences []metav1.OwnerReference, ref metav1.OwnerRef return ownerReferences } +// ReconcileOwnerRef re-parents an object from one OnwerReference to another +func ReconcileOwnerRef(ownerReferences []metav1.OwnerReference, target metav1.OwnerReference, source metav1.Object) []metav1.OwnerReference { + fi := -1 + for index, r := range ownerReferences { + if r.UID == source.GetUID() { + fi = index + ownerReferences[index] = target + break + } + } + if fi < 0 { + ownerReferences = append(ownerReferences, target) + } + return ownerReferences +} + // indexOwnerRef returns the index of the owner reference in the slice if found, or -1. func indexOwnerRef(ownerReferences []metav1.OwnerReference, ref metav1.OwnerReference) int { for index, r := range ownerReferences { @@ -294,9 +310,9 @@ func referSameObject(a, b metav1.OwnerReference) bool { } // PointsTo returns true if any of the owner references point to the given target -func PointsTo(refs []metav1.OwnerReference, target *metav1.ObjectMeta) bool { +func PointsTo(refs []metav1.OwnerReference, target metav1.Object) bool { for _, ref := range refs { - if ref.UID == target.UID { + if ref.UID == target.GetUID() { return true } }