diff --git a/test/e2e/clusterctl_upgrade.go b/test/e2e/clusterctl_upgrade.go index af4f8475d821..f9d1fa576794 100644 --- a/test/e2e/clusterctl_upgrade.go +++ b/test/e2e/clusterctl_upgrade.go @@ -18,6 +18,7 @@ package e2e import ( "context" + "errors" "fmt" "io" "net/http" @@ -45,6 +46,8 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/cmd/clusterctl/client/config" + "sigs.k8s.io/cluster-api/controllers/external" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/test/e2e/internal/log" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/bootstrap" @@ -127,6 +130,9 @@ type ClusterctlUpgradeSpecInput struct { // If not set, the test will upgrade once to the v1beta1 contract. // For some examples see clusterctl_upgrade_test.go Upgrades []ClusterctlUpgradeSpecInputUpgrade + + // ControlPlaneMachineCount specifies the number of control plane machines to create in the workload cluster. + ControlPlaneMachineCount *int64 } // ClusterctlUpgradeSpecInputUpgrade defines an upgrade. @@ -385,6 +391,9 @@ func ClusterctlUpgradeSpec(ctx context.Context, inputGetter func() ClusterctlUpg kubernetesVersion = input.E2EConfig.GetVariable(KubernetesVersion) } controlPlaneMachineCount := ptr.To[int64](1) + if input.ControlPlaneMachineCount != nil { + controlPlaneMachineCount = input.ControlPlaneMachineCount + } workerMachineCount := ptr.To[int64](1) log.Logf("Creating the workload cluster with name %q using the %q template (Kubernetes %s, %d control-plane machines, %d worker machines)", @@ -428,8 +437,15 @@ func ClusterctlUpgradeSpec(ctx context.Context, inputGetter func() ClusterctlUpg Name: workloadClusterName, }, input.E2EConfig.GetIntervals(specName, "wait-cluster")...) - expectedMachineCount := *controlPlaneMachineCount + calculateExpectedWorkerCount(ctx, managementClusterProxy.GetClient(), workloadClusterUnstructured, coreCAPIStorageVersion) + By("Calculating expected MachineDeployment and MachinePool Machine and Node counts") + expectedMachineDeploymentMachineCount := calculateExpectedMachineDeploymentMachineCount(ctx, managementClusterProxy.GetClient(), workloadClusterUnstructured, coreCAPIStorageVersion) + expectedMachinePoolNodeCount := calculateExpectedMachinePoolNodeCount(ctx, managementClusterProxy.GetClient(), workloadClusterUnstructured, coreCAPIStorageVersion) + expectedMachinePoolMachineCount, err := calculateExpectedMachinePoolMachineCount(ctx, managementClusterProxy.GetClient(), workloadClusterNamespace, workloadClusterName) + Expect(err).ToNot(HaveOccurred()) + expectedMachineCount := *controlPlaneMachineCount + expectedMachineDeploymentMachineCount + expectedMachinePoolMachineCount + + Byf("Expect %d Machines and %d MachinePool replicas to exist", expectedMachineCount, expectedMachinePoolNodeCount) By("Waiting for the machines to exist") Eventually(func() (int64, error) { var n int64 @@ -455,6 +471,26 @@ func ClusterctlUpgradeSpec(ctx context.Context, inputGetter func() ClusterctlUpg return n, nil }, input.E2EConfig.GetIntervals(specName, "wait-worker-nodes")...).Should(Equal(expectedMachineCount), "Timed out waiting for all Machines to exist") + By("Waiting for MachinePool to be ready with correct number of replicas") + Eventually(func() (int64, error) { + var n int64 + machinePoolList := &expv1.MachinePoolList{} + if err := managementClusterProxy.GetClient().List( + ctx, + machinePoolList, + client.InNamespace(workloadClusterNamespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: workloadClusterName}, + ); err == nil { + for _, mp := range machinePoolList.Items { + if mp.Status.Phase == string(expv1.MachinePoolPhaseRunning) { + n += int64(mp.Status.ReadyReplicas) + } + } + } + + return n, nil + }, input.E2EConfig.GetIntervals(specName, "wait-worker-nodes")...).Should(Equal(expectedMachinePoolNodeCount), "Timed out waiting for all MachinePool replicas to be ready") + By("THE MANAGEMENT CLUSTER WITH OLDER VERSION OF PROVIDERS WORKS!") for i, upgrade := range input.Upgrades { @@ -609,20 +645,24 @@ func ClusterctlUpgradeSpec(ctx context.Context, inputGetter func() ClusterctlUpg ClusterName: workloadClusterName, Namespace: workloadClusterNamespace, }) - framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ - ClusterProxy: managementClusterProxy, - Cluster: workloadCluster, - MachineDeployment: testMachineDeployments[0], - Replicas: 2, - WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), - }) - framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ - ClusterProxy: managementClusterProxy, - Cluster: workloadCluster, - MachineDeployment: testMachineDeployments[0], - Replicas: 1, - WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), - }) + if len(testMachineDeployments) > 0 { + framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ + ClusterProxy: managementClusterProxy, + Cluster: workloadCluster, + MachineDeployment: testMachineDeployments[0], + Replicas: 2, + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) + framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ + ClusterProxy: managementClusterProxy, + Cluster: workloadCluster, + MachineDeployment: testMachineDeployments[0], + Replicas: 1, + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) + } else { + Byf("[%d] No MachineDeployments found to scale", i) + } } Byf("[%d] Verify client-side SSA still works", i) @@ -763,8 +803,8 @@ func discoveryAndWaitForCluster(ctx context.Context, input discoveryAndWaitForCl return cluster } -func calculateExpectedWorkerCount(ctx context.Context, c client.Client, unstructuredCluster *unstructured.Unstructured, coreCAPIStorageVersion string) int64 { - var expectedWorkerCount int64 +func calculateExpectedMachineDeploymentMachineCount(ctx context.Context, c client.Client, unstructuredCluster *unstructured.Unstructured, coreCAPIStorageVersion string) int64 { + var expectedMachineDeploymentWorkerCount int64 // Convert v1beta1 unstructured Cluster to clusterv1.Cluster // Only v1beta1 Cluster support ClusterClass (i.e. have cluster.spec.topology). @@ -778,16 +818,10 @@ func calculateExpectedWorkerCount(ctx context.Context, c client.Client, unstruct if md.Replicas == nil { continue } - expectedWorkerCount += int64(*md.Replicas) - } - for _, mp := range cluster.Spec.Topology.Workers.MachinePools { - if mp.Replicas == nil { - continue - } - expectedWorkerCount += int64(*mp.Replicas) + expectedMachineDeploymentWorkerCount += int64(*md.Replicas) } } - return expectedWorkerCount + return expectedMachineDeploymentWorkerCount } } @@ -811,7 +845,67 @@ func calculateExpectedWorkerCount(ctx context.Context, c client.Client, unstruct if !ok { continue } - expectedWorkerCount += replicas + expectedMachineDeploymentWorkerCount += replicas + } + + return expectedMachineDeploymentWorkerCount +} + +func calculateExpectedMachinePoolMachineCount(ctx context.Context, c client.Client, workloadClusterNamespace, workloadClusterName string) (int64, error) { + expectedMachinePoolMachineCount := int64(0) + + machinePoolList := &expv1.MachinePoolList{} + if err := c.List( + ctx, + machinePoolList, + client.InNamespace(workloadClusterNamespace), + client.MatchingLabels{clusterv1.ClusterNameLabel: workloadClusterName}, + ); err == nil { + for _, mp := range machinePoolList.Items { + mp := mp + infraMachinePool, err := external.Get(ctx, c, &mp.Spec.Template.Spec.InfrastructureRef, workloadClusterNamespace) + if err != nil { + return 0, err + } + // Check if the InfraMachinePool has an infrastructureMachineKind field. If it does not, we should skip checking for MachinePool machines. + err = util.UnstructuredUnmarshalField(infraMachinePool, ptr.To(""), "status", "infrastructureMachineKind") + if err != nil && !errors.Is(err, util.ErrUnstructuredFieldNotFound) { + return 0, err + } + if err == nil { + expectedMachinePoolMachineCount += int64(*mp.Spec.Replicas) + } + } + } + + return expectedMachinePoolMachineCount, nil +} + +func calculateExpectedMachinePoolNodeCount(ctx context.Context, c client.Client, unstructuredCluster *unstructured.Unstructured, coreCAPIStorageVersion string) int64 { + var expectedMachinePoolWorkerCount int64 + + // Convert v1beta1 unstructured Cluster to clusterv1.Cluster + // Only v1beta1 Cluster support ClusterClass (i.e. have cluster.spec.topology). + if unstructuredCluster.GroupVersionKind().Version == clusterv1.GroupVersion.Version { + cluster := &clusterv1.Cluster{} + Expect(apiruntime.DefaultUnstructuredConverter.FromUnstructured(unstructuredCluster.Object, cluster)).To(Succeed()) + + if cluster.Spec.Topology != nil { + if cluster.Spec.Topology.Workers != nil { + for _, mp := range cluster.Spec.Topology.Workers.MachinePools { + if mp.Replicas == nil { + continue + } + expectedMachinePoolWorkerCount += int64(*mp.Replicas) + } + } + return expectedMachinePoolWorkerCount + } + } + + byClusterOptions := []client.ListOption{ + client.InNamespace(unstructuredCluster.GetNamespace()), + client.MatchingLabels{clusterv1.ClusterNameLabel: unstructuredCluster.GetName()}, } machinePoolList := &unstructured.UnstructuredList{} @@ -827,16 +921,16 @@ func calculateExpectedWorkerCount(ctx context.Context, c client.Client, unstruct Eventually(func() error { return c.List(ctx, machinePoolList, byClusterOptions...) }, 3*time.Minute, 3*time.Second).Should(Succeed(), "Failed to list MachinePool object for Cluster %s", klog.KObj(unstructuredCluster)) - for _, md := range machinePoolList.Items { - replicas, ok, err := unstructured.NestedInt64(md.Object, "spec", "replicas") + for _, mp := range machinePoolList.Items { + replicas, ok, err := unstructured.NestedInt64(mp.Object, "spec", "replicas") Expect(err).ToNot(HaveOccurred()) if !ok { continue } - expectedWorkerCount += replicas + expectedMachinePoolWorkerCount += replicas } - return expectedWorkerCount + return expectedMachinePoolWorkerCount } // deleteAllClustersAndWaitInput is the input type for deleteAllClustersAndWait. diff --git a/test/framework/clusterctl/client.go b/test/framework/clusterctl/client.go index 813301a66be0..b1dcb25171e4 100644 --- a/test/framework/clusterctl/client.go +++ b/test/framework/clusterctl/client.go @@ -374,49 +374,29 @@ func ConfigClusterWithBinary(_ context.Context, clusterctlBinaryPath string, inp Expect(err).ToNot(HaveOccurred()) clusterctlSupportsGenerateCluster := version.GTE(semver.MustParse("1.0.0")) - var cmd *exec.Cmd + var command string if clusterctlSupportsGenerateCluster { - log.Logf("clusterctl generate cluster %s --infrastructure %s --kubernetes-version %s --control-plane-machine-count %d --worker-machine-count %d --flavor %s", - input.ClusterName, - valueOrDefault(input.InfrastructureProvider), - input.KubernetesVersion, - *input.ControlPlaneMachineCount, - *input.WorkerMachineCount, - valueOrDefault(input.Flavor), - ) - cmd = exec.Command(clusterctlBinaryPath, "generate", "cluster", //nolint:gosec // We don't care about command injection here. - input.ClusterName, - "--infrastructure", input.InfrastructureProvider, - "--kubernetes-version", input.KubernetesVersion, - "--control-plane-machine-count", fmt.Sprint(*input.ControlPlaneMachineCount), - "--worker-machine-count", fmt.Sprint(*input.WorkerMachineCount), - "--flavor", input.Flavor, - "--target-namespace", input.Namespace, - "--config", input.ClusterctlConfigPath, - "--kubeconfig", input.KubeconfigPath, - ) + command = "generate" } else { - log.Logf("clusterctl config cluster %s --infrastructure %s --kubernetes-version %s --control-plane-machine-count %d --worker-machine-count %d --flavor %s", - input.ClusterName, - valueOrDefault(input.InfrastructureProvider), - input.KubernetesVersion, - *input.ControlPlaneMachineCount, - *input.WorkerMachineCount, - valueOrDefault(input.Flavor), - ) - cmd = exec.Command(clusterctlBinaryPath, "config", "cluster", //nolint:gosec // We don't care about command injection here. - input.ClusterName, - "--infrastructure", input.InfrastructureProvider, - "--kubernetes-version", input.KubernetesVersion, - "--control-plane-machine-count", fmt.Sprint(*input.ControlPlaneMachineCount), - "--worker-machine-count", fmt.Sprint(*input.WorkerMachineCount), - "--flavor", input.Flavor, - "--target-namespace", input.Namespace, - "--config", input.ClusterctlConfigPath, - "--kubeconfig", input.KubeconfigPath, - ) + command = "config" } + args := []string{command, "cluster", + input.ClusterName, + "--infrastructure", input.InfrastructureProvider, + "--kubernetes-version", input.KubernetesVersion, + "--worker-machine-count", fmt.Sprint(*input.WorkerMachineCount), + "--flavor", input.Flavor, + "--target-namespace", input.Namespace, + "--config", input.ClusterctlConfigPath, + "--kubeconfig", input.KubeconfigPath, + } + if input.ControlPlaneMachineCount != nil && *input.ControlPlaneMachineCount > 0 { + args = append(args, "--control-plane-machine-count", fmt.Sprint(*input.ControlPlaneMachineCount)) + } + log.Logf("clusterctl %s", strings.Join(args, " ")) + + cmd := exec.Command(clusterctlBinaryPath, args...) //nolint:gosec // We don't care about command injection here. out, err := cmd.Output() _ = os.WriteFile(filepath.Join(input.LogFolder, fmt.Sprintf("%s-cluster-template.yaml", input.ClusterName)), out, 0644) //nolint:gosec // this is a log file to be shared via prow artifacts var stdErr string