diff --git a/test/e2e/cluster_upgrade_runtimesdk.go b/test/e2e/cluster_upgrade_runtimesdk.go index 6e2fa9535f12..2edcb47252a9 100644 --- a/test/e2e/cluster_upgrade_runtimesdk.go +++ b/test/e2e/cluster_upgrade_runtimesdk.go @@ -35,7 +35,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" runtimev1 "sigs.k8s.io/cluster-api/exp/runtime/api/v1alpha1" "sigs.k8s.io/cluster-api/test/e2e/internal/log" "sigs.k8s.io/cluster-api/test/framework" @@ -44,8 +43,6 @@ import ( "sigs.k8s.io/cluster-api/util/conditions" ) -var hookFailedMessage = "hook failed" - // clusterUpgradeWithRuntimeSDKSpecInput is the input for clusterUpgradeWithRuntimeSDKSpec. type clusterUpgradeWithRuntimeSDKSpecInput struct { E2EConfig *clusterctl.E2EConfig @@ -83,7 +80,6 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl var ( input clusterUpgradeWithRuntimeSDKSpecInput namespace *corev1.Namespace - ext *runtimev1.ExtensionConfig cancelWatches context.CancelFunc controlPlaneMachineCount int64 @@ -91,6 +87,7 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult testExtensionPath string + clusterName string ) BeforeEach(func() { @@ -123,11 +120,12 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl // Set up a Namespace where to host objects for this spec and create a watcher for the Namespace events. namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder) + clusterName = fmt.Sprintf("%s-%s", specName, util.RandomString(6)) + clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) }) It("Should create, upgrade and delete a workload cluster", func() { - clusterName := fmt.Sprintf("%s-%s", specName, util.RandomString(6)) By("Deploy Test Extension") testExtensionDeploymentTemplate, err := os.ReadFile(testExtensionPath) //nolint:gosec Expect(err).ToNot(HaveOccurred(), "Failed to read the extension deployment manifest file") @@ -141,12 +139,14 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl Expect(input.BootstrapClusterProxy.Apply(ctx, []byte(testExtensionDeployment), "--namespace", namespace.Name)).To(Succeed()) By("Deploy Test Extension ExtensionConfig and ConfigMap") - ext = extensionConfig(specName, namespace) - err = input.BootstrapClusterProxy.GetClient().Create(ctx, ext) - Expect(err).ToNot(HaveOccurred(), "Failed to create the extension config") - responses := responsesConfigMap(clusterName, namespace) - err = input.BootstrapClusterProxy.GetClient().Create(ctx, responses) - Expect(err).ToNot(HaveOccurred(), "Failed to create the responses configmap") + + Expect(input.BootstrapClusterProxy.GetClient().Create(ctx, + extensionConfig(specName, namespace))). + To(Succeed(), "Failed to create the extension config") + + Expect(input.BootstrapClusterProxy.GetClient().Create(ctx, + responsesConfigMap(clusterName, namespace))). + To(Succeed()) By("Creating a workload cluster") @@ -195,6 +195,7 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl input.BootstrapClusterProxy.GetClient(), namespace.Name, clusterName, + input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade")) }, PreWaitForMachineDeploymentToBeUpgraded: func() { @@ -236,26 +237,31 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl By("Checking all lifecycle hooks have been called") // Assert that each hook has been called and returned "Success" during the test. - err = checkLifecycleHookResponses(ctx, input.BootstrapClusterProxy.GetClient(), namespace.Name, clusterName, map[string]string{ - "BeforeClusterCreate": "Success", - "BeforeClusterUpgrade": "Success", - "BeforeClusterDelete": "Success", + Expect(checkLifecycleHookResponses(ctx, input.BootstrapClusterProxy.GetClient(), namespace.Name, clusterName, map[string]string{ + "BeforeClusterCreate": "Status: Success, RetryAfterSeconds: 0", + "BeforeClusterUpgrade": "Status: Success, RetryAfterSeconds: 0", + "BeforeClusterDelete": "Status: Success, RetryAfterSeconds: 0", + "AfterControlPlaneUpgrade": "Status: Success, RetryAfterSeconds: 0", "AfterControlPlaneInitialized": "Success", - "AfterControlPlaneUpgrade": "Success", "AfterClusterUpgrade": "Success", - }) - Expect(err).ToNot(HaveOccurred(), "Lifecycle hook calls were not as expected") + })).To(Succeed(), "Lifecycle hook calls were not as expected") By("PASSED!") }) AfterEach(func() { + // Delete the extensionConfig first to ensure the BeforeDeleteCluster hook doesn't block deletion. + Eventually(func() error { + return input.BootstrapClusterProxy.GetClient().Delete(ctx, extensionConfig(specName, namespace)) + }, 10*time.Second, 1*time.Second).Should(Succeed(), "delete extensionConfig failed") + + Eventually(func() error { + return input.BootstrapClusterProxy.GetClient().Delete(ctx, responsesConfigMap(clusterName, namespace)) + }, 10*time.Second, 1*time.Second).Should(Succeed(), "delete responses configmap failed") + // Dumps all the resources in the spec Namespace, then cleanups the cluster object and the spec Namespace itself. dumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup) - Eventually(func() error { - return input.BootstrapClusterProxy.GetClient().Delete(ctx, ext) - }, 10*time.Second, 1*time.Second).Should(Succeed()) }) } @@ -303,11 +309,11 @@ func responsesConfigMap(name string, namespace *corev1.Namespace) *corev1.Config }, // Set the initial preloadedResponses for each of the tested hooks. Data: map[string]string{ - // Blocking hooks are set to Status:Failure initially. These will be changed during the test. - "BeforeClusterCreate-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage), - "BeforeClusterUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage), - "AfterControlPlaneUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage), - "BeforeClusterDelete-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage), + // Blocking hooks are set to return RetryAfterSeconds initially. These will be changed during the test. + "BeforeClusterCreate-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`, + "BeforeClusterUpgrade-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`, + "AfterControlPlaneUpgrade-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`, + "BeforeClusterDelete-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`, // Non-blocking hooks are set to Status:Success. "AfterControlPlaneInitialized-preloadedResponse": `{"Status": "Success"}`, @@ -364,10 +370,6 @@ func beforeClusterCreateTestHandler(ctx context.Context, c client.Client, namesp return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster) }).Should(Succeed()) - // Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage. - if !clusterConditionShowsHookFailed(cluster, hookName) { - blocked = false - } if cluster.Status.Phase == string(clusterv1.ClusterPhaseProvisioned) { blocked = false } @@ -376,25 +378,21 @@ func beforeClusterCreateTestHandler(ctx context.Context, c client.Client, namesp } // beforeClusterUpgradeTestHandler calls runtimeHookTestHandler with a blocking function which returns false if the -// Cluster has controlplanev1.RollingUpdateInProgressReason in its ReadyCondition. -func beforeClusterUpgradeTestHandler(ctx context.Context, c client.Client, namespace, clusterName string, intervals []interface{}) { +// Cluster has RollingUpdateInProgressReason in its ReadyCondition. +func beforeClusterUpgradeTestHandler(ctx context.Context, c client.Client, namespace, clusterName, toVersion string, intervals []interface{}) { hookName := "BeforeClusterUpgrade" - runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, true, func() bool { + runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, false, func() bool { var blocked = true - cluster := &clusterv1.Cluster{} - Eventually(func() error { - return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster) - }).Should(Succeed()) - - // Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage. - if !clusterConditionShowsHookFailed(cluster, hookName) { - blocked = false - } - // Check if the Cluster is showing the RollingUpdateInProgress condition reason. If it has the update process is unblocked. - if conditions.IsFalse(cluster, clusterv1.ReadyCondition) && - conditions.GetReason(cluster, clusterv1.ReadyCondition) == controlplanev1.RollingUpdateInProgressReason { - blocked = false + controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{ + Lister: c, + ClusterName: clusterName, + Namespace: namespace, + }) + for _, machine := range controlPlaneMachines { + if *machine.Spec.Version == toVersion { + blocked = false + } } return blocked }, intervals) @@ -411,11 +409,6 @@ func afterControlPlaneUpgradeTestHandler(ctx context.Context, c client.Client, n return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster) }).Should(Succeed()) - // Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage. - if !clusterConditionShowsHookFailed(cluster, hookName) { - blocked = false - } - mds := &clusterv1.MachineDeploymentList{} Eventually(func() error { return c.List(ctx, mds, client.MatchingLabels{ @@ -464,23 +457,24 @@ func runtimeHookTestHandler(ctx context.Context, c client.Client, namespace, clu if err := checkLifecycleHooksCalledAtLeastOnce(ctx, c, namespace, clusterName, []string{hookName}); err != nil { return err } - cluster := &clusterv1.Cluster{} - if err := c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster); err != nil { - return err - } // Check for the existence of the condition if withTopologyReconciledCondition is true. - if withTopologyReconciledCondition && - (conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) != clusterv1.TopologyReconcileFailedReason) { - return errors.New("Condition not found on Cluster object") + if withTopologyReconciledCondition { + cluster := &clusterv1.Cluster{} + if err := c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster); err != nil { + return err + } + if !clusterConditionShowsHookBlocking(cluster, hookName) { + return errors.Errorf("Blocking condition for %s not found on Cluster object", hookName) + } } return nil - }, 60*time.Second).Should(Succeed(), "%s has not been called", hookName) + }, 120*time.Second).Should(Succeed(), "%s has not been called", hookName) // blockingCondition should consistently be true as the Runtime hook is returning "Failure". Consistently(func() bool { return blockingCondition() - }, 30*time.Second).Should(BeTrue(), + }, 60*time.Second).Should(BeTrue(), fmt.Sprintf("Cluster Topology reconciliation continued unexpectedly: hook %s not blocking", hookName)) // Patch the ConfigMap to set the hook response to "Success". @@ -503,10 +497,9 @@ func runtimeHookTestHandler(ctx context.Context, c client.Client, namespace, clu fmt.Sprintf("ClusterTopology reconcile did not unblock after updating hook response: hook %s still blocking", hookName)) } -// clusterConditionShowsHookFailed checks if the TopologyReconciled condition message contains both the hook name and hookFailedMessage. -func clusterConditionShowsHookFailed(cluster *clusterv1.Cluster, hookName string) bool { - return conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) == clusterv1.TopologyReconcileFailedReason && - strings.Contains(conditions.GetMessage(cluster, clusterv1.TopologyReconciledCondition), hookFailedMessage) && +// clusterConditionShowsHookBlocking checks if the TopologyReconciled condition message contains both the hook name and hookFailedMessage. +func clusterConditionShowsHookBlocking(cluster *clusterv1.Cluster, hookName string) bool { + return conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) == clusterv1.TopologyReconciledHookBlockingReason && strings.Contains(conditions.GetMessage(cluster, clusterv1.TopologyReconciledCondition), hookName) } diff --git a/test/extension/handlers/lifecycle/handlers.go b/test/extension/handlers/lifecycle/handlers.go index 354190a7eae1..bdc4fa0106ea 100644 --- a/test/extension/handlers/lifecycle/handlers.go +++ b/test/extension/handlers/lifecycle/handlers.go @@ -153,6 +153,11 @@ func (h *Handler) readResponseFromConfigMap(ctx context.Context, name, namespace if err := yaml.Unmarshal([]byte(configMap.Data[hookName+"-preloadedResponse"]), response); err != nil { return errors.Wrapf(err, "failed to read %q response information from ConfigMap", hook) } + if r, ok := response.(runtimehooksv1.RetryResponseObject); ok { + log := ctrl.LoggerFrom(ctx) + log.Info(fmt.Sprintf("%s response is %s. retry: %v", hookName, r.GetStatus(), r.GetRetryAfterSeconds())) + response = r + } return nil } @@ -164,9 +169,16 @@ func (h *Handler) recordCallInConfigMap(ctx context.Context, name, namespace str return errors.Wrapf(err, "failed to read the ConfigMap %s/%s", namespace, configMapName) } - // Patch the actualResponseStatus with the returned value - patch := client.RawPatch(types.MergePatchType, - []byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus":"%s"}}`, hookName, response.GetStatus()))) //nolint:gocritic + var patch client.Patch + if r, ok := response.(runtimehooksv1.RetryResponseObject); ok { + patch = client.RawPatch(types.MergePatchType, + []byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus": "Status: %s, RetryAfterSeconds: %v"}}`, hookName, r.GetStatus(), r.GetRetryAfterSeconds()))) //nolint:gocritic + + } else { + // Patch the actualResponseStatus with the returned value + patch = client.RawPatch(types.MergePatchType, + []byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus":"%s"}}`, hookName, response.GetStatus()))) //nolint:gocritic + } if err := h.Client.Patch(ctx, configMap, patch); err != nil { return errors.Wrapf(err, "failed to update the ConfigMap %s/%s", namespace, configMapName) }