Skip to content

Commit

Permalink
Refactor e2e tests to use RetryAfterSeconds
Browse files Browse the repository at this point in the history
  • Loading branch information
killianmuldoon committed Jul 5, 2022
1 parent f9c7e14 commit 277402a
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 67 deletions.
121 changes: 57 additions & 64 deletions test/e2e/cluster_upgrade_runtimesdk.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
runtimev1 "sigs.k8s.io/cluster-api/exp/runtime/api/v1alpha1"
"sigs.k8s.io/cluster-api/test/e2e/internal/log"
"sigs.k8s.io/cluster-api/test/framework"
Expand All @@ -44,8 +43,6 @@ import (
"sigs.k8s.io/cluster-api/util/conditions"
)

var hookFailedMessage = "hook failed"

// clusterUpgradeWithRuntimeSDKSpecInput is the input for clusterUpgradeWithRuntimeSDKSpec.
type clusterUpgradeWithRuntimeSDKSpecInput struct {
E2EConfig *clusterctl.E2EConfig
Expand Down Expand Up @@ -83,14 +80,14 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
var (
input clusterUpgradeWithRuntimeSDKSpecInput
namespace *corev1.Namespace
ext *runtimev1.ExtensionConfig
cancelWatches context.CancelFunc

controlPlaneMachineCount int64
workerMachineCount int64

clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult
testExtensionPath string
clusterName string
)

BeforeEach(func() {
Expand Down Expand Up @@ -123,11 +120,12 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl

// Set up a Namespace where to host objects for this spec and create a watcher for the Namespace events.
namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder)
clusterName = fmt.Sprintf("%s-%s", specName, util.RandomString(6))

clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult)
})

It("Should create, upgrade and delete a workload cluster", func() {
clusterName := fmt.Sprintf("%s-%s", specName, util.RandomString(6))
By("Deploy Test Extension")
testExtensionDeploymentTemplate, err := os.ReadFile(testExtensionPath) //nolint:gosec
Expect(err).ToNot(HaveOccurred(), "Failed to read the extension deployment manifest file")
Expand All @@ -141,12 +139,14 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
Expect(input.BootstrapClusterProxy.Apply(ctx, []byte(testExtensionDeployment), "--namespace", namespace.Name)).To(Succeed())

By("Deploy Test Extension ExtensionConfig and ConfigMap")
ext = extensionConfig(specName, namespace)
err = input.BootstrapClusterProxy.GetClient().Create(ctx, ext)
Expect(err).ToNot(HaveOccurred(), "Failed to create the extension config")
responses := responsesConfigMap(clusterName, namespace)
err = input.BootstrapClusterProxy.GetClient().Create(ctx, responses)
Expect(err).ToNot(HaveOccurred(), "Failed to create the responses configmap")

Expect(input.BootstrapClusterProxy.GetClient().Create(ctx,
extensionConfig(specName, namespace))).
To(Succeed(), "Failed to create the extension config")

Expect(input.BootstrapClusterProxy.GetClient().Create(ctx,
responsesConfigMap(clusterName, namespace))).
To(Succeed())

By("Creating a workload cluster")

Expand Down Expand Up @@ -195,6 +195,7 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
input.BootstrapClusterProxy.GetClient(),
namespace.Name,
clusterName,
input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo),
input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"))
},
PreWaitForMachineDeploymentToBeUpgraded: func() {
Expand Down Expand Up @@ -236,26 +237,31 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl

By("Checking all lifecycle hooks have been called")
// Assert that each hook has been called and returned "Success" during the test.
err = checkLifecycleHookResponses(ctx, input.BootstrapClusterProxy.GetClient(), namespace.Name, clusterName, map[string]string{
"BeforeClusterCreate": "Success",
"BeforeClusterUpgrade": "Success",
"BeforeClusterDelete": "Success",
Expect(checkLifecycleHookResponses(ctx, input.BootstrapClusterProxy.GetClient(), namespace.Name, clusterName, map[string]string{
"BeforeClusterCreate": "Status: Success, RetryAfterSeconds: 0",
"BeforeClusterUpgrade": "Status: Success, RetryAfterSeconds: 0",
"BeforeClusterDelete": "Status: Success, RetryAfterSeconds: 0",
"AfterControlPlaneUpgrade": "Status: Success, RetryAfterSeconds: 0",
"AfterControlPlaneInitialized": "Success",
"AfterControlPlaneUpgrade": "Success",
"AfterClusterUpgrade": "Success",
})
Expect(err).ToNot(HaveOccurred(), "Lifecycle hook calls were not as expected")
})).To(Succeed(), "Lifecycle hook calls were not as expected")

By("PASSED!")
})

AfterEach(func() {
// Delete the extensionConfig first to ensure the BeforeDeleteCluster hook doesn't block deletion.
Eventually(func() error {
return input.BootstrapClusterProxy.GetClient().Delete(ctx, extensionConfig(specName, namespace))
}, 10*time.Second, 1*time.Second).Should(Succeed(), "delete extensionConfig failed")

Eventually(func() error {
return input.BootstrapClusterProxy.GetClient().Delete(ctx, responsesConfigMap(clusterName, namespace))
}, 10*time.Second, 1*time.Second).Should(Succeed(), "delete responses configmap failed")

// Dumps all the resources in the spec Namespace, then cleanups the cluster object and the spec Namespace itself.
dumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup)

Eventually(func() error {
return input.BootstrapClusterProxy.GetClient().Delete(ctx, ext)
}, 10*time.Second, 1*time.Second).Should(Succeed())
})
}

Expand Down Expand Up @@ -303,11 +309,11 @@ func responsesConfigMap(name string, namespace *corev1.Namespace) *corev1.Config
},
// Set the initial preloadedResponses for each of the tested hooks.
Data: map[string]string{
// Blocking hooks are set to Status:Failure initially. These will be changed during the test.
"BeforeClusterCreate-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
"BeforeClusterUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
"AfterControlPlaneUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
"BeforeClusterDelete-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
// Blocking hooks are set to return RetryAfterSeconds initially. These will be changed during the test.
"BeforeClusterCreate-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`,
"BeforeClusterUpgrade-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`,
"AfterControlPlaneUpgrade-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`,
"BeforeClusterDelete-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`,

// Non-blocking hooks are set to Status:Success.
"AfterControlPlaneInitialized-preloadedResponse": `{"Status": "Success"}`,
Expand Down Expand Up @@ -364,10 +370,6 @@ func beforeClusterCreateTestHandler(ctx context.Context, c client.Client, namesp
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
}).Should(Succeed())

// Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
if !clusterConditionShowsHookFailed(cluster, hookName) {
blocked = false
}
if cluster.Status.Phase == string(clusterv1.ClusterPhaseProvisioned) {
blocked = false
}
Expand All @@ -376,25 +378,21 @@ func beforeClusterCreateTestHandler(ctx context.Context, c client.Client, namesp
}

// beforeClusterUpgradeTestHandler calls runtimeHookTestHandler with a blocking function which returns false if the
// Cluster has controlplanev1.RollingUpdateInProgressReason in its ReadyCondition.
func beforeClusterUpgradeTestHandler(ctx context.Context, c client.Client, namespace, clusterName string, intervals []interface{}) {
// Cluster has RollingUpdateInProgressReason in its ReadyCondition.
func beforeClusterUpgradeTestHandler(ctx context.Context, c client.Client, namespace, clusterName, toVersion string, intervals []interface{}) {
hookName := "BeforeClusterUpgrade"
runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, true, func() bool {
runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, false, func() bool {
var blocked = true

cluster := &clusterv1.Cluster{}
Eventually(func() error {
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
}).Should(Succeed())

// Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
if !clusterConditionShowsHookFailed(cluster, hookName) {
blocked = false
}
// Check if the Cluster is showing the RollingUpdateInProgress condition reason. If it has the update process is unblocked.
if conditions.IsFalse(cluster, clusterv1.ReadyCondition) &&
conditions.GetReason(cluster, clusterv1.ReadyCondition) == controlplanev1.RollingUpdateInProgressReason {
blocked = false
controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{
Lister: c,
ClusterName: clusterName,
Namespace: namespace,
})
for _, machine := range controlPlaneMachines {
if *machine.Spec.Version == toVersion {
blocked = false
}
}
return blocked
}, intervals)
Expand All @@ -411,11 +409,6 @@ func afterControlPlaneUpgradeTestHandler(ctx context.Context, c client.Client, n
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
}).Should(Succeed())

// Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
if !clusterConditionShowsHookFailed(cluster, hookName) {
blocked = false
}

mds := &clusterv1.MachineDeploymentList{}
Eventually(func() error {
return c.List(ctx, mds, client.MatchingLabels{
Expand Down Expand Up @@ -464,23 +457,24 @@ func runtimeHookTestHandler(ctx context.Context, c client.Client, namespace, clu
if err := checkLifecycleHooksCalledAtLeastOnce(ctx, c, namespace, clusterName, []string{hookName}); err != nil {
return err
}
cluster := &clusterv1.Cluster{}
if err := c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster); err != nil {
return err
}

// Check for the existence of the condition if withTopologyReconciledCondition is true.
if withTopologyReconciledCondition &&
(conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) != clusterv1.TopologyReconcileFailedReason) {
return errors.New("Condition not found on Cluster object")
if withTopologyReconciledCondition {
cluster := &clusterv1.Cluster{}
if err := c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster); err != nil {
return err
}
if !clusterConditionShowsHookBlocking(cluster, hookName) {
return errors.Errorf("Blocking condition for %s not found on Cluster object", hookName)
}
}
return nil
}, 60*time.Second).Should(Succeed(), "%s has not been called", hookName)
}, 120*time.Second).Should(Succeed(), "%s has not been called", hookName)

// blockingCondition should consistently be true as the Runtime hook is returning "Failure".
Consistently(func() bool {
return blockingCondition()
}, 30*time.Second).Should(BeTrue(),
}, 60*time.Second).Should(BeTrue(),
fmt.Sprintf("Cluster Topology reconciliation continued unexpectedly: hook %s not blocking", hookName))

// Patch the ConfigMap to set the hook response to "Success".
Expand All @@ -503,10 +497,9 @@ func runtimeHookTestHandler(ctx context.Context, c client.Client, namespace, clu
fmt.Sprintf("ClusterTopology reconcile did not unblock after updating hook response: hook %s still blocking", hookName))
}

// clusterConditionShowsHookFailed checks if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
func clusterConditionShowsHookFailed(cluster *clusterv1.Cluster, hookName string) bool {
return conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) == clusterv1.TopologyReconcileFailedReason &&
strings.Contains(conditions.GetMessage(cluster, clusterv1.TopologyReconciledCondition), hookFailedMessage) &&
// clusterConditionShowsHookBlocking checks if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
func clusterConditionShowsHookBlocking(cluster *clusterv1.Cluster, hookName string) bool {
return conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) == clusterv1.TopologyReconciledHookBlockingReason &&
strings.Contains(conditions.GetMessage(cluster, clusterv1.TopologyReconciledCondition), hookName)
}

Expand Down
18 changes: 15 additions & 3 deletions test/extension/handlers/lifecycle/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ func (h *Handler) readResponseFromConfigMap(ctx context.Context, name, namespace
if err := yaml.Unmarshal([]byte(configMap.Data[hookName+"-preloadedResponse"]), response); err != nil {
return errors.Wrapf(err, "failed to read %q response information from ConfigMap", hook)
}
if r, ok := response.(runtimehooksv1.RetryResponseObject); ok {
log := ctrl.LoggerFrom(ctx)
log.Info(fmt.Sprintf("%s response is %s. retry: %v", hookName, r.GetStatus(), r.GetRetryAfterSeconds()))
response = r
}
return nil
}

Expand All @@ -164,9 +169,16 @@ func (h *Handler) recordCallInConfigMap(ctx context.Context, name, namespace str
return errors.Wrapf(err, "failed to read the ConfigMap %s/%s", namespace, configMapName)
}

// Patch the actualResponseStatus with the returned value
patch := client.RawPatch(types.MergePatchType,
[]byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus":"%s"}}`, hookName, response.GetStatus()))) //nolint:gocritic
var patch client.Patch
if r, ok := response.(runtimehooksv1.RetryResponseObject); ok {
patch = client.RawPatch(types.MergePatchType,
[]byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus": "Status: %s, RetryAfterSeconds: %v"}}`, hookName, r.GetStatus(), r.GetRetryAfterSeconds()))) //nolint:gocritic

} else {
// Patch the actualResponseStatus with the returned value
patch = client.RawPatch(types.MergePatchType,
[]byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus":"%s"}}`, hookName, response.GetStatus()))) //nolint:gocritic
}
if err := h.Client.Patch(ctx, configMap, patch); err != nil {
return errors.Wrapf(err, "failed to update the ConfigMap %s/%s", namespace, configMapName)
}
Expand Down

0 comments on commit 277402a

Please sign in to comment.