Skip to content

Commit

Permalink
Refactor e2e tests to use RetryAfterSeconds
Browse files Browse the repository at this point in the history
  • Loading branch information
killianmuldoon committed Jul 8, 2022
1 parent 03f62b1 commit 62b6f55
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 98 deletions.
152 changes: 59 additions & 93 deletions test/e2e/cluster_upgrade_runtimesdk.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
runtimev1 "sigs.k8s.io/cluster-api/exp/runtime/api/v1alpha1"
"sigs.k8s.io/cluster-api/test/e2e/internal/log"
"sigs.k8s.io/cluster-api/test/framework"
Expand All @@ -44,8 +43,6 @@ import (
"sigs.k8s.io/cluster-api/util/conditions"
)

var hookFailedMessage = "hook failed"

// clusterUpgradeWithRuntimeSDKSpecInput is the input for clusterUpgradeWithRuntimeSDKSpec.
type clusterUpgradeWithRuntimeSDKSpecInput struct {
E2EConfig *clusterctl.E2EConfig
Expand Down Expand Up @@ -83,14 +80,14 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
var (
input clusterUpgradeWithRuntimeSDKSpecInput
namespace *corev1.Namespace
ext *runtimev1.ExtensionConfig
cancelWatches context.CancelFunc

controlPlaneMachineCount int64
workerMachineCount int64

clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult
testExtensionPath string
clusterName string
)

BeforeEach(func() {
Expand Down Expand Up @@ -123,11 +120,12 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl

// Set up a Namespace where to host objects for this spec and create a watcher for the Namespace events.
namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder)
clusterName = fmt.Sprintf("%s-%s", specName, util.RandomString(6))

clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult)
})

It("Should create, upgrade and delete a workload cluster", func() {
clusterName := fmt.Sprintf("%s-%s", specName, util.RandomString(6))
By("Deploy Test Extension")
testExtensionDeploymentTemplate, err := os.ReadFile(testExtensionPath) //nolint:gosec
Expect(err).ToNot(HaveOccurred(), "Failed to read the extension deployment manifest file")
Expand All @@ -141,12 +139,14 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
Expect(input.BootstrapClusterProxy.Apply(ctx, []byte(testExtensionDeployment), "--namespace", namespace.Name)).To(Succeed())

By("Deploy Test Extension ExtensionConfig and ConfigMap")
ext = extensionConfig(specName, namespace)
err = input.BootstrapClusterProxy.GetClient().Create(ctx, ext)
Expect(err).ToNot(HaveOccurred(), "Failed to create the extension config")
responses := responsesConfigMap(clusterName, namespace)
err = input.BootstrapClusterProxy.GetClient().Create(ctx, responses)
Expect(err).ToNot(HaveOccurred(), "Failed to create the responses configmap")

Expect(input.BootstrapClusterProxy.GetClient().Create(ctx,
extensionConfig(specName, namespace))).
To(Succeed(), "Failed to create the extension config")

Expect(input.BootstrapClusterProxy.GetClient().Create(ctx,
responsesConfigMap(clusterName, namespace))).
To(Succeed())

By("Creating a workload cluster")

Expand Down Expand Up @@ -195,6 +195,7 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl
input.BootstrapClusterProxy.GetClient(),
namespace.Name,
clusterName,
input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo),
input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"))
},
PreWaitForMachineDeploymentToBeUpgraded: func() {
Expand Down Expand Up @@ -236,26 +237,30 @@ func clusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() cl

By("Checking all lifecycle hooks have been called")
// Assert that each hook has been called and returned "Success" during the test.
err = checkLifecycleHookResponses(ctx, input.BootstrapClusterProxy.GetClient(), namespace.Name, clusterName, map[string]string{
"BeforeClusterCreate": "Success",
"BeforeClusterUpgrade": "Success",
"BeforeClusterDelete": "Success",
Expect(checkLifecycleHookResponses(ctx, input.BootstrapClusterProxy.GetClient(), namespace.Name, clusterName, map[string]string{
"BeforeClusterCreate": "Status: Success, RetryAfterSeconds: 0",
"BeforeClusterUpgrade": "Status: Success, RetryAfterSeconds: 0",
"BeforeClusterDelete": "Status: Success, RetryAfterSeconds: 0",
"AfterControlPlaneUpgrade": "Status: Success, RetryAfterSeconds: 0",
"AfterControlPlaneInitialized": "Success",
"AfterControlPlaneUpgrade": "Success",
"AfterClusterUpgrade": "Success",
})
Expect(err).ToNot(HaveOccurred(), "Lifecycle hook calls were not as expected")
})).To(Succeed(), "Lifecycle hook calls were not as expected")

By("PASSED!")
})

AfterEach(func() {
// Dumps all the resources in the spec Namespace, then cleanups the cluster object and the spec Namespace itself.
dumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup)
// Delete the extensionConfig first to ensure the BeforeDeleteCluster hook doesn't block deletion.
Eventually(func() error {
return input.BootstrapClusterProxy.GetClient().Delete(ctx, extensionConfig(specName, namespace))
}, 10*time.Second, 1*time.Second).Should(Succeed(), "delete extensionConfig failed")

Eventually(func() error {
return input.BootstrapClusterProxy.GetClient().Delete(ctx, ext)
}, 10*time.Second, 1*time.Second).Should(Succeed())
return input.BootstrapClusterProxy.GetClient().Delete(ctx, responsesConfigMap(clusterName, namespace))
}, 10*time.Second, 1*time.Second).Should(Succeed(), "delete responses configmap failed")

// Dumps all the resources in the spec Namespace, then cleanups the cluster object and the spec Namespace itself.
dumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup)
})
}

Expand Down Expand Up @@ -303,11 +308,11 @@ func responsesConfigMap(name string, namespace *corev1.Namespace) *corev1.Config
},
// Set the initial preloadedResponses for each of the tested hooks.
Data: map[string]string{
// Blocking hooks are set to Status:Failure initially. These will be changed during the test.
"BeforeClusterCreate-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
"BeforeClusterUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
"AfterControlPlaneUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
"BeforeClusterDelete-preloadedResponse": fmt.Sprintf(`{"Status": "Failure", "Message": %q}`, hookFailedMessage),
// Blocking hooks are set to return RetryAfterSeconds initially. These will be changed during the test.
"BeforeClusterCreate-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`,
"BeforeClusterUpgrade-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`,
"AfterControlPlaneUpgrade-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`,
"BeforeClusterDelete-preloadedResponse": `{"Status": "Success", "RetryAfterSeconds": 5}`,

// Non-blocking hooks are set to Status:Success.
"AfterControlPlaneInitialized-preloadedResponse": `{"Status": "Success"}`,
Expand Down Expand Up @@ -359,42 +364,27 @@ func beforeClusterCreateTestHandler(ctx context.Context, c client.Client, namesp
runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, true, func() bool {
blocked := true
// This hook should block the Cluster from entering the "Provisioned" state.
cluster := &clusterv1.Cluster{}
Eventually(func() error {
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
}).Should(Succeed())
cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{Name: clusterName, Namespace: namespace, Getter: c})

// Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
if !clusterConditionShowsHookFailed(cluster, hookName) {
blocked = false
}
if cluster.Status.Phase == string(clusterv1.ClusterPhaseProvisioned) {
blocked = false
}
return blocked
}, intervals)
}

// beforeClusterUpgradeTestHandler calls runtimeHookTestHandler with a blocking function which returns false if the
// Cluster has controlplanev1.RollingUpdateInProgressReason in its ReadyCondition.
func beforeClusterUpgradeTestHandler(ctx context.Context, c client.Client, namespace, clusterName string, intervals []interface{}) {
// beforeClusterUpgradeTestHandler calls runtimeHookTestHandler with a blocking function which returns false if
// any of the machines in the control plane has been updated to the target Kubernetes version.
func beforeClusterUpgradeTestHandler(ctx context.Context, c client.Client, namespace, clusterName, toVersion string, intervals []interface{}) {
hookName := "BeforeClusterUpgrade"
runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, true, func() bool {
runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, false, func() bool {
var blocked = true

cluster := &clusterv1.Cluster{}
Eventually(func() error {
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
}).Should(Succeed())

// Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
if !clusterConditionShowsHookFailed(cluster, hookName) {
blocked = false
}
// Check if the Cluster is showing the RollingUpdateInProgress condition reason. If it has the update process is unblocked.
if conditions.IsFalse(cluster, clusterv1.ReadyCondition) &&
conditions.GetReason(cluster, clusterv1.ReadyCondition) == controlplanev1.RollingUpdateInProgressReason {
blocked = false
controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{Lister: c, ClusterName: clusterName, Namespace: namespace})
for _, machine := range controlPlaneMachines {
if *machine.Spec.Version == toVersion {
blocked = false
}
}
return blocked
}, intervals)
Expand All @@ -406,26 +396,10 @@ func afterControlPlaneUpgradeTestHandler(ctx context.Context, c client.Client, n
hookName := "AfterControlPlaneUpgrade"
runtimeHookTestHandler(ctx, c, namespace, clusterName, hookName, true, func() bool {
var blocked = true
cluster := &clusterv1.Cluster{}
Eventually(func() error {
return c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
}).Should(Succeed())

// Check if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
if !clusterConditionShowsHookFailed(cluster, hookName) {
blocked = false
}

mds := &clusterv1.MachineDeploymentList{}
Eventually(func() error {
return c.List(ctx, mds, client.MatchingLabels{
clusterv1.ClusterLabelName: clusterName,
clusterv1.ClusterTopologyOwnedLabel: "",
})
}).Should(Succeed())

mds := framework.GetMachineDeploymentsByCluster(ctx, framework.GetMachineDeploymentsByClusterInput{ClusterName: clusterName, Namespace: namespace, Lister: c})
// If any of the MachineDeployments have the target Kubernetes Version, the hook is unblocked.
for _, md := range mds.Items {
for _, md := range mds {
if *md.Spec.Template.Spec.Version == version {
blocked = false
}
Expand Down Expand Up @@ -464,23 +438,22 @@ func runtimeHookTestHandler(ctx context.Context, c client.Client, namespace, clu
if err := checkLifecycleHooksCalledAtLeastOnce(ctx, c, namespace, clusterName, []string{hookName}); err != nil {
return err
}
cluster := &clusterv1.Cluster{}
if err := c.Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster); err != nil {
return err
}

// Check for the existence of the condition if withTopologyReconciledCondition is true.
if withTopologyReconciledCondition &&
(conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) != clusterv1.TopologyReconcileFailedReason) {
return errors.New("Condition not found on Cluster object")
if withTopologyReconciledCondition {
cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{Name: clusterName, Namespace: namespace, Getter: c})

if !clusterConditionShowsHookBlocking(cluster, hookName) {
return errors.Errorf("Blocking condition for %s not found on Cluster object", hookName)
}
}
return nil
}, 60*time.Second).Should(Succeed(), "%s has not been called", hookName)
}, 30*time.Second).Should(Succeed(), "%s has not been called", hookName)

// blockingCondition should consistently be true as the Runtime hook is returning "Failure".
Consistently(func() bool {
return blockingCondition()
}, 30*time.Second).Should(BeTrue(),
}, 60*time.Second).Should(BeTrue(),
fmt.Sprintf("Cluster Topology reconciliation continued unexpectedly: hook %s not blocking", hookName))

// Patch the ConfigMap to set the hook response to "Success".
Expand All @@ -500,32 +473,25 @@ func runtimeHookTestHandler(ctx context.Context, c client.Client, namespace, clu
Eventually(func() bool {
return blockingCondition()
}, intervals...).Should(BeFalse(),
fmt.Sprintf("ClusterTopology reconcile did not unblock after updating hook response: hook %s still blocking", hookName))
fmt.Sprintf("ClusterTopology reconcile did proceed as expected when calling %s", hookName))
}

// clusterConditionShowsHookFailed checks if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
func clusterConditionShowsHookFailed(cluster *clusterv1.Cluster, hookName string) bool {
return conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) == clusterv1.TopologyReconcileFailedReason &&
strings.Contains(conditions.GetMessage(cluster, clusterv1.TopologyReconciledCondition), hookFailedMessage) &&
// clusterConditionShowsHookBlocking checks if the TopologyReconciled condition message contains both the hook name and hookFailedMessage.
func clusterConditionShowsHookBlocking(cluster *clusterv1.Cluster, hookName string) bool {
return conditions.GetReason(cluster, clusterv1.TopologyReconciledCondition) == clusterv1.TopologyReconciledHookBlockingReason &&
strings.Contains(conditions.GetMessage(cluster, clusterv1.TopologyReconciledCondition), hookName)
}

func dumpAndDeleteCluster(ctx context.Context, proxy framework.ClusterProxy, namespace, clusterName, artifactFolder string) {
By("Deleting the workload cluster")
cluster := &clusterv1.Cluster{}
Eventually(func() error {
return proxy.GetClient().Get(ctx, client.ObjectKey{Namespace: namespace, Name: clusterName}, cluster)
}).Should(Succeed())

cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{Name: clusterName, Namespace: namespace, Getter: proxy.GetClient()})

// Dump all the logs from the workload cluster before deleting them.
proxy.CollectWorkloadClusterLogs(ctx, cluster.Namespace, cluster.Name, filepath.Join(artifactFolder, "clusters-beforeClusterDelete", cluster.Name))

// Dump all Cluster API related resources to artifacts before deleting them.
framework.DumpAllResources(ctx, framework.DumpAllResourcesInput{
Lister: proxy.GetClient(),
Namespace: namespace,
LogPath: filepath.Join(artifactFolder, "clusters-beforeClusterDelete", proxy.GetName(), "resources"),
})
framework.DumpAllResources(ctx, framework.DumpAllResourcesInput{Lister: proxy.GetClient(), Namespace: namespace, LogPath: filepath.Join(artifactFolder, "clusters-beforeClusterDelete", proxy.GetName(), "resources")})

By("Deleting the workload cluster")
framework.DeleteCluster(ctx, framework.DeleteClusterInput{
Expand Down
17 changes: 13 additions & 4 deletions test/extension/handlers/lifecycle/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ func (h *Handler) readResponseFromConfigMap(ctx context.Context, name, namespace
if err := yaml.Unmarshal([]byte(configMap.Data[hookName+"-preloadedResponse"]), response); err != nil {
return errors.Wrapf(err, "failed to read %q response information from ConfigMap", hook)
}
if r, ok := response.(runtimehooksv1.RetryResponseObject); ok {
log := ctrl.LoggerFrom(ctx)
log.Info(fmt.Sprintf("%s response is %s. retry: %v", hookName, r.GetStatus(), r.GetRetryAfterSeconds()))
}
return nil
}

Expand All @@ -163,10 +167,15 @@ func (h *Handler) recordCallInConfigMap(ctx context.Context, name, namespace str
if err := h.Client.Get(ctx, client.ObjectKey{Namespace: namespace, Name: configMapName}, configMap); err != nil {
return errors.Wrapf(err, "failed to read the ConfigMap %s/%s", namespace, configMapName)
}

// Patch the actualResponseStatus with the returned value
patch := client.RawPatch(types.MergePatchType,
[]byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus":"%s"}}`, hookName, response.GetStatus()))) //nolint:gocritic
var patch client.Patch
if r, ok := response.(runtimehooksv1.RetryResponseObject); ok {
patch = client.RawPatch(types.MergePatchType,
[]byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus": "Status: %s, RetryAfterSeconds: %v"}}`, hookName, r.GetStatus(), r.GetRetryAfterSeconds())))
} else {
// Patch the actualResponseStatus with the returned value
patch = client.RawPatch(types.MergePatchType,
[]byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus":"%s"}}`, hookName, response.GetStatus()))) //nolint:gocritic
}
if err := h.Client.Patch(ctx, configMap, patch); err != nil {
return errors.Wrapf(err, "failed to update the ConfigMap %s/%s", namespace, configMapName)
}
Expand Down
2 changes: 1 addition & 1 deletion test/framework/cluster_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ type DeleteClusterInput struct {
Cluster *clusterv1.Cluster
}

// DeleteCluster deletes the cluster and waits for everything the cluster owned to actually be gone.
// DeleteCluster deletes the cluster.
func DeleteCluster(ctx context.Context, input DeleteClusterInput) {
Byf("Deleting cluster %s", input.Cluster.GetName())
Expect(input.Deleter.Delete(ctx, input.Cluster)).To(Succeed())
Expand Down

0 comments on commit 62b6f55

Please sign in to comment.