Skip to content

Commit

Permalink
ensure finalizers are resilient on reconciliation
Browse files Browse the repository at this point in the history
  • Loading branch information
adityabhatia committed Oct 19, 2023
1 parent 5786b77 commit 7d805d1
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 4 deletions.
4 changes: 2 additions & 2 deletions cmd/clusterctl/client/cluster/ownergraph.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func GetOwnerGraph(ctx context.Context, namespace, kubeconfigPath string) (Owner
}

// graph.Discovery can not be used here as it will use the latest APIVersion for ownerReferences - not those
// present in the object 'metadata.ownerReferences`.
// present in the object `metadata.ownerReferences`.
owners, err := discoverOwnerGraph(ctx, namespace, graph)
if err != nil {
return OwnerGraph{}, errors.Wrap(err, "failed to discovery ownerGraph types")
Expand Down Expand Up @@ -107,7 +107,7 @@ func discoverOwnerGraph(ctx context.Context, namespace string, o *objectGraph) (
continue
}
// Exclude the default service account from the owner graph.
// This Secret is not longer generated by default in Kubernetes 1.24+.
// This Secret is no longer generated by default in Kubernetes 1.24+.
// This is not a CAPI related Secret, so it can be ignored.
if obj.GetKind() == "Secret" && strings.Contains(obj.GetName(), "default-token") {
continue
Expand Down
35 changes: 35 additions & 0 deletions test/e2e/quick_start_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,38 @@ var _ = Describe("When following the Cluster API quick-start with dualstack and
}
})
})

var _ = Describe("When following the Cluster API quick-start check finalizers resilience after deletion", func() {
QuickStartSpec(ctx, func() QuickStartSpecInput {
return QuickStartSpecInput{
E2EConfig: e2eConfig,
ClusterctlConfigPath: clusterctlConfigPath,
BootstrapClusterProxy: bootstrapClusterProxy,
ArtifactFolder: artifactFolder,
SkipCleanup: skipCleanup,
InfrastructureProvider: pointer.String("docker"),
PostMachinesProvisioned: func(proxy framework.ClusterProxy, namespace, clusterName string) {
// This check ensures that finalizers are resilient - i.e. correctly re-reconciled - when removed.
framework.ValidateFinalizersResilience(ctx, proxy, namespace, clusterName)
},
}
})
})

var _ = Describe("When following the Cluster API quick-start with ClusterClass check finalizers resilience after deletion [ClusterClass]", func() {
QuickStartSpec(ctx, func() QuickStartSpecInput {
return QuickStartSpecInput{
E2EConfig: e2eConfig,
ClusterctlConfigPath: clusterctlConfigPath,
BootstrapClusterProxy: bootstrapClusterProxy,
ArtifactFolder: artifactFolder,
SkipCleanup: skipCleanup,
Flavor: pointer.String("topology"),
InfrastructureProvider: pointer.String("docker"),
PostMachinesProvisioned: func(proxy framework.ClusterProxy, namespace, clusterName string) {
// This check ensures that finalizers are resilient - i.e. correctly re-reconciled - when removed.
framework.ValidateFinalizersResilience(ctx, proxy, namespace, clusterName)
},
}
})
})
170 changes: 170 additions & 0 deletions test/framework/finalizers_helpers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package framework

import (
"context"
"fmt"
"reflect"
"time"

. "github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"sigs.k8s.io/controller-runtime/pkg/client"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
clusterctlcluster "sigs.k8s.io/cluster-api/cmd/clusterctl/client/cluster"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
addonsv1 "sigs.k8s.io/cluster-api/exp/addons/api/v1beta1"
infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1"
"sigs.k8s.io/cluster-api/util/patch"
)

// finalizerAssertion contains a list of expected Finalizers corresponding to a resource Kind.
var finalizerAssertion = map[string][]string{
"Cluster": {clusterv1.ClusterFinalizer},
"Machine": {clusterv1.MachineFinalizer},
"MachineSet": {clusterv1.MachineSetTopologyFinalizer},
"MachineDeployment": {clusterv1.MachineDeploymentTopologyFinalizer},
"ClusterResourceSet": {addonsv1.ClusterResourceSetFinalizer},
"DockerMachine": {infrav1.MachineFinalizer},
"DockerCluster": {infrav1.ClusterFinalizer},
"KubeadmControlPlane": {controlplanev1.KubeadmControlPlaneFinalizer},
}

// ValidateFinalizersResilience checks that expected finalizers are in place, deletes them, and verifies that expected finalizers are properly added again.
func ValidateFinalizersResilience(ctx context.Context, proxy ClusterProxy, namespace, clusterName string) {
clusterKey := client.ObjectKey{Namespace: namespace, Name: clusterName}

// Collect all objects where finalizers were initially set
objectsWithFinalizers := getObjectsWithFinalizers(ctx, proxy, namespace)

// Setting the paused property on the Cluster resource will pause reconciliations, thereby having no effect on Finalizers.
// This also makes debugging easier.
setClusterPause(ctx, proxy.GetClient(), clusterKey, true)

// We are testing the worst-case scenario, i.e. all finalizers are deleted.
// Once all Clusters are paused remove all the Finalizers from all objects in the graph.
// The reconciliation loop should be able to recover from this, by adding the required Finalizers back.
removeFinalizers(ctx, proxy, namespace)

// Unpause the cluster.
setClusterPause(ctx, proxy.GetClient(), clusterKey, false)

// Annotate the ClusterClass, if one is in use, to speed up reconciliation. This ensures ClusterClass Finalizers
// are re-reconciled before asserting the owner reference graph.
forceClusterClassReconcile(ctx, proxy.GetClient(), clusterKey)

// Annotate the MachineDeployment to speed up reconciliation. This ensures MachineDeployment topology Finalizers are re-reconciled.
// TODO: Remove this as part of https://github.com/kubernetes-sigs/cluster-api/issues/9532
forceMachineDeploymentTopologyReconcile(ctx, proxy.GetClient(), clusterKey)

// Check that the Finalizers are as expected after further reconciliations.
assertFinalizersExist(ctx, proxy, namespace, objectsWithFinalizers)
}

// removeFinalizers removes all Finalizers from objects in the owner graph.
func removeFinalizers(ctx context.Context, proxy ClusterProxy, namespace string) {
graph, err := clusterctlcluster.GetOwnerGraph(ctx, namespace, proxy.GetKubeconfigPath())
Expect(err).ToNot(HaveOccurred())
for _, object := range graph {
ref := object.Object
obj := new(unstructured.Unstructured)
obj.SetAPIVersion(ref.APIVersion)
obj.SetKind(ref.Kind)
obj.SetName(ref.Name)

Expect(proxy.GetClient().Get(ctx, client.ObjectKey{Namespace: namespace, Name: object.Object.Name}, obj)).To(Succeed())
helper, err := patch.NewHelper(obj, proxy.GetClient())
Expect(err).ToNot(HaveOccurred())
obj.SetFinalizers([]string{})
Expect(helper.Patch(ctx, obj)).To(Succeed())
}
}

func getObjectsWithFinalizers(ctx context.Context, proxy ClusterProxy, namespace string) map[string]*unstructured.Unstructured {
graph, err := clusterctlcluster.GetOwnerGraph(ctx, namespace, proxy.GetKubeconfigPath())
Expect(err).ToNot(HaveOccurred())

objsWithFinalizers := map[string]*unstructured.Unstructured{}

for _, node := range graph {
nodeNamespacedName := client.ObjectKey{Namespace: node.Object.Namespace, Name: node.Object.Name}
obj := &unstructured.Unstructured{}
obj.SetAPIVersion(node.Object.APIVersion)
obj.SetKind(node.Object.Kind)
err = proxy.GetClient().Get(ctx, nodeNamespacedName, obj)
Expect(err).ToNot(HaveOccurred())

setFinalizers := obj.GetFinalizers()

if len(setFinalizers) > 0 {
objsWithFinalizers[client.ObjectKey{Namespace: node.Object.Namespace, Name: node.Object.Name}.String()] = obj
}
}

return objsWithFinalizers
}

// assertFinalizersExist ensures that current Finalizers match those in the initialObjectsWithFinalizers.
func assertFinalizersExist(ctx context.Context, proxy ClusterProxy, namespace string, initialObjsWithFinalizers map[string]*unstructured.Unstructured) {
Eventually(func() error {
var allErrs []error
finalObjsWithFinalizers := getObjectsWithFinalizers(ctx, proxy, namespace)

for objNamespacedName, obj := range initialObjsWithFinalizers {
// verify if finalizers for this resource were set on reconcile
if _, valid := finalObjsWithFinalizers[objNamespacedName]; !valid {
allErrs = append(allErrs, fmt.Errorf("no finalizers set for %s/%s",
obj.GetKind(), objNamespacedName))
continue
}

// verify if this resource has the appropriate Finalizers set
expectedFinalizers, assert := finalizerAssertion[obj.GetKind()]
if !assert {
continue
}

setFinalizers := finalObjsWithFinalizers[objNamespacedName].GetFinalizers()
if !reflect.DeepEqual(expectedFinalizers, setFinalizers) {
allErrs = append(allErrs, fmt.Errorf("expected finalizers do not exist for %s/%s: expected: %v, found: %v",
obj.GetKind(), objNamespacedName, expectedFinalizers, setFinalizers))
}
}

return kerrors.NewAggregate(allErrs)
}).WithTimeout(1 * time.Minute).WithPolling(2 * time.Second).Should(Succeed())
}

// forceMachineDeploymentTopologyReconcile forces reconciliation of the MachineDeployment.
func forceMachineDeploymentTopologyReconcile(ctx context.Context, cli client.Client, clusterKey types.NamespacedName) {
mdList := &clusterv1.MachineDeploymentList{}
clientOptions := (&client.ListOptions{}).ApplyOptions([]client.ListOption{
client.MatchingLabels{clusterv1.ClusterNameLabel: clusterKey.Name},
})
Expect(cli.List(ctx, mdList, clientOptions)).To(Succeed())

for i := range mdList.Items {
if _, ok := mdList.Items[i].GetLabels()[clusterv1.ClusterTopologyOwnedLabel]; ok {
annotationPatch := client.RawPatch(types.MergePatchType, []byte(fmt.Sprintf("{\"metadata\":{\"annotations\":{\"cluster.x-k8s.io/modifiedAt\":\"%v\"}}}", time.Now().Format(time.RFC3339))))
Expect(cli.Patch(ctx, &mdList.Items[i], annotationPatch)).To(Succeed())
}
}
}
7 changes: 5 additions & 2 deletions test/framework/ownerreference_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ func ValidateOwnerReferencesResilience(ctx context.Context, proxy ClusterProxy,
// edge case where an external system intentionally nukes all the owner references in a single operation.
// The assumption is that if the system can recover from this edge case, it can also handle use cases where an owner
// reference is deleted by mistake.

// Setting the paused property on the Cluster resource will pause reconciliations, thereby having no effect on OwnerReferences.
// This also makes debugging easier.
setClusterPause(ctx, proxy.GetClient(), clusterKey, true)

// Once all Clusters are paused remove the OwnerReference from all objects in the graph.
Expand All @@ -88,7 +91,7 @@ func ValidateOwnerReferencesResilience(ctx context.Context, proxy ClusterProxy,
// Unpause the cluster.
setClusterPause(ctx, proxy.GetClient(), clusterKey, false)

// Annotate the clusterClass, if one is in use, to speed up reconciliation. This ensures ClusterClass ownerReferences
// Annotate the ClusterClass, if one is in use, to speed up reconciliation. This ensures ClusterClass ownerReferences
// are re-reconciled before asserting the owner reference graph.
forceClusterClassReconcile(ctx, proxy.GetClient(), clusterKey)

Expand All @@ -104,7 +107,7 @@ func AssertOwnerReferences(namespace, kubeconfigPath string, assertFuncs ...map[
}
}
Eventually(func() error {
allErrs := []error{}
var allErrs []error
ctx := context.Background()

graph, err := clusterctlcluster.GetOwnerGraph(ctx, namespace, kubeconfigPath)
Expand Down

0 comments on commit 7d805d1

Please sign in to comment.