From de8a503f5f5627eec8003cf307b0f042f49bf2ce Mon Sep 17 00:00:00 2001 From: Guillermo Gaston Date: Mon, 22 Jan 2024 23:06:20 +0000 Subject: [PATCH] Handle race condition when upgrading capi CRDs --- .../kubernetes-sigs/cluster-api/CHECKSUMS | 20 +- ...er-if-RESTMapping-outdated-cache-is-.patch | 205 ++++++++++++++++++ 2 files changed, 215 insertions(+), 10 deletions(-) create mode 100644 projects/kubernetes-sigs/cluster-api/patches/0037-Restart-controller-if-RESTMapping-outdated-cache-is-.patch diff --git a/projects/kubernetes-sigs/cluster-api/CHECKSUMS b/projects/kubernetes-sigs/cluster-api/CHECKSUMS index 5e8e95eb32..f5b146c935 100644 --- a/projects/kubernetes-sigs/cluster-api/CHECKSUMS +++ b/projects/kubernetes-sigs/cluster-api/CHECKSUMS @@ -1,10 +1,10 @@ -8a009d153d56a6d81e06428711aeecc217d9f4707471851d9fcff0820b601c47 _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager -853efbf32c9a44edcf4816e24a5ba18c3b4c291b3c13546e9c2a028b19f472ad _output/bin/cluster-api/linux-amd64/clusterctl -07189d12d166319ae0aa36a279e2bc6af913a6ceca2621c019fc83cdb3bd271c _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager -3d5f6f72d4b7c74c0097f2854066cc55b7be31841a1cad35fa6738883aa16b68 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager -c16ae332488a063969e17d03e404d7add1b5e00d67119a06e97f483a3f0d4bb0 _output/bin/cluster-api/linux-amd64/manager -254b2fa5972428e3eac45b76f75523019ae289f2307af18a3903bf4480ee078a _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager -68f0ca12b8cd80cdc52995236770530ad70491004f64e1a3ad67abe8cf592e3c _output/bin/cluster-api/linux-arm64/clusterctl -2816ba9ad224d286d1aaf877d3dc3cd66d41c9c35f7c16aca0d8b66c723c5b16 _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager -cf72aae2400e25f2bbe60ebc058bbc308e7a00837048fa10df18dfb1c9f888c3 _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager -3565095fe0d684d6289bc53688928500ada8cf014bc4a4742e8bc6ba8ff8c0b7 _output/bin/cluster-api/linux-arm64/manager +67d6cb389867539d0aed1952ce33e78fca6a7d2ee0a425a3147e157362ba9717 _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager +6fb0798347409c36d891d8f1cd3fdff362f9bce682343ede19e41d62970db210 _output/bin/cluster-api/linux-amd64/clusterctl +70694c97f4b0c7d7111f300bd654e91604c2af9b6f8a660b3e6fe29a658f1fad _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager +8867c7afcb0f2f0b861833e59c6f0daf13dbf73bccc9ae1b281b3812f57befe1 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager +51fe66e2d8f6a069dd03e6b1db27510bc4797a22ded974a288ce0cddb3ff32b1 _output/bin/cluster-api/linux-amd64/manager +caf5156a8b3292f9ab95601362d7867e0dec1ceeb28c0ccb119e3adb85fa4d40 _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager +3ce6d80c627f8c7c43445cf75c75a06307c7f54876db732f1f2d0ee5a868db85 _output/bin/cluster-api/linux-arm64/clusterctl +58b7909f8ec479b15552f13f587402c1f42298aec3ade5a6f570b0a08c086eb0 _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager +d30f76f517d62b753f3bf3022a618a64d3e8192658e5b2e93473e3c03807fd73 _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager +8ca71c26f2ae0c3afcd1aaf2aa00840a39197cfaf418dbafae9e1db6ddc89d3b _output/bin/cluster-api/linux-arm64/manager diff --git a/projects/kubernetes-sigs/cluster-api/patches/0037-Restart-controller-if-RESTMapping-outdated-cache-is-.patch b/projects/kubernetes-sigs/cluster-api/patches/0037-Restart-controller-if-RESTMapping-outdated-cache-is-.patch new file mode 100644 index 0000000000..840ad182d3 --- /dev/null +++ b/projects/kubernetes-sigs/cluster-api/patches/0037-Restart-controller-if-RESTMapping-outdated-cache-is-.patch @@ -0,0 +1,205 @@ +From 146972a1d190a1317627a80e8b2fdf2418bbc231 Mon Sep 17 00:00:00 2001 +From: Guillermo Gaston +Date: Sat, 20 Jan 2024 22:05:04 +0000 +Subject: [PATCH] Restart controller if RESTMapping outdated cache is detected + when reconciling external object + +--- + controllers/external/util.go | 28 +++++++- + controllers/external/util_test.go | 104 +++++++++++++++++++++++++++++- + 2 files changed, 128 insertions(+), 4 deletions(-) + +diff --git a/controllers/external/util.go b/controllers/external/util.go +index 5b6443c78..a8b8caa11 100644 +--- a/controllers/external/util.go ++++ b/controllers/external/util.go +@@ -19,13 +19,17 @@ package external + import ( + "context" + "strings" ++ "syscall" + + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" ++ "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apiserver/pkg/storage/names" + "sigs.k8s.io/controller-runtime/pkg/client" ++ "sigs.k8s.io/controller-runtime/pkg/client/apiutil" ++ "sigs.k8s.io/controller-runtime/pkg/log" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + ) +@@ -40,12 +44,34 @@ func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, name + obj.SetKind(ref.Kind) + obj.SetName(ref.Name) + key := client.ObjectKey{Name: obj.GetName(), Namespace: namespace} +- if err := c.Get(ctx, key, obj); err != nil { ++ err := c.Get(ctx, key, obj) ++ if isV1alpha4NotFoundFromDiscoveryError(err) { ++ logErrorAndGracefulShutdown( ++ ctx, ++ err, ++ "Client RESTMapper returned an error from an invalid cache referencing infrastructure.cluster.x-k8s.io/v1alpha4, exiting the program to force a new cache to be built", ++ ) ++ } ++ if err != nil { + return nil, errors.Wrapf(err, "failed to retrieve %s external object %q/%q", obj.GetKind(), key.Namespace, key.Name) + } + return obj, nil + } + ++func isV1alpha4NotFoundFromDiscoveryError(err error) bool { ++ discoverFailedErr := &apiutil.ErrResourceDiscoveryFailed{} ++ noResourceMatchErr := &meta.NoResourceMatchError{} ++ return errors.As(err, &discoverFailedErr) && ++ errors.As(err, &noResourceMatchErr) && // This is the error that ErrResourceDiscoveryFailed will unwrap when the original error is NotFound. ++ strings.Contains(err.Error(), "infrastructure.cluster.x-k8s.io/v1alpha4") ++} ++ ++func logErrorAndGracefulShutdown(ctx context.Context, err error, msg string) { ++ logger := log.FromContext(ctx) ++ logger.Error(err, msg) ++ syscall.Kill(syscall.Getpid(), syscall.SIGINT) ++} ++ + // Delete uses the client and reference to delete an external, unstructured object. + func Delete(ctx context.Context, c client.Writer, ref *corev1.ObjectReference) error { + obj := new(unstructured.Unstructured) +diff --git a/controllers/external/util_test.go b/controllers/external/util_test.go +index 012445478..13e57306a 100644 +--- a/controllers/external/util_test.go ++++ b/controllers/external/util_test.go +@@ -17,6 +17,7 @@ limitations under the License. + package external + + import ( ++ "fmt" + "testing" + + . "github.com/onsi/gomega" +@@ -25,16 +26,16 @@ import ( + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ++ "k8s.io/apimachinery/pkg/runtime/schema" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" ++ "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + ) + +-var ( +- ctx = ctrl.SetupSignalHandler() +-) ++var ctx = ctrl.SetupSignalHandler() + + const ( + testClusterName = "test-cluster" +@@ -323,3 +324,100 @@ func TestCloneTemplateMissingSpecTemplate(t *testing.T) { + }) + g.Expect(err).To(HaveOccurred()) + } ++ ++func TestIsV1alpha4NotFoundFromDiscoveryError(t *testing.T) { ++ tests := []struct { ++ name string ++ err error ++ want bool ++ }{ ++ { ++ name: "the error we are looking for", ++ err: &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "infrastructure.cluster.x-k8s.io", ++ Version: "v1alpha4", ++ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"), ++ }, ++ want: true, ++ }, ++ { ++ name: "the error we are looking for but wrapped", ++ err: fmt.Errorf("failed to get restmapping: %w", ++ &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "infrastructure.cluster.x-k8s.io", ++ Version: "v1alpha4", ++ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"), ++ }, ++ ), ++ want: true, ++ }, ++ { ++ name: "v1alpha4 not found with different group", ++ err: &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "different.group", ++ Version: "v1alpha4", ++ }: apierrors.NewNotFound(schema.GroupResource{}, "different.group/v1alpha4"), ++ }, ++ want: false, ++ }, ++ { ++ name: "infrastructure.cluster.x-k8s.io not found error with different version", ++ err: &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "infrastructure.cluster.x-k8s.io", ++ Version: "differentkind", ++ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/differentkind"), ++ }, ++ want: false, ++ }, ++ { ++ name: "infrastructure.cluster.x-k8s.io/v1alpha4 but different error that is not NotFound", ++ err: &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "infrastructure.cluster.x-k8s.io", ++ Version: "v1alpha4", ++ }: errors.New("some other error"), ++ }, ++ want: false, ++ }, ++ { ++ name: "plain not found error", ++ err: &apierrors.StatusError{ ++ ErrStatus: metav1.Status{ ++ Reason: metav1.StatusReasonNotFound, ++ }, ++ }, ++ want: false, ++ }, ++ { ++ name: "infrastructure.cluster.x-k8s.io/v1alpha4 not found error", ++ err: &apierrors.StatusError{ ++ ErrStatus: metav1.Status{ ++ Reason: metav1.StatusReasonNotFound, ++ Message: "infrastructure.cluster.x-k8s.io/v1alpha4", ++ }, ++ }, ++ want: false, ++ }, ++ { ++ name: "not error", ++ err: nil, ++ want: false, ++ }, ++ { ++ name: "other error", ++ err: errors.New("some other error"), ++ want: false, ++ }, ++ } ++ ++ for _, test := range tests { ++ t.Run(test.name, func(t *testing.T) { ++ g := NewWithT(t) ++ g.Expect(isV1alpha4NotFoundFromDiscoveryError(test.err)).To(Equal(test.want)) ++ }) ++ } ++} +-- +2.34.1 +