Skip to content

Commit

Permalink
Handle race condition when upgrading capi CRDs
Browse files Browse the repository at this point in the history
  • Loading branch information
g-gaston committed Jan 22, 2024
1 parent 94b51dc commit de8a503
Show file tree
Hide file tree
Showing 2 changed files with 215 additions and 10 deletions.
20 changes: 10 additions & 10 deletions projects/kubernetes-sigs/cluster-api/CHECKSUMS
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
8a009d153d56a6d81e06428711aeecc217d9f4707471851d9fcff0820b601c47 _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
853efbf32c9a44edcf4816e24a5ba18c3b4c291b3c13546e9c2a028b19f472ad _output/bin/cluster-api/linux-amd64/clusterctl
07189d12d166319ae0aa36a279e2bc6af913a6ceca2621c019fc83cdb3bd271c _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
3d5f6f72d4b7c74c0097f2854066cc55b7be31841a1cad35fa6738883aa16b68 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
c16ae332488a063969e17d03e404d7add1b5e00d67119a06e97f483a3f0d4bb0 _output/bin/cluster-api/linux-amd64/manager
254b2fa5972428e3eac45b76f75523019ae289f2307af18a3903bf4480ee078a _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
68f0ca12b8cd80cdc52995236770530ad70491004f64e1a3ad67abe8cf592e3c _output/bin/cluster-api/linux-arm64/clusterctl
2816ba9ad224d286d1aaf877d3dc3cd66d41c9c35f7c16aca0d8b66c723c5b16 _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
cf72aae2400e25f2bbe60ebc058bbc308e7a00837048fa10df18dfb1c9f888c3 _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
3565095fe0d684d6289bc53688928500ada8cf014bc4a4742e8bc6ba8ff8c0b7 _output/bin/cluster-api/linux-arm64/manager
67d6cb389867539d0aed1952ce33e78fca6a7d2ee0a425a3147e157362ba9717 _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
6fb0798347409c36d891d8f1cd3fdff362f9bce682343ede19e41d62970db210 _output/bin/cluster-api/linux-amd64/clusterctl
70694c97f4b0c7d7111f300bd654e91604c2af9b6f8a660b3e6fe29a658f1fad _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
8867c7afcb0f2f0b861833e59c6f0daf13dbf73bccc9ae1b281b3812f57befe1 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
51fe66e2d8f6a069dd03e6b1db27510bc4797a22ded974a288ce0cddb3ff32b1 _output/bin/cluster-api/linux-amd64/manager
caf5156a8b3292f9ab95601362d7867e0dec1ceeb28c0ccb119e3adb85fa4d40 _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
3ce6d80c627f8c7c43445cf75c75a06307c7f54876db732f1f2d0ee5a868db85 _output/bin/cluster-api/linux-arm64/clusterctl
58b7909f8ec479b15552f13f587402c1f42298aec3ade5a6f570b0a08c086eb0 _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
d30f76f517d62b753f3bf3022a618a64d3e8192658e5b2e93473e3c03807fd73 _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
8ca71c26f2ae0c3afcd1aaf2aa00840a39197cfaf418dbafae9e1db6ddc89d3b _output/bin/cluster-api/linux-arm64/manager
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
From 146972a1d190a1317627a80e8b2fdf2418bbc231 Mon Sep 17 00:00:00 2001
From: Guillermo Gaston <gaslor@amazon.com>
Date: Sat, 20 Jan 2024 22:05:04 +0000
Subject: [PATCH] Restart controller if RESTMapping outdated cache is detected
when reconciling external object

---
controllers/external/util.go | 28 +++++++-
controllers/external/util_test.go | 104 +++++++++++++++++++++++++++++-
2 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/controllers/external/util.go b/controllers/external/util.go
index 5b6443c78..a8b8caa11 100644
--- a/controllers/external/util.go
+++ b/controllers/external/util.go
@@ -19,13 +19,17 @@ package external
import (
"context"
"strings"
+ "syscall"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apiserver/pkg/storage/names"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
+ "sigs.k8s.io/controller-runtime/pkg/log"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)
@@ -40,12 +44,34 @@ func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, name
obj.SetKind(ref.Kind)
obj.SetName(ref.Name)
key := client.ObjectKey{Name: obj.GetName(), Namespace: namespace}
- if err := c.Get(ctx, key, obj); err != nil {
+ err := c.Get(ctx, key, obj)
+ if isV1alpha4NotFoundFromDiscoveryError(err) {
+ logErrorAndGracefulShutdown(
+ ctx,
+ err,
+ "Client RESTMapper returned an error from an invalid cache referencing infrastructure.cluster.x-k8s.io/v1alpha4, exiting the program to force a new cache to be built",
+ )
+ }
+ if err != nil {
return nil, errors.Wrapf(err, "failed to retrieve %s external object %q/%q", obj.GetKind(), key.Namespace, key.Name)
}
return obj, nil
}

+func isV1alpha4NotFoundFromDiscoveryError(err error) bool {
+ discoverFailedErr := &apiutil.ErrResourceDiscoveryFailed{}
+ noResourceMatchErr := &meta.NoResourceMatchError{}
+ return errors.As(err, &discoverFailedErr) &&
+ errors.As(err, &noResourceMatchErr) && // This is the error that ErrResourceDiscoveryFailed will unwrap when the original error is NotFound.
+ strings.Contains(err.Error(), "infrastructure.cluster.x-k8s.io/v1alpha4")
+}
+
+func logErrorAndGracefulShutdown(ctx context.Context, err error, msg string) {
+ logger := log.FromContext(ctx)
+ logger.Error(err, msg)
+ syscall.Kill(syscall.Getpid(), syscall.SIGINT)
+}
+
// Delete uses the client and reference to delete an external, unstructured object.
func Delete(ctx context.Context, c client.Writer, ref *corev1.ObjectReference) error {
obj := new(unstructured.Unstructured)
diff --git a/controllers/external/util_test.go b/controllers/external/util_test.go
index 012445478..13e57306a 100644
--- a/controllers/external/util_test.go
+++ b/controllers/external/util_test.go
@@ -17,6 +17,7 @@ limitations under the License.
package external

import (
+ "fmt"
"testing"

. "github.com/onsi/gomega"
@@ -25,16 +26,16 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+ "k8s.io/apimachinery/pkg/runtime/schema"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)

-var (
- ctx = ctrl.SetupSignalHandler()
-)
+var ctx = ctrl.SetupSignalHandler()

const (
testClusterName = "test-cluster"
@@ -323,3 +324,100 @@ func TestCloneTemplateMissingSpecTemplate(t *testing.T) {
})
g.Expect(err).To(HaveOccurred())
}
+
+func TestIsV1alpha4NotFoundFromDiscoveryError(t *testing.T) {
+ tests := []struct {
+ name string
+ err error
+ want bool
+ }{
+ {
+ name: "the error we are looking for",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ want: true,
+ },
+ {
+ name: "the error we are looking for but wrapped",
+ err: fmt.Errorf("failed to get restmapping: %w",
+ &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ ),
+ want: true,
+ },
+ {
+ name: "v1alpha4 not found with different group",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "different.group",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "different.group/v1alpha4"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io not found error with different version",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "differentkind",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/differentkind"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 but different error that is not NotFound",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: errors.New("some other error"),
+ },
+ want: false,
+ },
+ {
+ name: "plain not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ },
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ Message: "infrastructure.cluster.x-k8s.io/v1alpha4",
+ },
+ },
+ want: false,
+ },
+ {
+ name: "not error",
+ err: nil,
+ want: false,
+ },
+ {
+ name: "other error",
+ err: errors.New("some other error"),
+ want: false,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ g := NewWithT(t)
+ g.Expect(isV1alpha4NotFoundFromDiscoveryError(test.err)).To(Equal(test.want))
+ })
+ }
+}
--
2.34.1

0 comments on commit de8a503

Please sign in to comment.