Skip to content

Commit

Permalink
Handle race condition when upgrading capi CRDs (#2841)
Browse files Browse the repository at this point in the history
  • Loading branch information
g-gaston committed Jan 23, 2024
1 parent 68d9da8 commit 39b26ff
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 12 deletions.
2 changes: 1 addition & 1 deletion projects/kubernetes-sigs/cluster-api/ATTRIBUTION.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@ https://github.com/ProtonMail/go-crypto
** github.com/sagikazarmark/slog-shim; version v0.1.0 --
https://github.com/sagikazarmark/slog-shim

** golang.org/go; version go1.20.12 --
** golang.org/go; version go1.20.13 --
https://github.com/golang/go

** golang.org/x/crypto; version v0.15.0 --
Expand Down
2 changes: 1 addition & 1 deletion projects/kubernetes-sigs/cluster-api/CAPD_ATTRIBUTION.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

------

** golang.org/go; version go1.20.12 --
** golang.org/go; version go1.20.13 --
https://github.com/golang/go

** golang.org/x/crypto; version v0.15.0 --
Expand Down
20 changes: 10 additions & 10 deletions projects/kubernetes-sigs/cluster-api/CHECKSUMS
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
8d75c41829c86e1340443eb4c48a604b2dc680530300a20dda3e2b3c290c9047 _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
0a5b6099f80f82ef0d4dfae0ce2dcffe96819163373cf2ff41c598672357139f _output/bin/cluster-api/linux-amd64/clusterctl
e263f8ac74622367487a1e49f3822caea21b814d5f321cae4d4ac8089dc0189f _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
8c2acaafc4d212202a1cca6ff2c5b0985f1db0e2729e00c6bf0da6aabc14590d _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
bf65de26330f9b7df377f824eedece8a156c034fd90a378b38c961547a9cdb38 _output/bin/cluster-api/linux-amd64/manager
5bae0734b01e5b614028c9552df20ac95547f669475ae7995240f497def1c377 _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
2c11737adb858e53159798aeb7090a0abc9e1e056fbc587d2fe58bebc3799932 _output/bin/cluster-api/linux-arm64/clusterctl
47af4e933fa23d90a6510d76fff9d6aa25c9799ac6a4f96882832058da33b77e _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
40a8e36f663ef3a25261513442fe3e9acd9c35bc62258292d20fad7bd8cc6ddc _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
3b0d7261b88b3c0e0110738fb14f896d414b42dc4f86612f60426d42fb1ddea9 _output/bin/cluster-api/linux-arm64/manager
9e409df3cac9ddc5226b5a1f738e68f2933d5d7fbee33499b65626427baf369a _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
f2cbf46f03eff0b97656d379c1db3a29df4ddcbc6b71deea8e042265ae5ec2de _output/bin/cluster-api/linux-amd64/clusterctl
43e444a9f0d887b57401229f9037e2d2b920dad5d4895da9b42c2f380c0e3102 _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
2e7c6e330af51cd4bef2f6053e078dc40e5e9b6416180c15541a66b28ce5c2bb _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
cc0492b82ce60da6f5639439faf7b10cab24feb4b777e965471e5505f5e7461a _output/bin/cluster-api/linux-amd64/manager
b51fab0cb01d664e0f05f6ff9229bbb07a0df5364c8e6c59dcecf9d51ccd75fa _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
6564d579e98c114d22b8121cc48459f0122763e5a0bcd87d853a47f2574399d5 _output/bin/cluster-api/linux-arm64/clusterctl
efb9113830891c935fc6d6f99f151b284d6d685a4236820baa705be2c9e9901b _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
92f81c147b8b0ca2a9d4648b07582f05c8eba4c4f783438acef125cca696ba6e _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
38d69ed88660d1ff6ecac3fe902628eac3908eb4daa05e8eac9903e85efe3194 _output/bin/cluster-api/linux-arm64/manager
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
From 146972a1d190a1317627a80e8b2fdf2418bbc231 Mon Sep 17 00:00:00 2001
From: Guillermo Gaston <gaslor@amazon.com>
Date: Sat, 20 Jan 2024 22:05:04 +0000
Subject: [PATCH] Restart controller if RESTMapping outdated cache is detected
when reconciling external object

---
controllers/external/util.go | 28 +++++++-
controllers/external/util_test.go | 104 +++++++++++++++++++++++++++++-
2 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/controllers/external/util.go b/controllers/external/util.go
index 5b6443c78..a8b8caa11 100644
--- a/controllers/external/util.go
+++ b/controllers/external/util.go
@@ -19,13 +19,17 @@ package external
import (
"context"
"strings"
+ "syscall"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apiserver/pkg/storage/names"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
+ "sigs.k8s.io/controller-runtime/pkg/log"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)
@@ -40,12 +44,34 @@ func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, name
obj.SetKind(ref.Kind)
obj.SetName(ref.Name)
key := client.ObjectKey{Name: obj.GetName(), Namespace: namespace}
- if err := c.Get(ctx, key, obj); err != nil {
+ err := c.Get(ctx, key, obj)
+ if isV1alpha4NotFoundFromDiscoveryError(err) {
+ logErrorAndGracefulShutdown(
+ ctx,
+ err,
+ "Client RESTMapper returned an error from an invalid cache referencing infrastructure.cluster.x-k8s.io/v1alpha4, exiting the program to force a new cache to be built",
+ )
+ }
+ if err != nil {
return nil, errors.Wrapf(err, "failed to retrieve %s external object %q/%q", obj.GetKind(), key.Namespace, key.Name)
}
return obj, nil
}

+func isV1alpha4NotFoundFromDiscoveryError(err error) bool {
+ discoverFailedErr := &apiutil.ErrResourceDiscoveryFailed{}
+ noResourceMatchErr := &meta.NoResourceMatchError{}
+ return errors.As(err, &discoverFailedErr) &&
+ errors.As(err, &noResourceMatchErr) && // This is the error that ErrResourceDiscoveryFailed will unwrap when the original error is NotFound.
+ strings.Contains(err.Error(), "infrastructure.cluster.x-k8s.io/v1alpha4")
+}
+
+func logErrorAndGracefulShutdown(ctx context.Context, err error, msg string) {
+ logger := log.FromContext(ctx)
+ logger.Error(err, msg)
+ syscall.Kill(syscall.Getpid(), syscall.SIGINT)
+}
+
// Delete uses the client and reference to delete an external, unstructured object.
func Delete(ctx context.Context, c client.Writer, ref *corev1.ObjectReference) error {
obj := new(unstructured.Unstructured)
diff --git a/controllers/external/util_test.go b/controllers/external/util_test.go
index 012445478..13e57306a 100644
--- a/controllers/external/util_test.go
+++ b/controllers/external/util_test.go
@@ -17,6 +17,7 @@ limitations under the License.
package external

import (
+ "fmt"
"testing"

. "github.com/onsi/gomega"
@@ -25,16 +26,16 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+ "k8s.io/apimachinery/pkg/runtime/schema"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)

-var (
- ctx = ctrl.SetupSignalHandler()
-)
+var ctx = ctrl.SetupSignalHandler()

const (
testClusterName = "test-cluster"
@@ -323,3 +324,100 @@ func TestCloneTemplateMissingSpecTemplate(t *testing.T) {
})
g.Expect(err).To(HaveOccurred())
}
+
+func TestIsV1alpha4NotFoundFromDiscoveryError(t *testing.T) {
+ tests := []struct {
+ name string
+ err error
+ want bool
+ }{
+ {
+ name: "the error we are looking for",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ want: true,
+ },
+ {
+ name: "the error we are looking for but wrapped",
+ err: fmt.Errorf("failed to get restmapping: %w",
+ &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ ),
+ want: true,
+ },
+ {
+ name: "v1alpha4 not found with different group",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "different.group",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "different.group/v1alpha4"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io not found error with different version",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "differentkind",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/differentkind"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 but different error that is not NotFound",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: errors.New("some other error"),
+ },
+ want: false,
+ },
+ {
+ name: "plain not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ },
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ Message: "infrastructure.cluster.x-k8s.io/v1alpha4",
+ },
+ },
+ want: false,
+ },
+ {
+ name: "not error",
+ err: nil,
+ want: false,
+ },
+ {
+ name: "other error",
+ err: errors.New("some other error"),
+ want: false,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ g := NewWithT(t)
+ g.Expect(isV1alpha4NotFoundFromDiscoveryError(test.err)).To(Equal(test.want))
+ })
+ }
+}
--
2.34.1

0 comments on commit 39b26ff

Please sign in to comment.