Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle race condition when upgrading capi CRDs #2841

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion projects/kubernetes-sigs/cluster-api/ATTRIBUTION.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@ https://github.com/ProtonMail/go-crypto
** github.com/sagikazarmark/slog-shim; version v0.1.0 --
https://github.com/sagikazarmark/slog-shim

** golang.org/go; version go1.20.12 --
** golang.org/go; version go1.20.13 --
https://github.com/golang/go

** golang.org/x/crypto; version v0.15.0 --
Expand Down
2 changes: 1 addition & 1 deletion projects/kubernetes-sigs/cluster-api/CAPD_ATTRIBUTION.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

------

** golang.org/go; version go1.20.12 --
** golang.org/go; version go1.20.13 --
https://github.com/golang/go

** golang.org/x/crypto; version v0.15.0 --
Expand Down
20 changes: 10 additions & 10 deletions projects/kubernetes-sigs/cluster-api/CHECKSUMS
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
8d75c41829c86e1340443eb4c48a604b2dc680530300a20dda3e2b3c290c9047 _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
0a5b6099f80f82ef0d4dfae0ce2dcffe96819163373cf2ff41c598672357139f _output/bin/cluster-api/linux-amd64/clusterctl
e263f8ac74622367487a1e49f3822caea21b814d5f321cae4d4ac8089dc0189f _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
8c2acaafc4d212202a1cca6ff2c5b0985f1db0e2729e00c6bf0da6aabc14590d _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
bf65de26330f9b7df377f824eedece8a156c034fd90a378b38c961547a9cdb38 _output/bin/cluster-api/linux-amd64/manager
5bae0734b01e5b614028c9552df20ac95547f669475ae7995240f497def1c377 _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
2c11737adb858e53159798aeb7090a0abc9e1e056fbc587d2fe58bebc3799932 _output/bin/cluster-api/linux-arm64/clusterctl
47af4e933fa23d90a6510d76fff9d6aa25c9799ac6a4f96882832058da33b77e _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
40a8e36f663ef3a25261513442fe3e9acd9c35bc62258292d20fad7bd8cc6ddc _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
3b0d7261b88b3c0e0110738fb14f896d414b42dc4f86612f60426d42fb1ddea9 _output/bin/cluster-api/linux-arm64/manager
9e409df3cac9ddc5226b5a1f738e68f2933d5d7fbee33499b65626427baf369a _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
f2cbf46f03eff0b97656d379c1db3a29df4ddcbc6b71deea8e042265ae5ec2de _output/bin/cluster-api/linux-amd64/clusterctl
43e444a9f0d887b57401229f9037e2d2b920dad5d4895da9b42c2f380c0e3102 _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
2e7c6e330af51cd4bef2f6053e078dc40e5e9b6416180c15541a66b28ce5c2bb _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
cc0492b82ce60da6f5639439faf7b10cab24feb4b777e965471e5505f5e7461a _output/bin/cluster-api/linux-amd64/manager
b51fab0cb01d664e0f05f6ff9229bbb07a0df5364c8e6c59dcecf9d51ccd75fa _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
6564d579e98c114d22b8121cc48459f0122763e5a0bcd87d853a47f2574399d5 _output/bin/cluster-api/linux-arm64/clusterctl
efb9113830891c935fc6d6f99f151b284d6d685a4236820baa705be2c9e9901b _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
92f81c147b8b0ca2a9d4648b07582f05c8eba4c4f783438acef125cca696ba6e _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
38d69ed88660d1ff6ecac3fe902628eac3908eb4daa05e8eac9903e85efe3194 _output/bin/cluster-api/linux-arm64/manager
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
From 146972a1d190a1317627a80e8b2fdf2418bbc231 Mon Sep 17 00:00:00 2001
From: Guillermo Gaston <gaslor@amazon.com>
Date: Sat, 20 Jan 2024 22:05:04 +0000
Subject: [PATCH] Restart controller if RESTMapping outdated cache is detected
when reconciling external object

---
controllers/external/util.go | 28 +++++++-
controllers/external/util_test.go | 104 +++++++++++++++++++++++++++++-
2 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/controllers/external/util.go b/controllers/external/util.go
index 5b6443c78..a8b8caa11 100644
--- a/controllers/external/util.go
+++ b/controllers/external/util.go
@@ -19,13 +19,17 @@ package external
import (
"context"
"strings"
+ "syscall"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apiserver/pkg/storage/names"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
+ "sigs.k8s.io/controller-runtime/pkg/log"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)
@@ -40,12 +44,34 @@ func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, name
obj.SetKind(ref.Kind)
obj.SetName(ref.Name)
key := client.ObjectKey{Name: obj.GetName(), Namespace: namespace}
- if err := c.Get(ctx, key, obj); err != nil {
+ err := c.Get(ctx, key, obj)
+ if isV1alpha4NotFoundFromDiscoveryError(err) {
+ logErrorAndGracefulShutdown(
+ ctx,
+ err,
+ "Client RESTMapper returned an error from an invalid cache referencing infrastructure.cluster.x-k8s.io/v1alpha4, exiting the program to force a new cache to be built",
+ )
+ }
+ if err != nil {
return nil, errors.Wrapf(err, "failed to retrieve %s external object %q/%q", obj.GetKind(), key.Namespace, key.Name)
}
return obj, nil
}

+func isV1alpha4NotFoundFromDiscoveryError(err error) bool {
+ discoverFailedErr := &apiutil.ErrResourceDiscoveryFailed{}
+ noResourceMatchErr := &meta.NoResourceMatchError{}
+ return errors.As(err, &discoverFailedErr) &&
+ errors.As(err, &noResourceMatchErr) && // This is the error that ErrResourceDiscoveryFailed will unwrap when the original error is NotFound.
+ strings.Contains(err.Error(), "infrastructure.cluster.x-k8s.io/v1alpha4")
+}
+
+func logErrorAndGracefulShutdown(ctx context.Context, err error, msg string) {
+ logger := log.FromContext(ctx)
+ logger.Error(err, msg)
+ syscall.Kill(syscall.Getpid(), syscall.SIGINT)
+}
+
// Delete uses the client and reference to delete an external, unstructured object.
func Delete(ctx context.Context, c client.Writer, ref *corev1.ObjectReference) error {
obj := new(unstructured.Unstructured)
diff --git a/controllers/external/util_test.go b/controllers/external/util_test.go
index 012445478..13e57306a 100644
--- a/controllers/external/util_test.go
+++ b/controllers/external/util_test.go
@@ -17,6 +17,7 @@ limitations under the License.
package external

import (
+ "fmt"
"testing"

. "github.com/onsi/gomega"
@@ -25,16 +26,16 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+ "k8s.io/apimachinery/pkg/runtime/schema"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)

-var (
- ctx = ctrl.SetupSignalHandler()
-)
+var ctx = ctrl.SetupSignalHandler()

const (
testClusterName = "test-cluster"
@@ -323,3 +324,100 @@ func TestCloneTemplateMissingSpecTemplate(t *testing.T) {
})
g.Expect(err).To(HaveOccurred())
}
+
+func TestIsV1alpha4NotFoundFromDiscoveryError(t *testing.T) {
+ tests := []struct {
+ name string
+ err error
+ want bool
+ }{
+ {
+ name: "the error we are looking for",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ want: true,
+ },
+ {
+ name: "the error we are looking for but wrapped",
+ err: fmt.Errorf("failed to get restmapping: %w",
+ &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ ),
+ want: true,
+ },
+ {
+ name: "v1alpha4 not found with different group",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "different.group",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "different.group/v1alpha4"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io not found error with different version",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "differentkind",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/differentkind"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 but different error that is not NotFound",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: errors.New("some other error"),
+ },
+ want: false,
+ },
+ {
+ name: "plain not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ },
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ Message: "infrastructure.cluster.x-k8s.io/v1alpha4",
+ },
+ },
+ want: false,
+ },
+ {
+ name: "not error",
+ err: nil,
+ want: false,
+ },
+ {
+ name: "other error",
+ err: errors.New("some other error"),
+ want: false,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ g := NewWithT(t)
+ g.Expect(isV1alpha4NotFoundFromDiscoveryError(test.err)).To(Equal(test.want))
+ })
+ }
+}
--
2.34.1