Skip to content

Commit

Permalink
Handle race condition when upgrading capi CRDs
Browse files Browse the repository at this point in the history
  • Loading branch information
g-gaston committed Jan 23, 2024
1 parent 94b51dc commit dfff637
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 12 deletions.
2 changes: 1 addition & 1 deletion projects/kubernetes-sigs/cluster-api/ATTRIBUTION.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@ https://github.com/ProtonMail/go-crypto
** github.com/sagikazarmark/slog-shim; version v0.1.0 --
https://github.com/sagikazarmark/slog-shim

** golang.org/go; version go1.20.12 --
** golang.org/go; version go1.20.13 --
https://github.com/golang/go

** golang.org/x/crypto; version v0.15.0 --
Expand Down
2 changes: 1 addition & 1 deletion projects/kubernetes-sigs/cluster-api/CAPD_ATTRIBUTION.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

------

** golang.org/go; version go1.20.12 --
** golang.org/go; version go1.20.13 --
https://github.com/golang/go

** golang.org/x/crypto; version v0.15.0 --
Expand Down
20 changes: 10 additions & 10 deletions projects/kubernetes-sigs/cluster-api/CHECKSUMS
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
8a009d153d56a6d81e06428711aeecc217d9f4707471851d9fcff0820b601c47 _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
853efbf32c9a44edcf4816e24a5ba18c3b4c291b3c13546e9c2a028b19f472ad _output/bin/cluster-api/linux-amd64/clusterctl
07189d12d166319ae0aa36a279e2bc6af913a6ceca2621c019fc83cdb3bd271c _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
3d5f6f72d4b7c74c0097f2854066cc55b7be31841a1cad35fa6738883aa16b68 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
c16ae332488a063969e17d03e404d7add1b5e00d67119a06e97f483a3f0d4bb0 _output/bin/cluster-api/linux-amd64/manager
254b2fa5972428e3eac45b76f75523019ae289f2307af18a3903bf4480ee078a _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
68f0ca12b8cd80cdc52995236770530ad70491004f64e1a3ad67abe8cf592e3c _output/bin/cluster-api/linux-arm64/clusterctl
2816ba9ad224d286d1aaf877d3dc3cd66d41c9c35f7c16aca0d8b66c723c5b16 _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
cf72aae2400e25f2bbe60ebc058bbc308e7a00837048fa10df18dfb1c9f888c3 _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
3565095fe0d684d6289bc53688928500ada8cf014bc4a4742e8bc6ba8ff8c0b7 _output/bin/cluster-api/linux-arm64/manager
f93a5508363ea7a5041cee33a20241e915cdb7f8df69feb3948936f16e9d2b9b _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager
da39536808348a24bbc5d0e37cb5eed82d10e65e25b3b6007ec336175bb422a4 _output/bin/cluster-api/linux-amd64/clusterctl
345101d69ea5f5978ecfe49cb5b3e2d5931b91f8bc86cec6e016a1ea591be6f1 _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager
964e6b4ef49c03141dc1cb6269aef8b5991c049f2edcd6a0f70b0a9f6e3a7c21 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager
72d7da9728218f835686d0816cb9449068fab28d93e1555913c63751e062d087 _output/bin/cluster-api/linux-amd64/manager
9b41eb5adfa0aff627078923c9ef450f72aeda28b0e9721374d78a97954f1456 _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager
cfeadb8385cab64a745fbb429f4dfa15f79d8af3b285c410677b6978c3c30b5b _output/bin/cluster-api/linux-arm64/clusterctl
a1346ae65c21898b88ccf90678980bb09db6b165de80a4c0c2e7119de89e2219 _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager
d587d38b24af33871c8df564cc8490061b1c28559325258cd5d564d874c6fbf1 _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager
c28f6483e49e8fc83f340a3ead19dd6b7c121f5fc269d22d853defb704386244 _output/bin/cluster-api/linux-arm64/manager
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
From 146972a1d190a1317627a80e8b2fdf2418bbc231 Mon Sep 17 00:00:00 2001
From: Guillermo Gaston <gaslor@amazon.com>
Date: Sat, 20 Jan 2024 22:05:04 +0000
Subject: [PATCH] Restart controller if RESTMapping outdated cache is detected
when reconciling external object

---
controllers/external/util.go | 28 +++++++-
controllers/external/util_test.go | 104 +++++++++++++++++++++++++++++-
2 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/controllers/external/util.go b/controllers/external/util.go
index 5b6443c78..a8b8caa11 100644
--- a/controllers/external/util.go
+++ b/controllers/external/util.go
@@ -19,13 +19,17 @@ package external
import (
"context"
"strings"
+ "syscall"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apiserver/pkg/storage/names"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
+ "sigs.k8s.io/controller-runtime/pkg/log"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)
@@ -40,12 +44,34 @@ func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, name
obj.SetKind(ref.Kind)
obj.SetName(ref.Name)
key := client.ObjectKey{Name: obj.GetName(), Namespace: namespace}
- if err := c.Get(ctx, key, obj); err != nil {
+ err := c.Get(ctx, key, obj)
+ if isV1alpha4NotFoundFromDiscoveryError(err) {
+ logErrorAndGracefulShutdown(
+ ctx,
+ err,
+ "Client RESTMapper returned an error from an invalid cache referencing infrastructure.cluster.x-k8s.io/v1alpha4, exiting the program to force a new cache to be built",
+ )
+ }
+ if err != nil {
return nil, errors.Wrapf(err, "failed to retrieve %s external object %q/%q", obj.GetKind(), key.Namespace, key.Name)
}
return obj, nil
}

+func isV1alpha4NotFoundFromDiscoveryError(err error) bool {
+ discoverFailedErr := &apiutil.ErrResourceDiscoveryFailed{}
+ noResourceMatchErr := &meta.NoResourceMatchError{}
+ return errors.As(err, &discoverFailedErr) &&
+ errors.As(err, &noResourceMatchErr) && // This is the error that ErrResourceDiscoveryFailed will unwrap when the original error is NotFound.
+ strings.Contains(err.Error(), "infrastructure.cluster.x-k8s.io/v1alpha4")
+}
+
+func logErrorAndGracefulShutdown(ctx context.Context, err error, msg string) {
+ logger := log.FromContext(ctx)
+ logger.Error(err, msg)
+ syscall.Kill(syscall.Getpid(), syscall.SIGINT)
+}
+
// Delete uses the client and reference to delete an external, unstructured object.
func Delete(ctx context.Context, c client.Writer, ref *corev1.ObjectReference) error {
obj := new(unstructured.Unstructured)
diff --git a/controllers/external/util_test.go b/controllers/external/util_test.go
index 012445478..13e57306a 100644
--- a/controllers/external/util_test.go
+++ b/controllers/external/util_test.go
@@ -17,6 +17,7 @@ limitations under the License.
package external

import (
+ "fmt"
"testing"

. "github.com/onsi/gomega"
@@ -25,16 +26,16 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+ "k8s.io/apimachinery/pkg/runtime/schema"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)

-var (
- ctx = ctrl.SetupSignalHandler()
-)
+var ctx = ctrl.SetupSignalHandler()

const (
testClusterName = "test-cluster"
@@ -323,3 +324,100 @@ func TestCloneTemplateMissingSpecTemplate(t *testing.T) {
})
g.Expect(err).To(HaveOccurred())
}
+
+func TestIsV1alpha4NotFoundFromDiscoveryError(t *testing.T) {
+ tests := []struct {
+ name string
+ err error
+ want bool
+ }{
+ {
+ name: "the error we are looking for",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ want: true,
+ },
+ {
+ name: "the error we are looking for but wrapped",
+ err: fmt.Errorf("failed to get restmapping: %w",
+ &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"),
+ },
+ ),
+ want: true,
+ },
+ {
+ name: "v1alpha4 not found with different group",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "different.group",
+ Version: "v1alpha4",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "different.group/v1alpha4"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io not found error with different version",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "differentkind",
+ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/differentkind"),
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 but different error that is not NotFound",
+ err: &apiutil.ErrResourceDiscoveryFailed{
+ schema.GroupVersion{
+ Group: "infrastructure.cluster.x-k8s.io",
+ Version: "v1alpha4",
+ }: errors.New("some other error"),
+ },
+ want: false,
+ },
+ {
+ name: "plain not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ },
+ },
+ want: false,
+ },
+ {
+ name: "infrastructure.cluster.x-k8s.io/v1alpha4 not found error",
+ err: &apierrors.StatusError{
+ ErrStatus: metav1.Status{
+ Reason: metav1.StatusReasonNotFound,
+ Message: "infrastructure.cluster.x-k8s.io/v1alpha4",
+ },
+ },
+ want: false,
+ },
+ {
+ name: "not error",
+ err: nil,
+ want: false,
+ },
+ {
+ name: "other error",
+ err: errors.New("some other error"),
+ want: false,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ g := NewWithT(t)
+ g.Expect(isV1alpha4NotFoundFromDiscoveryError(test.err)).To(Equal(test.want))
+ })
+ }
+}
--
2.34.1

0 comments on commit dfff637

Please sign in to comment.