From dfff637b611ea16810f248b9378b863ebac3336c Mon Sep 17 00:00:00 2001 From: Guillermo Gaston Date: Mon, 22 Jan 2024 23:06:20 +0000 Subject: [PATCH] Handle race condition when upgrading capi CRDs --- .../cluster-api/ATTRIBUTION.txt | 2 +- .../cluster-api/CAPD_ATTRIBUTION.txt | 2 +- .../kubernetes-sigs/cluster-api/CHECKSUMS | 20 +- ...er-if-RESTMapping-outdated-cache-is-.patch | 205 ++++++++++++++++++ 4 files changed, 217 insertions(+), 12 deletions(-) create mode 100644 projects/kubernetes-sigs/cluster-api/patches/0037-Restart-controller-if-RESTMapping-outdated-cache-is-.patch diff --git a/projects/kubernetes-sigs/cluster-api/ATTRIBUTION.txt b/projects/kubernetes-sigs/cluster-api/ATTRIBUTION.txt index 8f1435f5b6..68011770e1 100644 --- a/projects/kubernetes-sigs/cluster-api/ATTRIBUTION.txt +++ b/projects/kubernetes-sigs/cluster-api/ATTRIBUTION.txt @@ -1201,7 +1201,7 @@ https://github.com/ProtonMail/go-crypto ** github.com/sagikazarmark/slog-shim; version v0.1.0 -- https://github.com/sagikazarmark/slog-shim -** golang.org/go; version go1.20.12 -- +** golang.org/go; version go1.20.13 -- https://github.com/golang/go ** golang.org/x/crypto; version v0.15.0 -- diff --git a/projects/kubernetes-sigs/cluster-api/CAPD_ATTRIBUTION.txt b/projects/kubernetes-sigs/cluster-api/CAPD_ATTRIBUTION.txt index 8f747381dd..f0d1c44e7a 100644 --- a/projects/kubernetes-sigs/cluster-api/CAPD_ATTRIBUTION.txt +++ b/projects/kubernetes-sigs/cluster-api/CAPD_ATTRIBUTION.txt @@ -1006,7 +1006,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ------ -** golang.org/go; version go1.20.12 -- +** golang.org/go; version go1.20.13 -- https://github.com/golang/go ** golang.org/x/crypto; version v0.15.0 -- diff --git a/projects/kubernetes-sigs/cluster-api/CHECKSUMS b/projects/kubernetes-sigs/cluster-api/CHECKSUMS index 5e8e95eb32..5f5d6aac26 100644 --- a/projects/kubernetes-sigs/cluster-api/CHECKSUMS +++ b/projects/kubernetes-sigs/cluster-api/CHECKSUMS @@ -1,10 +1,10 @@ -8a009d153d56a6d81e06428711aeecc217d9f4707471851d9fcff0820b601c47 _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager -853efbf32c9a44edcf4816e24a5ba18c3b4c291b3c13546e9c2a028b19f472ad _output/bin/cluster-api/linux-amd64/clusterctl -07189d12d166319ae0aa36a279e2bc6af913a6ceca2621c019fc83cdb3bd271c _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager -3d5f6f72d4b7c74c0097f2854066cc55b7be31841a1cad35fa6738883aa16b68 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager -c16ae332488a063969e17d03e404d7add1b5e00d67119a06e97f483a3f0d4bb0 _output/bin/cluster-api/linux-amd64/manager -254b2fa5972428e3eac45b76f75523019ae289f2307af18a3903bf4480ee078a _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager -68f0ca12b8cd80cdc52995236770530ad70491004f64e1a3ad67abe8cf592e3c _output/bin/cluster-api/linux-arm64/clusterctl -2816ba9ad224d286d1aaf877d3dc3cd66d41c9c35f7c16aca0d8b66c723c5b16 _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager -cf72aae2400e25f2bbe60ebc058bbc308e7a00837048fa10df18dfb1c9f888c3 _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager -3565095fe0d684d6289bc53688928500ada8cf014bc4a4742e8bc6ba8ff8c0b7 _output/bin/cluster-api/linux-arm64/manager +f93a5508363ea7a5041cee33a20241e915cdb7f8df69feb3948936f16e9d2b9b _output/bin/cluster-api/linux-amd64/cluster-api-provider-docker-manager +da39536808348a24bbc5d0e37cb5eed82d10e65e25b3b6007ec336175bb422a4 _output/bin/cluster-api/linux-amd64/clusterctl +345101d69ea5f5978ecfe49cb5b3e2d5931b91f8bc86cec6e016a1ea591be6f1 _output/bin/cluster-api/linux-amd64/kubeadm-bootstrap-manager +964e6b4ef49c03141dc1cb6269aef8b5991c049f2edcd6a0f70b0a9f6e3a7c21 _output/bin/cluster-api/linux-amd64/kubeadm-control-plane-manager +72d7da9728218f835686d0816cb9449068fab28d93e1555913c63751e062d087 _output/bin/cluster-api/linux-amd64/manager +9b41eb5adfa0aff627078923c9ef450f72aeda28b0e9721374d78a97954f1456 _output/bin/cluster-api/linux-arm64/cluster-api-provider-docker-manager +cfeadb8385cab64a745fbb429f4dfa15f79d8af3b285c410677b6978c3c30b5b _output/bin/cluster-api/linux-arm64/clusterctl +a1346ae65c21898b88ccf90678980bb09db6b165de80a4c0c2e7119de89e2219 _output/bin/cluster-api/linux-arm64/kubeadm-bootstrap-manager +d587d38b24af33871c8df564cc8490061b1c28559325258cd5d564d874c6fbf1 _output/bin/cluster-api/linux-arm64/kubeadm-control-plane-manager +c28f6483e49e8fc83f340a3ead19dd6b7c121f5fc269d22d853defb704386244 _output/bin/cluster-api/linux-arm64/manager diff --git a/projects/kubernetes-sigs/cluster-api/patches/0037-Restart-controller-if-RESTMapping-outdated-cache-is-.patch b/projects/kubernetes-sigs/cluster-api/patches/0037-Restart-controller-if-RESTMapping-outdated-cache-is-.patch new file mode 100644 index 0000000000..840ad182d3 --- /dev/null +++ b/projects/kubernetes-sigs/cluster-api/patches/0037-Restart-controller-if-RESTMapping-outdated-cache-is-.patch @@ -0,0 +1,205 @@ +From 146972a1d190a1317627a80e8b2fdf2418bbc231 Mon Sep 17 00:00:00 2001 +From: Guillermo Gaston +Date: Sat, 20 Jan 2024 22:05:04 +0000 +Subject: [PATCH] Restart controller if RESTMapping outdated cache is detected + when reconciling external object + +--- + controllers/external/util.go | 28 +++++++- + controllers/external/util_test.go | 104 +++++++++++++++++++++++++++++- + 2 files changed, 128 insertions(+), 4 deletions(-) + +diff --git a/controllers/external/util.go b/controllers/external/util.go +index 5b6443c78..a8b8caa11 100644 +--- a/controllers/external/util.go ++++ b/controllers/external/util.go +@@ -19,13 +19,17 @@ package external + import ( + "context" + "strings" ++ "syscall" + + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" ++ "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apiserver/pkg/storage/names" + "sigs.k8s.io/controller-runtime/pkg/client" ++ "sigs.k8s.io/controller-runtime/pkg/client/apiutil" ++ "sigs.k8s.io/controller-runtime/pkg/log" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + ) +@@ -40,12 +44,34 @@ func Get(ctx context.Context, c client.Reader, ref *corev1.ObjectReference, name + obj.SetKind(ref.Kind) + obj.SetName(ref.Name) + key := client.ObjectKey{Name: obj.GetName(), Namespace: namespace} +- if err := c.Get(ctx, key, obj); err != nil { ++ err := c.Get(ctx, key, obj) ++ if isV1alpha4NotFoundFromDiscoveryError(err) { ++ logErrorAndGracefulShutdown( ++ ctx, ++ err, ++ "Client RESTMapper returned an error from an invalid cache referencing infrastructure.cluster.x-k8s.io/v1alpha4, exiting the program to force a new cache to be built", ++ ) ++ } ++ if err != nil { + return nil, errors.Wrapf(err, "failed to retrieve %s external object %q/%q", obj.GetKind(), key.Namespace, key.Name) + } + return obj, nil + } + ++func isV1alpha4NotFoundFromDiscoveryError(err error) bool { ++ discoverFailedErr := &apiutil.ErrResourceDiscoveryFailed{} ++ noResourceMatchErr := &meta.NoResourceMatchError{} ++ return errors.As(err, &discoverFailedErr) && ++ errors.As(err, &noResourceMatchErr) && // This is the error that ErrResourceDiscoveryFailed will unwrap when the original error is NotFound. ++ strings.Contains(err.Error(), "infrastructure.cluster.x-k8s.io/v1alpha4") ++} ++ ++func logErrorAndGracefulShutdown(ctx context.Context, err error, msg string) { ++ logger := log.FromContext(ctx) ++ logger.Error(err, msg) ++ syscall.Kill(syscall.Getpid(), syscall.SIGINT) ++} ++ + // Delete uses the client and reference to delete an external, unstructured object. + func Delete(ctx context.Context, c client.Writer, ref *corev1.ObjectReference) error { + obj := new(unstructured.Unstructured) +diff --git a/controllers/external/util_test.go b/controllers/external/util_test.go +index 012445478..13e57306a 100644 +--- a/controllers/external/util_test.go ++++ b/controllers/external/util_test.go +@@ -17,6 +17,7 @@ limitations under the License. + package external + + import ( ++ "fmt" + "testing" + + . "github.com/onsi/gomega" +@@ -25,16 +26,16 @@ import ( + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ++ "k8s.io/apimachinery/pkg/runtime/schema" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" ++ "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + ) + +-var ( +- ctx = ctrl.SetupSignalHandler() +-) ++var ctx = ctrl.SetupSignalHandler() + + const ( + testClusterName = "test-cluster" +@@ -323,3 +324,100 @@ func TestCloneTemplateMissingSpecTemplate(t *testing.T) { + }) + g.Expect(err).To(HaveOccurred()) + } ++ ++func TestIsV1alpha4NotFoundFromDiscoveryError(t *testing.T) { ++ tests := []struct { ++ name string ++ err error ++ want bool ++ }{ ++ { ++ name: "the error we are looking for", ++ err: &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "infrastructure.cluster.x-k8s.io", ++ Version: "v1alpha4", ++ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"), ++ }, ++ want: true, ++ }, ++ { ++ name: "the error we are looking for but wrapped", ++ err: fmt.Errorf("failed to get restmapping: %w", ++ &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "infrastructure.cluster.x-k8s.io", ++ Version: "v1alpha4", ++ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/v1alpha4"), ++ }, ++ ), ++ want: true, ++ }, ++ { ++ name: "v1alpha4 not found with different group", ++ err: &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "different.group", ++ Version: "v1alpha4", ++ }: apierrors.NewNotFound(schema.GroupResource{}, "different.group/v1alpha4"), ++ }, ++ want: false, ++ }, ++ { ++ name: "infrastructure.cluster.x-k8s.io not found error with different version", ++ err: &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "infrastructure.cluster.x-k8s.io", ++ Version: "differentkind", ++ }: apierrors.NewNotFound(schema.GroupResource{}, "infrastructure.cluster.x-k8s.io/differentkind"), ++ }, ++ want: false, ++ }, ++ { ++ name: "infrastructure.cluster.x-k8s.io/v1alpha4 but different error that is not NotFound", ++ err: &apiutil.ErrResourceDiscoveryFailed{ ++ schema.GroupVersion{ ++ Group: "infrastructure.cluster.x-k8s.io", ++ Version: "v1alpha4", ++ }: errors.New("some other error"), ++ }, ++ want: false, ++ }, ++ { ++ name: "plain not found error", ++ err: &apierrors.StatusError{ ++ ErrStatus: metav1.Status{ ++ Reason: metav1.StatusReasonNotFound, ++ }, ++ }, ++ want: false, ++ }, ++ { ++ name: "infrastructure.cluster.x-k8s.io/v1alpha4 not found error", ++ err: &apierrors.StatusError{ ++ ErrStatus: metav1.Status{ ++ Reason: metav1.StatusReasonNotFound, ++ Message: "infrastructure.cluster.x-k8s.io/v1alpha4", ++ }, ++ }, ++ want: false, ++ }, ++ { ++ name: "not error", ++ err: nil, ++ want: false, ++ }, ++ { ++ name: "other error", ++ err: errors.New("some other error"), ++ want: false, ++ }, ++ } ++ ++ for _, test := range tests { ++ t.Run(test.name, func(t *testing.T) { ++ g := NewWithT(t) ++ g.Expect(isV1alpha4NotFoundFromDiscoveryError(test.err)).To(Equal(test.want)) ++ }) ++ } ++} +-- +2.34.1 +