From c918c21df2c9239871d24c9b2f674079cbc6c40a Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Mon, 5 Jun 2023 14:24:27 -0500 Subject: [PATCH] :sparkles: block move with annotation --- cmd/clusterctl/api/v1alpha3/annotations.go | 6 ++ cmd/clusterctl/client/cluster/mover.go | 58 +++++++++++++++++ cmd/clusterctl/client/cluster/mover_test.go | 69 +++++++++++++++++++++ docs/book/src/clusterctl/commands/move.md | 2 + 4 files changed, 135 insertions(+) diff --git a/cmd/clusterctl/api/v1alpha3/annotations.go b/cmd/clusterctl/api/v1alpha3/annotations.go index eb818ee181fb..d2cfa9ded86f 100644 --- a/cmd/clusterctl/api/v1alpha3/annotations.go +++ b/cmd/clusterctl/api/v1alpha3/annotations.go @@ -31,4 +31,10 @@ const ( // // It will help any validation webhook to take decision based on it. DeleteForMoveAnnotation = "clusterctl.cluster.x-k8s.io/delete-for-move" + + // MoveBlockedAnnotation blocks a move when defined with any value on any + // object to be moved. Providers are expected to set the annotation on + // resources that cannot be instantaneously paused, and remove the + // annotation when the resource has been paused. + MoveBlockedAnnotation = "clusterctl.cluster.x-k8s.io/move-blocked" ) diff --git a/cmd/clusterctl/client/cluster/mover.go b/cmd/clusterctl/client/cluster/mover.go index 433651d7d4bd..0d3baad26c31 100644 --- a/cmd/clusterctl/client/cluster/mover.go +++ b/cmd/clusterctl/client/cluster/mover.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "path/filepath" + "time" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -32,6 +33,7 @@ import ( kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/version" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" @@ -333,6 +335,19 @@ func (o *objectMover) move(graph *objectGraph, toProxy Proxy, mutators ...Resour return errors.Wrap(err, "error pausing ClusterClasses") } + log.V(2).Info("Waiting for all resources to be ready to move") + // exponential backoff configuration which returns durations for a total time of ~2m. + // Example: 0, 5s, 8s, 11s, 17s, 26s, 38s, 57s, 86s, 128s + waitForMoveUnblockedBackoff := wait.Backoff{ + Duration: 5 * time.Second, + Factor: 1.5, + Steps: 10, + Jitter: 0.1, + } + if err := waitReadyForMove(o.fromProxy, graph.getMoveNodes(), o.dryRun, waitForMoveUnblockedBackoff); err != nil { + return errors.Wrap(err, "error waiting for resources to be ready to move") + } + // Nb. DO NOT call ensureNamespaces at this point because: // - namespace will be ensured to exist before creating the resource. // - If it's done here, we might create a namespace that can end up unused on target cluster (due to mutators). @@ -595,6 +610,49 @@ func setClusterClassPause(proxy Proxy, clusterclasses []*node, pause bool, dryRu return nil } +func waitReadyForMove(proxy Proxy, nodes []*node, dryRun bool, backoff wait.Backoff) error { + if dryRun { + return nil + } + + log := logf.Log + + c, err := proxy.NewClient() + if err != nil { + return errors.Wrap(err, "error creating client") + } + + for _, n := range nodes { + obj := &metav1.PartialObjectMetadata{ + ObjectMeta: metav1.ObjectMeta{ + Name: n.identity.Name, + Namespace: n.identity.Namespace, + }, + TypeMeta: metav1.TypeMeta{ + APIVersion: n.identity.APIVersion, + Kind: n.identity.Kind, + }, + } + key := client.ObjectKeyFromObject(obj) + + if err := retryWithExponentialBackoff(backoff, func() error { + if err := c.Get(ctx, key, obj); err != nil { + return errors.Wrapf(err, "error getting %s %s", obj.GroupVersionKind(), key) + } + + if _, exists := obj.GetAnnotations()[clusterctlv1.MoveBlockedAnnotation]; exists { + return errors.Errorf("resource is not ready to move: %s %s", obj.GroupVersionKind(), key) + } + log.Info("Resource is ready to move", "apiVersion", obj.GroupVersionKind(), "resource", klog.KObj(obj)) + return nil + }); err != nil { + return err + } + } + + return nil +} + // patchCluster applies a patch to a node referring to a Cluster object. func patchCluster(proxy Proxy, n *node, patch client.Patch, mutators ...ResourceMutatorFunc) error { cFrom, err := proxy.NewClient() diff --git a/cmd/clusterctl/client/cluster/mover_test.go b/cmd/clusterctl/client/cluster/mover_test.go index ade86d12733c..848d029038f7 100644 --- a/cmd/clusterctl/client/cluster/mover_test.go +++ b/cmd/clusterctl/client/cluster/mover_test.go @@ -17,6 +17,7 @@ limitations under the License. package cluster import ( + "context" "fmt" "os" "path/filepath" @@ -29,7 +30,9 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" @@ -2240,3 +2243,69 @@ func Test_deleteSourceObject(t *testing.T) { }) } } + +func TestWaitReadyForMove(t *testing.T) { + tests := []struct { + name string + moveBlocked bool + wantErr bool + }{ + { + name: "moving blocked cluster should fail", + moveBlocked: true, + wantErr: true, + }, + { + name: "moving unblocked cluster should succeed", + moveBlocked: false, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + clusterName := "foo" + clusterNamespace := "ns1" + objs := test.NewFakeCluster(clusterNamespace, clusterName).Objs() + + // Create an objectGraph bound a source cluster with all the CRDs for the types involved in the test. + graph := getObjectGraphWithObjs(objs) + + if tt.moveBlocked { + c, err := graph.proxy.NewClient() + g.Expect(err).NotTo(HaveOccurred()) + + ctx := context.Background() + cluster := &clusterv1.Cluster{} + err = c.Get(ctx, types.NamespacedName{Namespace: clusterNamespace, Name: clusterName}, cluster) + g.Expect(err).NotTo(HaveOccurred()) + anns := cluster.GetAnnotations() + if anns == nil { + anns = make(map[string]string) + } + anns[clusterctlv1.MoveBlockedAnnotation] = "anything" + cluster.SetAnnotations(anns) + + g.Expect(c.Update(ctx, cluster)).To(Succeed()) + } + + // Get all the types to be considered for discovery + g.Expect(getFakeDiscoveryTypes(graph)).To(Succeed()) + + // trigger discovery the content of the source cluster + g.Expect(graph.Discovery("")).To(Succeed()) + + backoff := wait.Backoff{ + Steps: 1, + } + err := waitReadyForMove(graph.proxy, graph.getMoveNodes(), false, backoff) + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).NotTo(HaveOccurred()) + } + }) + } +} diff --git a/docs/book/src/clusterctl/commands/move.md b/docs/book/src/clusterctl/commands/move.md index 940766b32776..3b1356753dc8 100644 --- a/docs/book/src/clusterctl/commands/move.md +++ b/docs/book/src/clusterctl/commands/move.md @@ -30,6 +30,8 @@ to move the Cluster API objects defined in another namespace, you can use the `- Before moving a `Cluster`, clusterctl sets the `Cluster.Spec.Paused` field to `true` stopping the controllers from reconciling the workload cluster _in the source management cluster_. +clusterctl will then wait for the `clusterctl.cluster.x-k8s.io/move-blocked` annotation not +to exist on any resource to be moved. The `Cluster` object created in the target management cluster instead will be actively reconciled as soon as the move process completes.