kubernetes-sigs · k8s-ci-robot · Aug 17, 2023 · Jun 5, 2023 · Jont828 · Aug 16, 2023
diff --git a/cmd/clusterctl/api/v1alpha3/annotations.go b/cmd/clusterctl/api/v1alpha3/annotations.go
@@ -31,4 +31,14 @@ const (
 	//
 	// It will help any validation webhook to take decision based on it.
 	DeleteForMoveAnnotation = "clusterctl.cluster.x-k8s.io/delete-for-move"
+
+	// BlockMoveAnnotation prevents the cluster move operation from starting if it is defined on at least one
+	// of the objects in scope.
+	// Provider controllers are expected to set the annotation on resources that cannot be instantaneously
+	// paused and remove the annotation when the resource has been actually paused.
+	//
+	// e.g. If this annotation is defined with any value on an InfraMachine resource to be moved when
+	// `clusterctl move` is invoked, then NO resources for ANY workload cluster will be created on the
+	// destination management cluster until the annotation is removed.
+	BlockMoveAnnotation = "clusterctl.cluster.x-k8s.io/block-move"
 )
diff --git a/cmd/clusterctl/client/cluster/mover.go b/cmd/clusterctl/client/cluster/mover.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"time"
 
 	"github.com/pkg/errors"
 	corev1 "k8s.io/api/core/v1"
@@ -32,6 +33,7 @@ import (
 	kerrors "k8s.io/apimachinery/pkg/util/errors"
 	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/apimachinery/pkg/util/version"
+	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/klog/v2"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
@@ -333,6 +335,19 @@ func (o *objectMover) move(graph *objectGraph, toProxy Proxy, mutators ...Resour
 		return errors.Wrap(err, "error pausing ClusterClasses")
 	}
 
+	log.Info("Waiting for all resources to be ready to move")
+	// exponential backoff configuration which returns durations for a total time of ~2m.
+	// Example: 0, 5s, 8s, 11s, 17s, 26s, 38s, 57s, 86s, 128s
+	waitForMoveUnblockedBackoff := wait.Backoff{
+		Duration: 5 * time.Second,
+		Factor:   1.5,
+		Steps:    10,
+		Jitter:   0.1,
+	}
+	if err := waitReadyForMove(o.fromProxy, graph.getMoveNodes(), o.dryRun, waitForMoveUnblockedBackoff); err != nil {
+		return errors.Wrap(err, "error waiting for resources to be ready to move")
+	}
+
 	// Nb. DO NOT call ensureNamespaces at this point because:
 	// - namespace will be ensured to exist before creating the resource.
 	// - If it's done here, we might create a namespace that can end up unused on target cluster (due to mutators).
@@ -595,6 +610,55 @@ func setClusterClassPause(proxy Proxy, clusterclasses []*node, pause bool, dryRu
 	return nil
 }
 
+func waitReadyForMove(proxy Proxy, nodes []*node, dryRun bool, backoff wait.Backoff) error {
+	if dryRun {
+		return nil
+	}
+
+	log := logf.Log
+
+	c, err := proxy.NewClient()
+	if err != nil {
+		return errors.Wrap(err, "error creating client")
+	}
+
+	for _, n := range nodes {
+		obj := &metav1.PartialObjectMetadata{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      n.identity.Name,
+				Namespace: n.identity.Namespace,
+			},
+			TypeMeta: metav1.TypeMeta{
+				APIVersion: n.identity.APIVersion,
+				Kind:       n.identity.Kind,
+			},
+		}
+		key := client.ObjectKeyFromObject(obj)
+		log := log.WithValues("apiVersion", obj.GroupVersionKind(), "resource", klog.KObj(obj))
+
+		blockLogged := false
+		if err := retryWithExponentialBackoff(backoff, func() error {
 log.V(5).Info("Retrying with backoff", "Cause", err.Error()) 
 log.V(5).Info("Retrying with backoff", "Cause", err.Error()) 
+			if err := c.Get(ctx, key, obj); err != nil {
+				return errors.Wrapf(err, "error getting %s/%s", obj.GroupVersionKind(), key)
+			}
+
+			if _, exists := obj.GetAnnotations()[clusterctlv1.BlockMoveAnnotation]; exists {
+				if !blockLogged {
+					log.Info(fmt.Sprintf("Move blocked by %s annotation, waiting for it to be removed", clusterctlv1.BlockMoveAnnotation))
+					blockLogged = true
+				}
+				return errors.Errorf("resource is not ready to move: %s/%s", obj.GroupVersionKind(), key)
+			}
+			log.V(5).Info("Resource is ready to move")
+			return nil
+		}); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // patchCluster applies a patch to a node referring to a Cluster object.
 func patchCluster(proxy Proxy, n *node, patch client.Patch, mutators ...ResourceMutatorFunc) error {
 	cFrom, err := proxy.NewClient()

diff --git a/cmd/clusterctl/client/cluster/mover_test.go b/cmd/clusterctl/client/cluster/mover_test.go
@@ -17,6 +17,7 @@ limitations under the License.
 package cluster
 
 import (
+	"context"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -29,7 +30,9 @@ import (
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/utils/pointer"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
@@ -2288,3 +2291,69 @@ func Test_deleteSourceObject(t *testing.T) {
 		})
 	}
 }
+
+func TestWaitReadyForMove(t *testing.T) {
+	tests := []struct {
+		name        string
+		moveBlocked bool
+		wantErr     bool
+	}{
+		{
+			name:        "moving blocked cluster should fail",
+			moveBlocked: true,
+			wantErr:     true,
+		},
+		{
+			name:        "moving unblocked cluster should succeed",
+			moveBlocked: false,
+			wantErr:     false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			g := NewWithT(t)
+
+			clusterName := "foo"
+			clusterNamespace := "ns1"
+			objs := test.NewFakeCluster(clusterNamespace, clusterName).Objs()
+
+			// Create an objectGraph bound a source cluster with all the CRDs for the types involved in the test.
+			graph := getObjectGraphWithObjs(objs)
+
+			if tt.moveBlocked {
+				c, err := graph.proxy.NewClient()
+				g.Expect(err).NotTo(HaveOccurred())
+
+				ctx := context.Background()
+				cluster := &clusterv1.Cluster{}
+				err = c.Get(ctx, types.NamespacedName{Namespace: clusterNamespace, Name: clusterName}, cluster)
+				g.Expect(err).NotTo(HaveOccurred())
+				anns := cluster.GetAnnotations()
+				if anns == nil {
+					anns = make(map[string]string)
+				}
+				anns[clusterctlv1.BlockMoveAnnotation] = "anything"
+				cluster.SetAnnotations(anns)
+
+				g.Expect(c.Update(ctx, cluster)).To(Succeed())
+			}
+
+			// Get all the types to be considered for discovery
+			g.Expect(getFakeDiscoveryTypes(graph)).To(Succeed())
+
+			// trigger discovery the content of the source cluster
+			g.Expect(graph.Discovery("")).To(Succeed())
+
+			backoff := wait.Backoff{
+				Steps: 1,
+			}
+			err := waitReadyForMove(graph.proxy, graph.getMoveNodes(), false, backoff)
+			if tt.wantErr {
+				g.Expect(err).To(HaveOccurred())
+			} else {
+				g.Expect(err).NotTo(HaveOccurred())
+			}
+		})
+	}
+}
diff --git a/docs/book/src/clusterctl/commands/move.md b/docs/book/src/clusterctl/commands/move.md
@@ -32,6 +32,8 @@ The discovery mechanism for determining the objects to be moved is in the [provi
 
 Before moving a `Cluster`, clusterctl sets the `Cluster.Spec.Paused` field to `true` stopping
 the controllers from reconciling the workload cluster _in the source management cluster_.
+clusterctl will wait until the `clusterctl.cluster.x-k8s.io/block-move` annotation is not
+present on any resource targeted by the move operation.
 
 The `Cluster` object created in the target management cluster instead will be actively reconciled as soon as the move
 process completes.

diff --git a/docs/book/src/clusterctl/provider-contract.md b/docs/book/src/clusterctl/provider-contract.md
@@ -469,7 +469,9 @@ If moving some of excluded object is required, the provider authors should creat
 exact move sequence to be executed by the user.
 
 Additionally, provider authors should be aware that `clusterctl move` assumes all the provider's Controllers respect the
-`Cluster.Spec.Paused` field introduced in the v1alpha3 Cluster API specification.
+`Cluster.Spec.Paused` field introduced in the v1alpha3 Cluster API specification. If a provider needs to perform extra work in response to a
+cluster being paused, `clusterctl move` can be blocked from creating any resources on the destination
+management cluster by annotating any resource to be moved with `clusterctl.cluster.x-k8s.io/block-move`.
 
 <aside class="note warning">
 

diff --git a/docs/book/src/developer/providers/migrations/v1.5-to-v1.6.md b/docs/book/src/developer/providers/migrations/v1.5-to-v1.6.md
@@ -27,6 +27,7 @@ maintainers of providers and consumers of our Go API.
 
 
 ### Other
+- `clusterctl move` can be blocked temporarily by a provider when an object to be moved is annotated with `clusterctl.cluster.x-k8s.io/block-move`.
 
 
 ### Suggested changes for providers

diff --git a/docs/book/src/reference/labels_and_annotations.md b/docs/book/src/reference/labels_and_annotations.md
@@ -24,6 +24,7 @@
 | :--------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | clusterctl.cluster.x-k8s.io/skip-crd-name-preflight-check        | Can be placed on provider CRDs, so that clusterctl doesn't emit an error if the CRD doesn't comply with Cluster APIs naming scheme. Only CRDs that are referenced by core Cluster API CRDs have to comply with the naming scheme.                                                                                                                                                                                                                                                                                                                           |
 | clusterctl.cluster.x-k8s.io/delete-for-move                      | DeleteForMoveAnnotation will be set to objects that are going to be deleted from the source cluster after being moved to the target cluster during the clusterctl move operation. It will help any validation webhook to take decision based on it.                                                                                                                                                                                                                                                                                                         |
+| clusterctl.cluster.x-k8s.io/block-move                           | BlockMoveAnnotation prevents the cluster move operation from starting if it is defined on at least one of the objects in scope. Provider controllers are expected to set the annotation on resources that cannot be instantaneously paused and remove the annotation when the resource has been actually paused.                                                                                                                                                                                                                                            |
 | unsafe.topology.cluster.x-k8s.io/disable-update-class-name-check | It can be used to disable the webhook check on update that disallows a pre-existing Cluster to be populated with Topology information and Class.                                                                                                                                                                                                                                                                                                                                                                                                            |
 | cluster.x-k8s.io/cluster-name                                    | It is set on nodes identifying the name of the cluster the node belongs to.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | cluster.x-k8s.io/cluster-namespace                               | It is set on nodes identifying the namespace of the cluster the node belongs to.                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |