Skip to content

Commit

Permalink
Merge pull request #8690 from nojnhuh/wait-pause
Browse files Browse the repository at this point in the history
✨ block move with annotation
  • Loading branch information
k8s-ci-robot committed Aug 17, 2023
2 parents 62d734c + 29bb47d commit 390549a
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 1 deletion.
10 changes: 10 additions & 0 deletions cmd/clusterctl/api/v1alpha3/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,14 @@ const (
//
// It will help any validation webhook to take decision based on it.
DeleteForMoveAnnotation = "clusterctl.cluster.x-k8s.io/delete-for-move"

// BlockMoveAnnotation prevents the cluster move operation from starting if it is defined on at least one
// of the objects in scope.
// Provider controllers are expected to set the annotation on resources that cannot be instantaneously
// paused and remove the annotation when the resource has been actually paused.
//
// e.g. If this annotation is defined with any value on an InfraMachine resource to be moved when
// `clusterctl move` is invoked, then NO resources for ANY workload cluster will be created on the
// destination management cluster until the annotation is removed.
BlockMoveAnnotation = "clusterctl.cluster.x-k8s.io/block-move"
)
64 changes: 64 additions & 0 deletions cmd/clusterctl/client/cluster/mover.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"os"
"path/filepath"
"time"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
Expand All @@ -32,6 +33,7 @@ import (
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand Down Expand Up @@ -333,6 +335,19 @@ func (o *objectMover) move(graph *objectGraph, toProxy Proxy, mutators ...Resour
return errors.Wrap(err, "error pausing ClusterClasses")
}

log.Info("Waiting for all resources to be ready to move")
// exponential backoff configuration which returns durations for a total time of ~2m.
// Example: 0, 5s, 8s, 11s, 17s, 26s, 38s, 57s, 86s, 128s
waitForMoveUnblockedBackoff := wait.Backoff{
Duration: 5 * time.Second,
Factor: 1.5,
Steps: 10,
Jitter: 0.1,
}
if err := waitReadyForMove(o.fromProxy, graph.getMoveNodes(), o.dryRun, waitForMoveUnblockedBackoff); err != nil {
return errors.Wrap(err, "error waiting for resources to be ready to move")
}

// Nb. DO NOT call ensureNamespaces at this point because:
// - namespace will be ensured to exist before creating the resource.
// - If it's done here, we might create a namespace that can end up unused on target cluster (due to mutators).
Expand Down Expand Up @@ -595,6 +610,55 @@ func setClusterClassPause(proxy Proxy, clusterclasses []*node, pause bool, dryRu
return nil
}

func waitReadyForMove(proxy Proxy, nodes []*node, dryRun bool, backoff wait.Backoff) error {
if dryRun {
return nil
}

log := logf.Log

c, err := proxy.NewClient()
if err != nil {
return errors.Wrap(err, "error creating client")
}

for _, n := range nodes {
obj := &metav1.PartialObjectMetadata{
ObjectMeta: metav1.ObjectMeta{
Name: n.identity.Name,
Namespace: n.identity.Namespace,
},
TypeMeta: metav1.TypeMeta{
APIVersion: n.identity.APIVersion,
Kind: n.identity.Kind,
},
}
key := client.ObjectKeyFromObject(obj)
log := log.WithValues("apiVersion", obj.GroupVersionKind(), "resource", klog.KObj(obj))

blockLogged := false
if err := retryWithExponentialBackoff(backoff, func() error {
if err := c.Get(ctx, key, obj); err != nil {
return errors.Wrapf(err, "error getting %s/%s", obj.GroupVersionKind(), key)
}

if _, exists := obj.GetAnnotations()[clusterctlv1.BlockMoveAnnotation]; exists {
if !blockLogged {
log.Info(fmt.Sprintf("Move blocked by %s annotation, waiting for it to be removed", clusterctlv1.BlockMoveAnnotation))
blockLogged = true
}
return errors.Errorf("resource is not ready to move: %s/%s", obj.GroupVersionKind(), key)
}
log.V(5).Info("Resource is ready to move")
return nil
}); err != nil {
return err
}
}

return nil
}

// patchCluster applies a patch to a node referring to a Cluster object.
func patchCluster(proxy Proxy, n *node, patch client.Patch, mutators ...ResourceMutatorFunc) error {
cFrom, err := proxy.NewClient()
Expand Down
69 changes: 69 additions & 0 deletions cmd/clusterctl/client/cluster/mover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package cluster

import (
"context"
"fmt"
"os"
"path/filepath"
Expand All @@ -29,7 +30,9 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand Down Expand Up @@ -2288,3 +2291,69 @@ func Test_deleteSourceObject(t *testing.T) {
})
}
}

func TestWaitReadyForMove(t *testing.T) {
tests := []struct {
name string
moveBlocked bool
wantErr bool
}{
{
name: "moving blocked cluster should fail",
moveBlocked: true,
wantErr: true,
},
{
name: "moving unblocked cluster should succeed",
moveBlocked: false,
wantErr: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

clusterName := "foo"
clusterNamespace := "ns1"
objs := test.NewFakeCluster(clusterNamespace, clusterName).Objs()

// Create an objectGraph bound a source cluster with all the CRDs for the types involved in the test.
graph := getObjectGraphWithObjs(objs)

if tt.moveBlocked {
c, err := graph.proxy.NewClient()
g.Expect(err).NotTo(HaveOccurred())

ctx := context.Background()
cluster := &clusterv1.Cluster{}
err = c.Get(ctx, types.NamespacedName{Namespace: clusterNamespace, Name: clusterName}, cluster)
g.Expect(err).NotTo(HaveOccurred())
anns := cluster.GetAnnotations()
if anns == nil {
anns = make(map[string]string)
}
anns[clusterctlv1.BlockMoveAnnotation] = "anything"
cluster.SetAnnotations(anns)

g.Expect(c.Update(ctx, cluster)).To(Succeed())
}

// Get all the types to be considered for discovery
g.Expect(getFakeDiscoveryTypes(graph)).To(Succeed())

// trigger discovery the content of the source cluster
g.Expect(graph.Discovery("")).To(Succeed())

backoff := wait.Backoff{
Steps: 1,
}
err := waitReadyForMove(graph.proxy, graph.getMoveNodes(), false, backoff)
if tt.wantErr {
g.Expect(err).To(HaveOccurred())
} else {
g.Expect(err).NotTo(HaveOccurred())
}
})
}
}
2 changes: 2 additions & 0 deletions docs/book/src/clusterctl/commands/move.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ The discovery mechanism for determining the objects to be moved is in the [provi

Before moving a `Cluster`, clusterctl sets the `Cluster.Spec.Paused` field to `true` stopping
the controllers from reconciling the workload cluster _in the source management cluster_.
clusterctl will wait until the `clusterctl.cluster.x-k8s.io/block-move` annotation is not
present on any resource targeted by the move operation.

The `Cluster` object created in the target management cluster instead will be actively reconciled as soon as the move
process completes.
Expand Down
4 changes: 3 additions & 1 deletion docs/book/src/clusterctl/provider-contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,9 @@ If moving some of excluded object is required, the provider authors should creat
exact move sequence to be executed by the user.

Additionally, provider authors should be aware that `clusterctl move` assumes all the provider's Controllers respect the
`Cluster.Spec.Paused` field introduced in the v1alpha3 Cluster API specification.
`Cluster.Spec.Paused` field introduced in the v1alpha3 Cluster API specification. If a provider needs to perform extra work in response to a
cluster being paused, `clusterctl move` can be blocked from creating any resources on the destination
management cluster by annotating any resource to be moved with `clusterctl.cluster.x-k8s.io/block-move`.

<aside class="note warning">

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ maintainers of providers and consumers of our Go API.


### Other
- `clusterctl move` can be blocked temporarily by a provider when an object to be moved is annotated with `clusterctl.cluster.x-k8s.io/block-move`.


### Suggested changes for providers
Expand Down
1 change: 1 addition & 0 deletions docs/book/src/reference/labels_and_annotations.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
| :--------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| clusterctl.cluster.x-k8s.io/skip-crd-name-preflight-check | Can be placed on provider CRDs, so that clusterctl doesn't emit an error if the CRD doesn't comply with Cluster APIs naming scheme. Only CRDs that are referenced by core Cluster API CRDs have to comply with the naming scheme. |
| clusterctl.cluster.x-k8s.io/delete-for-move | DeleteForMoveAnnotation will be set to objects that are going to be deleted from the source cluster after being moved to the target cluster during the clusterctl move operation. It will help any validation webhook to take decision based on it. |
| clusterctl.cluster.x-k8s.io/block-move | BlockMoveAnnotation prevents the cluster move operation from starting if it is defined on at least one of the objects in scope. Provider controllers are expected to set the annotation on resources that cannot be instantaneously paused and remove the annotation when the resource has been actually paused. |
| unsafe.topology.cluster.x-k8s.io/disable-update-class-name-check | It can be used to disable the webhook check on update that disallows a pre-existing Cluster to be populated with Topology information and Class. |
| cluster.x-k8s.io/cluster-name | It is set on nodes identifying the name of the cluster the node belongs to. |
| cluster.x-k8s.io/cluster-namespace | It is set on nodes identifying the namespace of the cluster the node belongs to. |
Expand Down

0 comments on commit 390549a

Please sign in to comment.