diff --git a/pkg/operator/helpers/helpers.go b/pkg/operator/helpers/helpers.go index 21c2a99fe..4d9781941 100644 --- a/pkg/operator/helpers/helpers.go +++ b/pkg/operator/helpers/helpers.go @@ -46,6 +46,8 @@ const ( FeatureGatesTypeValid = "ValidFeatureGates" FeatureGatesReasonAllValid = "FeatureGatesAllValid" FeatureGatesReasonInvalidExisting = "InvalidFeatureGatesExisting" + + KlusterletRebootstrapProgressing = "RebootstrapProgressing" ) var ( diff --git a/pkg/operator/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go b/pkg/operator/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go index 9bc38cbcc..d387b5385 100644 --- a/pkg/operator/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go +++ b/pkg/operator/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go @@ -22,9 +22,12 @@ import ( certutil "k8s.io/client-go/util/cert" "k8s.io/klog/v2" + operatorv1client "open-cluster-management.io/api/client/operator/clientset/versioned/typed/operator/v1" operatorinformer "open-cluster-management.io/api/client/operator/informers/externalversions/operator/v1" operatorlister "open-cluster-management.io/api/client/operator/listers/operator/v1" + operatorapiv1 "open-cluster-management.io/api/operator/v1" + "open-cluster-management.io/ocm/pkg/common/patcher" "open-cluster-management.io/ocm/pkg/operator/helpers" ) @@ -40,19 +43,25 @@ var BootstrapControllerSyncInterval = 5 * time.Minute type bootstrapController struct { kubeClient kubernetes.Interface klusterletLister operatorlister.KlusterletLister + klusterletClient operatorv1client.KlusterletInterface secretInformers map[string]coreinformer.SecretInformer + patcher patcher.Patcher[*operatorapiv1.Klusterlet, operatorapiv1.KlusterletSpec, operatorapiv1.KlusterletStatus] } // NewBootstrapController returns a bootstrapController func NewBootstrapController( kubeClient kubernetes.Interface, + klusterletClient operatorv1client.KlusterletInterface, klusterletInformer operatorinformer.KlusterletInformer, secretInformers map[string]coreinformer.SecretInformer, recorder events.Recorder) factory.Controller { controller := &bootstrapController{ kubeClient: kubeClient, + klusterletClient: klusterletClient, klusterletLister: klusterletInformer.Lister(), secretInformers: secretInformers, + patcher: patcher.NewPatcher[ + *operatorapiv1.Klusterlet, operatorapiv1.KlusterletSpec, operatorapiv1.KlusterletStatus](klusterletClient), } return factory.New().WithSync(controller.sync). WithInformersQueueKeysFunc(bootstrapSecretQueueKeyFunc(controller.klusterletLister), @@ -93,6 +102,18 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor return nil } + // handle rebootstrap if the klusterlet is in rebootstrapping state + klusterlet, err := k.klusterletLister.Get(klusterletName) + if err != nil { + return err + } + requeueFunc := func(duration time.Duration) { + controllerContext.Queue().AddAfter(queueKey, duration) + } + if meta.IsStatusConditionTrue(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + return k.processRebootstrap(ctx, agentNamespace, klusterlet, controllerContext.Recorder(), requeueFunc) + } + bootstrapHubKubeconfigSecret, err := k.secretInformers[helpers.BootstrapHubKubeConfig].Lister().Secrets(agentNamespace).Get(helpers.BootstrapHubKubeConfig) switch { case errors.IsNotFound(err): @@ -136,7 +157,7 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor !bytes.Equal(bootstrapKubeconfig.CertificateAuthorityData, hubKubeconfig.CertificateAuthorityData) { // the bootstrap kubeconfig secret is changed, reload the klusterlet agents reloadReason := fmt.Sprintf("the bootstrap secret %s/%s is changed", agentNamespace, helpers.BootstrapHubKubeConfig) - return k.reloadAgents(ctx, controllerContext, agentNamespace, klusterletName, reloadReason) + return k.startRebootstrap(ctx, klusterlet, reloadReason, controllerContext.Recorder(), requeueFunc) } expired, err := isHubKubeconfigSecretExpired(hubKubeconfigSecret) @@ -154,33 +175,72 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor // the hub kubeconfig secret cert is expired, reload klusterlet to restart bootstrap reloadReason := fmt.Sprintf("the hub kubeconfig secret %s/%s is expired", agentNamespace, helpers.HubKubeConfig) - return k.reloadAgents(ctx, controllerContext, agentNamespace, klusterletName, reloadReason) + return k.startRebootstrap(ctx, klusterlet, reloadReason, controllerContext.Recorder(), requeueFunc) } -// reloadAgents reload klusterlet agents by -// 1. make the registration agent re-bootstrap by deleting the current hub kubeconfig secret to -// 2. restart the registration and work agents to reload the new hub ca by deleting the agent deployments -func (k *bootstrapController) reloadAgents(ctx context.Context, ctrlContext factory.SyncContext, namespace, klusterletName, reason string) error { - if err := k.kubeClient.CoreV1().Secrets(namespace).Delete(ctx, helpers.HubKubeConfig, metav1.DeleteOptions{}); err != nil { +func (k *bootstrapController) processRebootstrap(ctx context.Context, agentNamespace string, klusterlet *operatorapiv1.Klusterlet, + recorder events.Recorder, requeueFunc func(time.Duration)) error { + deploymentName := fmt.Sprintf("%s-registration-agent", klusterlet.Name) + deployment, err := k.kubeClient.AppsV1().Deployments(agentNamespace).Get(ctx, deploymentName, metav1.GetOptions{}) + if errors.IsNotFound(err) { + return k.completeRebootstrap(ctx, agentNamespace, klusterlet, recorder) + } + if err != nil { return err } - ctrlContext.Recorder().Eventf("HubKubeconfigSecretDeleted", fmt.Sprintf("the hub kubeconfig secret %s/%s is deleted due to %s", - namespace, helpers.HubKubeConfig, reason)) - registrationName := fmt.Sprintf("%s-registration-agent", klusterletName) - if err := k.kubeClient.AppsV1().Deployments(namespace).Delete(ctx, registrationName, metav1.DeleteOptions{}); err != nil { - return err + if deployment.Status.AvailableReplicas == 0 { + return k.completeRebootstrap(ctx, agentNamespace, klusterlet, recorder) } - ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", fmt.Sprintf("the deployment %s/%s is deleted due to %s", - namespace, registrationName, reason)) - workName := fmt.Sprintf("%s-work-agent", klusterletName) - if err := k.kubeClient.AppsV1().Deployments(namespace).Delete(ctx, workName, metav1.DeleteOptions{}); err != nil { + // there still is registation agent pod running. Resync in 5 seconds + requeueFunc(5 * time.Second) + return nil +} + +func (k *bootstrapController) startRebootstrap(ctx context.Context, klusterlet *operatorapiv1.Klusterlet, message string, + recorder events.Recorder, requeueFunc func(duration time.Duration)) error { + klusterletCopy := klusterlet.DeepCopy() + meta.SetStatusCondition(&klusterletCopy.Status.Conditions, metav1.Condition{ + Type: helpers.KlusterletRebootstrapProgressing, + Status: metav1.ConditionTrue, + Reason: "RebootstrapStarted", + Message: message, + }) + _, err := k.patcher.PatchStatus(ctx, klusterlet, klusterletCopy.Status, klusterlet.Status) + if err != nil { return err } - ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", fmt.Sprintf("the deployment %s/%s is deleted due to %s", - namespace, workName, reason)) + recorder.Eventf("KlusterletRebootstrap", fmt.Sprintf("The klusterlet %q starts rebootstrapping due to %s", + klusterlet.Name, message)) + + // requeue and check the rebootstrap progress in 5 seconds + requeueFunc(5 * time.Second) + return nil +} +func (k *bootstrapController) completeRebootstrap(ctx context.Context, agentNamespace string, klusterlet *operatorapiv1.Klusterlet, + recorder events.Recorder) error { + // delete the existing hub kubeconfig + if err := k.kubeClient.CoreV1().Secrets(agentNamespace).Delete(ctx, helpers.HubKubeConfig, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + return err + } + recorder.Eventf("KlusterletRebootstrap", fmt.Sprintf("Secret %s/%s is deleted", agentNamespace, helpers.HubKubeConfig)) + + // update the condition of klusterlet + klusterletCopy := klusterlet.DeepCopy() + meta.SetStatusCondition(&klusterletCopy.Status.Conditions, metav1.Condition{ + Type: helpers.KlusterletRebootstrapProgressing, + Status: metav1.ConditionFalse, + Reason: "RebootstrapCompleted", + Message: fmt.Sprintf("Secret %s/%s is deleted and bootstrap is triggered", agentNamespace, helpers.HubKubeConfig), + }) + + _, err := k.patcher.PatchStatus(ctx, klusterlet, klusterletCopy.Status, klusterlet.Status) + if err != nil { + return err + } + recorder.Eventf("KlusterletRebootstrap", fmt.Sprintf("Rebootstrap of the klusterlet %q is completed", klusterlet.Name)) return nil } diff --git a/pkg/operator/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go b/pkg/operator/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go index 59b4121ae..2e60341be 100644 --- a/pkg/operator/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go +++ b/pkg/operator/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go @@ -14,6 +14,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/runtime" @@ -29,16 +30,19 @@ import ( operatorinformers "open-cluster-management.io/api/client/operator/informers/externalversions" operatorapiv1 "open-cluster-management.io/api/operator/v1" + "open-cluster-management.io/ocm/pkg/common/patcher" testingcommon "open-cluster-management.io/ocm/pkg/common/testing" "open-cluster-management.io/ocm/pkg/operator/helpers" ) func TestSync(t *testing.T) { cases := []struct { - name string - queueKey string - objects []runtime.Object - validateActions func(t *testing.T, actions []clienttesting.Action) + name string + queueKey string + initRebootstrapping bool + objects []runtime.Object + expectedRebootstrapping bool + validateActions func(t *testing.T, actions []clienttesting.Action) }{ { name: "the changed secret is not bootstrap secret", @@ -50,18 +54,17 @@ func TestSync(t *testing.T) { }, }, { - name: "checking the hub kubeconfig secret", + name: "client certificate expired", queueKey: "test/test", objects: []runtime.Object{ newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.47:6443")), newHubKubeConfigSecret("test", time.Now().Add(-60*time.Second).UTC()), - newDeployment("test-registration-agent", "test"), - newDeployment("test-work-agent", "test"), }, + expectedRebootstrapping: true, validateActions: func(t *testing.T, actions []clienttesting.Action) { - testingcommon.AssertDelete(t, actions[0], "secrets", "test", "hub-kubeconfig-secret") - testingcommon.AssertDelete(t, actions[1], "deployments", "test", "test-registration-agent") - testingcommon.AssertDelete(t, actions[2], "deployments", "test", "test-work-agent") + if len(actions) != 0 { + t.Errorf("expected no actions happens, but got %#v", actions) + } }, }, { @@ -90,16 +93,42 @@ func TestSync(t *testing.T) { { name: "the bootstrap secret is changed", queueKey: "test/test", + objects: []runtime.Object{ + newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.48:6443")), + newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()), + }, + expectedRebootstrapping: true, + validateActions: func(t *testing.T, actions []clienttesting.Action) { + if len(actions) != 0 { + t.Errorf("expected no actions happens, but got %#v", actions) + } + }, + }, + { + name: "wait for scaling down", + queueKey: "test/test", + initRebootstrapping: true, + objects: []runtime.Object{ + newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.48:6443")), + newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()), + newDeploymentWithAvailableReplicas("test-registration-agent", "test", 1), + }, + expectedRebootstrapping: true, + validateActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertGet(t, actions[0], "apps", "v1", "deployments") + }, + }, + { + name: "rebootstrap is completed", + queueKey: "test/test", + initRebootstrapping: true, objects: []runtime.Object{ newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.48:6443")), newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()), newDeployment("test-registration-agent", "test"), - newDeployment("test-work-agent", "test"), }, validateActions: func(t *testing.T, actions []clienttesting.Action) { - testingcommon.AssertDelete(t, actions[0], "secrets", "test", "hub-kubeconfig-secret") - testingcommon.AssertDelete(t, actions[1], "deployments", "test", "test-registration-agent") - testingcommon.AssertDelete(t, actions[2], "deployments", "test", "test-work-agent") + testingcommon.AssertDelete(t, actions[1], "secrets", "test", "hub-kubeconfig-secret") }, }, } @@ -107,11 +136,19 @@ func TestSync(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { fakeKubeClient := fakekube.NewSimpleClientset(c.objects...) - - fakeOperatorClient := fakeoperatorclient.NewSimpleClientset() + klusterlet := newKlusterlet("test", "test") + if c.initRebootstrapping { + klusterlet.Status.Conditions = []metav1.Condition{ + { + Type: helpers.KlusterletRebootstrapProgressing, + Status: metav1.ConditionTrue, + }, + } + } + fakeOperatorClient := fakeoperatorclient.NewSimpleClientset(klusterlet) operatorInformers := operatorinformers.NewSharedInformerFactory(fakeOperatorClient, 5*time.Minute) operatorStore := operatorInformers.Operator().V1().Klusterlets().Informer().GetStore() - if err := operatorStore.Add(newKlusterlet("test", "test")); err != nil { + if err := operatorStore.Add(klusterlet); err != nil { t.Fatal(err) } @@ -151,10 +188,14 @@ func TestSync(t *testing.T) { } } + klusterletClient := fakeOperatorClient.OperatorV1().Klusterlets() + klusterletPatcher := patcher.NewPatcher[*operatorapiv1.Klusterlet, operatorapiv1.KlusterletSpec, operatorapiv1.KlusterletStatus](klusterletClient) controller := &bootstrapController{ kubeClient: fakeKubeClient, + klusterletClient: klusterletClient, klusterletLister: operatorInformers.Operator().V1().Klusterlets().Lister(), secretInformers: secretInformers, + patcher: klusterletPatcher, } syncContext := testingcommon.NewFakeSyncContext(t, c.queueKey) @@ -163,6 +204,15 @@ func TestSync(t *testing.T) { } c.validateActions(t, fakeKubeClient.Actions()) + + klusterlet, err := fakeOperatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + t.Errorf("Expected no errors, but got %v", err) + } + rebootstrapping := meta.IsStatusConditionTrue(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) + if c.expectedRebootstrapping != rebootstrapping { + t.Errorf("Expected rebootstrapping is %v, but got %v", c.expectedRebootstrapping, rebootstrapping) + } }) } } @@ -313,3 +363,9 @@ func newDeployment(name, namespace string) *appsv1.Deployment { Spec: appsv1.DeploymentSpec{}, } } + +func newDeploymentWithAvailableReplicas(name, namespace string, availableReplicas int32) *appsv1.Deployment { + deploy := newDeployment(name, namespace) + deploy.Status.AvailableReplicas = availableReplicas + return deploy +} diff --git a/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller_test.go b/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller_test.go index eda3c9d32..c074ec933 100644 --- a/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller_test.go +++ b/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller_test.go @@ -829,8 +829,32 @@ func TestReplica(t *testing.T) { t.Errorf("Expected non error when sync, %v", err) } + // should have 3 replicas for clusters with multiple nodes assertRegistrationDeployment(t, controller.kubeClient.Actions(), "update", "", "cluster1", 3) assertWorkDeployment(t, controller.kubeClient.Actions(), "update", "cluster1", operatorapiv1.InstallModeDefault, 3) + + klusterlet = newKlusterlet("klusterlet", "testns", "cluster1") + klusterlet.Status.Conditions = []metav1.Condition{ + { + Type: helpers.KlusterletRebootstrapProgressing, + Status: metav1.ConditionTrue, + }, + } + if err := controller.operatorStore.Update(klusterlet); err != nil { + t.Fatal(err) + } + + controller.kubeClient.ClearActions() + controller.operatorClient.ClearActions() + + err = controller.controller.sync(context.TODO(), syncContext) + if err != nil { + t.Errorf("Expected non error when sync, %v", err) + } + + // should have 0 replicas for klusterlet in rebootstrapping state + assertRegistrationDeployment(t, controller.kubeClient.Actions(), "update", "", "cluster1", 0) + assertWorkDeployment(t, controller.kubeClient.Actions(), "update", "cluster1", operatorapiv1.InstallModeDefault, 0) } func TestClusterNameChange(t *testing.T) { diff --git a/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_runtime_reconcile.go b/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_runtime_reconcile.go index cb4b9c38f..1360671d5 100644 --- a/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_runtime_reconcile.go +++ b/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_runtime_reconcile.go @@ -51,6 +51,14 @@ func (r *runtimeReconcile) reconcile(ctx context.Context, klusterlet *operatorap } } + // Check if the klusterlet is in rebootstrapping state + // Both registration agent and work agent are scaled to 0 if the klusterlet is + // in rebootstrapping state. + runtimeConfig := config + if meta.IsStatusConditionTrue(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + runtimeConfig.Replica = 0 + } + // Deploy registration agent _, generationStatus, err := helpers.ApplyDeployment( ctx, @@ -62,7 +70,7 @@ func (r *runtimeReconcile) reconcile(ctx context.Context, klusterlet *operatorap if err != nil { return nil, err } - objData := assets.MustCreateAssetFromTemplate(name, template, config).Data + objData := assets.MustCreateAssetFromTemplate(name, template, runtimeConfig).Data helpers.SetRelatedResourcesStatusesWithObj(&klusterlet.Status.RelatedResources, objData) return objData, nil }, @@ -78,7 +86,7 @@ func (r *runtimeReconcile) reconcile(ctx context.Context, klusterlet *operatorap // If cluster name is empty, read cluster name from hub config secret. // registration-agent generated the cluster name and set it into hub config secret. - workConfig := config + workConfig := runtimeConfig if workConfig.ClusterName == "" { workConfig.ClusterName, err = r.getClusterNameFromHubKubeConfigSecret(ctx, config.AgentNamespace, klusterlet) if err != nil { @@ -87,11 +95,13 @@ func (r *runtimeReconcile) reconcile(ctx context.Context, klusterlet *operatorap } // Deploy work agent. - // * work agent is scaled to 0 only when degrade is true with the reason is HubKubeConfigSecretMissing. - // It is to ensure a fast startup of work agent when the klusterlet is bootstrapped at the first time. - // * The work agent should not be scaled to 0 in degraded condition with other reasons, - // because we still need work agent running even though the hub kubconfig is missing some certain permission. - // It can ensure work agent to clean up the resources defined in manifestworks when cluster is detaching from the hub. + // Work agent is scaled to 0 when + // 1). the klusterlet is in re-bootstrapping state; + // 2). degrade is true with the reason is HubKubeConfigSecretMissing. It is to ensure a fast startup of work + // agent when the klusterlet is bootstrapped at the first time. The work agent should not be scaled to 0 + // in degraded condition with other reasons, because we still need work agent running even though the hub + // kubconfig is missing some certain permission. It can ensure work agent to clean up the resources defined + // in manifestworks when cluster is detaching from the hub. hubConnectionDegradedCondition := meta.FindStatusCondition(klusterlet.Status.Conditions, hubConnectionDegraded) if hubConnectionDegradedCondition == nil { workConfig.Replica = 0 diff --git a/pkg/operator/operators/klusterlet/options.go b/pkg/operator/operators/klusterlet/options.go index beae069a7..37549909d 100644 --- a/pkg/operator/operators/klusterlet/options.go +++ b/pkg/operator/operators/klusterlet/options.go @@ -141,6 +141,7 @@ func (o *Options) RunKlusterletOperator(ctx context.Context, controllerContext * bootstrapController := bootstrapcontroller.NewBootstrapController( kubeClient, + operatorClient.OperatorV1().Klusterlets(), operatorInformer.Operator().V1().Klusterlets(), secretInformers, controllerContext.EventRecorder, diff --git a/test/integration/operator/klusterlet_test.go b/test/integration/operator/klusterlet_test.go index ca9982c6b..0eceafa4c 100644 --- a/test/integration/operator/klusterlet_test.go +++ b/test/integration/operator/klusterlet_test.go @@ -12,6 +12,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/rand" "k8s.io/client-go/rest" @@ -479,11 +480,15 @@ var _ = ginkgo.Describe("Klusterlet", func() { return true }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - klusterlet, err = operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - klusterlet.Spec.ClusterName = "cluster2" - _, err = operatorClient.OperatorV1().Klusterlets().Update(context.Background(), klusterlet, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() error { + klusterlet, err = operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + return err + } + klusterlet.Spec.ClusterName = "cluster2" + _, err = operatorClient.OperatorV1().Klusterlets().Update(context.Background(), klusterlet, metav1.UpdateOptions{}) + return err + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) gomega.Eventually(func() bool { actual, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}) @@ -751,6 +756,15 @@ var _ = ginkgo.Describe("Klusterlet", func() { hubSecret.Data["kubeconfig"] = util.NewKubeConfig(&rest.Config{Host: "https://nohost"}) _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Update(context.Background(), hubSecret, metav1.UpdateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Update replica of deployment + registrationDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + registrationDeployment = registrationDeployment.DeepCopy() + registrationDeployment.Status.AvailableReplicas = 0 + _, err = kubeClient.AppsV1().Deployments(klusterletNamespace).UpdateStatus(context.Background(), registrationDeployment, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + util.AssertKlusterletCondition( klusterlet.Name, operatorClient, "HubConnectionDegraded", @@ -799,7 +813,7 @@ var _ = ginkgo.Describe("Klusterlet", func() { }) }) - ginkgo.Context("bootstrap reconciliation", func() { + ginkgo.Context("rebootstrap", func() { ginkgo.BeforeEach(func() { registrationDeploymentName = fmt.Sprintf("%s-registration-agent", klusterlet.Name) workDeploymentName = fmt.Sprintf("%s-work-agent", klusterlet.Name) @@ -808,6 +822,74 @@ var _ = ginkgo.Describe("Klusterlet", func() { gomega.Expect(operatorClient.OperatorV1().Klusterlets().Delete(context.Background(), klusterlet.Name, metav1.DeleteOptions{})).To(gomega.BeNil()) }) + assertRebootstrap := func() { + // Check if the rebootstrap is started + gomega.Eventually(func() error { + klusterlet, err := operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + return err + } + if meta.IsStatusConditionTrue(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + return nil + } + + return fmt.Errorf("Rebootstrap is not started yet") + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + // Make sure the deployments are scaled down to 0 during rebootstrapping; + gomega.Eventually(func() error { + // return if the rebootstrap is completed + klusterlet, err := operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + return err + } + if meta.IsStatusConditionFalse(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + return nil + } + + // check the Replicas of deployments + registrationDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), + registrationDeploymentName, metav1.GetOptions{}) + if err != nil { + return err + } + + if *registrationDeployment.Spec.Replicas != 0 { + return fmt.Errorf("registrationDeployment.Spec.Replicas is not 0: %d", *registrationDeployment.Spec.Replicas) + } + + workDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}) + if err != nil { + return err + } + + if *workDeployment.Spec.Replicas != 0 { + return fmt.Errorf("workDeployment.Spec.Replicas is not 0: %d", *registrationDeployment.Spec.Replicas) + } + + return nil + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + // check if the rebootstrap is completed + gomega.Eventually(func() error { + klusterlet, err := operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + return err + } + if !meta.IsStatusConditionFalse(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + return fmt.Errorf("Rebootstrap is not completed yet") + } + + _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Get(context.Background(), helpers.HubKubeConfig, metav1.GetOptions{}) + if errors.IsNotFound(err) { + return nil + } else if err != nil { + return err + } + return fmt.Errorf("hub kubeconfig secret %s/%s is not deleted yet", klusterletNamespace, helpers.HubKubeConfig) + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + } + ginkgo.It("should reload the klusterlet after the bootstrap secret is changed", func() { _, err := operatorClient.OperatorV1().Klusterlets().Create(context.Background(), klusterlet, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -840,21 +922,6 @@ var _ = ginkgo.Describe("Klusterlet", func() { return true }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - // Get the deployments - var registrationDeployment *appsv1.Deployment - var workDeployment *appsv1.Deployment - gomega.Eventually(func() bool { - if registrationDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get( - context.Background(), registrationDeploymentName, metav1.GetOptions{}); err != nil { - return false - } - if workDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get( - context.Background(), workDeploymentName, metav1.GetOptions{}); err != nil { - return false - } - return true - }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - // Change the bootstrap secret server address bootStrapSecret, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Get(context.Background(), helpers.BootstrapHubKubeConfig, metav1.GetOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) @@ -863,25 +930,7 @@ var _ = ginkgo.Describe("Klusterlet", func() { _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Update(context.Background(), bootStrapSecret, metav1.UpdateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - // Make sure the deployments are deleted and recreated - gomega.Eventually(func() bool { - lastRegistrationDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get( - context.Background(), registrationDeploymentName, metav1.GetOptions{}) - if err != nil { - return false - } - lastWorkDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}) - if err != nil { - return false - } - if registrationDeployment.UID == lastRegistrationDeployment.UID { - return false - } - if workDeployment.UID == lastWorkDeployment.UID { - return false - } - return true - }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + assertRebootstrap() }) ginkgo.It("should reload the klusterlet after the hub secret is expired", func() { @@ -901,21 +950,6 @@ var _ = ginkgo.Describe("Klusterlet", func() { _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Create(context.Background(), bootStrapSecret, metav1.CreateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - // Get the deployments - var registrationDeployment *appsv1.Deployment - var workDeployment *appsv1.Deployment - gomega.Eventually(func() bool { - if registrationDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get( - context.Background(), registrationDeploymentName, metav1.GetOptions{}); err != nil { - return false - } - if workDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get( - context.Background(), workDeploymentName, metav1.GetOptions{}); err != nil { - return false - } - return true - }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - // Update the hub secret and make it same with the bootstrap secret gomega.Eventually(func() bool { hubSecret, err := kubeClient.CoreV1().Secrets(klusterletNamespace).Get(context.Background(), helpers.HubKubeConfig, metav1.GetOptions{}) @@ -932,25 +966,7 @@ var _ = ginkgo.Describe("Klusterlet", func() { return true }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - // Make sure the deployments are deleted and recreated - gomega.Eventually(func() bool { - lastRegistrationDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get( - context.Background(), registrationDeploymentName, metav1.GetOptions{}) - if err != nil { - return false - } - lastWorkDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}) - if err != nil { - return false - } - if registrationDeployment.UID == lastRegistrationDeployment.UID { - return false - } - if workDeployment.UID == lastWorkDeployment.UID { - return false - } - return true - }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + assertRebootstrap() }) })