From 73f58c68cc3b66fe01f1cf64575971737a1efcac Mon Sep 17 00:00:00 2001 From: xuezhaojun Date: Sun, 28 Apr 2024 23:20:23 +0800 Subject: [PATCH] suppport Klusterlet switch-hub. Signed-off-by: xuezhaojun --- ...pen-cluster-management.io_klusterlets.yaml | 45 ++ .../klusterlet-agent-deployment.yaml | 6 + .../klusterlet-registration-deployment.yaml | 6 + .../klusterlet_controller.go | 9 + .../bootstrapkubeconfig.go | 100 +++++ .../bootstrapkubeconfig_test.go | 202 +++++++++ .../bootstrapkubeconfiginuse.go | 53 +++ .../bootstrapkubeconfiginuse_test.go | 85 ++++ .../bootstrapkubeconfigsmanager.go | 117 +++++ .../bootstrapkubeconfigsmanager_test.go | 104 +++++ .../spoke/lease/lease_controller.go | 77 +++- .../spoke/lease/lease_controller_test.go | 109 ++++- pkg/registration/spoke/options.go | 20 +- .../registration/hub_accept_controller.go | 46 ++ .../hub_accept_controller_test.go | 71 +++ pkg/registration/spoke/spokeagent.go | 25 ++ .../registration/integration_suite_test.go | 1 + .../spokeagent_switch_hub_test.go | 408 ++++++++++++++++++ test/integration/util/authentication.go | 85 ++++ 19 files changed, 1546 insertions(+), 23 deletions(-) create mode 100644 pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfig.go create mode 100644 pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfig_test.go create mode 100644 pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfiginuse.go create mode 100644 pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfiginuse_test.go create mode 100644 pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfigsmanager.go create mode 100644 pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfigsmanager_test.go create mode 100644 pkg/registration/spoke/registration/hub_accept_controller.go create mode 100644 pkg/registration/spoke/registration/hub_accept_controller_test.go create mode 100644 test/integration/registration/spokeagent_switch_hub_test.go diff --git a/deploy/klusterlet/olm-catalog/0.13.1/manifests/operator.open-cluster-management.io_klusterlets.yaml b/deploy/klusterlet/olm-catalog/0.13.1/manifests/operator.open-cluster-management.io_klusterlets.yaml index 40d81f073..8701897dd 100644 --- a/deploy/klusterlet/olm-catalog/0.13.1/manifests/operator.open-cluster-management.io_klusterlets.yaml +++ b/deploy/klusterlet/olm-catalog/0.13.1/manifests/operator.open-cluster-management.io_klusterlets.yaml @@ -181,6 +181,51 @@ spec: description: RegistrationConfiguration contains the configuration of registration properties: + bootstrapKubeConfigs: + description: "BootstrapKubeConfigs defines the ordered list of + bootstrap kubeconfigs. The order decides which bootstrap kubeconfig + to use first when rebootstrap. \n When the agent loses the connection + to the current hub over HubConnectionTimeoutSeconds, or the + managedcluster CR is set `hubAcceptsClient=false` on the hub, + the controller marks the related bootstrap kubeconfig as \"failed\". + \n A failed bootstrapkubeconfig won't be used for the duration + specified by SkipFailedBootstrapKubeConfigSeconds. But if the + user updates the content of a failed bootstrapkubeconfig, the + \"failed\" mark will be cleared." + properties: + localSecretsConfig: + description: LocalSecretsConfig include a list of secrets + that contains the kubeconfigs for ordered bootstrap kubeconifigs. + The secrets must be in the same namespace where the agent + controller runs. + properties: + hubConnectionTimeoutSeconds: + default: 600 + description: HubConnectionTimeoutSeconds is used to set + the timeout of connecting to the hub cluster. When agent + loses the connection to the hub over the timeout seconds, + the agent do a rebootstrap. By default is 10 mins. + format: int32 + minimum: 180 + type: integer + secretNames: + description: SecretNames is a list of secret names. The + secrets are in the same namespace where the agent controller + runs. + items: + type: string + type: array + type: object + type: + default: None + description: Type specifies the type of priority bootstrap + kubeconfigs. By default, it is set to None, representing + no priority bootstrap kubeconfigs are set. + enum: + - None + - LocalSecrets + type: string + type: object clientCertExpirationSeconds: description: clientCertExpirationSeconds represents the seconds of a client certificate to expire. If it is not set or 0, the diff --git a/manifests/klusterlet/management/klusterlet-agent-deployment.yaml b/manifests/klusterlet/management/klusterlet-agent-deployment.yaml index fdf29b37b..c70054cc8 100644 --- a/manifests/klusterlet/management/klusterlet-agent-deployment.yaml +++ b/manifests/klusterlet/management/klusterlet-agent-deployment.yaml @@ -90,6 +90,12 @@ spec: {{if gt .AgentKubeAPIBurst 0}} - "--kube-api-burst={{ .AgentKubeAPIBurst }}" {{end}} + {{range .BootStrapKubeConfigSecrets}} + - "--bootstrap-kubeconfig-secrets={{ . }}" + {{end}} + {{if gt .HubConnectionTimeoutSeconds 0}} + - "--hub-connection-timeout-seconds={{ .HubConnectionTimeoutSeconds }}" + {{end}} env: - name: POD_NAME valueFrom: diff --git a/manifests/klusterlet/management/klusterlet-registration-deployment.yaml b/manifests/klusterlet/management/klusterlet-registration-deployment.yaml index 80620b16b..da0730819 100644 --- a/manifests/klusterlet/management/klusterlet-registration-deployment.yaml +++ b/manifests/klusterlet/management/klusterlet-registration-deployment.yaml @@ -81,6 +81,12 @@ spec: {{if gt .RegistrationKubeAPIBurst 0}} - "--kube-api-burst={{ .RegistrationKubeAPIBurst }}" {{end}} + {{range .BootStrapKubeConfigSecrets}} + - "--bootstrap-kubeconfig-secrets={{ . }}" + {{end}} + {{if gt .HubConnectionTimeoutSeconds 0}} + - "--hub-connection-timeout-seconds={{ .HubConnectionTimeoutSeconds }}" + {{end}} env: - name: POD_NAME valueFrom: diff --git a/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller.go b/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller.go index 597639133..96bf48ab3 100644 --- a/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller.go +++ b/pkg/operator/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller.go @@ -126,6 +126,8 @@ type klusterletConfig struct { ExternalServerURL string HubKubeConfigSecret string BootStrapKubeConfigSecret string + BootStrapKubeConfigSecrets []string + HubConnectionTimeoutSeconds int32 OperatorNamespace string Replica int32 ClientCertExpirationSeconds int32 @@ -264,6 +266,13 @@ func (n *klusterletController) sync(ctx context.Context, controllerContext facto annotationsArray = append(annotationsArray, fmt.Sprintf("%s=%s", k, v)) } config.ClusterAnnotationsString = strings.Join(annotationsArray, ",") + + // get bootstrap kubeconfig secrets from the klusterlet configuration + bootstrapKubeConfigs := klusterlet.Spec.RegistrationConfiguration.BootstrapKubeConfigs + if bootstrapKubeConfigs.Type == operatorapiv1.LocalSecrets { + config.BootStrapKubeConfigSecrets = bootstrapKubeConfigs.LocalSecrets.SecretNames + config.HubConnectionTimeoutSeconds = bootstrapKubeConfigs.LocalSecrets.HubConnectionTimeoutSeconds + } } config.RegistrationFeatureGates, registrationFeatureMsgs = helpers.ConvertToFeatureGateFlags("Registration", registrationFeatureGates, ocmfeature.DefaultSpokeRegistrationFeatureGates) diff --git a/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfig.go b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfig.go new file mode 100644 index 000000000..b4565a24c --- /dev/null +++ b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfig.go @@ -0,0 +1,100 @@ +package bootstrapkubeconfigsmanager + +import ( + "context" + "fmt" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +type boostrapKubeConfigStatus string + +const ( + boostrapKubeConfigStatusInValid boostrapKubeConfigStatus = "InValid" + boostrapKubeConfigStatusValid boostrapKubeConfigStatus = "Valid" +) + +// bootstrapKubeConfig represents a bootstrap kubeconfig that agent can use to bootstrap a managed cluster. +type boostrapKubeConfig interface { + // Name returns the name of the bootstrap kubeconfig. It helps to identify the bootstrap kubeconfig. + Name() string + + // KubeConfigData returns the kubeconfig data of the bootstrap kubeconfig. + // KubeConfigData includes the kubeconfig and the credentials to connect to the hub cluster. + KubeConfigData() (map[string][]byte, error) + + // Status returns the status of the bootstrap kubeconfig. + // A bootstrap kubeconfig has two status: Valid and InValid. + Status() (boostrapKubeConfigStatus, error) + + // Fail means at the time t, the bootstrap kubeconfig failed to connect to the hub cluster. + Fail(t time.Time) error +} + +var _ boostrapKubeConfig = &boostrapKubeConfigSecretImpl{} + +const ( + // BootstrapKubeconfigFailedTimeAnnotationKey represents the time when the bootstrap kubeconfig failed + BootstrapKubeconfigFailedTimeAnnotationKey = "agent.open-cluster-management.io/bootstrap-kubeconfig-failed-time" +) + +type boostrapKubeConfigSecretImpl struct { + secretName string + secretNamespace string + skipFailedBootstrapKubeconfigSeconds int32 // if a bootstrap kubeconfig failed, in 3 mins, it can't be used in rebootstrap. + kubeClient kubernetes.Interface +} + +func (b *boostrapKubeConfigSecretImpl) Name() string { + return b.secretName +} + +func (b *boostrapKubeConfigSecretImpl) KubeConfigData() (map[string][]byte, error) { + secret, err := b.kubeClient.CoreV1().Secrets(b.secretNamespace).Get(context.Background(), b.secretName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("get the bootstrap kubeconfig secret failed: %v", err) + } + return secret.Data, nil +} + +func (b *boostrapKubeConfigSecretImpl) Status() (boostrapKubeConfigStatus, error) { + secret, err := b.kubeClient.CoreV1().Secrets(b.secretNamespace).Get(context.Background(), b.secretName, metav1.GetOptions{}) + if err != nil { + return boostrapKubeConfigStatusInValid, fmt.Errorf("get the bootstrap kubeconfig secret failed: %v", err) + } + + if secret.Annotations == nil { + return boostrapKubeConfigStatusValid, nil + } + + now := time.Now() + if failedTime, ok := secret.Annotations[BootstrapKubeconfigFailedTimeAnnotationKey]; ok { + failedTimeParsed, err := time.Parse(time.RFC3339, failedTime) + if err != nil { + return boostrapKubeConfigStatusInValid, fmt.Errorf("failed to parse the failed time %s of the secret %s: %v", failedTime, secret.Name, err) + } + if now.Sub(failedTimeParsed).Seconds() < float64(b.skipFailedBootstrapKubeconfigSeconds) { + return boostrapKubeConfigStatusInValid, nil + } + } + return boostrapKubeConfigStatusValid, nil +} + +func (b *boostrapKubeConfigSecretImpl) Fail(t time.Time) error { + secret, err := b.kubeClient.CoreV1().Secrets(b.secretNamespace).Get(context.Background(), b.secretName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("get the bootstrap kubeconfig secret failed: %v", err) + } + secretCopy := secret.DeepCopy() + if secretCopy.Annotations == nil { + secretCopy.Annotations = make(map[string]string) + } + secretCopy.Annotations[BootstrapKubeconfigFailedTimeAnnotationKey] = t.Format(time.RFC3339) + _, err = b.kubeClient.CoreV1().Secrets(b.secretNamespace).Update(context.Background(), secretCopy, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("update the secret %s failed: %v", b.secretName, err) + } + return nil +} diff --git a/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfig_test.go b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfig_test.go new file mode 100644 index 000000000..880568457 --- /dev/null +++ b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfig_test.go @@ -0,0 +1,202 @@ +package bootstrapkubeconfigsmanager + +import ( + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" +) + +func TestBootstrapKubeConfigSecretImpl(t *testing.T) { + testcases := []struct { + name string + mockClient func(failedTime time.Time) *fake.Clientset + validate func(c boostrapKubeConfig) + }{ + { + name: "Valid BootstrapKubeConfig", + mockClient: func(_ time.Time) *fake.Clientset { + // Create a mock secret + mockSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mock-secret", + Namespace: "test-namespace", + }, + Data: map[string][]byte{ + "kubeconfig": []byte("mock-kubeconfig"), + }, + } + + // Create a mock kubeClient + mockKubeClient := &fake.Clientset{} + mockKubeClient.AddReactor("get", "secrets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, mockSecret, nil + }) + return mockKubeClient + }, + validate: func(c boostrapKubeConfig) { + if c.Name() != "mock-secret" { + t.Errorf("Expected name %v, but got %v", "mock-secret", c.Name()) + } + + kubeConfigData, err := c.KubeConfigData() + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if string(kubeConfigData["kubeconfig"]) != "mock-kubeconfig" { + t.Errorf("Expected kubeconfig data %v, but got %v", "mock-kubeconfig", string(kubeConfigData["kubeconfig"])) + } + + status, err := c.Status() + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if status != boostrapKubeConfigStatusValid { + t.Errorf("Expected status %v, but got %v", boostrapKubeConfigStatusValid, status) + } + }, + }, + { + name: "Once failed but now valid BootstrapKubeConfig", + mockClient: func(_ time.Time) *fake.Clientset { + // Create a mock secret + mockSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mock-secret", + Namespace: "test-namespace", + Annotations: map[string]string{ + BootstrapKubeconfigFailedTimeAnnotationKey: "2021-08-01T00:00:00Z", + }, + }, + Data: map[string][]byte{ + "kubeconfig": []byte("mock-kubeconfig"), + }, + } + + // Create a mock kubeClient + mockKubeClient := &fake.Clientset{} + mockKubeClient.AddReactor("get", "secrets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, mockSecret, nil + }) + return mockKubeClient + }, + validate: func(c boostrapKubeConfig) { + status, err := c.Status() + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if status != boostrapKubeConfigStatusValid { + t.Errorf("Expected status %v, but got %v", boostrapKubeConfigStatusValid, status) + } + }, + }, + { + name: "Recently failed and invalid BootstrapKubeConfig", + mockClient: func(failedTime time.Time) *fake.Clientset { + // Create a mock secret + mockSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mock-secret", + Namespace: "test-namespace", + Annotations: map[string]string{ + BootstrapKubeconfigFailedTimeAnnotationKey: failedTime.Format(time.RFC3339), + }, + }, + Data: map[string][]byte{ + "kubeconfig": []byte("mock-kubeconfig"), + }, + } + + // Create a mock kubeClient + mockKubeClient := &fake.Clientset{} + mockKubeClient.AddReactor("get", "secrets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, mockSecret, nil + }) + return mockKubeClient + }, + validate: func(c boostrapKubeConfig) { + status, err := c.Status() + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if status != boostrapKubeConfigStatusInValid { + t.Errorf("Expected status %v, but got %v", boostrapKubeConfigStatusInValid, status) + } + }, + }, + { + name: "Fail a BootstrapKubeConfig", + mockClient: func(_ time.Time) *fake.Clientset { + // Create a mock secret + mockSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mock-secret", + Namespace: "test-namespace", + }, + Data: map[string][]byte{ + "kubeconfig": []byte("mock-kubeconfig"), + }, + } + + // Create a mock kubeClient + mockKubeClient := &fake.Clientset{} + mockKubeClient.AddReactor("get", "secrets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, mockSecret, nil + }) + mockKubeClient.AddReactor("update", "secrets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + // Get annotation from action + annotations := action.(k8stesting.UpdateAction).GetObject().(*corev1.Secret).Annotations + mockSecret.Annotations = annotations + return true, nil, nil + }) + return mockKubeClient + }, + validate: func(c boostrapKubeConfig) { + status, err := c.Status() + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if status != boostrapKubeConfigStatusValid { + t.Errorf("Expected status %v, but got %v", boostrapKubeConfigStatusValid, status) + } + + // Fail the BootstrapKubeConfig + err = c.Fail(time.Now()) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + status, err = c.Status() + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + if status != boostrapKubeConfigStatusInValid { + t.Errorf("Expected status %v, but got %v", boostrapKubeConfigStatusInValid, status) + } + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + mockClient := tc.mockClient(time.Now()) + bootstrapKubeConfig := &boostrapKubeConfigSecretImpl{ + kubeClient: mockClient, + secretNamespace: "test-namespace", + secretName: "mock-secret", + skipFailedBootstrapKubeconfigSeconds: 60, + } + tc.validate(bootstrapKubeConfig) + }) + } +} diff --git a/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfiginuse.go b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfiginuse.go new file mode 100644 index 000000000..ab6b42ae3 --- /dev/null +++ b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfiginuse.go @@ -0,0 +1,53 @@ +package bootstrapkubeconfigsmanager + +import ( + "context" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +// bootstrapKubeConfigInUse is the registration spoke's current in used bootstrap kubeconfig. +type bootstrapKubeConfigInUse interface { + // KubeConfigData returns the kubeconfig data of the bootstrap kubeconfig in use. + KubeConfigData() (map[string][]byte, error) + + // Update updates the kubeconfig data of the bootstrap kubeconfig in use. + Update(ctx context.Context, kubeconfigData map[string][]byte) error +} + +var _ bootstrapKubeConfigInUse = &bootstrapKubeConfigInUseImpl{} + +type bootstrapKubeConfigInUseImpl struct { + secretName string + secretNamespace string + kubeClient kubernetes.Interface +} + +func (b *bootstrapKubeConfigInUseImpl) KubeConfigData() (map[string][]byte, error) { + secret, err := b.kubeClient.CoreV1().Secrets(b.secretNamespace).Get(context.Background(), b.secretName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("get the bootstrap kubeconfig secret failed: %v", err) + } + return secret.Data, nil +} + +func (b *bootstrapKubeConfigInUseImpl) Update(ctx context.Context, kubeconfigData map[string][]byte) error { + var err error + // Get the in-use bootstrapkubeconfig secret + inUse, err := b.kubeClient.CoreV1().Secrets(b.secretNamespace).Get(ctx, b.secretName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("get the current bootstrap kubeconfig secret failed: %v", err) + } + + // Update the in-use bootstrapkubeconfig secret + copy := inUse.DeepCopy() + copy.Data = kubeconfigData + + _, err = b.kubeClient.CoreV1().Secrets(b.secretNamespace).Update(ctx, copy, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("update the current bootstrap kubeconfig secret failed: %v", err) + } + return nil +} diff --git a/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfiginuse_test.go b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfiginuse_test.go new file mode 100644 index 000000000..4a8f95fb6 --- /dev/null +++ b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfiginuse_test.go @@ -0,0 +1,85 @@ +package bootstrapkubeconfigsmanager + +import ( + "context" + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" +) + +func TestBootstrapKubeConfigInUseImpl(t *testing.T) { + testcases := []struct { + name string + mockClient func() *fake.Clientset + validate func(c bootstrapKubeConfigInUse) + }{ + { + name: "Update KubeConfig Data", + mockClient: func() *fake.Clientset { + // Create a mock secret + mockSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mock-secret", + Namespace: "test-namespace", + }, + Data: map[string][]byte{ + "kubeconfig": []byte("mock-kubeconfig"), + }, + } + + // Create a mock kubeClient + mockKubeClient := &fake.Clientset{} + mockKubeClient.AddReactor("get", "secrets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, mockSecret, nil + }) + mockKubeClient.AddReactor("update", "secrets", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + // Get data from action + data := action.(k8stesting.UpdateAction).GetObject().(*corev1.Secret).Data + mockSecret.Data = data + return true, nil, nil + }) + return mockKubeClient + }, + validate: func(c bootstrapKubeConfigInUse) { + kubeConfigData, err := c.KubeConfigData() + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if string(kubeConfigData["kubeconfig"]) != "mock-kubeconfig" { + t.Errorf("Expected kubeconfig data %v, but got %v", "mock-kubeconfig", string(kubeConfigData["kubeconfig"])) + } + + // Update kubeconfig data + err = c.Update(context.TODO(), map[string][]byte{ + "kubeconfig": []byte("new-kubeconfig"), + }) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + kubeConfigData, err = c.KubeConfigData() + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if string(kubeConfigData["kubeconfig"]) != "new-kubeconfig" { + t.Errorf("Expected kubeconfig data %v, but got %v", "new-kubeconfig", string(kubeConfigData["kubeconfig"])) + } + }, + }, + } + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + mockClient := tc.mockClient() + bootstrapKubeConfig := &bootstrapKubeConfigInUseImpl{ + kubeClient: mockClient, + secretNamespace: "test-namespace", + secretName: "mock-secret", + } + tc.validate(bootstrapKubeConfig) + }) + } +} diff --git a/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfigsmanager.go b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfigsmanager.go new file mode 100644 index 000000000..d04099fa7 --- /dev/null +++ b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfigsmanager.go @@ -0,0 +1,117 @@ +package bootstrapkubeconfigsmanager + +import ( + "context" + "fmt" + "reflect" + "time" + + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" +) + +// bootstrapkubeconfigsManager manages multiple bootstrapkubeconfigs. +// The only public method is ReSelect. +// ReSelect do 2 things: +// 1. Call Fail method of the bootstrapkubeconfig that has the same kubeconfig data with the current bootstrapkubeconfig in use. +// 2. Select a first valid bootstrapkubeconfig from the list and update it to the current bootstrapkubeconfig in use. +// +// Note: ReSelect should be called only once. Then it wait for registration-agent to rebootstrap. +type bootstrapkubeconfigsManager struct { + bootstrapKubeConfigInUse bootstrapKubeConfigInUse + bootstrapKubeConfigs []boostrapKubeConfig + reselected bool +} + +func NewBoostrapkubeconfigsManager(spokeNamespace string, + bootstrapKubeconfigSecretInUse string, + bootstrapKubeconfigSecrets []string, + kubeClient kubernetes.Interface) *bootstrapkubeconfigsManager { + var bootstrapKubeConfigs []boostrapKubeConfig + for _, secretName := range bootstrapKubeconfigSecrets { + bootstrapKubeConfigs = append(bootstrapKubeConfigs, &boostrapKubeConfigSecretImpl{ + secretName: secretName, + secretNamespace: spokeNamespace, + skipFailedBootstrapKubeconfigSeconds: 180, // 3 mins + kubeClient: kubeClient, + }) + } + return &bootstrapkubeconfigsManager{ + bootstrapKubeConfigInUse: &bootstrapKubeConfigInUseImpl{ + secretName: bootstrapKubeconfigSecretInUse, + secretNamespace: spokeNamespace, + kubeClient: kubeClient, + }, + bootstrapKubeConfigs: bootstrapKubeConfigs, + reselected: false, + } +} + +// ReSelect is the only public method of a bootstrapkubeconfigManager +func (m *bootstrapkubeconfigsManager) ReSelect(ctx context.Context) error { + if m.reselected { + klog.Info("ReSelect has been called") + return nil + } + + now := time.Now() + klog.Info("ReSelect Start") + + bootstrapKubeConfigInUse := m.bootstrapKubeConfigInUse + kubeconfigDataInUse, err := bootstrapKubeConfigInUse.KubeConfigData() + if err != nil { + return fmt.Errorf("get the current bootstrap kubeconfig failed: %v", err) + } + + bootstrapKubeConfigs := m.bootstrapKubeConfigs + if len(m.bootstrapKubeConfigs) == 0 { + klog.Info("No bootstrap kubeconfig found") + return nil + } + + // If kubeconfigData equals to the kubeConfigDataInUse, need to mark the bootstrapkubeconfig as invalid. + for _, bootstrapKubeConfig := range bootstrapKubeConfigs { + kubeconfigData, err := bootstrapKubeConfig.KubeConfigData() + if err != nil { + klog.Errorf("get the kubeconfig data of the secret %s failed: %v", bootstrapKubeConfig.Name(), err) + continue + } + + if reflect.DeepEqual(kubeconfigData, kubeconfigDataInUse) { + err := bootstrapKubeConfig.Fail(now) + if err != nil { + klog.Errorf("fail the bootstrapKubeConfig %s failed: %v", bootstrapKubeConfig.Name(), err) + } + } + } + + // Select a new valid bootstrapkubeconfig and update it to the current bootstrapkubeconfig in use. + for _, bootstrapKubeConfig := range bootstrapKubeConfigs { + status, err := bootstrapKubeConfig.Status() + if err != nil { + klog.Errorf("get the status of the secret %s failed: %v", bootstrapKubeConfig.Name(), err) + continue + } + if status == boostrapKubeConfigStatusInValid { + klog.Errorf("bootstrap kubeconfig %s is invalid", bootstrapKubeConfig.Name()) + continue + } + if status == boostrapKubeConfigStatusValid { + kubeconfigData, err := bootstrapKubeConfig.KubeConfigData() + if err != nil { + klog.Errorf("get the kubeconfig data of the bootstrapKubeConfig %s failed: %v", bootstrapKubeConfig.Name(), err) + continue + } + err = bootstrapKubeConfigInUse.Update(ctx, kubeconfigData) + if err != nil { + klog.Errorf("update the current bootstrap kubeconfig failed: %v", err) + continue + } + + m.reselected = true + klog.Info("ReSelect Success") + break + } + } + return nil +} diff --git a/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfigsmanager_test.go b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfigsmanager_test.go new file mode 100644 index 000000000..e5b12f54d --- /dev/null +++ b/pkg/registration/spoke/bootstrapkubeconfigsmanager/bootstrapkubeconfigsmanager_test.go @@ -0,0 +1,104 @@ +package bootstrapkubeconfigsmanager + +import ( + "context" + "testing" + "time" +) + +func TestBootstrapKubeConfigManager(t *testing.T) { + var err error + manager := &bootstrapkubeconfigsManager{ + bootstrapKubeConfigInUse: &fakeBootstrapKubeConfigInUse{ + kubeconfigData: map[string][]byte{ + "kubeconfig": []byte("hub1"), + }, + }, + bootstrapKubeConfigs: []boostrapKubeConfig{ + &fakeBootstrapKubeConfig{ + name: "hub1", + kubeconfigData: map[string][]byte{ + "kubeconfig": []byte("hub1"), + }, + status: boostrapKubeConfigStatusValid, + }, + &fakeBootstrapKubeConfig{ + name: "hub2", + kubeconfigData: map[string][]byte{ + "kubeconfig": []byte("hub2"), + }, + status: boostrapKubeConfigStatusInValid, + }, + &fakeBootstrapKubeConfig{ + name: "hub3", + kubeconfigData: map[string][]byte{ + "kubeconfig": []byte("hub3"), + }, + status: boostrapKubeConfigStatusValid, + }, + }, + } + + err = manager.ReSelect(context.TODO()) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + // the status of hub1 should change to invalid + if status, _ := manager.bootstrapKubeConfigs[0].Status(); status != boostrapKubeConfigStatusInValid { + t.Errorf("Expected status %v, but got %v", boostrapKubeConfigStatusInValid, status) + } + + // the kubeconfigData of bootstrapKubeConfigInUse should change to hub3 + kubeconfigData, _ := manager.bootstrapKubeConfigInUse.KubeConfigData() + if string(kubeconfigData["kubeconfig"]) != "hub3" { + t.Errorf("Expected kubeconfig %v, but got %v", "hub3", string(kubeconfigData["kubeconfig"])) + } + + // reselect again + err = manager.ReSelect(context.TODO()) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + // the status of hub3 should still be valid + if status, _ := manager.bootstrapKubeConfigs[2].Status(); status != boostrapKubeConfigStatusValid { + t.Errorf("Expected status %v, but got %v", boostrapKubeConfigStatusValid, status) + } +} + +type fakeBootstrapKubeConfigInUse struct { + kubeconfigData map[string][]byte +} + +func (f *fakeBootstrapKubeConfigInUse) KubeConfigData() (map[string][]byte, error) { + return f.kubeconfigData, nil +} + +func (f *fakeBootstrapKubeConfigInUse) Update(ctx context.Context, kubeconfigData map[string][]byte) error { + f.kubeconfigData = kubeconfigData + return nil +} + +type fakeBootstrapKubeConfig struct { + name string + kubeconfigData map[string][]byte + status boostrapKubeConfigStatus +} + +func (f *fakeBootstrapKubeConfig) Name() string { + return f.name +} + +func (f *fakeBootstrapKubeConfig) KubeConfigData() (map[string][]byte, error) { + return f.kubeconfigData, nil +} + +func (f *fakeBootstrapKubeConfig) Status() (boostrapKubeConfigStatus, error) { + return f.status, nil +} + +func (f *fakeBootstrapKubeConfig) Fail(failedTime time.Time) error { + f.status = boostrapKubeConfigStatusInValid + return nil +} diff --git a/pkg/registration/spoke/lease/lease_controller.go b/pkg/registration/spoke/lease/lease_controller.go index ef7ab9f29..fd347678a 100644 --- a/pkg/registration/spoke/lease/lease_controller.go +++ b/pkg/registration/spoke/lease/lease_controller.go @@ -26,7 +26,7 @@ type managedClusterLeaseController struct { clusterName string hubClusterLister clusterv1listers.ManagedClusterLister lastLeaseDurationSeconds int32 - leaseUpdater leaseUpdaterInterface + leaseUpdater leaseUpdater } // NewManagedClusterLeaseController creates a new managed cluster lease controller on the managed cluster. @@ -34,15 +34,19 @@ func NewManagedClusterLeaseController( clusterName string, hubClient clientset.Interface, hubClusterInformer clusterv1informer.ManagedClusterInformer, + timeoutSeconds int32, + handleTimeout func(ctx context.Context) error, recorder events.Recorder) factory.Controller { c := &managedClusterLeaseController{ clusterName: clusterName, hubClusterLister: hubClusterInformer.Lister(), - leaseUpdater: &leaseUpdater{ - hubClient: hubClient, - clusterName: clusterName, - leaseName: "managed-cluster-lease", - recorder: recorder, + leaseUpdater: &leaseUpdaterImpl{ + hubClient: hubClient, + clusterName: clusterName, + leaseName: "managed-cluster-lease", + recorder: recorder, + timeoutSeconds: timeoutSeconds, + handleTimeout: handleTimeout, }, } @@ -85,23 +89,27 @@ func (c *managedClusterLeaseController) sync(ctx context.Context, syncCtx factor return nil } -type leaseUpdaterInterface interface { +type leaseUpdater interface { start(ctx context.Context, leaseDuration time.Duration) stop() } // leaseUpdater periodically updates the lease of a managed cluster -type leaseUpdater struct { +type leaseUpdaterImpl struct { hubClient clientset.Interface clusterName string leaseName string lock sync.Mutex cancel context.CancelFunc recorder events.Recorder + + lastSuccuessUpdateTime time.Time + timeoutSeconds int32 + handleTimeout func(ctx context.Context) error } // start a lease update routine to update the lease of a managed cluster periodically. -func (u *leaseUpdater) start(ctx context.Context, leaseDuration time.Duration) { +func (u *leaseUpdaterImpl) start(ctx context.Context, leaseDuration time.Duration) { u.lock.Lock() defer u.lock.Unlock() @@ -112,11 +120,13 @@ func (u *leaseUpdater) start(ctx context.Context, leaseDuration time.Duration) { var updateCtx context.Context updateCtx, u.cancel = context.WithCancel(ctx) go wait.JitterUntilWithContext(updateCtx, u.update, leaseDuration, leaseUpdateJitterFactor, true) - u.recorder.Eventf("ManagedClusterLeaseUpdateStarted", "Start to update lease %q on cluster %q", u.leaseName, u.clusterName) + u.recorder.Eventf("ManagedClusterLeaseUpdateStarted", + "Start to update lease %q on cluster %q, leaseDuration: %d, timeoutSeconds: %d", + u.leaseName, u.clusterName, leaseDuration, u.timeoutSeconds) } // stop the lease update routine. -func (u *leaseUpdater) stop() { +func (u *leaseUpdaterImpl) stop() { u.lock.Lock() defer u.lock.Unlock() @@ -129,15 +139,52 @@ func (u *leaseUpdater) stop() { } // update the lease of a given managed cluster. -func (u *leaseUpdater) update(ctx context.Context) { +func (u *leaseUpdaterImpl) update(ctx context.Context) { + u.lock.Lock() + defer u.lock.Unlock() + + now := time.Now() + + handleTimeoutIfExpired := func(now time.Time) { + // if handleTimeout is nil, do nothing. + if u.handleTimeout == nil { + return + } + // if now is after the lastSuccuessRenewTime + timeoutSeconds, log timeout event and call timeoutThen function. + if !u.lastSuccuessUpdateTime.IsZero() && now.After(u.lastSuccuessUpdateTime.Add(time.Duration(u.timeoutSeconds)*time.Second)) { + utilruntime.HandleError(fmt.Errorf("Timeout to update lease cluster lease %q on hub cluster", u.leaseName)) + if err := u.handleTimeout(ctx); err != nil { + utilruntime.HandleError(fmt.Errorf("unable to handle timeout of cluster lease %q on hub cluster: %w", u.leaseName, err)) + } + return + } + } + + handleErrorAndTimeout := func(err error, now time.Time) { + utilruntime.HandleError(fmt.Errorf("unable to operate cluster lease %q on hub cluster: %w", u.leaseName, err)) + handleTimeoutIfExpired(now) + } + + // Get lease on the hub. lease, err := u.hubClient.CoordinationV1().Leases(u.clusterName).Get(ctx, u.leaseName, metav1.GetOptions{}) if err != nil { - utilruntime.HandleError(fmt.Errorf("unable to get cluster lease %q on hub cluster: %w", u.leaseName, err)) + handleErrorAndTimeout(fmt.Errorf("get lease err: %w", err), now) return } - lease.Spec.RenewTime = &metav1.MicroTime{Time: time.Now()} + // Update the lease RenewTime. + lease.Spec.RenewTime = &metav1.MicroTime{Time: now} if _, err = u.hubClient.CoordinationV1().Leases(u.clusterName).Update(ctx, lease, metav1.UpdateOptions{}); err != nil { - utilruntime.HandleError(fmt.Errorf("unable to update cluster lease %q on hub cluster: %w", u.leaseName, err)) + handleErrorAndTimeout(fmt.Errorf("update lease err: %w", err), now) + return } + + // Update the lastSuccuessUpdateTime. + u.lastSuccuessUpdateTime = now +} + +func (u *leaseUpdaterImpl) getLastSuccuessUpdateTime() time.Time { + u.lock.Lock() + defer u.lock.Unlock() + return u.lastSuccuessUpdateTime } diff --git a/pkg/registration/spoke/lease/lease_controller_test.go b/pkg/registration/spoke/lease/lease_controller_test.go index 0c555de64..e08d60882 100644 --- a/pkg/registration/spoke/lease/lease_controller_test.go +++ b/pkg/registration/spoke/lease/lease_controller_test.go @@ -130,14 +130,21 @@ func (f *fakeLeaseUpdater) stop() { f.stopCalled = true } -func TestLeaseUpdater(t *testing.T) { +func TestLeaseUpdater_UpdateSuccess(t *testing.T) { initRenewTime := time.Now() + handled := false hubClient := kubefake.NewSimpleClientset(testinghelpers.NewManagedClusterLease("managed-cluster-lease", initRenewTime)) - leaseUpdater := &leaseUpdater{ - hubClient: hubClient, - clusterName: testinghelpers.TestManagedClusterName, - leaseName: "managed-cluster-lease", - recorder: eventstesting.NewTestingEventRecorder(t), + leaseUpdater := &leaseUpdaterImpl{ + hubClient: hubClient, + clusterName: testinghelpers.TestManagedClusterName, + leaseName: "managed-cluster-lease", + recorder: eventstesting.NewTestingEventRecorder(t), + timeoutSeconds: 3, + lastSuccuessUpdateTime: initRenewTime, + handleTimeout: func(ctx context.Context) error { + handled = true + return nil + }, } // start the updater @@ -145,6 +152,7 @@ func TestLeaseUpdater(t *testing.T) { leaseUpdater.start(ctx, time.Second*1) // wait for 3 second, the all actions should be in get,update pairs + // and the lastSuccuessUpdateTime should be updated time.Sleep(time.Second * 3) actions := hubClient.Actions() if len(actions) == 0 { @@ -159,6 +167,12 @@ func TestLeaseUpdater(t *testing.T) { t.Errorf("expect update action, but got %s", actions[i+1].GetVerb()) } } + if !leaseUpdater.getLastSuccuessUpdateTime().After(initRenewTime) { + t.Errorf("expect lastSuccuessRenewTime after %v, but got %v", initRenewTime, leaseUpdater.lastSuccuessUpdateTime) + } + if handled { + t.Error("expect handled to be false, but got true") + } // stop the updater leaseUpdater.stop() @@ -171,3 +185,86 @@ func TestLeaseUpdater(t *testing.T) { t.Errorf("expect %d actions, but got %d", actionLen, len(actions)) } } + +func TestLeaseUpdater_TimeoutWithHandle(t *testing.T) { + initRenewTime := time.Now() + hubClient := kubefake.NewSimpleClientset() + leaseUpdater := &leaseUpdaterImpl{ + hubClient: hubClient, + clusterName: testinghelpers.TestManagedClusterName, + leaseName: "managed-cluster-lease", + recorder: eventstesting.NewTestingEventRecorder(t), + timeoutSeconds: 3, + lastSuccuessUpdateTime: initRenewTime, + } + + // start the updater + ctx := context.Background() + leaseUpdater.start(ctx, time.Second*1) + + // wait for 4 second, there should only be get actions + time.Sleep(time.Second * 4) + actions := hubClient.Actions() + if len(actions) == 0 { + t.Error("expect at least 1 action, but got 0") + return + } + for _, action := range actions { + if action.GetVerb() != "get" { + t.Errorf("expect get action, but got %s", action.GetVerb()) + } + } + + if !leaseUpdater.getLastSuccuessUpdateTime().Equal(initRenewTime) { + t.Errorf("expect lastSuccuessRenewTime equal to %v, but got %v", initRenewTime, leaseUpdater.lastSuccuessUpdateTime) + } + + // stop the updater + leaseUpdater.stop() +} + +func TestLeaseUpdater_TimeoutWithoutHandle(t *testing.T) { + initRenewTime := time.Now() + hubClient := kubefake.NewSimpleClientset() + timeout := false + leaseUpdater := &leaseUpdaterImpl{ + hubClient: hubClient, + clusterName: testinghelpers.TestManagedClusterName, + leaseName: "managed-cluster-lease", + recorder: eventstesting.NewTestingEventRecorder(t), + timeoutSeconds: 3, + lastSuccuessUpdateTime: initRenewTime, + handleTimeout: func(ctx context.Context) error { + timeout = true + return nil + }, + } + + // start the updater + ctx := context.Background() + leaseUpdater.start(ctx, time.Second*1) + + // wait for 4 second, there should only be get actions + time.Sleep(time.Second * 4) + actions := hubClient.Actions() + if len(actions) == 0 { + t.Error("expect at least 1 action, but got 0") + return + } + for _, action := range actions { + if action.GetVerb() != "get" { + t.Errorf("expect get action, but got %s", action.GetVerb()) + } + } + + if !leaseUpdater.getLastSuccuessUpdateTime().Equal(initRenewTime) { + t.Errorf("expect lastSuccuessRenewTime equal to %v, but got %v", initRenewTime, leaseUpdater.lastSuccuessUpdateTime) + } + + if !timeout { + t.Error("expect timeout to be true, but got false") + } + + // stop the updater + leaseUpdater.stop() +} diff --git a/pkg/registration/spoke/options.go b/pkg/registration/spoke/options.go index 4b65a4af2..0379fd442 100644 --- a/pkg/registration/spoke/options.go +++ b/pkg/registration/spoke/options.go @@ -15,8 +15,16 @@ var ClientCertHealthCheckInterval = 30 * time.Second // SpokeAgentOptions holds configuration for spoke cluster agent type SpokeAgentOptions struct { - BootstrapKubeconfig string - BootstrapKubeconfigSecret string + // The differences among BootstrapKubeconfig, BootstrapKubeconfigSecret, BootstrapKubeconfigSecrets are: + // 1. BootstrapKubeconfig is a file path, the controller uses it to build the client. + // 2. BootstrapKubeconfigSecret is the secret, a eventhandler will watch it, if the secret is changed, threbootstrap. + // 3. BootstrapKubeconfigSecrets is a list of secrets, when rebootstrap, the controller will chooses once from the list. + BootstrapKubeconfig string + BootstrapKubeconfigSecret string + BootstrapKubeconfigSecrets []string + + HubConnectionTimeoutSeconds int32 + HubKubeconfigSecret string SpokeExternalServerURLs []string ClusterHealthCheckPeriod time.Duration @@ -53,6 +61,10 @@ func (o *SpokeAgentOptions) AddFlags(fs *pflag.FlagSet) { "The path of the kubeconfig file for agent bootstrap.") fs.StringVar(&o.BootstrapKubeconfigSecret, "bootstrap-kubeconfig-secret", o.BootstrapKubeconfigSecret, "The name of secret in component namespace storing kubeconfig for agent bootstrap.") + fs.StringArrayVar(&o.BootstrapKubeconfigSecrets, "bootstrap-kubeconfig-secrets", o.BootstrapKubeconfigSecrets, + "The name of secrets in component namespace storing bootstrap kubeconfigs for agent bootstrap.") + fs.Int32Var(&o.HubConnectionTimeoutSeconds, "hub-connection-timeout-seconds", o.HubConnectionTimeoutSeconds, + "The timeout in seconds to connect to hub cluster.") fs.StringVar(&o.HubKubeconfigSecret, "hub-kubeconfig-secret", o.HubKubeconfigSecret, "The name of secret in component namespace storing kubeconfig for hub.") fs.StringArrayVar(&o.SpokeExternalServerURLs, "spoke-external-server-urls", o.SpokeExternalServerURLs, @@ -91,6 +103,10 @@ func (o *SpokeAgentOptions) Validate() error { return errors.New("client certificate expiration seconds must greater or qual to 3600") } + if o.HubConnectionTimeoutSeconds == 0 { + return errors.New("hub connection timeout seconds must greater than zero") + } + return nil } diff --git a/pkg/registration/spoke/registration/hub_accept_controller.go b/pkg/registration/spoke/registration/hub_accept_controller.go new file mode 100644 index 000000000..d75803643 --- /dev/null +++ b/pkg/registration/spoke/registration/hub_accept_controller.go @@ -0,0 +1,46 @@ +package registration + +import ( + "context" + + "github.com/openshift/library-go/pkg/controller/factory" + "github.com/openshift/library-go/pkg/operator/events" + + clusterv1informer "open-cluster-management.io/api/client/cluster/informers/externalversions/cluster/v1" + clusterv1listers "open-cluster-management.io/api/client/cluster/listers/cluster/v1" +) + +// hubAcceptController watch ManagedCluster CR on hub after spoke bootstrap done +// If the managedCluster.Spec.HubAccetpsClient is false, then mark the connection as failed. +type hubAcceptController struct { + clusterName string + hubClusterLister clusterv1listers.ManagedClusterLister + handleAcceptFalse func(ctx context.Context) error +} + +func NewHubAcceptController(clusterName string, hubClusterInformer clusterv1informer.ManagedClusterInformer, + handleAcceptFalse func(ctx context.Context) error, recorder events.Recorder) factory.Controller { + c := &hubAcceptController{ + clusterName: clusterName, + hubClusterLister: hubClusterInformer.Lister(), + handleAcceptFalse: handleAcceptFalse, + } + return factory.New(). + WithInformers(hubClusterInformer.Informer()). + WithSync(c.sync). + ToController("HubAcceptController", recorder) +} + +func (c *hubAcceptController) sync(ctx context.Context, _ factory.SyncContext) error { + cluster, err := c.hubClusterLister.Get(c.clusterName) + if err != nil { + return err + } + if cluster.Spec.HubAcceptsClient { + return nil + } + if c.handleAcceptFalse == nil { + return nil + } + return c.handleAcceptFalse(ctx) +} diff --git a/pkg/registration/spoke/registration/hub_accept_controller_test.go b/pkg/registration/spoke/registration/hub_accept_controller_test.go new file mode 100644 index 000000000..53533d1ee --- /dev/null +++ b/pkg/registration/spoke/registration/hub_accept_controller_test.go @@ -0,0 +1,71 @@ +package registration + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + clusterv1lister "open-cluster-management.io/api/client/cluster/listers/cluster/v1" + clusterv1 "open-cluster-management.io/api/cluster/v1" +) + +func TestHubAcceptController_sync(t *testing.T) { + var err error + clusterName := "testCluster" + handled := false + mockHubClusterLister := &MockManagedClusterLister{} + hacontroller := &hubAcceptController{ + clusterName: clusterName, + hubClusterLister: mockHubClusterLister, + handleAcceptFalse: func(ctx context.Context) error { + handled = true + return nil + }, + } + + // Create a mock hub cluster with HubAcceptsClient set to false + mockHubClusterLister.mockHubCluster = &clusterv1.ManagedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + }, + Spec: clusterv1.ManagedClusterSpec{ + HubAcceptsClient: true, + }, + } + + // Call the sync method + err = hacontroller.sync(context.TODO(), nil) + assert.NoError(t, err, "Expected no error") + + // Expect handled to be false + assert.False(t, handled, "Expected handled to be false") + + // Create a mock hub cluster with HubAcceptsClient set to true + mockHubClusterLister.mockHubCluster = &clusterv1.ManagedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + }, + Spec: clusterv1.ManagedClusterSpec{ + HubAcceptsClient: false, + }, + } + + // Call the sync method again + err = hacontroller.sync(context.TODO(), nil) + assert.NoError(t, err, "Expected no error") + + // Expect handled to be true + assert.True(t, handled, "Expected handled to be true") +} + +type MockManagedClusterLister struct { + clusterv1lister.ManagedClusterLister + mockHubCluster *clusterv1.ManagedCluster +} + +func (m *MockManagedClusterLister) Get(name string) (*clusterv1.ManagedCluster, error) { + // Return a dummy ManagedCluster or an error based on your test case. + return m.mockHubCluster, nil +} diff --git a/pkg/registration/spoke/spokeagent.go b/pkg/registration/spoke/spokeagent.go index 6aad644c2..0376cbc20 100644 --- a/pkg/registration/spoke/spokeagent.go +++ b/pkg/registration/spoke/spokeagent.go @@ -32,6 +32,7 @@ import ( "open-cluster-management.io/ocm/pkg/features" "open-cluster-management.io/ocm/pkg/registration/clientcert" "open-cluster-management.io/ocm/pkg/registration/spoke/addon" + bootstrapkubeconfigsmanager "open-cluster-management.io/ocm/pkg/registration/spoke/bootstrapkubeconfigsmanager" "open-cluster-management.io/ocm/pkg/registration/spoke/lease" "open-cluster-management.io/ocm/pkg/registration/spoke/managedcluster" "open-cluster-management.io/ocm/pkg/registration/spoke/registration" @@ -135,6 +136,13 @@ func (o *SpokeAgentConfig) RunSpokeAgentWithSpokeInformers(ctx context.Context, return err } + bootstrapKubeconfigManager := bootstrapkubeconfigsmanager.NewBoostrapkubeconfigsManager( + o.agentOptions.ComponentNamespace, + o.registrationOption.BootstrapKubeconfigSecret, // the in-use bootstrap kubeconfig secret + o.registrationOption.BootstrapKubeconfigSecrets, + managementKubeClient, + ) + // dump data in hub kubeconfig secret into file system if it exists err = registration.DumpSecret( managementKubeClient.CoreV1(), o.agentOptions.ComponentNamespace, o.registrationOption.HubKubeconfigSecret, @@ -368,6 +376,11 @@ func (o *SpokeAgentConfig) RunSpokeAgentWithSpokeInformers(ctx context.Context, o.agentOptions.SpokeClusterName, hubKubeClient, hubClusterInformerFactory.Cluster().V1().ManagedClusters(), + o.registrationOption.HubConnectionTimeoutSeconds, + func(ctx context.Context) error { + klog.Info("Failed to connect to hub because of lease out-of-date, reselect a new bootstrap kubeconfig") + return bootstrapKubeconfigManager.ReSelect(ctx) + }, recorder, ) @@ -411,6 +424,16 @@ func (o *SpokeAgentConfig) RunSpokeAgentWithSpokeInformers(ctx context.Context, ) } + hubAcceptController := registration.NewHubAcceptController( + o.agentOptions.SpokeClusterName, + hubClusterInformerFactory.Cluster().V1().ManagedClusters(), + func(ctx context.Context) error { + klog.Info("Failed to connect to hub because of hubAcceptClient set to false, reselect a new bootstrap kubeconfig") + return bootstrapKubeconfigManager.ReSelect(ctx) + }, + recorder, + ) + go hubKubeInformerFactory.Start(ctx.Done()) go hubClusterInformerFactory.Start(ctx.Done()) go namespacedManagementKubeInformerFactory.Start(ctx.Done()) @@ -429,6 +452,8 @@ func (o *SpokeAgentConfig) RunSpokeAgentWithSpokeInformers(ctx context.Context, go addOnRegistrationController.Run(ctx, 1) } + go hubAcceptController.Run(ctx, 1) + // start health checking of hub client certificate if o.registrationOption.clientCertHealthChecker != nil { tlsCertFile := path.Join(o.agentOptions.HubKubeconfigDir, clientcert.TLSCertFile) diff --git a/test/integration/registration/integration_suite_test.go b/test/integration/registration/integration_suite_test.go index d906a1031..c0d1cbc7f 100644 --- a/test/integration/registration/integration_suite_test.go +++ b/test/integration/registration/integration_suite_test.go @@ -91,6 +91,7 @@ func runAgentWithContext(ctx context.Context, name string, opt *spoke.SpokeAgent EventRecorder: util.NewIntegrationTestEventRecorder(name), }) if err != nil { + fmt.Printf("Failed to run agent %s: %v\n", name, err) return } }() diff --git a/test/integration/registration/spokeagent_switch_hub_test.go b/test/integration/registration/spokeagent_switch_hub_test.go new file mode 100644 index 000000000..9a69e6616 --- /dev/null +++ b/test/integration/registration/spokeagent_switch_hub_test.go @@ -0,0 +1,408 @@ +package registration_test + +import ( + "context" + "fmt" + "path" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + . "github.com/onsi/gomega" + "github.com/openshift/library-go/pkg/controller/controllercmd" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/apiserver/pkg/server/healthz" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/envtest" + + addonclientset "open-cluster-management.io/api/client/addon/clientset/versioned" + clusterclientset "open-cluster-management.io/api/client/cluster/clientset/versioned" + clusterv1 "open-cluster-management.io/api/cluster/v1" + + commonoptions "open-cluster-management.io/ocm/pkg/common/options" + "open-cluster-management.io/ocm/pkg/registration/hub" + "open-cluster-management.io/ocm/pkg/registration/spoke" + "open-cluster-management.io/ocm/test/integration/util" +) + +// In the switch-hub test, we will start 2 hubs, and a spoke, we will test 2 cases: +// 1. the spoke first connects to hub1, we set the hub1 to not accept the spoke, then the spoke should switch to hub2. +// 2. the spoke first connects to hub1, we stop hub1, then the spoke should switch to hub2. +var _ = Describe("switch-hub", Ordered, func() { + var managedClusterName, bootstrapFile, hubKubeconfigSecret, suffix string + + var hub1bootstrapFilePath string + var hub2bootstrapFilePath string + var hub1KubeClient, hub2KubeClient kubernetes.Interface + var hub1ClusterClient, hub2ClusterClient clusterclientset.Interface + var hub1Env, hub2Env *envtest.Environment + var hub1NewAuthn, hub2NewAuthn *util.TestAuthn + var spokeCancel context.CancelFunc + + BeforeEach(func() { + var err error + suffix = rand.String(5) + managedClusterName = fmt.Sprintf("cluster-%s", suffix) + hubKubeconfigSecret = fmt.Sprintf("hub-kubeconfig-secret-%s", suffix) + hubKubeconfigDir := path.Join(util.TestDir, fmt.Sprintf("switch-hub-%s", suffix), "hub-kubeconfig") + testDir := fmt.Sprintf("switch-hub-%s", suffix) + bootstrapFile = path.Join(util.TestDir, testDir, "bootstrap-kubeconfig") + + // Ensure there is no remaining bootstrap-hub-kubeconfig secret + err = kubeClient.CoreV1().Secrets(testNamespace).Delete(context.Background(), "bootstrap-hub-kubeconfig", metav1.DeleteOptions{}) + if err != nil { + gomega.Expect(apierrors.IsNotFound(err)).To(gomega.BeTrue()) + } + + // Start 2 hubs + hub1bootstrapFilePath, hub1KubeClient, hub1ClusterClient, _, hub1Env, hub1NewAuthn = startNewHub(context.Background(), fmt.Sprintf("hub1-%s", suffix)) + fmt.Println("hub1 bootstrap file path: ", hub1bootstrapFilePath) + + hub2bootstrapFilePath, hub2KubeClient, hub2ClusterClient, _, hub2Env, hub2NewAuthn = startNewHub(context.Background(), fmt.Sprintf("hub2-%s", suffix)) + fmt.Println("hub2 bootstrap file path: ", hub2bootstrapFilePath) + + // Create: + // 1. agent bootstrapkubeconfig secret(based on hub1) and sync to files + // 2. hub1 bootstrapkubeconfig secret + // 3. hub2 bootstrapkubeconfig secret + + err = util.SyncBootstrapKubeConfigDataToSecret( + hub1bootstrapFilePath, + testNamespace, + "bootstrap-hub-kubeconfig", + kubeClient, + ) + Expect(err).NotTo(HaveOccurred()) + err = util.SyncSecretToBootstrapKubeConfigFiles(bootstrapFile, testNamespace, "bootstrap-hub-kubeconfig", kubeClient) + Expect(err).NotTo(HaveOccurred()) + + err = util.SyncBootstrapKubeConfigDataToSecret( + hub1bootstrapFilePath, + testNamespace, + "bootstrap-hub-kubeconfig-hub1", + kubeClient, + ) + Expect(err).NotTo(HaveOccurred()) + + err = util.SyncBootstrapKubeConfigDataToSecret( + hub2bootstrapFilePath, + testNamespace, + "bootstrap-hub-kubeconfig-hub2", + kubeClient, + ) + Expect(err).NotTo(HaveOccurred()) + + // start a auto restart agent + By("Starting a auto restart spoke agent") + var spokeCtx context.Context + spokeCtx, spokeCancel = context.WithCancel(context.Background()) + go startAutoRestartAgent(spokeCtx, + managedClusterName, hubKubeconfigDir, + func() *spoke.SpokeAgentOptions { + agentOptions := spoke.NewSpokeAgentOptions() + agentOptions.BootstrapKubeconfig = bootstrapFile + agentOptions.HubKubeconfigSecret = hubKubeconfigSecret + agentOptions.BootstrapKubeconfigSecret = "bootstrap-hub-kubeconfig" + agentOptions.BootstrapKubeconfigSecrets = []string{"bootstrap-hub-kubeconfig-hub1", "bootstrap-hub-kubeconfig-hub2"} + agentOptions.HubConnectionTimeoutSeconds = 10 + return agentOptions + }, + func(ctx context.Context, stopAgent context.CancelFunc, agentOptions *spoke.SpokeAgentOptions) { + startAgentHealthChecker(ctx, stopAgent, agentOptions.GetHealthCheckers()) + }) + + // start sync secret to file bootstrapFile + // this is used to simulate the agent to sync the bootstrap kubeconfig secret to file + // the interval is 3s, it must be less than the agent restart interval, because: + // when secret is updated, the agent will be triggered to restart + // and a new agent need to use the new kubeconfig file + startAgentBootstrapKubeConfigSecretToFileSyncer(spokeCtx, bootstrapFile, kubeClient) + + approveAndAcceptManagedCluster(testNamespace, managedClusterName, hubKubeconfigSecret, + hub1KubeClient, kubeClient, hub1ClusterClient, hub1NewAuthn, 10*time.Minute) + }) + + AfterEach(func() { + // stop hubs + hub1Env.Stop() + hub2Env.Stop() + + // stop spoke + spokeCancel() + }) + + Context("Hub1 doesn't accept client", func() { + It("Should switch to hub2", func() { + // Update managed cluster to not accept + By("Update ManagedCluster to not accept") + Eventually(func() error { + mc, err := util.GetManagedCluster(hub1ClusterClient, managedClusterName) + if err != nil { + return err + } + mc.Spec.HubAcceptsClient = false + _, err = hub1ClusterClient.ClusterV1().ManagedClusters().Update(context.TODO(), mc, metav1.UpdateOptions{}) + return err + }, eventuallyTimeout, eventuallyInterval).Should(BeNil()) + + // The spoke should switch to hub2 + approveAndAcceptManagedCluster(testNamespace, managedClusterName, hubKubeconfigSecret, + hub2KubeClient, kubeClient, hub2ClusterClient, hub2NewAuthn, 10*time.Minute) + }) + }) + + Context("Hub1 is down", func() { + It("Should switch to hub2", func() { + // Stop hub1 + hub1Env.Stop() + + // The timeoutSeconds is 10s, so we need to wait for 30s to make sure the agent is restarted + time.Sleep(30 * time.Second) + + // The spoke should switch to hub2 + approveAndAcceptManagedCluster(testNamespace, managedClusterName, hubKubeconfigSecret, + hub2KubeClient, kubeClient, hub2ClusterClient, hub2NewAuthn, 10*time.Minute) + }) + }) +}) + +func startNewHub(ctx context.Context, hubName string) ( + string, + kubernetes.Interface, + clusterclientset.Interface, + addonclientset.Interface, + *envtest.Environment, *util.TestAuthn) { + apiserver := &envtest.APIServer{} + newAuthn := util.NewTestAuthn(path.Join(util.CertDir, fmt.Sprintf("%s-another-ca.crt", hubName)), + path.Join(util.CertDir, fmt.Sprintf("%s-another-ca.key", hubName))) + apiserver.SecureServing.Authn = newAuthn + + env := &envtest.Environment{ + ControlPlane: envtest.ControlPlane{ + APIServer: apiserver, + }, + ErrorIfCRDPathMissing: true, + CRDDirectoryPaths: CRDPaths, + } + + cfg, err := env.Start() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(cfg).ToNot(gomega.BeNil()) + + err = clusterv1.Install(scheme.Scheme) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // prepare configs + newSecurePort := env.ControlPlane.APIServer.SecureServing.Port + gomega.Expect(len(newSecurePort)).ToNot(gomega.BeZero()) + + anotherServerCertFile := fmt.Sprintf("%s/apiserver.crt", env.ControlPlane.APIServer.CertDir) + + // If the input is hub1, eventually we wil have: /tmp//hub1/bootstrap-kubeconfig. + // This is because under the /tmp//hub1, we will also create cert files for hub1. + bootstrapKubeConfigFile := path.Join(util.TestDir, hubName, "bootstrap-kubeconfig") + err = newAuthn.CreateBootstrapKubeConfigWithCertAge(bootstrapKubeConfigFile, anotherServerCertFile, newSecurePort, 24*time.Hour) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Prepare clients + kubeClient, err := kubernetes.NewForConfig(cfg) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(kubeClient).ToNot(gomega.BeNil()) + + clusterClient, err := clusterclientset.NewForConfig(cfg) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(clusterClient).ToNot(gomega.BeNil()) + + addOnClient, err := addonclientset.NewForConfig(cfg) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(clusterClient).ToNot(gomega.BeNil()) + + // Start hub controller + go func() { + err := hub.NewHubManagerOptions().RunControllerManager(ctx, &controllercmd.ControllerContext{ + KubeConfig: cfg, + EventRecorder: util.NewIntegrationTestEventRecorder(hubName), + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }() + + return bootstrapKubeConfigFile, kubeClient, clusterClient, addOnClient, env, newAuthn +} + +func startAgent(ctx context.Context, managedClusterName, hubKubeconfigDir string, + agentOptions *spoke.SpokeAgentOptions) (context.Context, context.CancelFunc) { + ginkgo.By("run registration agent") + commOptions := commonoptions.NewAgentOptions() + commOptions.HubKubeconfigDir = hubKubeconfigDir + commOptions.SpokeClusterName = managedClusterName + + agentCtx, stopAgent := context.WithCancel(ctx) + runAgentWithContext(agentCtx, "switch-hub", agentOptions, commOptions, spokeCfg) + + return agentCtx, stopAgent +} + +func approveAndAcceptManagedCluster(testNamespace, managedClusterName, hubKubeconfigSecret string, + hubKubeClient, spokeKubeClient kubernetes.Interface, hubClusterClient clusterclientset.Interface, + auth *util.TestAuthn, certAget time.Duration) { + // The spoke cluster and csr should be created after bootstrap + ginkgo.By("Check existence of ManagedCluster & CSR") + gomega.Eventually(func() error { + if _, err := util.GetManagedCluster(hubClusterClient, managedClusterName); err != nil { + return err + } + return nil + }, eventuallyTimeout, eventuallyInterval).Should(Succeed()) + + gomega.Eventually(func() error { + if _, err := util.FindUnapprovedSpokeCSR(hubKubeClient, managedClusterName); err != nil { + return err + } + return nil + }, eventuallyTimeout, eventuallyInterval).ShouldNot(gomega.HaveOccurred()) + + // The spoke cluster should has finalizer that is added by hub controller + gomega.Eventually(func() bool { + spokeCluster, err := util.GetManagedCluster(hubClusterClient, managedClusterName) + if err != nil { + return false + } + if len(spokeCluster.Finalizers) != 1 { + return false + } + + if spokeCluster.Finalizers[0] != clusterv1.ManagedClusterFinalizer { + return false + } + + return true + }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + + ginkgo.By("Accept and approve the ManagedCluster") + // Simulate hub cluster admin to accept the managedcluster and approve the csr + gomega.Eventually(func() error { + return util.AcceptManagedCluster(hubClusterClient, managedClusterName) + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + err := auth.ApproveSpokeClusterCSR(hubKubeClient, managedClusterName, certAget) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // The managed cluster should have accepted condition after it is accepted + gomega.Eventually(func() error { + spokeCluster, err := util.GetManagedCluster(hubClusterClient, managedClusterName) + if err != nil { + return err + } + if meta.IsStatusConditionFalse(spokeCluster.Status.Conditions, clusterv1.ManagedClusterConditionHubAccepted) { + return fmt.Errorf("cluster should be accepted") + } + return nil + }, eventuallyTimeout, eventuallyInterval).ShouldNot(gomega.HaveOccurred()) + + // The hub kubeconfig secret should be filled after the csr is approved + gomega.Eventually(func() error { + if _, err := util.GetFilledHubKubeConfigSecret(spokeKubeClient, testNamespace, hubKubeconfigSecret); err != nil { + return err + } + return nil + }, eventuallyTimeout, eventuallyInterval).ShouldNot(gomega.HaveOccurred()) + + ginkgo.By("ManagedCluster joins the hub") + // The spoke cluster should have joined condition finally + gomega.Eventually(func() error { + spokeCluster, err := util.GetManagedCluster(hubClusterClient, managedClusterName) + if err != nil { + return err + } + joined := meta.FindStatusCondition(spokeCluster.Status.Conditions, clusterv1.ManagedClusterConditionJoined) + if joined == nil { + return fmt.Errorf("cluster should be joined") + } + return nil + }, eventuallyTimeout, eventuallyInterval).ShouldNot(gomega.HaveOccurred()) + + // Ensure cluster namespace is in place + gomega.Eventually(func() error { + _, err := hubKubeClient.CoreV1().Namespaces().Get(context.TODO(), managedClusterName, metav1.GetOptions{}) + if err != nil { + return err + } + return nil + }, eventuallyTimeout, eventuallyInterval).ShouldNot(gomega.HaveOccurred()) +} + +func startAutoRestartAgent(ctx context.Context, + managedClusterName, hubKubeconfigDir string, + getNewAgentOptions func() *spoke.SpokeAgentOptions, + watchStop func(ctx context.Context, stopAgent context.CancelFunc, agentOptions *spoke.SpokeAgentOptions), +) { + fmt.Println("[auto-restart-agent] - start agent...") + newAgentOptions := getNewAgentOptions() + agentCtx, stopAgent := startAgent(ctx, managedClusterName, hubKubeconfigDir, newAgentOptions) + go watchStop(ctx, stopAgent, newAgentOptions) + for { + select { + case <-agentCtx.Done(): + // restart agent + time.Sleep(10 * time.Second) // Wait for new secret sync to files + + fmt.Println("[auto-restart-agent] - restart agent...") + newAgentOptions := getNewAgentOptions() + agentCtx, stopAgent = startAgent(ctx, managedClusterName, hubKubeconfigDir, newAgentOptions) + go watchStop(ctx, stopAgent, newAgentOptions) + case <-ctx.Done(): + // exit + fmt.Println("[auto-restart-agent] - shutting down...") + return + } + } +} + +func startAgentHealthChecker(ctx context.Context, stopAgent context.CancelFunc, healthCheckers []healthz.HealthChecker) { + ticker := time.NewTicker(3 * time.Second) + fmt.Println("[agent-health-checker] - start health checking...") + for { + select { + case <-ticker.C: + for _, healthchecker := range healthCheckers { + if err := healthchecker.Check(nil); err != nil { + fmt.Printf("[agent-health-checker] - agent is not health: %v\n", err) + stopAgent() + return + } + } + fmt.Println("[agent-health-checker] - agent is health") + case <-ctx.Done(): + // exit + fmt.Println("[agent-health-checker] - shutting down...") + return + } + } +} + +func startAgentBootstrapKubeConfigSecretToFileSyncer(ctx context.Context, bootstrapFile string, kubeClient kubernetes.Interface) { + // Simulate the agent to sync the bootstrap kubeconfig secret to file + fmt.Println("[agent-bootstrap-kubeconfig-secret-syncer] - start sync secret to file...") + ticker := time.NewTicker(3 * time.Second) + go func() { + for { + select { + case <-ticker.C: + fmt.Printf("[agent-bootstrap-kubeconfig-secret-syncer] - sync secret to file...\n") + err := util.SyncSecretToBootstrapKubeConfigFiles(bootstrapFile, testNamespace, "bootstrap-hub-kubeconfig", kubeClient) + if err != nil { + fmt.Printf("[agent-bootstrap-kubeconfig-secret-syncer] - failed to sync bootstrap kubeconfig secret to file: %v\n", err) + } + case <-ctx.Done(): + // Exit + fmt.Println("[agent-bootstrap-kubeconfig-secret-syncer] - shutting down...") + return + } + } + }() + +} diff --git a/test/integration/util/authentication.go b/test/integration/util/authentication.go index 6b5fd2768..a3b0f3608 100644 --- a/test/integration/util/authentication.go +++ b/test/integration/util/authentication.go @@ -19,6 +19,7 @@ import ( certificates "k8s.io/api/certificates/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -279,6 +280,90 @@ func (t *TestAuthn) CreateBootstrapKubeConfig(configFileName, serverCertFile, se return clientcmd.WriteToFile(*config, configFileName) } +func GetBootstrapKubeConfigData(filePath string) (map[string][]byte, error) { + bootstrapKubeconfigDir := path.Dir(filePath) + files, err := os.ReadDir(bootstrapKubeconfigDir) + if err != nil { + return nil, err + } + + data := map[string][]byte{} + for _, file := range files { + if file.IsDir() { + continue + } + filePath := path.Join(bootstrapKubeconfigDir, file.Name()) + fileData, err := os.ReadFile(filePath) + if err != nil { + return nil, err + } + data[file.Name()] = fileData + } + + return data, nil +} + +func SyncBootstrapKubeConfigDataToSecret( + filePath, secretNS, secretName string, + kubeClient kubernetes.Interface) error { + data, err := GetBootstrapKubeConfigData(filePath) + if err != nil { + return err + } + + secret, err := kubeClient.CoreV1().Secrets(secretNS).Get(context.Background(), secretName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + secret = &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: secretNS, + }, + Data: data, + } + _, err = kubeClient.CoreV1().Secrets(secretNS).Create(context.Background(), secret, metav1.CreateOptions{}) + if err != nil { + return err + } + } + if err != nil { + return err + } + + if reflect.DeepEqual(secret.Data, data) { + return nil + } + secretCopy := secret.DeepCopy() + secretCopy.Data = data + _, err = kubeClient.CoreV1().Secrets(secretNS).Update(context.Background(), secretCopy, metav1.UpdateOptions{}) + if err != nil { + return err + } + return nil +} + +func SyncSecretToBootstrapKubeConfigFiles(filePath, secretNS, secretName string, kubeClient kubernetes.Interface) error { + secret, err := kubeClient.CoreV1().Secrets(secretNS).Get(context.Background(), secretName, metav1.GetOptions{}) + if err != nil { + return err + } + + dir := path.Dir(filePath) + + if _, err := os.Stat(dir); os.IsNotExist(err) { + if err = os.MkdirAll(dir, 0755); err != nil { + return err + } + } + + for k, v := range secret.Data { + f := path.Join(dir, k) + if err := os.WriteFile(f, v, 0600); err != nil { + return err + } + } + return nil +} + func (t *TestAuthn) signClientCertKeyWithCA(user string, groups []string, maxAge time.Duration) ([]byte, []byte, error) { now := time.Now() caData, err := os.ReadFile(t.caFile)