From a8e965321e33e50943b7df27e171508c7af50552 Mon Sep 17 00:00:00 2001 From: RainbowMango Date: Fri, 29 Nov 2024 18:45:54 +0800 Subject: [PATCH] Disable cluster failover by default which should be explicitly enabled by administrators after a fully evaluation. Signed-off-by: RainbowMango --- .../deploy/karmada-controller-manager.yaml | 2 +- .../app/controllermanager.go | 6 ----- pkg/controllers/applicationfailover/common.go | 8 +------ .../crb_application_failover_controller.go | 6 ----- .../rb_application_failover_controller.go | 6 ----- .../cluster/cluster_controller_test.go | 12 +++++++--- pkg/controllers/cluster/taint_manager.go | 6 ----- pkg/features/features.go | 23 +++++++++++++++---- 8 files changed, 30 insertions(+), 39 deletions(-) diff --git a/artifacts/deploy/karmada-controller-manager.yaml b/artifacts/deploy/karmada-controller-manager.yaml index 692822b0fecc..814a02d7fe92 100644 --- a/artifacts/deploy/karmada-controller-manager.yaml +++ b/artifacts/deploy/karmada-controller-manager.yaml @@ -30,7 +30,7 @@ spec: - --cluster-status-update-frequency=10s - --failover-eviction-timeout=30s - --controllers=*,hpaScaleTargetMarker,deploymentReplicasSyncer - - --feature-gates=PropagationPolicyPreemption=true,MultiClusterService=true,StatefulFailoverInjection=true + - --feature-gates=Failover=true,PropagationPolicyPreemption=true,MultiClusterService=true,StatefulFailoverInjection=true - --health-probe-bind-address=0.0.0.0:10357 - --v=4 livenessProbe: diff --git a/cmd/controller-manager/app/controllermanager.go b/cmd/controller-manager/app/controllermanager.go index 626daa6367f3..23172eb9b27d 100644 --- a/cmd/controller-manager/app/controllermanager.go +++ b/cmd/controller-manager/app/controllermanager.go @@ -567,9 +567,6 @@ func startFederatedResourceQuotaStatusController(ctx controllerscontext.Context) } func startGracefulEvictionController(ctx controllerscontext.Context) (enabled bool, err error) { - if !features.FeatureGate.Enabled(features.GracefulEviction) { - return false, nil - } rbGracefulEvictionController := &gracefuleviction.RBGracefulEvictionController{ Client: ctx.Mgr.GetClient(), EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.RBGracefulEvictionControllerName), @@ -594,9 +591,6 @@ func startGracefulEvictionController(ctx controllerscontext.Context) (enabled bo } func startApplicationFailoverController(ctx controllerscontext.Context) (enabled bool, err error) { - if !features.FeatureGate.Enabled(features.Failover) { - return false, nil - } rbApplicationFailoverController := applicationfailover.RBApplicationFailoverController{ Client: ctx.Mgr.GetClient(), EventRecorder: ctx.Mgr.GetEventRecorderFor(applicationfailover.RBApplicationFailoverControllerName), diff --git a/pkg/controllers/applicationfailover/common.go b/pkg/controllers/applicationfailover/common.go index 64160d715ac1..26313001bb5f 100644 --- a/pkg/controllers/applicationfailover/common.go +++ b/pkg/controllers/applicationfailover/common.go @@ -219,13 +219,7 @@ func buildTaskOptions(failoverBehavior *policyv1alpha1.ApplicationFailoverBehavi return nil, err } case policyv1alpha1.Never: - if features.FeatureGate.Enabled(features.GracefulEviction) { - taskOpts = append(taskOpts, workv1alpha2.WithSuppressDeletion(ptr.To[bool](true))) - } else { - err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Never) - klog.Error(err) - return nil, err - } + taskOpts = append(taskOpts, workv1alpha2.WithSuppressDeletion(ptr.To[bool](true))) } return taskOpts, nil diff --git a/pkg/controllers/applicationfailover/crb_application_failover_controller.go b/pkg/controllers/applicationfailover/crb_application_failover_controller.go index 9e06736f5092..6ab9907ef652 100644 --- a/pkg/controllers/applicationfailover/crb_application_failover_controller.go +++ b/pkg/controllers/applicationfailover/crb_application_failover_controller.go @@ -36,7 +36,6 @@ import ( configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1" workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" - "github.com/karmada-io/karmada/pkg/features" "github.com/karmada-io/karmada/pkg/resourceinterpreter" "github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag" "github.com/karmada-io/karmada/pkg/util/helper" @@ -173,11 +172,6 @@ func (c *CRBApplicationFailoverController) updateBinding(ctx context.Context, bi for _, cluster := range needEvictClusters { allClusters.Delete(cluster) } - if !features.FeatureGate.Enabled(features.GracefulEviction) { - for _, cluster := range needEvictClusters { - helper.EmitClusterEvictionEventForClusterResourceBinding(binding, cluster, c.EventRecorder, nil) - } - } return nil } diff --git a/pkg/controllers/applicationfailover/rb_application_failover_controller.go b/pkg/controllers/applicationfailover/rb_application_failover_controller.go index 572d69f24795..dd5f86048efc 100644 --- a/pkg/controllers/applicationfailover/rb_application_failover_controller.go +++ b/pkg/controllers/applicationfailover/rb_application_failover_controller.go @@ -36,7 +36,6 @@ import ( configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1" workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" - "github.com/karmada-io/karmada/pkg/features" "github.com/karmada-io/karmada/pkg/resourceinterpreter" "github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag" "github.com/karmada-io/karmada/pkg/util/helper" @@ -173,11 +172,6 @@ func (c *RBApplicationFailoverController) updateBinding(ctx context.Context, bin for _, cluster := range needEvictClusters { allClusters.Delete(cluster) } - if !features.FeatureGate.Enabled(features.GracefulEviction) { - for _, cluster := range needEvictClusters { - helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, c.EventRecorder, nil) - } - } return nil } diff --git a/pkg/controllers/cluster/cluster_controller_test.go b/pkg/controllers/cluster/cluster_controller_test.go index cbdcef296ada..e1a915dc9e77 100644 --- a/pkg/controllers/cluster/cluster_controller_test.go +++ b/pkg/controllers/cluster/cluster_controller_test.go @@ -18,6 +18,7 @@ package cluster import ( "context" + "fmt" "reflect" "testing" "time" @@ -33,6 +34,7 @@ import ( clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" workv1alpha1 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha1" workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" + "github.com/karmada-io/karmada/pkg/features" "github.com/karmada-io/karmada/pkg/util" "github.com/karmada-io/karmada/pkg/util/gclient" "github.com/karmada-io/karmada/pkg/util/names" @@ -420,20 +422,24 @@ func TestController_monitorClusterHealth(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + err := features.FeatureGate.Set(fmt.Sprintf("%s=%t", features.Failover, true)) + if err != nil { + t.Fatalf("Failed to enable failover feature gate: %v", err) + } c := newClusterController() if tt.cluster != nil { - if err := c.Create(context.Background(), tt.cluster, &client.CreateOptions{}); err != nil { + if err = c.Create(context.Background(), tt.cluster, &client.CreateOptions{}); err != nil { t.Fatalf("failed to create cluster: %v", err) } } - if err := c.monitorClusterHealth(context.Background()); (err != nil) != tt.wantErr { + if err = c.monitorClusterHealth(context.Background()); (err != nil) != tt.wantErr { t.Errorf("Controller.monitorClusterHealth() error = %v, wantErr %v", err, tt.wantErr) return } cluster := &clusterv1alpha1.Cluster{} - if err := c.Get(context.Background(), types.NamespacedName{Name: "test-cluster"}, cluster, &client.GetOptions{}); err != nil { + if err = c.Get(context.Background(), types.NamespacedName{Name: "test-cluster"}, cluster, &client.GetOptions{}); err != nil { t.Errorf("failed to get cluster: %v", err) return } diff --git a/pkg/controllers/cluster/taint_manager.go b/pkg/controllers/cluster/taint_manager.go index 285643c13af9..6d4c53768e25 100644 --- a/pkg/controllers/cluster/taint_manager.go +++ b/pkg/controllers/cluster/taint_manager.go @@ -190,9 +190,6 @@ func (tc *NoExecuteTaintManager) syncBindingEviction(key util.QueueKey) error { } klog.V(2).Infof("Success to evict Cluster(%s) from ResourceBinding(%s) schedule result", fedKey.ClusterWideKey.NamespaceKey(), fedKey.Cluster) - if !features.FeatureGate.Enabled(features.GracefulEviction) { - helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, tc.EventRecorder, nil) - } } else if tolerationTime > 0 { tc.bindingEvictionWorker.AddAfter(fedKey, tolerationTime) } @@ -252,9 +249,6 @@ func (tc *NoExecuteTaintManager) syncClusterBindingEviction(key util.QueueKey) e } klog.V(2).Infof("Success to evict Cluster(%s) from ClusterResourceBinding(%s) schedule result", fedKey.ClusterWideKey.NamespaceKey(), fedKey.Cluster) - if !features.FeatureGate.Enabled(features.GracefulEviction) { - helper.EmitClusterEvictionEventForClusterResourceBinding(binding, cluster, tc.EventRecorder, nil) - } } else if tolerationTime > 0 { tc.clusterBindingEvictionWorker.AddAfter(fedKey, tolerationTime) return nil diff --git a/pkg/features/features.go b/pkg/features/features.go index 2836a0c5d18f..84e9f513d309 100644 --- a/pkg/features/features.go +++ b/pkg/features/features.go @@ -22,11 +22,22 @@ import ( ) const ( - // Failover indicates if scheduler should reschedule on cluster failure. + // Failover controls whether the scheduler should reschedule + // workloads on cluster failure. + // When enabled, Karmada will automatically migrate workloads + // from a failed cluster to other available clusters. + // + // Note: This feature does not control application failover, + // which is managed separately via the PropagationPolicy or + // ClusterPropagationPolicy. Failover featuregate.Feature = "Failover" - // GracefulEviction indicates if enable grace eviction. - // Takes effect only when the Failover feature is enabled. + // GracefulEviction controls whether to perform graceful evictions + // during both cluster failover and application failover. + // When used for cluster failover, it takes effect only when the + // Failover feature is enabled. + // Graceful eviction ensures that workloads are migrated in a + // controlled manner, minimizing disruption to applications. GracefulEviction featuregate.Feature = "GracefulEviction" // PropagateDeps indicates if relevant resources should be propagated automatically @@ -60,7 +71,11 @@ var ( // DefaultFeatureGates is the default feature gates of Karmada. DefaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ - Failover: {Default: true, PreRelease: featuregate.Beta}, + // Failover(cluster failover) is disabled by default because it involves migrating + // all resources in the cluster, which can have significant impacts, it should be + // explicitly enabled by administrators after fully evaluation to avoid unexpected + // incidents. + Failover: {Default: false, PreRelease: featuregate.Beta}, GracefulEviction: {Default: true, PreRelease: featuregate.Beta}, PropagateDeps: {Default: true, PreRelease: featuregate.Beta}, CustomizedClusterResourceModeling: {Default: true, PreRelease: featuregate.Beta},