Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable cluster failover by default which should be explicitly enabled by administrators #5899

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion artifacts/deploy/karmada-controller-manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ spec:
- --cluster-status-update-frequency=10s
- --failover-eviction-timeout=30s
- --controllers=*,hpaScaleTargetMarker,deploymentReplicasSyncer
- --feature-gates=PropagationPolicyPreemption=true,MultiClusterService=true,StatefulFailoverInjection=true
- --feature-gates=Failover=true,PropagationPolicyPreemption=true,MultiClusterService=true,StatefulFailoverInjection=true
- --health-probe-bind-address=0.0.0.0:10357
- --v=4
livenessProbe:
Expand Down
6 changes: 0 additions & 6 deletions cmd/controller-manager/app/controllermanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,9 +567,6 @@ func startFederatedResourceQuotaStatusController(ctx controllerscontext.Context)
}

func startGracefulEvictionController(ctx controllerscontext.Context) (enabled bool, err error) {
if !features.FeatureGate.Enabled(features.GracefulEviction) {
return false, nil
}
rbGracefulEvictionController := &gracefuleviction.RBGracefulEvictionController{
Client: ctx.Mgr.GetClient(),
EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.RBGracefulEvictionControllerName),
Expand All @@ -594,9 +591,6 @@ func startGracefulEvictionController(ctx controllerscontext.Context) (enabled bo
}

func startApplicationFailoverController(ctx controllerscontext.Context) (enabled bool, err error) {
if !features.FeatureGate.Enabled(features.Failover) {
return false, nil
}
rbApplicationFailoverController := applicationfailover.RBApplicationFailoverController{
Client: ctx.Mgr.GetClient(),
EventRecorder: ctx.Mgr.GetEventRecorderFor(applicationfailover.RBApplicationFailoverControllerName),
Expand Down
8 changes: 1 addition & 7 deletions pkg/controllers/applicationfailover/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,7 @@ func buildTaskOptions(failoverBehavior *policyv1alpha1.ApplicationFailoverBehavi
return nil, err
}
case policyv1alpha1.Never:
if features.FeatureGate.Enabled(features.GracefulEviction) {
taskOpts = append(taskOpts, workv1alpha2.WithSuppressDeletion(ptr.To[bool](true)))
} else {
err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Never)
klog.Error(err)
return nil, err
}
taskOpts = append(taskOpts, workv1alpha2.WithSuppressDeletion(ptr.To[bool](true)))
}

return taskOpts, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import (

configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1"
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
"github.com/karmada-io/karmada/pkg/features"
"github.com/karmada-io/karmada/pkg/resourceinterpreter"
"github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag"
"github.com/karmada-io/karmada/pkg/util/helper"
Expand Down Expand Up @@ -173,11 +172,6 @@ func (c *CRBApplicationFailoverController) updateBinding(ctx context.Context, bi
for _, cluster := range needEvictClusters {
allClusters.Delete(cluster)
}
if !features.FeatureGate.Enabled(features.GracefulEviction) {
for _, cluster := range needEvictClusters {
helper.EmitClusterEvictionEventForClusterResourceBinding(binding, cluster, c.EventRecorder, nil)
}
}
RainbowMango marked this conversation as resolved.
Show resolved Hide resolved

return nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import (

configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1"
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
"github.com/karmada-io/karmada/pkg/features"
"github.com/karmada-io/karmada/pkg/resourceinterpreter"
"github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag"
"github.com/karmada-io/karmada/pkg/util/helper"
Expand Down Expand Up @@ -173,11 +172,6 @@ func (c *RBApplicationFailoverController) updateBinding(ctx context.Context, bin
for _, cluster := range needEvictClusters {
allClusters.Delete(cluster)
}
if !features.FeatureGate.Enabled(features.GracefulEviction) {
for _, cluster := range needEvictClusters {
helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, c.EventRecorder, nil)
}
}

return nil
}
Expand Down
12 changes: 9 additions & 3 deletions pkg/controllers/cluster/cluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package cluster

import (
"context"
"fmt"
"reflect"
"testing"
"time"
Expand All @@ -33,6 +34,7 @@ import (
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
workv1alpha1 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha1"
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
"github.com/karmada-io/karmada/pkg/features"
"github.com/karmada-io/karmada/pkg/util"
"github.com/karmada-io/karmada/pkg/util/gclient"
"github.com/karmada-io/karmada/pkg/util/names"
Expand Down Expand Up @@ -420,20 +422,24 @@ func TestController_monitorClusterHealth(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := features.FeatureGate.Set(fmt.Sprintf("%s=%t", features.Failover, true))
if err != nil {
t.Fatalf("Failed to enable failover feature gate: %v", err)
}
c := newClusterController()
if tt.cluster != nil {
if err := c.Create(context.Background(), tt.cluster, &client.CreateOptions{}); err != nil {
if err = c.Create(context.Background(), tt.cluster, &client.CreateOptions{}); err != nil {
t.Fatalf("failed to create cluster: %v", err)
}
}

if err := c.monitorClusterHealth(context.Background()); (err != nil) != tt.wantErr {
if err = c.monitorClusterHealth(context.Background()); (err != nil) != tt.wantErr {
t.Errorf("Controller.monitorClusterHealth() error = %v, wantErr %v", err, tt.wantErr)
return
}

cluster := &clusterv1alpha1.Cluster{}
if err := c.Get(context.Background(), types.NamespacedName{Name: "test-cluster"}, cluster, &client.GetOptions{}); err != nil {
if err = c.Get(context.Background(), types.NamespacedName{Name: "test-cluster"}, cluster, &client.GetOptions{}); err != nil {
t.Errorf("failed to get cluster: %v", err)
return
}
Expand Down
6 changes: 0 additions & 6 deletions pkg/controllers/cluster/taint_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,6 @@ func (tc *NoExecuteTaintManager) syncBindingEviction(key util.QueueKey) error {
}
klog.V(2).Infof("Success to evict Cluster(%s) from ResourceBinding(%s) schedule result",
fedKey.ClusterWideKey.NamespaceKey(), fedKey.Cluster)
if !features.FeatureGate.Enabled(features.GracefulEviction) {
helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, tc.EventRecorder, nil)
}
} else if tolerationTime > 0 {
tc.bindingEvictionWorker.AddAfter(fedKey, tolerationTime)
}
Expand Down Expand Up @@ -252,9 +249,6 @@ func (tc *NoExecuteTaintManager) syncClusterBindingEviction(key util.QueueKey) e
}
klog.V(2).Infof("Success to evict Cluster(%s) from ClusterResourceBinding(%s) schedule result",
fedKey.ClusterWideKey.NamespaceKey(), fedKey.Cluster)
if !features.FeatureGate.Enabled(features.GracefulEviction) {
helper.EmitClusterEvictionEventForClusterResourceBinding(binding, cluster, tc.EventRecorder, nil)
}
} else if tolerationTime > 0 {
tc.clusterBindingEvictionWorker.AddAfter(fedKey, tolerationTime)
return nil
Expand Down
23 changes: 19 additions & 4 deletions pkg/features/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,22 @@ import (
)

const (
// Failover indicates if scheduler should reschedule on cluster failure.
// Failover controls whether the scheduler should reschedule
// workloads on cluster failure.
// When enabled, Karmada will automatically migrate workloads
// from a failed cluster to other available clusters.
//
// Note: This feature does not control application failover,
// which is managed separately via the PropagationPolicy or
// ClusterPropagationPolicy.
Failover featuregate.Feature = "Failover"

// GracefulEviction indicates if enable grace eviction.
// Takes effect only when the Failover feature is enabled.
// GracefulEviction controls whether to perform graceful evictions
// during both cluster failover and application failover.
// When used for cluster failover, it takes effect only when the
// Failover feature is enabled.
// Graceful eviction ensures that workloads are migrated in a
// controlled manner, minimizing disruption to applications.
GracefulEviction featuregate.Feature = "GracefulEviction"

// PropagateDeps indicates if relevant resources should be propagated automatically
Expand Down Expand Up @@ -60,7 +71,11 @@ var (

// DefaultFeatureGates is the default feature gates of Karmada.
DefaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
Failover: {Default: true, PreRelease: featuregate.Beta},
// Failover(cluster failover) is disabled by default because it involves migrating
// all resources in the cluster, which can have significant impacts, it should be
// explicitly enabled by administrators after fully evaluation to avoid unexpected
// incidents.
Failover: {Default: false, PreRelease: featuregate.Beta},
GracefulEviction: {Default: true, PreRelease: featuregate.Beta},
PropagateDeps: {Default: true, PreRelease: featuregate.Beta},
CustomizedClusterResourceModeling: {Default: true, PreRelease: featuregate.Beta},
Expand Down