Skip to content

Commit

Permalink
Run cluster status monitor on unsharded controller only
Browse files Browse the repository at this point in the history
Running one cluster status monitor per Fleet controller pod is not
necessary and may cause conflicts in sharded setups.
  • Loading branch information
weyfonk committed Dec 18, 2024
1 parent fdae320 commit 2aaefd8
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions internal/cmd/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,18 @@ func start(
return err
}

setupLog.Info("starting cluster status monitor")
cfg := fleetcfg.Get()
// No need to run a similar check on the threshold, since its minimum value will be a multiple of the agent check-in
// interval anyway.
if cfg.ClusterMonitorInterval.Seconds() == 0 {
err := errors.New("cluster status monitor interval cannot be 0")
setupLog.Error(err, "cannot start cluster status monitor")
return err
if shardID == "" { // only one instance of the cluster status monitor needs to run.
setupLog.Info("starting cluster status monitor")
cfg := fleetcfg.Get()
// No need to run a similar check on the threshold, since its minimum value will be a multiple of the agent check-in
// interval anyway.
if cfg.ClusterMonitorInterval.Seconds() == 0 {
err := errors.New("cluster status monitor interval cannot be 0")
setupLog.Error(err, "cannot start cluster status monitor")
return err
}
go clustermonitor.Run(ctx, mgr.GetClient(), cfg.ClusterMonitorInterval.Duration, cfg.ClusterMonitorThreshold.Duration)
}
go clustermonitor.Run(ctx, mgr.GetClient(), cfg.ClusterMonitorInterval.Duration, cfg.ClusterMonitorThreshold.Duration)

setupLog.Info("starting job scheduler")
jobCtx, cancel := context.WithCancel(ctx)
Expand Down

0 comments on commit 2aaefd8

Please sign in to comment.