From e52774032b4ce325c9e78f1d4b3d4e4d3d8be0e2 Mon Sep 17 00:00:00 2001 From: Qing Hao Date: Sun, 8 Oct 2023 21:08:12 -0500 Subject: [PATCH] upgrade addon rollout library (#212) Signed-off-by: haoqing0110 --- go.mod | 2 +- go.sum | 4 +- .../controllers/addonconfiguration/graph.go | 78 +++-- .../addonconfiguration/graph_test.go | 89 ++++-- vendor/modules.txt | 2 +- .../api/cluster/v1alpha1/helpers.go | 294 +++++++++++------- .../cluster/v1alpha1/zz_generated.deepcopy.go | 4 +- 7 files changed, 292 insertions(+), 181 deletions(-) diff --git a/go.mod b/go.mod index da286d928..0435ac5cd 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( k8s.io/component-base v0.26.7 k8s.io/klog/v2 v2.80.1 k8s.io/utils v0.0.0-20221128185143-99ec85e7a448 - open-cluster-management.io/api v0.12.0 + open-cluster-management.io/api v0.12.1-0.20230925140632-bf4f47ea90d1 sigs.k8s.io/controller-runtime v0.14.4 ) diff --git a/go.sum b/go.sum index 479e2167e..c472e1651 100644 --- a/go.sum +++ b/go.sum @@ -819,8 +819,8 @@ k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 h1:+70TFaan3hfJzs+7VK2o+O k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280/go.mod h1:+Axhij7bCpeqhklhUTe3xmOn6bWxolyZEeyaFpjGtl4= k8s.io/utils v0.0.0-20221128185143-99ec85e7a448 h1:KTgPnR10d5zhztWptI952TNtt/4u5h3IzDXkdIMuo2Y= k8s.io/utils v0.0.0-20221128185143-99ec85e7a448/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -open-cluster-management.io/api v0.12.0 h1:sNkj4k2XyWA/GLsTiFg82bLIZ7JDZKkLLLyZjJUlJMs= -open-cluster-management.io/api v0.12.0/go.mod h1:/CZhelEH+30/pX7vXGSZOzLMX0zvjthYOkT/5ZTzVTQ= +open-cluster-management.io/api v0.12.1-0.20230925140632-bf4f47ea90d1 h1:8r0fdost7Yhvvz+xJb7xkj/tLZ4DigxiGoEJGakXhUg= +open-cluster-management.io/api v0.12.1-0.20230925140632-bf4f47ea90d1/go.mod h1:/CZhelEH+30/pX7vXGSZOzLMX0zvjthYOkT/5ZTzVTQ= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/pkg/manager/controllers/addonconfiguration/graph.go b/pkg/manager/controllers/addonconfiguration/graph.go index c07438a04..3bccd522a 100644 --- a/pkg/manager/controllers/addonconfiguration/graph.go +++ b/pkg/manager/controllers/addonconfiguration/graph.go @@ -48,9 +48,11 @@ type addonConfigMap map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigRe // set addon rollout status func (n *addonNode) setRolloutStatus() { + n.status = &clusterv1alpha1.ClusterRolloutStatus{ClusterName: n.mca.Namespace} + // desired configs doesn't match actual configs, set to ToApply if len(n.mca.Status.ConfigReferences) != len(n.desiredConfigs) { - n.status = &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply} + n.status.Status = clusterv1alpha1.ToApply return } @@ -66,38 +68,33 @@ func (n *addonNode) setRolloutStatus() { if desired, ok := n.desiredConfigs[actual.ConfigGroupResource]; ok { // desired config spec hash doesn't match actual, set to ToApply if !equality.Semantic.DeepEqual(desired.DesiredConfig, actual.DesiredConfig) { - n.status = &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply} + n.status.Status = clusterv1alpha1.ToApply return // desired config spec hash matches actual, but last applied config spec hash doesn't match actual } else if !equality.Semantic.DeepEqual(actual.LastAppliedConfig, actual.DesiredConfig) { switch progressingCond.Reason { case addonv1alpha1.ProgressingReasonInstallFailed, addonv1alpha1.ProgressingReasonUpgradeFailed: - n.status = &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.Failed, LastTransitionTime: &progressingCond.LastTransitionTime} + n.status.Status = clusterv1alpha1.Failed + n.status.LastTransitionTime = &progressingCond.LastTransitionTime case addonv1alpha1.ProgressingReasonInstalling, addonv1alpha1.ProgressingReasonUpgrading: - n.status = &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.Progressing, LastTransitionTime: &progressingCond.LastTransitionTime} + n.status.Status = clusterv1alpha1.Progressing + n.status.LastTransitionTime = &progressingCond.LastTransitionTime default: - n.status = &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.Progressing} + n.status.Status = clusterv1alpha1.Progressing } return } } else { - n.status = &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply} + n.status.Status = clusterv1alpha1.ToApply return } } // succeed + n.status.Status = clusterv1alpha1.Succeeded if progressingCond.Reason == addonv1alpha1.ProgressingReasonInstallSucceed || progressingCond.Reason == addonv1alpha1.ProgressingReasonUpgradeSucceed { - n.status = &clusterv1alpha1.ClusterRolloutStatus{ - Status: clusterv1alpha1.Succeeded, - LastTransitionTime: &progressingCond.LastTransitionTime, - } - } else { - n.status = &clusterv1alpha1.ClusterRolloutStatus{ - Status: clusterv1alpha1.Succeeded, - } + n.status.LastTransitionTime = &progressingCond.LastTransitionTime } - } func (d addonConfigMap) copy() addonConfigMap { @@ -311,20 +308,39 @@ func (n *installStrategyNode) generateRolloutResult() error { if n.placementRef.Name == "" { // default addons rolloutResult := clusterv1alpha1.RolloutResult{} - rolloutResult.ClustersToRollout = map[string]clusterv1alpha1.ClusterRolloutStatus{} - for k, addon := range n.children { + rolloutResult.ClustersToRollout = []clusterv1alpha1.ClusterRolloutStatus{} + for name, addon := range n.children { + if addon.status == nil { + return fmt.Errorf("failed to get rollout status on cluster %v", name) + } if addon.status.Status != clusterv1alpha1.Succeeded { - rolloutResult.ClustersToRollout[k] = *addon.status + rolloutResult.ClustersToRollout = append(rolloutResult.ClustersToRollout, *addon.status) } } n.rolloutResult = rolloutResult } else { // placement addons - rolloutHandler, err := clusterv1alpha1.NewRolloutHandler(n.pdTracker) + rolloutHandler, err := clusterv1alpha1.NewRolloutHandler(n.pdTracker, getClusterRolloutStatus) if err != nil { return err } - _, rolloutResult, err := rolloutHandler.GetRolloutCluster(n.rolloutStrategy, n.getUpgradeStatus) + + // get existing addons + existingRolloutClusters := []clusterv1alpha1.ClusterRolloutStatus{} + for name, addon := range n.children { + clsRolloutStatus, err := getClusterRolloutStatus(name, addon) + if err != nil { + return err + } + existingRolloutClusters = append(existingRolloutClusters, clsRolloutStatus) + } + + // sort by cluster name + sort.SliceStable(existingRolloutClusters, func(i, j int) bool { + return existingRolloutClusters[i].ClusterName < existingRolloutClusters[j].ClusterName + }) + + _, rolloutResult, err := rolloutHandler.GetRolloutCluster(n.rolloutStrategy, existingRolloutClusters) if err != nil { return err } @@ -334,23 +350,16 @@ func (n *installStrategyNode) generateRolloutResult() error { return nil } -func (n *installStrategyNode) getUpgradeStatus(clusterName string) clusterv1alpha1.ClusterRolloutStatus { - if node, exist := n.children[clusterName]; exist { - return *node.status - } else { - // if children not exist, return succeed status to skip - return clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.Skip} - } -} - // addonToUpdate finds the addons to be updated by placement func (n *installStrategyNode) getAddonsToUpdate() []*addonNode { var addons []*addonNode var clusters []string // get addon to update from rollout result - for c := range n.rolloutResult.ClustersToRollout { - clusters = append(clusters, c) + for _, c := range n.rolloutResult.ClustersToRollout { + if _, exist := n.children[c.ClusterName]; exist { + clusters = append(clusters, c.ClusterName) + } } // sort addons by name @@ -385,6 +394,13 @@ func (n *installStrategyNode) countAddonTimeOut() int { return len(n.rolloutResult.ClustersTimeOut) } +func getClusterRolloutStatus(clusterName string, addonNode *addonNode) (clusterv1alpha1.ClusterRolloutStatus, error) { + if addonNode.status == nil { + return clusterv1alpha1.ClusterRolloutStatus{}, fmt.Errorf("failed to get rollout status on cluster %v", clusterName) + } + return *addonNode.status, nil +} + func desiredConfigsEqual(a, b addonConfigMap) bool { if len(a) != len(b) { return false diff --git a/pkg/manager/controllers/addonconfiguration/graph_test.go b/pkg/manager/controllers/addonconfiguration/graph_test.go index a5f6fd6aa..e482ecae1 100644 --- a/pkg/manager/controllers/addonconfiguration/graph_test.go +++ b/pkg/manager/controllers/addonconfiguration/graph_test.go @@ -63,8 +63,10 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster1"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster1"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster1", + Status: clusterv1alpha1.ToApply}, }, { desiredConfigs: map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigReference{ @@ -77,8 +79,10 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster2"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster2"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster2", + Status: clusterv1alpha1.ToApply}, }, }, }, @@ -146,8 +150,10 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster1"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster1"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster1", + Status: clusterv1alpha1.ToApply}, }, { desiredConfigs: map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigReference{ @@ -168,8 +174,11 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster2"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster2"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster2", + Status: clusterv1alpha1.ToApply, + }, }, { desiredConfigs: map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigReference{ @@ -190,8 +199,11 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster3"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster3"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster3", + Status: clusterv1alpha1.ToApply, + }, }, }, }, @@ -306,8 +318,12 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster1"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.Failed, LastTransitionTime: &fakeTime}, + mca: addontesting.NewAddon("test", "cluster1"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster1", + Status: clusterv1alpha1.Failed, + LastTransitionTime: &fakeTime, + }, }, { desiredConfigs: map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigReference{ @@ -320,8 +336,12 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster2"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.Progressing, LastTransitionTime: &fakeTime}, + mca: addontesting.NewAddon("test", "cluster2"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster2", + Status: clusterv1alpha1.Progressing, + LastTransitionTime: &fakeTime, + }, }, { desiredConfigs: map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigReference{ @@ -334,8 +354,11 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster4"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster4"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster4", + Status: clusterv1alpha1.ToApply, + }, }, }, }, @@ -403,8 +426,10 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster1"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster1"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster1", + Status: clusterv1alpha1.ToApply}, }, { desiredConfigs: map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigReference{ @@ -425,8 +450,10 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster2"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster2"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster2", + Status: clusterv1alpha1.ToApply}, }, { desiredConfigs: map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigReference{ @@ -447,8 +474,10 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster3"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster3"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster3", + Status: clusterv1alpha1.ToApply}, }, }, }, @@ -523,7 +552,9 @@ func TestConfigurationGraph(t *testing.T) { {ConfigGroupResource: addonv1alpha1.ConfigGroupResource{Group: "core", Resource: "Bar"}, ConfigReferent: addonv1alpha1.ConfigReferent{Name: "test1"}}, }, nil, nil), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster1", + Status: clusterv1alpha1.ToApply}, }, { desiredConfigs: map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigReference{ @@ -544,8 +575,10 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster2"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster2"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster2", + Status: clusterv1alpha1.ToApply}, }, { desiredConfigs: map[addonv1alpha1.ConfigGroupResource]addonv1alpha1.ConfigReference{ @@ -566,8 +599,10 @@ func TestConfigurationGraph(t *testing.T) { }, }, }, - mca: addontesting.NewAddon("test", "cluster3"), - status: &clusterv1alpha1.ClusterRolloutStatus{Status: clusterv1alpha1.ToApply}, + mca: addontesting.NewAddon("test", "cluster3"), + status: &clusterv1alpha1.ClusterRolloutStatus{ + ClusterName: "cluster3", + Status: clusterv1alpha1.ToApply}, }, }, }, diff --git a/vendor/modules.txt b/vendor/modules.txt index fee482df9..c8efa4625 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1226,7 +1226,7 @@ k8s.io/utils/path k8s.io/utils/pointer k8s.io/utils/strings/slices k8s.io/utils/trace -# open-cluster-management.io/api v0.12.0 +# open-cluster-management.io/api v0.12.1-0.20230925140632-bf4f47ea90d1 ## explicit; go 1.19 open-cluster-management.io/api/addon/v1alpha1 open-cluster-management.io/api/client/addon/clientset/versioned diff --git a/vendor/open-cluster-management.io/api/cluster/v1alpha1/helpers.go b/vendor/open-cluster-management.io/api/cluster/v1alpha1/helpers.go index d70865825..9d25e05d5 100644 --- a/vendor/open-cluster-management.io/api/cluster/v1alpha1/helpers.go +++ b/vendor/open-cluster-management.io/api/cluster/v1alpha1/helpers.go @@ -37,11 +37,10 @@ const ( Skip ) -// ClusterRolloutStatusFunc defines a function to return the rollout status for a managed cluster. -type ClusterRolloutStatusFunc func(clusterName string) ClusterRolloutStatus - // ClusterRolloutStatus holds the rollout status information for a cluster. type ClusterRolloutStatus struct { + // cluster name + ClusterName string // GroupKey represents the cluster group key (optional field). GroupKey clusterv1beta1.GroupKey // Status is the required field indicating the rollout status. @@ -53,50 +52,57 @@ type ClusterRolloutStatus struct { TimeOutTime *metav1.Time } -// RolloutResult contains the clusters to be rolled out and the clusters that have timed out. +// RolloutResult contains list of clusters that are timeOut, removed and required to rollOut type RolloutResult struct { - // ClustersToRollout is a map where the key is the cluster name and the value is the ClusterRolloutStatus. - ClustersToRollout map[string]ClusterRolloutStatus - // ClustersTimeOut is a map where the key is the cluster name and the value is the ClusterRolloutStatus. - ClustersTimeOut map[string]ClusterRolloutStatus + // ClustersToRollout is a slice of ClusterRolloutStatus that will be rolled out. + ClustersToRollout []ClusterRolloutStatus + // ClustersTimeOut is a slice of ClusterRolloutStatus that are timeout. + ClustersTimeOut []ClusterRolloutStatus + // ClustersRemoved is a slice of ClusterRolloutStatus that are removed. + ClustersRemoved []ClusterRolloutStatus } +// ClusterRolloutStatusFunc defines a function that return the rollout status for a given workload. +type ClusterRolloutStatusFunc[T any] func(clusterName string, workload T) (ClusterRolloutStatus, error) + +// The RolloutHandler required workload type (interface/struct) to be assigned to the generic type. +// The custom implementation of the ClusterRolloutStatusFunc is required to use the RolloutHandler. // +k8s:deepcopy-gen=false -type RolloutHandler struct { +type RolloutHandler[T any] struct { // placement decision tracker - pdTracker *clusterv1beta1.PlacementDecisionClustersTracker + pdTracker *clusterv1beta1.PlacementDecisionClustersTracker + statusFunc ClusterRolloutStatusFunc[T] } -func NewRolloutHandler(pdTracker *clusterv1beta1.PlacementDecisionClustersTracker) (*RolloutHandler, error) { +// NewRolloutHandler creates a new RolloutHandler with the give workload type. +func NewRolloutHandler[T any](pdTracker *clusterv1beta1.PlacementDecisionClustersTracker, statusFunc ClusterRolloutStatusFunc[T]) (*RolloutHandler[T], error) { if pdTracker == nil { return nil, fmt.Errorf("invalid placement decision tracker %v", pdTracker) } - return &RolloutHandler{pdTracker: pdTracker}, nil + return &RolloutHandler[T]{pdTracker: pdTracker, statusFunc: statusFunc}, nil } -// The input is a duck type RolloutStrategy and a ClusterRolloutStatusFunc to return the rollout status on each managed cluster. -// Return the strategy actual take effect and a list of clusters that need to rollout and that are timeout. +// The input are a RolloutStrategy and existingClusterRolloutStatus list. +// The existing ClusterRolloutStatus list should be created using the ClusterRolloutStatusFunc to determine the current workload rollout status. +// The existing ClusterRolloutStatus list should contain all the current workloads rollout status such as ToApply, Progressing, Succeeded, +// Failed, TimeOut and Skip in order to determine the added, removed, timeout clusters and next clusters to rollout. // -// ClustersToRollout: If mandatory decision groups are defined in strategy, will return the clusters to rollout in mandatory decision groups first. -// When all the mandatory decision groups rollout successfully, will return the rest of the clusters that need to rollout. -// -// ClustersTimeOut: If the cluster status is Progressing or Failed, and the status lasts longer than timeout defined in strategy, -// will list them RolloutResult.ClustersTimeOut with status TimeOut. -func (r *RolloutHandler) GetRolloutCluster(rolloutStrategy RolloutStrategy, statusFunc ClusterRolloutStatusFunc) (*RolloutStrategy, RolloutResult, error) { +// Return the actual RolloutStrategy that take effect and a RolloutResult contain list of ClusterToRollout, ClustersTimeout and ClusterRemoved. +func (r *RolloutHandler[T]) GetRolloutCluster(rolloutStrategy RolloutStrategy, existingClusterStatus []ClusterRolloutStatus) (*RolloutStrategy, RolloutResult, error) { switch rolloutStrategy.Type { case All: - return r.getRolloutAllClusters(rolloutStrategy, statusFunc) + return r.getRolloutAllClusters(rolloutStrategy, existingClusterStatus) case Progressive: - return r.getProgressiveClusters(rolloutStrategy, statusFunc) + return r.getProgressiveClusters(rolloutStrategy, existingClusterStatus) case ProgressivePerGroup: - return r.getProgressivePerGroupClusters(rolloutStrategy, statusFunc) + return r.getProgressivePerGroupClusters(rolloutStrategy, existingClusterStatus) default: return nil, RolloutResult{}, fmt.Errorf("incorrect rollout strategy type %v", rolloutStrategy.Type) } } -func (r *RolloutHandler) getRolloutAllClusters(rolloutStrategy RolloutStrategy, statusFunc ClusterRolloutStatusFunc) (*RolloutStrategy, RolloutResult, error) { +func (r *RolloutHandler[T]) getRolloutAllClusters(rolloutStrategy RolloutStrategy, existingClusterStatus []ClusterRolloutStatus) (*RolloutStrategy, RolloutResult, error) { // Prepare the rollout strategy strategy := RolloutStrategy{Type: All} strategy.All = rolloutStrategy.All.DeepCopy() @@ -110,15 +116,18 @@ func (r *RolloutHandler) getRolloutAllClusters(rolloutStrategy RolloutStrategy, return &strategy, RolloutResult{}, err } - // Get all clusters and perform progressive rollout - totalClusterGroups := r.pdTracker.ExistingClusterGroupsBesides() - totalClusters := totalClusterGroups.GetClusters().UnsortedList() - rolloutResult := progressivePerCluster(totalClusterGroups, len(totalClusters), failureTimeout, statusFunc) + allClusterGroups := r.pdTracker.ExistingClusterGroupsBesides() + allClusters := allClusterGroups.GetClusters().UnsortedList() + + // Check for removed Clusters + currentClusterStatus, removedClusterStatus := r.getRemovedClusters(allClusterGroups, existingClusterStatus) + rolloutResult := progressivePerCluster(allClusterGroups, len(allClusters), failureTimeout, currentClusterStatus) + rolloutResult.ClustersRemoved = removedClusterStatus return &strategy, rolloutResult, nil } -func (r *RolloutHandler) getProgressiveClusters(rolloutStrategy RolloutStrategy, statusFunc ClusterRolloutStatusFunc) (*RolloutStrategy, RolloutResult, error) { +func (r *RolloutHandler[T]) getProgressiveClusters(rolloutStrategy RolloutStrategy, existingClusterStatus []ClusterRolloutStatus) (*RolloutStrategy, RolloutResult, error) { // Prepare the rollout strategy strategy := RolloutStrategy{Type: Progressive} strategy.Progressive = rolloutStrategy.Progressive.DeepCopy() @@ -126,37 +135,45 @@ func (r *RolloutHandler) getProgressiveClusters(rolloutStrategy RolloutStrategy, strategy.Progressive = &RolloutProgressive{} } - // Upgrade mandatory decision groups first - groupKeys := decisionGroupsToGroupKeys(strategy.Progressive.MandatoryDecisionGroups.MandatoryDecisionGroups) - clusterGroups := r.pdTracker.ExistingClusterGroups(groupKeys...) - - // Perform progressive rollout for mandatory decision groups - rolloutResult := progressivePerGroup(clusterGroups, maxTimeDuration, statusFunc) - if len(rolloutResult.ClustersToRollout) > 0 { - return &strategy, rolloutResult, nil - } - // Parse timeout for non-mandatory decision groups failureTimeout, err := parseTimeout(strategy.Progressive.Timeout.Timeout) if err != nil { return &strategy, RolloutResult{}, err } - // Calculate the length for progressive rollout - totalClusters := r.pdTracker.ExistingClusterGroupsBesides().GetClusters() - length, err := calculateLength(strategy.Progressive.MaxConcurrency, len(totalClusters)) + // Check for removed clusters + clusterGroups := r.pdTracker.ExistingClusterGroupsBesides() + currentClusterStatus, removedClusterStatus := r.getRemovedClusters(clusterGroups, existingClusterStatus) + + // Upgrade mandatory decision groups first + groupKeys := decisionGroupsToGroupKeys(strategy.Progressive.MandatoryDecisionGroups.MandatoryDecisionGroups) + clusterGroups = r.pdTracker.ExistingClusterGroups(groupKeys...) + + // Perform progressive rollOut for mandatory decision groups first. + if len(clusterGroups) > 0 { + rolloutResult := progressivePerGroup(clusterGroups, failureTimeout, currentClusterStatus) + if len(rolloutResult.ClustersToRollout) > 0 || len(rolloutResult.ClustersTimeOut) > 0 { + rolloutResult.ClustersRemoved = removedClusterStatus + return &strategy, rolloutResult, nil + } + } + + // Calculate the size of progressive rollOut + // If the MaxConcurrency not defined, total clusters length is considered as maxConcurrency. + clusterGroups = r.pdTracker.ExistingClusterGroupsBesides(groupKeys...) + length, err := calculateRolloutSize(strategy.Progressive.MaxConcurrency, len(clusterGroups.GetClusters())) if err != nil { return &strategy, RolloutResult{}, err } - // Upgrade the remaining clusters - restClusterGroups := r.pdTracker.ExistingClusterGroupsBesides(clusterGroups.GetOrderedGroupKeys()...) - rolloutResult = progressivePerCluster(restClusterGroups, length, failureTimeout, statusFunc) + // Rollout the remaining clusters + rolloutResult := progressivePerCluster(clusterGroups, length, failureTimeout, currentClusterStatus) + rolloutResult.ClustersRemoved = removedClusterStatus return &strategy, rolloutResult, nil } -func (r *RolloutHandler) getProgressivePerGroupClusters(rolloutStrategy RolloutStrategy, statusFunc ClusterRolloutStatusFunc) (*RolloutStrategy, RolloutResult, error) { +func (r *RolloutHandler[T]) getProgressivePerGroupClusters(rolloutStrategy RolloutStrategy, existingClusterStatus []ClusterRolloutStatus) (*RolloutStrategy, RolloutResult, error) { // Prepare the rollout strategy strategy := RolloutStrategy{Type: ProgressivePerGroup} strategy.ProgressivePerGroup = rolloutStrategy.ProgressivePerGroup.DeepCopy() @@ -164,65 +181,99 @@ func (r *RolloutHandler) getProgressivePerGroupClusters(rolloutStrategy RolloutS strategy.ProgressivePerGroup = &RolloutProgressivePerGroup{} } + // Parse timeout for non-mandatory decision groups + failureTimeout, err := parseTimeout(strategy.ProgressivePerGroup.Timeout.Timeout) + if err != nil { + return &strategy, RolloutResult{}, err + } + + // Check for removed Clusters + clusterGroups := r.pdTracker.ExistingClusterGroupsBesides() + currentClusterStatus, removedClusterStatus := r.getRemovedClusters(clusterGroups, existingClusterStatus) + // Upgrade mandatory decision groups first mandatoryDecisionGroups := strategy.ProgressivePerGroup.MandatoryDecisionGroups.MandatoryDecisionGroups groupKeys := decisionGroupsToGroupKeys(mandatoryDecisionGroups) - clusterGroups := r.pdTracker.ExistingClusterGroups(groupKeys...) + clusterGroups = r.pdTracker.ExistingClusterGroups(groupKeys...) - // Perform progressive rollout per group for mandatory decision groups - rolloutResult := progressivePerGroup(clusterGroups, maxTimeDuration, statusFunc) - if len(rolloutResult.ClustersToRollout) > 0 { - return &strategy, rolloutResult, nil - } + // Perform progressive rollout per group for mandatory decision groups first + if len(clusterGroups) > 0 { + rolloutResult := progressivePerGroup(clusterGroups, failureTimeout, currentClusterStatus) - // Parse timeout for non-mandatory decision groups - failureTimeout, err := parseTimeout(strategy.ProgressivePerGroup.Timeout.Timeout) - if err != nil { - return &strategy, RolloutResult{}, err + if len(rolloutResult.ClustersToRollout) > 0 || len(rolloutResult.ClustersTimeOut) > 0 { + rolloutResult.ClustersRemoved = removedClusterStatus + return &strategy, rolloutResult, nil + } } - // Upgrade the rest of the decision groups - restClusterGroups := r.pdTracker.ExistingClusterGroupsBesides(clusterGroups.GetOrderedGroupKeys()...) + // RollOut the rest of the decision groups + restClusterGroups := r.pdTracker.ExistingClusterGroupsBesides(groupKeys...) // Perform progressive rollout per group for the remaining decision groups - rolloutResult = progressivePerGroup(restClusterGroups, failureTimeout, statusFunc) + rolloutResult := progressivePerGroup(restClusterGroups, failureTimeout, currentClusterStatus) + rolloutResult.ClustersRemoved = removedClusterStatus + return &strategy, rolloutResult, nil } -func progressivePerCluster(clusterGroupsMap clusterv1beta1.ClusterGroupsMap, length int, timeout time.Duration, statusFunc ClusterRolloutStatusFunc) RolloutResult { - rolloutClusters := map[string]ClusterRolloutStatus{} - timeoutClusters := map[string]ClusterRolloutStatus{} +func (r *RolloutHandler[T]) getRemovedClusters(clusterGroupsMap clusterv1beta1.ClusterGroupsMap, existingClusterStatus []ClusterRolloutStatus) ([]ClusterRolloutStatus, []ClusterRolloutStatus) { + var currentClusterStatus, removedClusterStatus []ClusterRolloutStatus - if length == 0 { - return RolloutResult{ - ClustersToRollout: rolloutClusters, - ClustersTimeOut: timeoutClusters, + clusters := clusterGroupsMap.GetClusters().UnsortedList() + for _, clusterStatus := range existingClusterStatus { + exist := false + for _, cluster := range clusters { + if clusterStatus.ClusterName == cluster { + exist = true + currentClusterStatus = append(currentClusterStatus, clusterStatus) + break + } + } + + if !exist { + removedClusterStatus = append(removedClusterStatus, clusterStatus) + } + } + return currentClusterStatus, removedClusterStatus +} + +func progressivePerCluster(clusterGroupsMap clusterv1beta1.ClusterGroupsMap, length int, timeout time.Duration, existingClusterStatus []ClusterRolloutStatus) RolloutResult { + var rolloutClusters, timeoutClusters []ClusterRolloutStatus + existingClusters := make(map[string]bool) + + for _, status := range existingClusterStatus { + if status.ClusterName == "" { + continue + } + + existingClusters[status.ClusterName] = true + rolloutClusters, timeoutClusters = determineRolloutStatus(status, timeout, rolloutClusters, timeoutClusters) + + if len(rolloutClusters) >= length { + return RolloutResult{ + ClustersToRollout: rolloutClusters, + ClustersTimeOut: timeoutClusters, + } } } clusters := clusterGroupsMap.GetClusters().UnsortedList() clusterToGroupKey := clusterGroupsMap.ClusterToGroupKey() - // Sort the clusters in alphabetical order to ensure consistency. sort.Strings(clusters) for _, cluster := range clusters { - status := statusFunc(cluster) - if groupKey, exists := clusterToGroupKey[cluster]; exists { - status.GroupKey = groupKey + if existingClusters[cluster] { + continue } - newStatus, needToRollout := determineRolloutStatusAndContinue(status, timeout) - status.Status = newStatus.Status - status.TimeOutTime = newStatus.TimeOutTime - - if needToRollout { - rolloutClusters[cluster] = status - } - if status.Status == TimeOut { - timeoutClusters[cluster] = status + status := ClusterRolloutStatus{ + ClusterName: cluster, + Status: ToApply, + GroupKey: clusterToGroupKey[cluster], } + rolloutClusters = append(rolloutClusters, status) - if len(rolloutClusters)%length == 0 && len(rolloutClusters) > 0 { + if len(rolloutClusters) >= length { return RolloutResult{ ClustersToRollout: rolloutClusters, ClustersTimeOut: timeoutClusters, @@ -236,32 +287,44 @@ func progressivePerCluster(clusterGroupsMap clusterv1beta1.ClusterGroupsMap, len } } -func progressivePerGroup(clusterGroupsMap clusterv1beta1.ClusterGroupsMap, timeout time.Duration, statusFunc ClusterRolloutStatusFunc) RolloutResult { - rolloutClusters := map[string]ClusterRolloutStatus{} - timeoutClusters := map[string]ClusterRolloutStatus{} +func progressivePerGroup(clusterGroupsMap clusterv1beta1.ClusterGroupsMap, timeout time.Duration, existingClusterStatus []ClusterRolloutStatus) RolloutResult { + var rolloutClusters, timeoutClusters []ClusterRolloutStatus + existingClusters := make(map[string]bool) - clusterGroupKeys := clusterGroupsMap.GetOrderedGroupKeys() + for _, status := range existingClusterStatus { + if status.ClusterName == "" { + continue + } + + if status.Status == ToApply { + // Set as false to consider the cluster in the decisionGroups iteration. + existingClusters[status.ClusterName] = false + } else { + existingClusters[status.ClusterName] = true + rolloutClusters, timeoutClusters = determineRolloutStatus(status, timeout, rolloutClusters, timeoutClusters) + } + } + clusterGroupKeys := clusterGroupsMap.GetOrderedGroupKeys() for _, key := range clusterGroupKeys { if subclusters, ok := clusterGroupsMap[key]; ok { // Iterate through clusters in the group - for _, cluster := range subclusters.UnsortedList() { - status := statusFunc(cluster) - status.GroupKey = key - - newStatus, needToRollout := determineRolloutStatusAndContinue(status, timeout) - status.Status = newStatus.Status - status.TimeOutTime = newStatus.TimeOutTime - - if needToRollout { - rolloutClusters[cluster] = status + clusters := subclusters.UnsortedList() + sort.Strings(clusters) + for _, cluster := range clusters { + if existingClusters[cluster] { + continue } - if status.Status == TimeOut { - timeoutClusters[cluster] = status + + status := ClusterRolloutStatus{ + ClusterName: cluster, + Status: ToApply, + GroupKey: key, } + rolloutClusters = append(rolloutClusters, status) } - // Return if there are clusters to rollout + // As it is perGroup Return if there are clusters to rollOut if len(rolloutClusters) > 0 { return RolloutResult{ ClustersToRollout: rolloutClusters, @@ -277,36 +340,33 @@ func progressivePerGroup(clusterGroupsMap clusterv1beta1.ClusterGroupsMap, timeo } } -// determineRolloutStatusAndContinue checks whether a cluster should continue its rollout based on -// its current status and timeout. The function returns an updated cluster status and a boolean -// indicating whether the rollout should continue. +// determineRolloutStatus checks whether a cluster should continue its rollout based on its current status and timeout. +// The function update the cluster status and append it to the expected slice. // -// The timeout parameter is utilized for handling progressing and failed statuses: -// 1. If timeout is set to None (maxTimeDuration), the function will wait until cluster reaching a success status. -// It returns true to include the cluster in the result and halts the rollout of other clusters or groups. -// 2. If timeout is set to 0, the function proceeds with upgrading other clusters without waiting. -// It returns false to skip waiting for the cluster to reach a success status and continues to rollout others. -func determineRolloutStatusAndContinue(status ClusterRolloutStatus, timeout time.Duration) (*ClusterRolloutStatus, bool) { - newStatus := status.DeepCopy() +// The timeout parameter is utilized for handling progressing and failed statuses and any other unknown status: +// 1. If timeout is set to None (maxTimeDuration), the function will append the clusterStatus to the rollOut Clusters. +// 2. If timeout is set to 0, the function append the clusterStatus to the timeOut clusters. +func determineRolloutStatus(status ClusterRolloutStatus, timeout time.Duration, rolloutClusters []ClusterRolloutStatus, timeoutClusters []ClusterRolloutStatus) ([]ClusterRolloutStatus, []ClusterRolloutStatus) { + switch status.Status { case ToApply: - return newStatus, true + rolloutClusters = append(rolloutClusters, status) case TimeOut, Succeeded, Skip: - return newStatus, false - case Progressing, Failed: + return rolloutClusters, timeoutClusters + default: // For progressing, failed status and any other unknown status. timeOutTime := getTimeOutTime(status.LastTransitionTime, timeout) - newStatus.TimeOutTime = timeOutTime + status.TimeOutTime = timeOutTime // check if current time is before the timeout time if RolloutClock.Now().Before(timeOutTime.Time) { - return newStatus, true + rolloutClusters = append(rolloutClusters, status) } else { - newStatus.Status = TimeOut - return newStatus, false + status.Status = TimeOut + timeoutClusters = append(timeoutClusters, status) } - default: - return newStatus, true } + + return rolloutClusters, timeoutClusters } // get the timeout time @@ -320,7 +380,7 @@ func getTimeOutTime(startTime *metav1.Time, timeout time.Duration) *metav1.Time return &metav1.Time{Time: timeoutTime} } -func calculateLength(maxConcurrency intstr.IntOrString, total int) (int, error) { +func calculateRolloutSize(maxConcurrency intstr.IntOrString, total int) (int, error) { length := total switch maxConcurrency.Type { diff --git a/vendor/open-cluster-management.io/api/cluster/v1alpha1/zz_generated.deepcopy.go b/vendor/open-cluster-management.io/api/cluster/v1alpha1/zz_generated.deepcopy.go index a72d4c816..00938a885 100644 --- a/vendor/open-cluster-management.io/api/cluster/v1alpha1/zz_generated.deepcopy.go +++ b/vendor/open-cluster-management.io/api/cluster/v1alpha1/zz_generated.deepcopy.go @@ -315,14 +315,14 @@ func (in *RolloutResult) DeepCopyInto(out *RolloutResult) { *out = *in if in.ClustersToRollout != nil { in, out := &in.ClustersToRollout, &out.ClustersToRollout - *out = make(map[string]ClusterRolloutStatus, len(*in)) + *out = make([]ClusterRolloutStatus, len(*in)) for key, val := range *in { (*out)[key] = *val.DeepCopy() } } if in.ClustersTimeOut != nil { in, out := &in.ClustersTimeOut, &out.ClustersTimeOut - *out = make(map[string]ClusterRolloutStatus, len(*in)) + *out = make([]ClusterRolloutStatus, len(*in)) for key, val := range *in { (*out)[key] = *val.DeepCopy() }