Skip to content

Commit

Permalink
feat: calc group score by weight
Browse files Browse the repository at this point in the history
Signed-off-by: ipsum-0320 <trueman.0320@zju.edu.cn>

chore: update

Signed-off-by: ipsum-0320 <trueman.0320@zju.edu.cn>

re-test

Signed-off-by: ipsum-0320 <trueman.0320@zju.edu.cn>

feat: update basic score

Signed-off-by: ipsum-0320 <trueman.0320@zju.edu.cn>

chore: add comment

Signed-off-by: ipsum-0320 <trueman.0320@zju.edu.cn>

feat: calcGroupScoreForDivided

Signed-off-by: ipsum-0320 <trueman.0320@zju.edu.cn>

fix: lint error

Signed-off-by: ipsum-0320 <trueman.0320@zju.edu.cn>

chore: fix the comment

Signed-off-by: ipsum-0320 <trueman.0320@zju.edu.cn>

feat: resolve the comments

Signed-off-by: ipsum-0320 <trueman.0320@zju.edu.cn>
  • Loading branch information
ipsum-0320 committed Oct 26, 2024
1 parent 58612d3 commit b6d597f
Showing 1 changed file with 155 additions and 12 deletions.
167 changes: 155 additions & 12 deletions pkg/scheduler/core/spreadconstraint/group_clusters.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package spreadconstraint

import (
"math"

"k8s.io/utils/ptr"

clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
Expand All @@ -40,7 +42,7 @@ type GroupClustersInfo struct {
// ProviderInfo indicate the provider information
type ProviderInfo struct {
Name string
Score int64 // the highest score in all clusters of the provider
Score int64 // the comprehensive score in all clusters of the provider
AvailableReplicas int64

// Regions under this provider
Expand All @@ -54,7 +56,7 @@ type ProviderInfo struct {
// RegionInfo indicate the region information
type RegionInfo struct {
Name string
Score int64 // the highest score in all clusters of the region
Score int64 // the comprehensive score in all clusters of the region
AvailableReplicas int64

// Zones under this provider
Expand All @@ -66,7 +68,7 @@ type RegionInfo struct {
// ZoneInfo indicate the zone information
type ZoneInfo struct {
Name string
Score int64 // the highest score in all clusters of the zone
Score int64 // the comprehensive score in all clusters of the zone
AvailableReplicas int64

// Clusters under this zone, sorted by cluster.Score descending.
Expand Down Expand Up @@ -109,9 +111,9 @@ func groupClustersBasedTopology(
}
groupClustersInfo.calAvailableReplicasFunc = calAvailableReplicasFunc
groupClustersInfo.generateClustersInfo(clustersScore, rbSpec)
groupClustersInfo.generateZoneInfo(spreadConstraints)
groupClustersInfo.generateRegionInfo(spreadConstraints)
groupClustersInfo.generateProviderInfo(spreadConstraints)
groupClustersInfo.generateZoneInfo(spreadConstraints, rbSpec)
groupClustersInfo.generateRegionInfo(spreadConstraints, rbSpec)
groupClustersInfo.generateProviderInfo(spreadConstraints, rbSpec)

return groupClustersInfo
}
Expand All @@ -128,6 +130,121 @@ func groupClustersIgnoringTopology(
return groupClustersInfo
}

// weightUnit is used to minimize the impact of avg(cluster.Score).
// By multiply the weightUnit, the unit of targetReplica will be 1000, and the avg(cluster.Score) will in [0, 100].
// Thus, when sorting by Group Score, targetReplica will be considered first, and if the Weights are the same, then Score will be considered.
const weightUnit int64 = 1000

func (info *GroupClustersInfo) calcGroupScore(
clusters []ClusterDetailInfo,
rbSpec *workv1alpha2.ResourceBindingSpec,
minGroups int) int64 {
// if the replica scheduling type is divided, the score is calculated by followed.
float64MinGroups := float64(minGroups)
targetReplica := int64(math.Ceil(float64(rbSpec.Replicas) / float64MinGroups))

// get the minGroups of Cluster.
var clusterMinGroups int
if rbSpec.Placement != nil && rbSpec.Placement.SpreadConstraints != nil {
for _, sc := range rbSpec.Placement.SpreadConstraints {
if sc.SpreadByField == policyv1alpha1.SpreadByFieldCluster {
clusterMinGroups = sc.MinGroups
}
}
}

// if the minGroups of Cluster is less than the minGroups of Group, set the minGroups of Cluster to the minGroups of Group.
if clusterMinGroups < minGroups {
clusterMinGroups = minGroups
}
int64ClusterMinGroups := int64(clusterMinGroups)

// clusters have been sorted by cluster.Score descending,
// and if the cluster.Score is the same, the cluster.availableReplica is ascending.
var sumAvailableReplica int64
var sumScore int64
var validClusters int64
for _, cluster := range clusters {
sumAvailableReplica += cluster.AvailableReplicas
sumScore += cluster.Score
validClusters++
if validClusters >= int64ClusterMinGroups && sumAvailableReplica >= targetReplica {
break
}
}

// cluster.Score is 0 or 100. To minimize the impact of Score,
// set the atomic value of targetReplica to 1000. This way,
// when sorting by Group Score, targetReplica will be considered first,
// and if the Weights are the same, then Score will be considered.

// Here is an example, the rbSpec.Replicas == 100 and the Group.minGroups == 2, Cluster.minGroups == 1.
// Thus, the targetReplica is 50, and the int64ClusterMinGroups == 2, because int64ClusterMinGroups == max(Group.minGroups, Cluster.minGroups).

// There is the Group 1, it has three clusters as follows.
// ----------------------------------------------------------------------
// | clusterName | member1 | member2 | member3 | member4 | member5 |
// |---------------------------------------------------------------------
// | score | 100 | 100 | 100 | 100 | 100 |
// |------------------------------------------------|---------|---------|
// |AvailableReplicas | 10 | 10 | 10 | 10 | 5 |
// |------------------------------------------------|---------|---------|

// There is the Group 2, it has four clusters as follows.
// ------------------------------------------------------------
// | clusterName | member1 | member2 | member3 | member4 |
// |-----------------------------------------------------------
// | score | 0 | 0 | 0 | 0 |
// |------------------------------------------------|---------|
// |AvailableReplicas | 40 | 30 | 10 | 10 |
// |------------------------------------------------|---------|

// According to our expectations, Group 2 is a more ideal choice
// than Group 1 because Group 2 has more available replica capacity,
// which meets the needs of replica allocation, even though Group 1 has a higher Cluster balance.
// Based on our algorithm, Group 2’s Score is also higher than that of Group 1.

// Group1's Score = 45 * 1000 + 100 = 45100
// Group2's Score = 50 * 1000 + 0 = 50000

// There is another example, the targetReplica is 50, and the int64ClusterMinGroups == 2.
// The difference now is the situation of the Groups; both Groups now meet the requirements for available replica capacity.

// There is the Group 1, it has three clusters as follows.
// ----------------------------------------------------------------------
// | clusterName | member1 | member2 | member3 | member4 | member5 |
// |---------------------------------------------------------------------
// | score | 100 | 100 | 100 | 100 | 100 |
// |------------------------------------------------|---------|---------|
// |AvailableReplicas | 40 | 40 | 10 | 10 | 5 |
// |------------------------------------------------|---------|---------|

// There is the Group 2, it has four clusters as follows.
// ------------------------------------------------------------
// | clusterName | member1 | member2 | member3 | member4 |
// |-----------------------------------------------------------
// | score | 0 | 0 | 0 | 0 |
// |------------------------------------------------|---------|
// |AvailableReplicas | 100 | 100 | 10 | 10 |
// |------------------------------------------------|---------|

// According to our expectations, Group 1 is a more ideal choice than Group 2,
// as both Group 2 and Group 1 can now meet the replica allocation requirements.
// However, Group 1 has a higher Cluster balance (even though Group 2 has more available replicas).
// Based on our algorithm, the Score for Group 1 is also higher than that of Group 2.

// Group1's Score = 50 * 1000 + 100 = 50100
// Group2's Score = 50 * 1000 + 0 = 50000

if sumAvailableReplica < targetReplica {
sumAvailableReplica = sumAvailableReplica * weightUnit
return sumAvailableReplica + sumScore/int64(len(clusters))
}

targetReplica = targetReplica * weightUnit
return targetReplica + sumScore/validClusters
}

func (info *GroupClustersInfo) generateClustersInfo(clustersScore framework.ClusterScoreList, rbSpec *workv1alpha2.ResourceBindingSpec) {
var clusters []*clusterv1alpha1.Cluster
for _, clusterScore := range clustersScore {
Expand All @@ -153,7 +270,7 @@ func (info *GroupClustersInfo) generateClustersInfo(clustersScore framework.Clus
})
}

func (info *GroupClustersInfo) generateZoneInfo(spreadConstraints []policyv1alpha1.SpreadConstraint) {
func (info *GroupClustersInfo) generateZoneInfo(spreadConstraints []policyv1alpha1.SpreadConstraint, rbSpec *workv1alpha2.ResourceBindingSpec) {
if !IsSpreadConstraintExisted(spreadConstraints, policyv1alpha1.SpreadByFieldZone) {
return
}
Expand All @@ -178,13 +295,22 @@ func (info *GroupClustersInfo) generateZoneInfo(spreadConstraints []policyv1alph
}
}

var minGroups int
if spreadConstraints != nil {

Check failure on line 299 in pkg/scheduler/core/spreadconstraint/group_clusters.go

View workflow job for this annotation

GitHub Actions / lint

S1031: unnecessary nil check around range (gosimple)
for _, sc := range spreadConstraints {
if sc.SpreadByField == policyv1alpha1.SpreadByFieldZone {
minGroups = sc.MinGroups
}
}
}

for zone, zoneInfo := range info.Zones {
zoneInfo.Score = zoneInfo.Clusters[0].Score
zoneInfo.Score = info.calcGroupScore(zoneInfo.Clusters, rbSpec, minGroups)
info.Zones[zone] = zoneInfo
}
}

func (info *GroupClustersInfo) generateRegionInfo(spreadConstraints []policyv1alpha1.SpreadConstraint) {
func (info *GroupClustersInfo) generateRegionInfo(spreadConstraints []policyv1alpha1.SpreadConstraint, rbSpec *workv1alpha2.ResourceBindingSpec) {
if !IsSpreadConstraintExisted(spreadConstraints, policyv1alpha1.SpreadByFieldRegion) {
return
}
Expand Down Expand Up @@ -212,13 +338,22 @@ func (info *GroupClustersInfo) generateRegionInfo(spreadConstraints []policyv1al
info.Regions[region] = regionInfo
}

var minGroups int
if spreadConstraints != nil {

Check failure on line 342 in pkg/scheduler/core/spreadconstraint/group_clusters.go

View workflow job for this annotation

GitHub Actions / lint

S1031: unnecessary nil check around range (gosimple)
for _, sc := range spreadConstraints {
if sc.SpreadByField == policyv1alpha1.SpreadByFieldRegion {
minGroups = sc.MinGroups
}
}
}

for region, regionInfo := range info.Regions {
regionInfo.Score = regionInfo.Clusters[0].Score
regionInfo.Score = info.calcGroupScore(regionInfo.Clusters, rbSpec, minGroups)
info.Regions[region] = regionInfo
}
}

func (info *GroupClustersInfo) generateProviderInfo(spreadConstraints []policyv1alpha1.SpreadConstraint) {
func (info *GroupClustersInfo) generateProviderInfo(spreadConstraints []policyv1alpha1.SpreadConstraint, rbSpec *workv1alpha2.ResourceBindingSpec) {
if !IsSpreadConstraintExisted(spreadConstraints, policyv1alpha1.SpreadByFieldProvider) {
return
}
Expand Down Expand Up @@ -252,8 +387,16 @@ func (info *GroupClustersInfo) generateProviderInfo(spreadConstraints []policyv1
info.Providers[provider] = providerInfo
}

var minGroups int
if spreadConstraints != nil {

Check failure on line 391 in pkg/scheduler/core/spreadconstraint/group_clusters.go

View workflow job for this annotation

GitHub Actions / lint

S1031: unnecessary nil check around range (gosimple)
for _, sc := range spreadConstraints {
if sc.SpreadByField == policyv1alpha1.SpreadByFieldProvider {
minGroups = sc.MinGroups
}
}
}
for provider, providerInfo := range info.Providers {
providerInfo.Score = providerInfo.Clusters[0].Score
providerInfo.Score = info.calcGroupScore(providerInfo.Clusters, rbSpec, minGroups)
info.Providers[provider] = providerInfo
}
}
Expand Down

0 comments on commit b6d597f

Please sign in to comment.