slo-controller: refactor codes for reading

Signed-off-by: Fansong Zeng <fanster.z@gmail.com>
koordinator-sh · Mar 26, 2024 · e11de1d · e11de1d
1 parent cd58ac6
commit e11de1d
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 27 deletions.
diff --git a/docs/images/node-resource-model.png b/docs/images/node-resource-model.png
diff --git a/pkg/slo-controller/noderesource/plugins/batchresource/plugin.go b/pkg/slo-controller/noderesource/plugins/batchresource/plugin.go
@@ -211,6 +211,18 @@ func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *cor
 	}, nil
 }
 
+// In order to support the colocation requirements of different enterprise environments, a configurable colocation strategy is provided.
+// The resource view from the node perspective is as follows:
+//
+//	https://github.com/koordinator-sh/koordinator/blob/main/docs/images/node-resource-model.png
+//
+// Typical colocation scenario:
+//  1. default policy, and the CPU and memory that can be collocated are automatically calculated based on the load level of the node.
+//  2. default policy on CPU, and the Memory is configured not to be overcommitted. This can reduce the probability of batch pods
+//     being killed due to high memory water levels (reduce the kill rate)
+//
+// In each scenario, users can also adjust the resource water level configuration according to your own needs and control the deployment
+// density of batch pods.
 func (p *Plugin) calculateOnNode(strategy *configuration.ColocationStrategy, node *corev1.Node, podList *corev1.PodList,
 	resourceMetrics *framework.ResourceMetrics) (corev1.ResourceList, string, string) {
 	// compute the requests and usages according to the pods' priority classes.
@@ -293,7 +305,7 @@ func (p *Plugin) calculateOnNode(strategy *configuration.ColocationStrategy, nod
 		"cpu", podsDanglingUsed.Cpu().String(), "memory", podsDanglingUsed.Memory().String())
 
 	nodeCapacity := getNodeCapacity(node)
-	nodeReservation := getNodeReservation(strategy, nodeCapacity)
+	nodeSafetyMargin := getNodeSafetyMargin(strategy, nodeCapacity)
 
 	systemUsed := getResourceListForCPUAndMemory(nodeMetric.Status.NodeMetric.SystemUsage.ResourceList)
 	// resource usage of host applications with prod priority will be count as host system usage since they consumes the
@@ -303,9 +315,10 @@ func (p *Plugin) calculateOnNode(strategy *configuration.ColocationStrategy, nod
 	// System.Reserved = Node.Anno.Reserved, Node.Kubelet.Reserved)
 	nodeAnnoReserved := util.GetNodeReservationFromAnnotation(node.Annotations)
 	nodeKubeletReserved := util.GetNodeReservationFromKubelet(node)
-	systemReserved := quotav1.Max(nodeKubeletReserved, nodeAnnoReserved)
+	// FIXME: resource reservation taking max is rather confusing.
+	nodeReserved := quotav1.Max(nodeKubeletReserved, nodeAnnoReserved)
 
-	batchAllocatable, cpuMsg, memMsg := calculateBatchResourceByPolicy(strategy, nodeCapacity, nodeReservation, systemReserved,
+	batchAllocatable, cpuMsg, memMsg := calculateBatchResourceByPolicy(strategy, nodeCapacity, nodeSafetyMargin, nodeReserved,
 		systemUsed, podsHPRequest, podsHPUsed, podsHPMaxUsedReq)
 	metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.BatchCPU), metrics.UnitInteger, float64(batchAllocatable.Cpu().MilliValue())/1000)
 	metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.BatchMemory), metrics.UnitByte, float64(batchAllocatable.Memory().Value()))
@@ -380,7 +393,7 @@ func (p *Plugin) calculateOnNUMALevel(strategy *configuration.ColocationStrategy
 				nodeZoneAllocatable[i][corev1.ResourceName(resourceInfo.Name)] = resourceInfo.Allocatable.DeepCopy()
 			}
 		}
-		nodeZoneReserve[i] = getNodeReservation(strategy, nodeZoneAllocatable[i])
+		nodeZoneReserve[i] = getNodeSafetyMargin(strategy, nodeZoneAllocatable[i])
 		systemZoneUsed[i] = divideResourceList(systemUsed, float64(zoneNum))
 		systemZoneReserved[i] = divideResourceList(systemReserved, float64(zoneNum))
 	}

diff --git a/pkg/slo-controller/noderesource/plugins/batchresource/util.go b/pkg/slo-controller/noderesource/plugins/batchresource/util.go
@@ -35,56 +35,56 @@ import (
 	"github.com/koordinator-sh/koordinator/pkg/util"
 )
 
-func calculateBatchResourceByPolicy(strategy *configuration.ColocationStrategy, nodeCapacity, nodeReserved, systemReserved,
+func calculateBatchResourceByPolicy(strategy *configuration.ColocationStrategy, nodeCapacity, nodeSafetyMargin, nodeReserved,
 	systemUsed, podHPReq, podHPUsed, podHPMaxUsedReq corev1.ResourceList) (corev1.ResourceList, string, string) {
-	// Node(Batch).Alloc[usage] := Node.Total - Node.Reserved - System.Used - sum(Pod(Prod/Mid).Used)
+	// Node(Batch).Alloc[usage] := Node.Total - Node.SafetyMargin - System.Used - sum(Pod(Prod/Mid).Used)
 	// System.Used = max(Node.Used - Pod(All).Used, Node.Anno.Reserved, Node.Kubelet.Reserved)
-	systemUsed = quotav1.Max(systemUsed, systemReserved)
+	systemUsed = quotav1.Max(systemUsed, nodeReserved)
 	batchAllocatableByUsage := quotav1.Max(quotav1.Subtract(quotav1.Subtract(quotav1.Subtract(
-		nodeCapacity, nodeReserved), systemUsed), podHPUsed), util.NewZeroResourceList())
+		nodeCapacity, nodeSafetyMargin), systemUsed), podHPUsed), util.NewZeroResourceList())
 
-	// Node(Batch).Alloc[request] := Node.Total - Node.Reserved - System.Reserved - sum(Pod(Prod/Mid).Request)
+	// Node(Batch).Alloc[request] := Node.Total - Node.SafetyMargin - System.Reserved - sum(Pod(Prod/Mid).Request)
 	// System.Reserved = max(Node.Anno.Reserved, Node.Kubelet.Reserved)
 	batchAllocatableByRequest := quotav1.Max(quotav1.Subtract(quotav1.Subtract(quotav1.Subtract(
-		nodeCapacity, nodeReserved), systemReserved), podHPReq), util.NewZeroResourceList())
+		nodeCapacity, nodeSafetyMargin), nodeReserved), podHPReq), util.NewZeroResourceList())
 
-	// Node(Batch).Alloc[maxUsageRequest] := Node.Total - Node.Reserved - System.Used - sum(max(Pod(Prod/Mid).Request, Pod(Prod/Mid).Used))
+	// Node(Batch).Alloc[maxUsageRequest] := Node.Total - Node.SafetyMargin - System.Used - sum(max(Pod(Prod/Mid).Request, Pod(Prod/Mid).Used))
 	batchAllocatableByMaxUsageRequest := quotav1.Max(quotav1.Subtract(quotav1.Subtract(quotav1.Subtract(
-		nodeCapacity, nodeReserved), systemUsed), podHPMaxUsedReq), util.NewZeroResourceList())
+		nodeCapacity, nodeSafetyMargin), systemUsed), podHPMaxUsedReq), util.NewZeroResourceList())
 
 	batchAllocatable := batchAllocatableByUsage
 
 	var cpuMsg string
 	// batch cpu support policy "usage" and "maxUsageRequest"
 	if strategy != nil && strategy.CPUCalculatePolicy != nil && *strategy.CPUCalculatePolicy == configuration.CalculateByPodMaxUsageRequest {
 		batchAllocatable[corev1.ResourceCPU] = *batchAllocatableByMaxUsageRequest.Cpu()
-		cpuMsg = fmt.Sprintf("batchAllocatable[CPU(Milli-Core)]:%v = nodeCapacity:%v - nodeReservation:%v - systemUsageOrReserved:%v - podHPMaxUsedRequest:%v",
-			batchAllocatable.Cpu().MilliValue(), nodeCapacity.Cpu().MilliValue(), nodeReserved.Cpu().MilliValue(),
+		cpuMsg = fmt.Sprintf("batchAllocatable[CPU(Milli-Core)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - systemUsageOrNodeReserved:%v - podHPMaxUsedRequest:%v",
+			batchAllocatable.Cpu().MilliValue(), nodeCapacity.Cpu().MilliValue(), nodeSafetyMargin.Cpu().MilliValue(),
 			systemUsed.Cpu().MilliValue(), podHPMaxUsedReq.Cpu().MilliValue())
 	} else { // use CalculatePolicy "usage" by default
-		cpuMsg = fmt.Sprintf("batchAllocatable[CPU(Milli-Core)]:%v = nodeCapacity:%v - nodeReservation:%v - systemUsageOrReserved:%v - podHPUsed:%v",
-			batchAllocatable.Cpu().MilliValue(), nodeCapacity.Cpu().MilliValue(), nodeReserved.Cpu().MilliValue(),
+		cpuMsg = fmt.Sprintf("batchAllocatable[CPU(Milli-Core)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - systemUsageOrNodeReserved:%v - podHPUsed:%v",
+			batchAllocatable.Cpu().MilliValue(), nodeCapacity.Cpu().MilliValue(), nodeSafetyMargin.Cpu().MilliValue(),
 			systemUsed.Cpu().MilliValue(), podHPUsed.Cpu().MilliValue())
 	}
 
 	var memMsg string
 	// batch memory support policy "usage", "request" and "maxUsageRequest"
 	if strategy != nil && strategy.MemoryCalculatePolicy != nil && *strategy.MemoryCalculatePolicy == configuration.CalculateByPodRequest {
 		batchAllocatable[corev1.ResourceMemory] = *batchAllocatableByRequest.Memory()
-		memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeReservation:%v - systemReserved:%v - podHPRequest:%v",
+		memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - nodeReserved:%v - podHPRequest:%v",
 			batchAllocatable.Memory().ScaledValue(resource.Giga), nodeCapacity.Memory().ScaledValue(resource.Giga),
-			nodeReserved.Memory().ScaledValue(resource.Giga), systemReserved.Memory().ScaledValue(resource.Giga),
+			nodeSafetyMargin.Memory().ScaledValue(resource.Giga), nodeReserved.Memory().ScaledValue(resource.Giga),
 			podHPReq.Memory().ScaledValue(resource.Giga))
 	} else if strategy != nil && strategy.MemoryCalculatePolicy != nil && *strategy.MemoryCalculatePolicy == configuration.CalculateByPodMaxUsageRequest {
 		batchAllocatable[corev1.ResourceMemory] = *batchAllocatableByMaxUsageRequest.Memory()
-		memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeReservation:%v - systemUsage:%v - podHPMaxUsedRequest:%v",
+		memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - systemUsage:%v - podHPMaxUsedRequest:%v",
 			batchAllocatable.Memory().ScaledValue(resource.Giga), nodeCapacity.Memory().ScaledValue(resource.Giga),
-			nodeReserved.Memory().ScaledValue(resource.Giga), systemUsed.Memory().ScaledValue(resource.Giga),
+			nodeSafetyMargin.Memory().ScaledValue(resource.Giga), systemUsed.Memory().ScaledValue(resource.Giga),
 			podHPMaxUsedReq.Memory().ScaledValue(resource.Giga))
 	} else { // use CalculatePolicy "usage" by default
-		memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeReservation:%v - systemUsage:%v - podHPUsed:%v",
+		memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - systemUsage:%v - podHPUsed:%v",
 			batchAllocatable.Memory().ScaledValue(resource.Giga), nodeCapacity.Memory().ScaledValue(resource.Giga),
-			nodeReserved.Memory().ScaledValue(resource.Giga), systemUsed.Memory().ScaledValue(resource.Giga),
+			nodeSafetyMargin.Memory().ScaledValue(resource.Giga), systemUsed.Memory().ScaledValue(resource.Giga),
 			podHPUsed.Memory().ScaledValue(resource.Giga))
 	}
 	return batchAllocatable, cpuMsg, memMsg
@@ -201,10 +201,10 @@ func getNodeCapacity(node *corev1.Node) corev1.ResourceList {
 	return getResourceListForCPUAndMemory(node.Status.Capacity)
 }
 
-// getNodeReservation gets node-level safe-guarding reservation with the node's allocatable
-func getNodeReservation(strategy *configuration.ColocationStrategy, nodeAllocatable corev1.ResourceList) corev1.ResourceList {
-	cpuReserveQuant := util.MultiplyMilliQuant(nodeAllocatable[corev1.ResourceCPU], getReserveRatio(*strategy.CPUReclaimThresholdPercent))
-	memReserveQuant := util.MultiplyQuant(nodeAllocatable[corev1.ResourceMemory], getReserveRatio(*strategy.MemoryReclaimThresholdPercent))
+// getNodeSafetyMargin gets node-level safe-guarding reservation with the node's allocatable
+func getNodeSafetyMargin(strategy *configuration.ColocationStrategy, nodeCapacity corev1.ResourceList) corev1.ResourceList {
+	cpuReserveQuant := util.MultiplyMilliQuant(nodeCapacity[corev1.ResourceCPU], getReserveRatio(*strategy.CPUReclaimThresholdPercent))
+	memReserveQuant := util.MultiplyQuant(nodeCapacity[corev1.ResourceMemory], getReserveRatio(*strategy.MemoryReclaimThresholdPercent))
 
 	return corev1.ResourceList{
 		corev1.ResourceCPU:    cpuReserveQuant,

diff --git a/pkg/slo-controller/noderesource/plugins/batchresource/util_test.go b/pkg/slo-controller/noderesource/plugins/batchresource/util_test.go
@@ -121,7 +121,7 @@ func Test_getNodeReservation(t *testing.T) {
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			got := getNodeReservation(tt.args.strategy, tt.args.nodeAllocatable)
+			got := getNodeSafetyMargin(tt.args.strategy, tt.args.nodeAllocatable)
 			testingCorrectResourceList(t, &tt.want, &got)
 		})
 	}