Skip to content

Commit

Permalink
slo-controller: refactor codes for reading
Browse files Browse the repository at this point in the history
Signed-off-by: Fansong Zeng <fanster.z@gmail.com>
  • Loading branch information
hormes committed Mar 26, 2024
1 parent cd58ac6 commit e11de1d
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 27 deletions.
Binary file added docs/images/node-resource-model.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 17 additions & 4 deletions pkg/slo-controller/noderesource/plugins/batchresource/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,18 @@ func (p *Plugin) calculate(strategy *configuration.ColocationStrategy, node *cor
}, nil
}

// In order to support the colocation requirements of different enterprise environments, a configurable colocation strategy is provided.
// The resource view from the node perspective is as follows:
//
// https://github.com/koordinator-sh/koordinator/blob/main/docs/images/node-resource-model.png
//
// Typical colocation scenario:
// 1. default policy, and the CPU and memory that can be collocated are automatically calculated based on the load level of the node.
// 2. default policy on CPU, and the Memory is configured not to be overcommitted. This can reduce the probability of batch pods
// being killed due to high memory water levels (reduce the kill rate)
//
// In each scenario, users can also adjust the resource water level configuration according to your own needs and control the deployment
// density of batch pods.
func (p *Plugin) calculateOnNode(strategy *configuration.ColocationStrategy, node *corev1.Node, podList *corev1.PodList,
resourceMetrics *framework.ResourceMetrics) (corev1.ResourceList, string, string) {
// compute the requests and usages according to the pods' priority classes.
Expand Down Expand Up @@ -293,7 +305,7 @@ func (p *Plugin) calculateOnNode(strategy *configuration.ColocationStrategy, nod
"cpu", podsDanglingUsed.Cpu().String(), "memory", podsDanglingUsed.Memory().String())

nodeCapacity := getNodeCapacity(node)
nodeReservation := getNodeReservation(strategy, nodeCapacity)
nodeSafetyMargin := getNodeSafetyMargin(strategy, nodeCapacity)

systemUsed := getResourceListForCPUAndMemory(nodeMetric.Status.NodeMetric.SystemUsage.ResourceList)
// resource usage of host applications with prod priority will be count as host system usage since they consumes the
Expand All @@ -303,9 +315,10 @@ func (p *Plugin) calculateOnNode(strategy *configuration.ColocationStrategy, nod
// System.Reserved = Node.Anno.Reserved, Node.Kubelet.Reserved)
nodeAnnoReserved := util.GetNodeReservationFromAnnotation(node.Annotations)
nodeKubeletReserved := util.GetNodeReservationFromKubelet(node)
systemReserved := quotav1.Max(nodeKubeletReserved, nodeAnnoReserved)
// FIXME: resource reservation taking max is rather confusing.
nodeReserved := quotav1.Max(nodeKubeletReserved, nodeAnnoReserved)

batchAllocatable, cpuMsg, memMsg := calculateBatchResourceByPolicy(strategy, nodeCapacity, nodeReservation, systemReserved,
batchAllocatable, cpuMsg, memMsg := calculateBatchResourceByPolicy(strategy, nodeCapacity, nodeSafetyMargin, nodeReserved,
systemUsed, podsHPRequest, podsHPUsed, podsHPMaxUsedReq)
metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.BatchCPU), metrics.UnitInteger, float64(batchAllocatable.Cpu().MilliValue())/1000)
metrics.RecordNodeExtendedResourceAllocatableInternal(node, string(extension.BatchMemory), metrics.UnitByte, float64(batchAllocatable.Memory().Value()))
Expand Down Expand Up @@ -380,7 +393,7 @@ func (p *Plugin) calculateOnNUMALevel(strategy *configuration.ColocationStrategy
nodeZoneAllocatable[i][corev1.ResourceName(resourceInfo.Name)] = resourceInfo.Allocatable.DeepCopy()
}
}
nodeZoneReserve[i] = getNodeReservation(strategy, nodeZoneAllocatable[i])
nodeZoneReserve[i] = getNodeSafetyMargin(strategy, nodeZoneAllocatable[i])
systemZoneUsed[i] = divideResourceList(systemUsed, float64(zoneNum))
systemZoneReserved[i] = divideResourceList(systemReserved, float64(zoneNum))
}
Expand Down
44 changes: 22 additions & 22 deletions pkg/slo-controller/noderesource/plugins/batchresource/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,56 +35,56 @@ import (
"github.com/koordinator-sh/koordinator/pkg/util"
)

func calculateBatchResourceByPolicy(strategy *configuration.ColocationStrategy, nodeCapacity, nodeReserved, systemReserved,
func calculateBatchResourceByPolicy(strategy *configuration.ColocationStrategy, nodeCapacity, nodeSafetyMargin, nodeReserved,
systemUsed, podHPReq, podHPUsed, podHPMaxUsedReq corev1.ResourceList) (corev1.ResourceList, string, string) {
// Node(Batch).Alloc[usage] := Node.Total - Node.Reserved - System.Used - sum(Pod(Prod/Mid).Used)
// Node(Batch).Alloc[usage] := Node.Total - Node.SafetyMargin - System.Used - sum(Pod(Prod/Mid).Used)
// System.Used = max(Node.Used - Pod(All).Used, Node.Anno.Reserved, Node.Kubelet.Reserved)
systemUsed = quotav1.Max(systemUsed, systemReserved)
systemUsed = quotav1.Max(systemUsed, nodeReserved)
batchAllocatableByUsage := quotav1.Max(quotav1.Subtract(quotav1.Subtract(quotav1.Subtract(
nodeCapacity, nodeReserved), systemUsed), podHPUsed), util.NewZeroResourceList())
nodeCapacity, nodeSafetyMargin), systemUsed), podHPUsed), util.NewZeroResourceList())

// Node(Batch).Alloc[request] := Node.Total - Node.Reserved - System.Reserved - sum(Pod(Prod/Mid).Request)
// Node(Batch).Alloc[request] := Node.Total - Node.SafetyMargin - System.Reserved - sum(Pod(Prod/Mid).Request)
// System.Reserved = max(Node.Anno.Reserved, Node.Kubelet.Reserved)
batchAllocatableByRequest := quotav1.Max(quotav1.Subtract(quotav1.Subtract(quotav1.Subtract(
nodeCapacity, nodeReserved), systemReserved), podHPReq), util.NewZeroResourceList())
nodeCapacity, nodeSafetyMargin), nodeReserved), podHPReq), util.NewZeroResourceList())

// Node(Batch).Alloc[maxUsageRequest] := Node.Total - Node.Reserved - System.Used - sum(max(Pod(Prod/Mid).Request, Pod(Prod/Mid).Used))
// Node(Batch).Alloc[maxUsageRequest] := Node.Total - Node.SafetyMargin - System.Used - sum(max(Pod(Prod/Mid).Request, Pod(Prod/Mid).Used))
batchAllocatableByMaxUsageRequest := quotav1.Max(quotav1.Subtract(quotav1.Subtract(quotav1.Subtract(
nodeCapacity, nodeReserved), systemUsed), podHPMaxUsedReq), util.NewZeroResourceList())
nodeCapacity, nodeSafetyMargin), systemUsed), podHPMaxUsedReq), util.NewZeroResourceList())

batchAllocatable := batchAllocatableByUsage

var cpuMsg string
// batch cpu support policy "usage" and "maxUsageRequest"
if strategy != nil && strategy.CPUCalculatePolicy != nil && *strategy.CPUCalculatePolicy == configuration.CalculateByPodMaxUsageRequest {
batchAllocatable[corev1.ResourceCPU] = *batchAllocatableByMaxUsageRequest.Cpu()
cpuMsg = fmt.Sprintf("batchAllocatable[CPU(Milli-Core)]:%v = nodeCapacity:%v - nodeReservation:%v - systemUsageOrReserved:%v - podHPMaxUsedRequest:%v",
batchAllocatable.Cpu().MilliValue(), nodeCapacity.Cpu().MilliValue(), nodeReserved.Cpu().MilliValue(),
cpuMsg = fmt.Sprintf("batchAllocatable[CPU(Milli-Core)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - systemUsageOrNodeReserved:%v - podHPMaxUsedRequest:%v",
batchAllocatable.Cpu().MilliValue(), nodeCapacity.Cpu().MilliValue(), nodeSafetyMargin.Cpu().MilliValue(),
systemUsed.Cpu().MilliValue(), podHPMaxUsedReq.Cpu().MilliValue())
} else { // use CalculatePolicy "usage" by default
cpuMsg = fmt.Sprintf("batchAllocatable[CPU(Milli-Core)]:%v = nodeCapacity:%v - nodeReservation:%v - systemUsageOrReserved:%v - podHPUsed:%v",
batchAllocatable.Cpu().MilliValue(), nodeCapacity.Cpu().MilliValue(), nodeReserved.Cpu().MilliValue(),
cpuMsg = fmt.Sprintf("batchAllocatable[CPU(Milli-Core)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - systemUsageOrNodeReserved:%v - podHPUsed:%v",
batchAllocatable.Cpu().MilliValue(), nodeCapacity.Cpu().MilliValue(), nodeSafetyMargin.Cpu().MilliValue(),
systemUsed.Cpu().MilliValue(), podHPUsed.Cpu().MilliValue())
}

var memMsg string
// batch memory support policy "usage", "request" and "maxUsageRequest"
if strategy != nil && strategy.MemoryCalculatePolicy != nil && *strategy.MemoryCalculatePolicy == configuration.CalculateByPodRequest {
batchAllocatable[corev1.ResourceMemory] = *batchAllocatableByRequest.Memory()
memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeReservation:%v - systemReserved:%v - podHPRequest:%v",
memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - nodeReserved:%v - podHPRequest:%v",
batchAllocatable.Memory().ScaledValue(resource.Giga), nodeCapacity.Memory().ScaledValue(resource.Giga),
nodeReserved.Memory().ScaledValue(resource.Giga), systemReserved.Memory().ScaledValue(resource.Giga),
nodeSafetyMargin.Memory().ScaledValue(resource.Giga), nodeReserved.Memory().ScaledValue(resource.Giga),
podHPReq.Memory().ScaledValue(resource.Giga))
} else if strategy != nil && strategy.MemoryCalculatePolicy != nil && *strategy.MemoryCalculatePolicy == configuration.CalculateByPodMaxUsageRequest {
batchAllocatable[corev1.ResourceMemory] = *batchAllocatableByMaxUsageRequest.Memory()
memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeReservation:%v - systemUsage:%v - podHPMaxUsedRequest:%v",
memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - systemUsage:%v - podHPMaxUsedRequest:%v",
batchAllocatable.Memory().ScaledValue(resource.Giga), nodeCapacity.Memory().ScaledValue(resource.Giga),
nodeReserved.Memory().ScaledValue(resource.Giga), systemUsed.Memory().ScaledValue(resource.Giga),
nodeSafetyMargin.Memory().ScaledValue(resource.Giga), systemUsed.Memory().ScaledValue(resource.Giga),
podHPMaxUsedReq.Memory().ScaledValue(resource.Giga))
} else { // use CalculatePolicy "usage" by default
memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeReservation:%v - systemUsage:%v - podHPUsed:%v",
memMsg = fmt.Sprintf("batchAllocatable[Mem(GB)]:%v = nodeCapacity:%v - nodeSafetyMargin:%v - systemUsage:%v - podHPUsed:%v",
batchAllocatable.Memory().ScaledValue(resource.Giga), nodeCapacity.Memory().ScaledValue(resource.Giga),
nodeReserved.Memory().ScaledValue(resource.Giga), systemUsed.Memory().ScaledValue(resource.Giga),
nodeSafetyMargin.Memory().ScaledValue(resource.Giga), systemUsed.Memory().ScaledValue(resource.Giga),
podHPUsed.Memory().ScaledValue(resource.Giga))
}
return batchAllocatable, cpuMsg, memMsg
Expand Down Expand Up @@ -201,10 +201,10 @@ func getNodeCapacity(node *corev1.Node) corev1.ResourceList {
return getResourceListForCPUAndMemory(node.Status.Capacity)
}

// getNodeReservation gets node-level safe-guarding reservation with the node's allocatable
func getNodeReservation(strategy *configuration.ColocationStrategy, nodeAllocatable corev1.ResourceList) corev1.ResourceList {
cpuReserveQuant := util.MultiplyMilliQuant(nodeAllocatable[corev1.ResourceCPU], getReserveRatio(*strategy.CPUReclaimThresholdPercent))
memReserveQuant := util.MultiplyQuant(nodeAllocatable[corev1.ResourceMemory], getReserveRatio(*strategy.MemoryReclaimThresholdPercent))
// getNodeSafetyMargin gets node-level safe-guarding reservation with the node's allocatable
func getNodeSafetyMargin(strategy *configuration.ColocationStrategy, nodeCapacity corev1.ResourceList) corev1.ResourceList {
cpuReserveQuant := util.MultiplyMilliQuant(nodeCapacity[corev1.ResourceCPU], getReserveRatio(*strategy.CPUReclaimThresholdPercent))
memReserveQuant := util.MultiplyQuant(nodeCapacity[corev1.ResourceMemory], getReserveRatio(*strategy.MemoryReclaimThresholdPercent))

return corev1.ResourceList{
corev1.ResourceCPU: cpuReserveQuant,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func Test_getNodeReservation(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := getNodeReservation(tt.args.strategy, tt.args.nodeAllocatable)
got := getNodeSafetyMargin(tt.args.strategy, tt.args.nodeAllocatable)
testingCorrectResourceList(t, &tt.want, &got)
})
}
Expand Down

0 comments on commit e11de1d

Please sign in to comment.