Skip to content

Commit

Permalink
WiP: topology-aware: support for CPU allocator priorities.
Browse files Browse the repository at this point in the history
Add support for configurable default and annotated per-container
CPU priority preferences. These determine the preferred priority
for CPUs when doing fully or partially exclusive CPU allocation.
Priorities are calculated for such allocations and passed on to
the CPU allocator which then tries to fulfill these preferences.

It should now be possible to configure the policy to allocate
(exclusive) E-cores by default and P-cores to containers which
are annotated so, or to do it the other way around.

Signed-off-by: Krisztian Litkey <krisztian.litkey@intel.com>
  • Loading branch information
klihub committed Mar 14, 2024
1 parent ecb4ced commit 7a8e9fe
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 37 deletions.
66 changes: 51 additions & 15 deletions cmd/plugins/topology-aware/policy/pod-preferences.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ const (
keyColdStartPreference = "cold-start"
// annotation key for reserved pools
keyReservedCPUsPreference = "prefer-reserved-cpus"
// annotation key for CPU Priority preference
keyCpuPriorityPreference = "prefer-cpu-priority"

// effective annotation key for isolated CPU preference
preferIsolatedCPUsKey = keyIsolationPreference + "." + kubernetes.ResmgrKeyNamespace
Expand All @@ -53,6 +55,8 @@ const (
preferColdStartKey = keyColdStartPreference + "." + kubernetes.ResmgrKeyNamespace
// annotation key for reserved pools
preferReservedCPUsKey = keyReservedCPUsPreference + "." + kubernetes.ResmgrKeyNamespace
// effective annotation key for CPU priority preference
preferCpuPriorityKey = keyCpuPriorityPreference + "." + kubernetes.ResmgrKeyNamespace
)

// cpuClass is a type of CPU to allocate
Expand Down Expand Up @@ -153,6 +157,36 @@ func sharedCPUsPreference(pod cache.Pod, container cache.Container) (bool, bool)
return preference, true
}

// cpuPrioPreference returns the CPU priority preference for the given container
// and whether the container was explicitly annotated with this setting.
func cpuPrioPreference(pod cache.Pod, container cache.Container, fallback cpuPrio) (cpuPrio, bool) {
key := preferCpuPriorityKey
value, ok := pod.GetEffectiveAnnotation(key, container.GetName())

if !ok {
prio := fallback
log.Debug("%s: implicit CPU priority preference %q", container.PrettyName(), prio)
return prio, false
}

if value == "default" {
prio := defaultPrio
log.Debug("%s: explicit CPU priority preference %q", container.PrettyName(), prio)
return prio, true
}

prio, ok := cpuPrioByName[value]
if !ok {
log.Error("%s: invalid CPU priority preference %q", container.PrettyName(), value)
prio := fallback
log.Debug("%s: implicit CPU priority preference %q", container.PrettyName(), prio)
return prio, false
}

log.Debug("%s: explicit CPU priority preference %q", container.PrettyName(), prio)
return prio, true
}

// memoryTypePreference returns what type of memory should be allocated for the container.
//
// If the effective annotations are not found, this function falls back to
Expand Down Expand Up @@ -370,7 +404,7 @@ func checkReservedCPUsAnnotations(c cache.Container) (bool, bool) {
// 2. fraction: amount of fractional CPU in milli-CPU
// 3. isolate: (bool) whether to prefer isolated full CPUs
// 4. cpuType: (cpuClass) class of CPU to allocate (reserved vs. normal)
func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, int, bool, cpuClass) {
func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, int, bool, cpuClass, cpuPrio) {
//
// CPU allocation preferences for a container consist of
//
Expand Down Expand Up @@ -439,60 +473,62 @@ func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, in
request := reqs.Requests[corev1.ResourceCPU]
qosClass := pod.GetQOSClass()
fraction := int(request.MilliValue())
prio := defaultPrio // ignored for fractional allocations

// easy cases: kube-system namespace, Burstable or BestEffort QoS class containers
preferReserved, explicitReservation := checkReservedCPUsAnnotations(container)
switch {
case container.PreserveCpuResources():
return 0, fraction, false, cpuPreserve
return 0, fraction, false, cpuPreserve, prio
case preferReserved == true:
return 0, fraction, false, cpuReserved
return 0, fraction, false, cpuReserved, prio
case checkReservedPoolNamespaces(namespace) && !explicitReservation:
return 0, fraction, false, cpuReserved
return 0, fraction, false, cpuReserved, prio
case qosClass == corev1.PodQOSBurstable:
return 0, fraction, false, cpuNormal
return 0, fraction, false, cpuNormal, prio
case qosClass == corev1.PodQOSBestEffort:
return 0, 0, false, cpuNormal
return 0, 0, false, cpuNormal, prio
}

// complex case: Guaranteed QoS class containers
cores := fraction / 1000
fraction = fraction % 1000
preferIsolated, explicitIsolated := isolatedCPUsPreference(pod, container)
preferShared, explicitShared := sharedCPUsPreference(pod, container)
prio, _ = cpuPrioPreference(pod, container, defaultPrio) // ignored for fractional allocations

switch {
// sub-core CPU request
case cores == 0:
return 0, fraction, false, cpuNormal
return 0, fraction, false, cpuNormal, prio
// 1 <= CPU request < 2
case cores < 2:
// fractional allocation, potentially mixed
if fraction > 0 {
if preferShared {
return 0, 1000*cores + fraction, false, cpuNormal
return 0, 1000*cores + fraction, false, cpuNormal, prio
}
return cores, fraction, preferIsolated, cpuNormal
return cores, fraction, preferIsolated, cpuNormal, prio
}
// non-fractional allocation
if preferShared && explicitShared {
return 0, 1000*cores + fraction, false, cpuNormal
return 0, 1000*cores + fraction, false, cpuNormal, prio
}
return cores, fraction, preferIsolated, cpuNormal
return cores, fraction, preferIsolated, cpuNormal, prio
// CPU request >= 2
default:
// fractional allocation, only mixed if explicitly annotated as unshared
if fraction > 0 {
if !preferShared && explicitShared {
return cores, fraction, preferIsolated && explicitIsolated, cpuNormal
return cores, fraction, preferIsolated && explicitIsolated, cpuNormal, prio
}
return 0, 1000*cores + fraction, false, cpuNormal
return 0, 1000*cores + fraction, false, cpuNormal, prio
}
// non-fractional allocation
if preferShared && explicitShared {
return 0, 1000 * cores, false, cpuNormal
return 0, 1000 * cores, false, cpuNormal, prio
}
return cores, fraction, preferIsolated && explicitIsolated, cpuNormal
return cores, fraction, preferIsolated && explicitIsolated, cpuNormal, prio
}
}

Expand Down
47 changes: 47 additions & 0 deletions cmd/plugins/topology-aware/policy/pools.go
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,37 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco
}
}

// X. for LowPrio and HighPrio CPU preference, the only fulfilling node wins
log.Debug(" - preferred CPU priority is %s", request.CPUPrio())
switch request.CPUPrio() {
case lowPrio:
lp1, lp2 := score1.LowPrio(), score2.LowPrio()
log.Debug(" - lp1 %d vs. lp2 %d", lp1, lp2)
switch {
case lp1 == lp2:
log.Debug(" - LowPrio CPU capacity is a TIE")
case lp1 >= 0 && lp2 < 0:
log.Debug(" => %s WINS based on LowPrio CPU capacity", node1.Name())
return true
case lp1 < 0 && lp2 >= 0:
log.Debug(" => %s WINS based on LowPrio CPU capacity", node1.Name())
return false
}

case highPrio:
hp1, hp2 := score1.HighPrio(), score2.HighPrio()
switch {
case hp1 == hp2:
log.Debug(" - HighPrio CPU capacity is a TIE")
case hp1 >= 0 && hp2 < 0:
log.Debug(" => %s WINS based on HighPrio CPU capacity", node1.Name())
return true
case hp1 < 0 && hp2 >= 0:
log.Debug(" => %s WINS based on HighPrio CPU capacity", node1.Name())
return false
}
}

// 5) a lower node wins
if depth1 > depth2 {
log.Debug(" => %s WINS on depth", node1.Name())
Expand Down Expand Up @@ -1001,6 +1032,22 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco
return id1 < id2
}

// X. for NormalPrio CPU preference, the only fulfilling node wins
log.Debug(" - preferred CPU priority is %s", request.CPUPrio())
if request.CPUPrio() == normalPrio {
np1, np2 := score1.NormalPrio(), score2.NormalPrio()
switch {
case np1 == np2:
log.Debug(" - NormalPrio CPU capacity is a TIE")
case np1 >= 0 && np2 < 0:
log.Debug(" => %s WINS based on NormalPrio CPU capacity", node1.Name())
return true
case np1 < 0 && np2 >= 0:
log.Debug(" => %s WINS based on NormalPrio capacity", node1.Name())
return false
}
}

// 8) more slicable shared capacity wins
if request.FullCPUs() > 0 && (shared1 > 0 || shared2 > 0) {
if shared1 > shared2 {
Expand Down
Loading

0 comments on commit 7a8e9fe

Please sign in to comment.