diff --git a/cmd/plugins/topology-aware/policy/pod-preferences.go b/cmd/plugins/topology-aware/policy/pod-preferences.go index af1bb1b90..ce35215c9 100644 --- a/cmd/plugins/topology-aware/policy/pod-preferences.go +++ b/cmd/plugins/topology-aware/policy/pod-preferences.go @@ -42,6 +42,8 @@ const ( keyColdStartPreference = "cold-start" // annotation key for reserved pools keyReservedCPUsPreference = "prefer-reserved-cpus" + // annotation key for CPU Priority preference + keyCpuPriorityPreference = "prefer-cpu-priority" // effective annotation key for isolated CPU preference preferIsolatedCPUsKey = keyIsolationPreference + "." + kubernetes.ResmgrKeyNamespace @@ -53,6 +55,8 @@ const ( preferColdStartKey = keyColdStartPreference + "." + kubernetes.ResmgrKeyNamespace // annotation key for reserved pools preferReservedCPUsKey = keyReservedCPUsPreference + "." + kubernetes.ResmgrKeyNamespace + // effective annotation key for CPU priority preference + preferCpuPriorityKey = keyCpuPriorityPreference + "." + kubernetes.ResmgrKeyNamespace ) // cpuClass is a type of CPU to allocate @@ -153,6 +157,36 @@ func sharedCPUsPreference(pod cache.Pod, container cache.Container) (bool, bool) return preference, true } +// cpuPrioPreference returns the CPU priority preference for the given container +// and whether the container was explicitly annotated with this setting. +func cpuPrioPreference(pod cache.Pod, container cache.Container, fallback cpuPrio) (cpuPrio, bool) { + key := preferCpuPriorityKey + value, ok := pod.GetEffectiveAnnotation(key, container.GetName()) + + if !ok { + prio := fallback + log.Debug("%s: implicit CPU priority preference %q", container.PrettyName(), prio) + return prio, false + } + + if value == "default" { + prio := defaultPrio + log.Debug("%s: explicit CPU priority preference %q", container.PrettyName(), prio) + return prio, true + } + + prio, ok := cpuPrioByName[value] + if !ok { + log.Error("%s: invalid CPU priority preference %q", container.PrettyName(), value) + prio := fallback + log.Debug("%s: implicit CPU priority preference %q", container.PrettyName(), prio) + return prio, false + } + + log.Debug("%s: explicit CPU priority preference %q", container.PrettyName(), prio) + return prio, true +} + // memoryTypePreference returns what type of memory should be allocated for the container. // // If the effective annotations are not found, this function falls back to @@ -370,7 +404,7 @@ func checkReservedCPUsAnnotations(c cache.Container) (bool, bool) { // 2. fraction: amount of fractional CPU in milli-CPU // 3. isolate: (bool) whether to prefer isolated full CPUs // 4. cpuType: (cpuClass) class of CPU to allocate (reserved vs. normal) -func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, int, bool, cpuClass) { +func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, int, bool, cpuClass, cpuPrio) { // // CPU allocation preferences for a container consist of // @@ -439,20 +473,21 @@ func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, in request := reqs.Requests[corev1.ResourceCPU] qosClass := pod.GetQOSClass() fraction := int(request.MilliValue()) + prio := defaultPrio // ignored for fractional allocations // easy cases: kube-system namespace, Burstable or BestEffort QoS class containers preferReserved, explicitReservation := checkReservedCPUsAnnotations(container) switch { case container.PreserveCpuResources(): - return 0, fraction, false, cpuPreserve + return 0, fraction, false, cpuPreserve, prio case preferReserved == true: - return 0, fraction, false, cpuReserved + return 0, fraction, false, cpuReserved, prio case checkReservedPoolNamespaces(namespace) && !explicitReservation: - return 0, fraction, false, cpuReserved + return 0, fraction, false, cpuReserved, prio case qosClass == corev1.PodQOSBurstable: - return 0, fraction, false, cpuNormal + return 0, fraction, false, cpuNormal, prio case qosClass == corev1.PodQOSBestEffort: - return 0, 0, false, cpuNormal + return 0, 0, false, cpuNormal, prio } // complex case: Guaranteed QoS class containers @@ -460,39 +495,40 @@ func cpuAllocationPreferences(pod cache.Pod, container cache.Container) (int, in fraction = fraction % 1000 preferIsolated, explicitIsolated := isolatedCPUsPreference(pod, container) preferShared, explicitShared := sharedCPUsPreference(pod, container) + prio, _ = cpuPrioPreference(pod, container, defaultPrio) // ignored for fractional allocations switch { // sub-core CPU request case cores == 0: - return 0, fraction, false, cpuNormal + return 0, fraction, false, cpuNormal, prio // 1 <= CPU request < 2 case cores < 2: // fractional allocation, potentially mixed if fraction > 0 { if preferShared { - return 0, 1000*cores + fraction, false, cpuNormal + return 0, 1000*cores + fraction, false, cpuNormal, prio } - return cores, fraction, preferIsolated, cpuNormal + return cores, fraction, preferIsolated, cpuNormal, prio } // non-fractional allocation if preferShared && explicitShared { - return 0, 1000*cores + fraction, false, cpuNormal + return 0, 1000*cores + fraction, false, cpuNormal, prio } - return cores, fraction, preferIsolated, cpuNormal + return cores, fraction, preferIsolated, cpuNormal, prio // CPU request >= 2 default: // fractional allocation, only mixed if explicitly annotated as unshared if fraction > 0 { if !preferShared && explicitShared { - return cores, fraction, preferIsolated && explicitIsolated, cpuNormal + return cores, fraction, preferIsolated && explicitIsolated, cpuNormal, prio } - return 0, 1000*cores + fraction, false, cpuNormal + return 0, 1000*cores + fraction, false, cpuNormal, prio } // non-fractional allocation if preferShared && explicitShared { - return 0, 1000 * cores, false, cpuNormal + return 0, 1000 * cores, false, cpuNormal, prio } - return cores, fraction, preferIsolated && explicitIsolated, cpuNormal + return cores, fraction, preferIsolated && explicitIsolated, cpuNormal, prio } } diff --git a/cmd/plugins/topology-aware/policy/pools.go b/cmd/plugins/topology-aware/policy/pools.go index e3975e522..d9dde1f8c 100644 --- a/cmd/plugins/topology-aware/policy/pools.go +++ b/cmd/plugins/topology-aware/policy/pools.go @@ -960,6 +960,37 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco } } + // X. for LowPrio and HighPrio CPU preference, the only fulfilling node wins + log.Debug(" - preferred CPU priority is %s", request.CPUPrio()) + switch request.CPUPrio() { + case lowPrio: + lp1, lp2 := score1.LowPrio(), score2.LowPrio() + log.Debug(" - lp1 %d vs. lp2 %d", lp1, lp2) + switch { + case lp1 == lp2: + log.Debug(" - LowPrio CPU capacity is a TIE") + case lp1 >= 0 && lp2 < 0: + log.Debug(" => %s WINS based on LowPrio CPU capacity", node1.Name()) + return true + case lp1 < 0 && lp2 >= 0: + log.Debug(" => %s WINS based on LowPrio CPU capacity", node1.Name()) + return false + } + + case highPrio: + hp1, hp2 := score1.HighPrio(), score2.HighPrio() + switch { + case hp1 == hp2: + log.Debug(" - HighPrio CPU capacity is a TIE") + case hp1 >= 0 && hp2 < 0: + log.Debug(" => %s WINS based on HighPrio CPU capacity", node1.Name()) + return true + case hp1 < 0 && hp2 >= 0: + log.Debug(" => %s WINS based on HighPrio CPU capacity", node1.Name()) + return false + } + } + // 5) a lower node wins if depth1 > depth2 { log.Debug(" => %s WINS on depth", node1.Name()) @@ -1001,6 +1032,22 @@ func (p *policy) compareScores(request Request, pools []Node, scores map[int]Sco return id1 < id2 } + // X. for NormalPrio CPU preference, the only fulfilling node wins + log.Debug(" - preferred CPU priority is %s", request.CPUPrio()) + if request.CPUPrio() == normalPrio { + np1, np2 := score1.NormalPrio(), score2.NormalPrio() + switch { + case np1 == np2: + log.Debug(" - NormalPrio CPU capacity is a TIE") + case np1 >= 0 && np2 < 0: + log.Debug(" => %s WINS based on NormalPrio CPU capacity", node1.Name()) + return true + case np1 < 0 && np2 >= 0: + log.Debug(" => %s WINS based on NormalPrio capacity", node1.Name()) + return false + } + } + // 8) more slicable shared capacity wins if request.FullCPUs() > 0 && (shared1 > 0 || shared2 > 0) { if shared1 > shared2 { diff --git a/cmd/plugins/topology-aware/policy/resources.go b/cmd/plugins/topology-aware/policy/resources.go index b2e4ef210..7af1e0277 100644 --- a/cmd/plugins/topology-aware/policy/resources.go +++ b/cmd/plugins/topology-aware/policy/resources.go @@ -19,6 +19,7 @@ import ( "strconv" "time" + "github.com/containers/nri-plugins/pkg/sysfs" "github.com/containers/nri-plugins/pkg/utils/cpuset" v1 "k8s.io/api/core/v1" @@ -28,6 +29,28 @@ import ( idset "github.com/intel/goresctrl/pkg/utils" ) +type ( + cpuPrio = cpuallocator.CPUPriority +) + +const ( + highPrio = cpuallocator.PriorityHigh + normalPrio = cpuallocator.PriorityNormal + lowPrio = cpuallocator.PriorityLow + nonePrio = cpuallocator.PriorityNone +) + +var ( + defaultPrio = nonePrio + + cpuPrioByName = map[string]cpuPrio{ + "high": highPrio, + "normal": normalPrio, + "low": lowPrio, + "none": nonePrio, + } +) + // Supply represents avaialbe CPU and memory capacity of a node. type Supply interface { // GetNode returns the node supplying this capacity. @@ -95,6 +118,8 @@ type Request interface { String() string // CPUType returns the type of requested CPU. CPUType() cpuClass + // CPUPrio returns the preferred priority of requested CPU. + CPUPrio() cpuPrio // SetCPUType sets the type of requested CPU. SetCPUType(cpuType cpuClass) // FullCPUs return the number of full CPUs requested. @@ -195,6 +220,9 @@ type Score interface { SharedCapacity() int Colocated() int HintScores() map[string]float64 + LowPrio() int + HighPrio() int + NormalPrio() int String() string } @@ -223,6 +251,7 @@ type request struct { fraction int // amount of fractional CPU requested isolate bool // prefer isolated exclusive CPUs cpuType cpuClass // preferred CPU type (normal, reserved) + prio cpuPrio // CPU priority preference, ignored for fraction requests memReq uint64 // memory request memLim uint64 // memory limit @@ -257,13 +286,16 @@ var _ Grant = &grant{} // score implements our Score interface. type score struct { - supply Supply // CPU supply (node) - req Request // CPU request (container) - isolated int // remaining isolated CPUs - reserved int // remaining reserved CPUs - shared int // remaining shared capacity - colocated int // number of colocated containers - hints map[string]float64 // hint scores + supply Supply // CPU supply (node) + req Request // CPU request (container) + isolated int // remaining isolated CPUs + reserved int // remaining reserved CPUs + shared int // remaining shared capacity + lowPrio int // remaining low-priority CPUs + highPrio int // remaining high-priority CPUs + normalPrio int // normal-priority CPUs + colocated int // number of colocated containers + hints map[string]float64 // hint scores } var _ Score = &score{} @@ -575,7 +607,7 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) { // allocate isolated exclusive CPUs or slice them off the sharable set switch { case full > 0 && cs.isolated.Size() >= full && cr.isolate: - exclusive, err = cs.takeCPUs(&cs.isolated, nil, full) + exclusive, err = cs.takeCPUs(&cs.isolated, nil, full, cr.CPUPrio()) if err != nil { return nil, policyError("internal error: "+ "%s: can't take %d exclusive isolated CPUs from %s: %v", @@ -583,7 +615,7 @@ func (cs *supply) AllocateCPU(r Request) (Grant, error) { } case full > 0 && cs.AllocatableSharedCPU() > 1000*full: - exclusive, err = cs.takeCPUs(&cs.sharable, nil, full) + exclusive, err = cs.takeCPUs(&cs.sharable, nil, full, cr.CPUPrio()) if err != nil { return nil, policyError("internal error: "+ "%s: can't take %d exclusive CPUs from %s: %v", @@ -764,8 +796,8 @@ func (cs *supply) ReserveMemory(g Grant) error { } // takeCPUs takes up to cnt CPUs from a given CPU set to another. -func (cs *supply) takeCPUs(from, to *cpuset.CPUSet, cnt int) (cpuset.CPUSet, error) { - cset, err := cs.node.Policy().cpuAllocator.AllocateCpus(from, cnt, cpuallocator.PriorityHigh) +func (cs *supply) takeCPUs(from, to *cpuset.CPUSet, cnt int, prio cpuPrio) (cpuset.CPUSet, error) { + cset, err := cs.node.Policy().cpuAllocator.AllocateCpus(from, cnt, prio) if err != nil { return cset, err } @@ -942,12 +974,12 @@ func (cs *supply) DumpMemoryState(prefix string) { // newRequest creates a new request for the given container. func newRequest(container cache.Container) Request { pod, _ := container.GetPod() - full, fraction, isolate, cpuType := cpuAllocationPreferences(pod, container) + full, fraction, isolate, cpuType, prio := cpuAllocationPreferences(pod, container) req, lim, mtype := memoryAllocationPreference(pod, container) coldStart := time.Duration(0) - log.Debug("%s: CPU preferences: cpuType=%s, full=%v, fraction=%v, isolate=%v", - container.PrettyName(), cpuType, full, fraction, isolate) + log.Debug("%s: CPU preferences: cpuType=%s, full=%v, fraction=%v, isolate=%v, prio=%v", + container.PrettyName(), cpuType, full, fraction, isolate, prio) if mtype == memoryUnspec { mtype = defaultMemoryType @@ -984,6 +1016,7 @@ func newRequest(container cache.Container) Request { memLim: lim, memType: mtype, coldStart: coldStart, + prio: prio, } } @@ -998,19 +1031,19 @@ func (cr *request) String() string { isolated := map[bool]string{false: "", true: "isolated "}[cr.isolate] switch { case cr.full == 0 && cr.fraction == 0: - return fmt.Sprintf("") + mem + return fmt.Sprintf("<%s CPU request "+cr.container.PrettyName()+": ->", cr.prio) + mem case cr.full > 0 && cr.fraction > 0: - return fmt.Sprintf("", isolated, cr.full, cr.fraction) + mem + return fmt.Sprintf("<%s CPU request "+cr.container.PrettyName()+": "+ + "%sexclusive: %d, shared: %d>", cr.prio, isolated, cr.full, cr.fraction) + mem case cr.full > 0: - return fmt.Sprintf("", isolated, cr.full) + mem + return fmt.Sprintf("<%s CPU request "+ + cr.container.PrettyName()+": %sexclusive: %d>", cr.prio, isolated, cr.full) + mem default: - return fmt.Sprintf("", cr.fraction) + mem + return fmt.Sprintf("<%s CPU request "+ + cr.container.PrettyName()+": shared: %d>", cr.prio, cr.fraction) + mem } } @@ -1019,6 +1052,10 @@ func (cr *request) CPUType() cpuClass { return cr.cpuType } +func (cr *request) CPUPrio() cpuPrio { + return cr.prio +} + // SetCPUType sets the requested type of CPU for the grant. func (cr *request) SetCPUType(cpuType cpuClass) { cr.cpuType = cpuType @@ -1105,6 +1142,32 @@ func (cs *supply) GetScore(req Request) Score { // calculate fractional capacity score.shared -= part + + ecores := cs.GetNode().System().CoreKindCPUs(sysfs.EfficientCore) + lpCPUs := ecores + if ecores.Size() == 0 { + lpCPUs = cs.GetNode().Policy().cpuAllocator.GetCPUPriorities()[lowPrio] + } + lpCPUs = lpCPUs.Intersection(cs.SharableCPUs()) + lpCnt := lpCPUs.Size() + score.lowPrio = lpCnt*1000 - (1000*full + part) + + pcores := cs.GetNode().System().CoreKindCPUs(sysfs.PerformanceCore) + hpCPUs := pcores + if pcores.Size() == 0 { + hpCPUs = cs.GetNode().Policy().cpuAllocator.GetCPUPriorities()[highPrio] + } + hpCPUs = hpCPUs.Intersection(cs.SharableCPUs()) + hpCnt := hpCPUs.Size() + score.highPrio = hpCnt*1000 - (1000*full + part) + + npCPUs := pcores + if pcores.Size() == 0 { + npCPUs = cs.GetNode().Policy().cpuAllocator.GetCPUPriorities()[highPrio] + } + npCPUs = npCPUs.Intersection(cs.SharableCPUs()) + npCnt := npCPUs.Size() + score.normalPrio = npCnt*1000 - (1000*full + part) } // calculate colocation score @@ -1204,6 +1267,18 @@ func (score *score) HintScores() map[string]float64 { return score.hints } +func (score *score) LowPrio() int { + return score.lowPrio +} + +func (score *score) HighPrio() int { + return score.highPrio +} + +func (score *score) NormalPrio() int { + return score.normalPrio +} + func (score *score) String() string { return fmt.Sprintf("", score.supply.GetNode().Name(), score.isolated, score.reserved, score.shared, score.colocated, score.hints) diff --git a/cmd/plugins/topology-aware/policy/topology-aware-policy.go b/cmd/plugins/topology-aware/policy/topology-aware-policy.go index e25984c2f..efc829cc3 100644 --- a/cmd/plugins/topology-aware/policy/topology-aware-policy.go +++ b/cmd/plugins/topology-aware/policy/topology-aware-policy.go @@ -98,6 +98,7 @@ func (p *policy) Setup(opts *policyapi.BackendOptions) error { p.cpuAllocator = cpuallocator.NewCPUAllocator(opts.System) opt = cfg + defaultPrio = cfg.DefaultCPUPriority.Value() if err := p.initialize(); err != nil { return policyError("failed to initialize %s policy: %w", PolicyName, err) @@ -105,6 +106,8 @@ func (p *policy) Setup(opts *policyapi.BackendOptions) error { p.registerImplicitAffinities() + log.Info("***** default CPU priority is %s", defaultPrio) + return nil } @@ -425,6 +428,7 @@ func (p *policy) Reconfigure(newCfg interface{}) error { opt = cfg p.cfg = cfg + defaultPrio = cfg.DefaultCPUPriority.Value() if err := p.initialize(); err != nil { *p = savedPolicy @@ -435,6 +439,7 @@ func (p *policy) Reconfigure(newCfg interface{}) error { if err := grant.RefetchNodes(); err != nil { *p = savedPolicy opt = p.cfg + defaultPrio = p.cfg.DefaultCPUPriority.Value() return policyError("failed to reconfigure: %v", err) } } @@ -523,7 +528,7 @@ func (p *policy) checkConstraints() error { // Use CpuAllocator to pick reserved CPUs among // allowed ones. Because using those CPUs is allowed, // they remain (they are put back) in the allowed set. - cset, err := p.cpuAllocator.AllocateCpus(&p.allowed, p.reserveCnt, cpuallocator.PriorityNormal) + cset, err := p.cpuAllocator.AllocateCpus(&p.allowed, p.reserveCnt, normalPrio) p.allowed = p.allowed.Union(cset) if err != nil { log.Fatal("cannot reserve %dm CPUs for ReservedResources from AvailableResources: %s", qty.MilliValue(), err) diff --git a/config/crd/bases/config.nri_topologyawarepolicies.yaml b/config/crd/bases/config.nri_topologyawarepolicies.yaml index af7faacee..802b4da15 100644 --- a/config/crd/bases/config.nri_topologyawarepolicies.yaml +++ b/config/crd/bases/config.nri_topologyawarepolicies.yaml @@ -94,6 +94,19 @@ spec: - classes type: object type: object + defaultCPUPriority: + default: none + description: |- + DefaultCPUPriority (high, normal, low, none) is the preferred CPU + priority for allocated CPUs when a container has not been annotated + with any other CPU preference. + Notes: Currently this option only affects exclusive CPU allocations. + enum: + - high + - normal + - low + - none + type: string instrumentation: description: Config provides runtime configuration for instrumentation. properties: diff --git a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml index af7faacee..802b4da15 100644 --- a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml +++ b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml @@ -94,6 +94,19 @@ spec: - classes type: object type: object + defaultCPUPriority: + default: none + description: |- + DefaultCPUPriority (high, normal, low, none) is the preferred CPU + priority for allocated CPUs when a container has not been annotated + with any other CPU preference. + Notes: Currently this option only affects exclusive CPU allocations. + enum: + - high + - normal + - low + - none + type: string instrumentation: description: Config provides runtime configuration for instrumentation. properties: diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware/config.go b/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware/config.go index 5adfca087..d1e537215 100644 --- a/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware/config.go +++ b/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware/config.go @@ -15,7 +15,10 @@ package topologyaware import ( + "strings" + policy "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/resmgr/policy" + "github.com/containers/nri-plugins/pkg/cpuallocator" ) type ( @@ -33,6 +36,27 @@ const ( AmountCPUSet = policy.AmountCPUSet ) +type CPUPriority string + +const ( + PriorityHigh CPUPriority = "high" + PriorityNormal CPUPriority = "normal" + PriorityLow CPUPriority = "low" + PriorityNone CPUPriority = "none" +) + +func (p CPUPriority) Value() cpuallocator.CPUPriority { + switch strings.ToLower(string(p)) { + case string(PriorityHigh): + return cpuallocator.PriorityHigh + case string(PriorityNormal): + return cpuallocator.PriorityNormal + case string(PriorityLow): + return cpuallocator.PriorityLow + } + return cpuallocator.PriorityNone +} + // +k8s:deepcopy-gen=true // +optional type Config struct { @@ -77,4 +101,12 @@ type Config struct { // of it. // +kubebuilder:validation:Required ReservedResources Constraints `json:"reservedResources"` + // DefaultCPUPriority (high, normal, low, none) is the preferred CPU + // priority for allocated CPUs when a container has not been annotated + // with any other CPU preference. + // Notes: Currently this option only affects exclusive CPU allocations. + // +kubebuilder:validation:Enum=high;normal;low;none + // +kubebuilder:default=none + // +kubebuilder:validation:Format:string + DefaultCPUPriority CPUPriority `json:"defaultCPUPriority,omitempty"` }