diff --git a/client/clientset/versioned/fake/register.go b/client/clientset/versioned/fake/register.go index bde8c44..cf5c71b 100644 --- a/client/clientset/versioned/fake/register.go +++ b/client/clientset/versioned/fake/register.go @@ -41,14 +41,14 @@ var localSchemeBuilder = runtime.SchemeBuilder{ // AddToScheme adds all types of this clientset into the given scheme. This allows composition // of clientsets, like in: // -// import ( -// "k8s.io/client-go/kubernetes" -// clientsetscheme "k8s.io/client-go/kubernetes/scheme" -// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" -// ) +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) // -// kclientset, _ := kubernetes.NewForConfig(c) -// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) // // After this, RawExtensions in Kubernetes types will serialize kube-aggregator types // correctly. diff --git a/client/clientset/versioned/scheme/register.go b/client/clientset/versioned/scheme/register.go index b329218..01b4b17 100644 --- a/client/clientset/versioned/scheme/register.go +++ b/client/clientset/versioned/scheme/register.go @@ -41,14 +41,14 @@ var localSchemeBuilder = runtime.SchemeBuilder{ // AddToScheme adds all types of this clientset into the given scheme. This allows composition // of clientsets, like in: // -// import ( -// "k8s.io/client-go/kubernetes" -// clientsetscheme "k8s.io/client-go/kubernetes/scheme" -// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" -// ) +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) // -// kclientset, _ := kubernetes.NewForConfig(c) -// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) // // After this, RawExtensions in Kubernetes types will serialize kube-aggregator types // correctly. diff --git a/config/v1alpha1/cluster_colocation_profile_types.go b/config/v1alpha1/cluster_colocation_profile_types.go index 286f5b0..120b8c9 100644 --- a/config/v1alpha1/cluster_colocation_profile_types.go +++ b/config/v1alpha1/cluster_colocation_profile_types.go @@ -74,6 +74,16 @@ type ClusterColocationProfileSpec struct { // +optional Annotations map[string]string `json:"annotations,omitempty"` + // LabelKeysMapping describes the labels that needs to inject into Pod.Labels with the same values. + // It sets the Pod.Labels[LabelsToLabels[k]] = Pod.Labels[k] for each key k. + // +optional + LabelKeysMapping map[string]string `json:"labelKeysMapping,omitempty"` + + // AnnotationKeysMapping describes the annotations that needs to inject into Pod.Annotations with the same values. + // It sets the Pod.Annotations[AnnotationsToAnnotations[k]] = Pod.Annotations[k] for each key k. + // +optional + AnnotationKeysMapping map[string]string `json:"annotationKeysMapping,omitempty"` + // If specified, the pod will be dispatched by specified scheduler. // +optional SchedulerName string `json:"schedulerName,omitempty"` diff --git a/config/v1alpha1/zz_generated.deepcopy.go b/config/v1alpha1/zz_generated.deepcopy.go index 91de457..954e864 100644 --- a/config/v1alpha1/zz_generated.deepcopy.go +++ b/config/v1alpha1/zz_generated.deepcopy.go @@ -123,6 +123,20 @@ func (in *ClusterColocationProfileSpec) DeepCopyInto(out *ClusterColocationProfi (*out)[key] = val } } + if in.LabelKeysMapping != nil { + in, out := &in.LabelKeysMapping, &out.LabelKeysMapping + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.AnnotationKeysMapping != nil { + in, out := &in.AnnotationKeysMapping, &out.AnnotationKeysMapping + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } in.Patch.DeepCopyInto(&out.Patch) } diff --git a/configuration/slo_controller_config.go b/configuration/slo_controller_config.go index 174abbd..7d0e6ec 100644 --- a/configuration/slo_controller_config.go +++ b/configuration/slo_controller_config.go @@ -30,6 +30,8 @@ const ( ResourceQOSConfigKey = "resource-qos-config" CPUBurstConfigKey = "cpu-burst-config" SystemConfigKey = "system-config" + HostApplicationConfigKey = "host-application-config" + CPUNormalizationConfigKey = "cpu-normalization-config" ) // +k8s:deepcopy-gen=true @@ -88,6 +90,18 @@ type SystemCfg struct { NodeStrategies []NodeSystemStrategy `json:"nodeStrategies,omitempty" validate:"dive"` } +// +k8s:deepcopy-gen=true +type NodeHostApplicationCfg struct { + NodeCfgProfile `json:",inline"` + Applications []slov1alpha1.HostApplicationSpec `json:"applications,omitempty"` +} + +// +k8s:deepcopy-gen=true +type HostApplicationCfg struct { + Applications []slov1alpha1.HostApplicationSpec `json:"applications,omitempty"` + NodeConfigs []NodeHostApplicationCfg `json:"nodeConfigs,omitempty"` +} + // +k8s:deepcopy-gen=true type ResourceQOSCfg struct { ClusterStrategy *slov1alpha1.ResourceQOSStrategy `json:"clusterStrategy,omitempty"` @@ -159,11 +173,29 @@ func (in *NodeExtensionStrategy) DeepCopy() *NodeExtensionStrategy { return out } +// CalculatePolicy defines the calculate policy for resource overcommitment. +// Default is "usage". type CalculatePolicy string const ( - CalculateByPodUsage CalculatePolicy = "usage" + // CalculateByPodUsage is the calculate policy according to the pod resource usage. + // When the policy="usage", the low-priority (LP) resources are calculated according to the high-priority (HP) pods' + // usages, so LP pod can reclaim the requested but unused resources of the HP pods. + // It is the default policy where the resources are over-committed between priority bands. + CalculateByPodUsage CalculatePolicy = "usage" + // CalculateByPodRequest is the calculate policy according to the pod resource request. + // When the policy="request", the low-priority (LP) resources are calculated according to the high-priority (HP) + // pods' requests, so LP pod can allocate the unallocated resources of the HP pods but can NOT reclaim the + // requested but unused resources of the HP pods. + // It is the policy where the resources are NOT over-committed between priority bands. CalculateByPodRequest CalculatePolicy = "request" + // CalculateByPodMaxUsageRequest is the calculate policy according to the maximum of the pod usage and request. + // When the policy="maxUsageRequest", the low-priority (LP) resources are calculated according to the sum of the + // high-priority (HP) pods' maximum of its usage and its request, so LP pod can allocate the resources both + // unallocated and unused by the HP pods. + // It is the conservative policy where the resources are NOT over-committed between priority bands while HP's usage + // is also protected from the overcommitment. + CalculateByPodMaxUsageRequest CalculatePolicy = "maxUsageRequest" ) // +k8s:deepcopy-gen=true @@ -195,17 +227,23 @@ func (in *ExtraFields) DeepCopy() *ExtraFields { // ColocationStrategy defines the strategy for node colocation. // +k8s:deepcopy-gen=true type ColocationStrategy struct { - Enable *bool `json:"enable,omitempty"` - MetricAggregateDurationSeconds *int64 `json:"metricAggregateDurationSeconds,omitempty" validate:"omitempty,min=1"` - MetricReportIntervalSeconds *int64 `json:"metricReportIntervalSeconds,omitempty" validate:"omitempty,min=1"` - MetricAggregatePolicy *slov1alpha1.AggregatePolicy `json:"metricAggregatePolicy,omitempty"` - - CPUReclaimThresholdPercent *int64 `json:"cpuReclaimThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"` + Enable *bool `json:"enable,omitempty"` + MetricAggregateDurationSeconds *int64 `json:"metricAggregateDurationSeconds,omitempty" validate:"omitempty,min=1"` + MetricReportIntervalSeconds *int64 `json:"metricReportIntervalSeconds,omitempty" validate:"omitempty,min=1"` + MetricAggregatePolicy *slov1alpha1.AggregatePolicy `json:"metricAggregatePolicy,omitempty"` + MetricMemoryCollectPolicy *slov1alpha1.NodeMemoryCollectPolicy `json:"metricMemoryCollectPolicy,omitempty"` + + CPUReclaimThresholdPercent *int64 `json:"cpuReclaimThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"` + // CPUCalculatePolicy determines the calculation policy of the CPU resources for the Batch pods. + // Supported: "usage" (default), "maxUsageRequest". + CPUCalculatePolicy *CalculatePolicy `json:"cpuCalculatePolicy,omitempty"` MemoryReclaimThresholdPercent *int64 `json:"memoryReclaimThresholdPercent,omitempty" validate:"omitempty,min=0,max=100"` - MemoryCalculatePolicy *CalculatePolicy `json:"memoryCalculatePolicy,omitempty"` - DegradeTimeMinutes *int64 `json:"degradeTimeMinutes,omitempty" validate:"omitempty,min=1"` - UpdateTimeThresholdSeconds *int64 `json:"updateTimeThresholdSeconds,omitempty" validate:"omitempty,min=1"` - ResourceDiffThreshold *float64 `json:"resourceDiffThreshold,omitempty" validate:"omitempty,gt=0,max=1"` + // MemoryCalculatePolicy determines the calculation policy of the memory resources for the Batch pods. + // Supported: "usage" (default), "request", "maxUsageRequest". + MemoryCalculatePolicy *CalculatePolicy `json:"memoryCalculatePolicy,omitempty"` + DegradeTimeMinutes *int64 `json:"degradeTimeMinutes,omitempty" validate:"omitempty,min=1"` + UpdateTimeThresholdSeconds *int64 `json:"updateTimeThresholdSeconds,omitempty" validate:"omitempty,min=1"` + ResourceDiffThreshold *float64 `json:"resourceDiffThreshold,omitempty" validate:"omitempty,gt=0,max=1"` // MidCPUThresholdPercent defines the maximum percentage of the Mid-tier cpu resource dividing the node allocatable. // MidCPUAllocatable <= NodeCPUAllocatable * MidCPUThresholdPercent / 100. @@ -217,6 +255,44 @@ type ColocationStrategy struct { ColocationStrategyExtender `json:",inline"` // for third-party extension } +// CPUNormalizationCfg is the cluster-level configuration of the CPU normalization strategy. +// +k8s:deepcopy-gen=true +type CPUNormalizationCfg struct { + CPUNormalizationStrategy `json:",inline"` + NodeConfigs []NodeCPUNormalizationCfg `json:"nodeConfigs,omitempty" validate:"dive"` +} + +// NodeCPUNormalizationCfg is the node-level configuration of the CPU normalization strategy. +// +k8s:deepcopy-gen=true +type NodeCPUNormalizationCfg struct { + NodeCfgProfile `json:",inline"` + CPUNormalizationStrategy +} + +// CPUNormalizationStrategy is the CPU normalization strategy. +// +k8s:deepcopy-gen=true +type CPUNormalizationStrategy struct { + // Enable defines whether the cpu normalization is enabled. + // If set to false, the node cpu normalization ratio will be removed. + Enable *bool `json:"enable,omitempty"` + // RatioModel defines the cpu normalization ratio of each CPU model. + // It maps the CPUModel of BasicInfo into the ratios. + RatioModel map[string]ModelRatioCfg `json:"ratioModel,omitempty"` +} + +// ModelRatioCfg defines the cpu normalization ratio of a CPU model. +// +k8s:deepcopy-gen=true +type ModelRatioCfg struct { + // BaseRatio defines the ratio of which the CPU neither enables Hyper Thread, nor the Turbo. + BaseRatio *float64 `json:"baseRatio,omitempty"` + // HyperThreadEnabledRatio defines the ratio of which the CPU enables the Hyper Thread. + HyperThreadEnabledRatio *float64 `json:"hyperThreadEnabledRatio,omitempty"` + // TurboEnabledRatio defines the ratio of which the CPU enables the Turbo. + TurboEnabledRatio *float64 `json:"turboEnabledRatio,omitempty"` + // HyperThreadTurboEnabledRatio defines the ratio of which the CPU enables the Hyper Thread and Turbo. + HyperThreadTurboEnabledRatio *float64 `json:"hyperThreadTurboEnabledRatio,omitempty"` +} + /* Koordinator uses configmap to manage the configuration of SLO, the configmap is stored in /, with the following keys respectively: @@ -244,6 +320,7 @@ data: "15m" ] }, + "metricMemoryCollectPolicy": "usageWithoutPageCache", "cpuReclaimThresholdPercent": 60, "memoryReclaimThresholdPercent": 65, "memoryCalculatePolicy": "usage", @@ -453,6 +530,72 @@ data: } ] } + host-application-config: | + { + "applications": [ + { + "name": "nginx", + "priority": "koord-prod", + "qos": "LS", + "cgroupPath": { + "base": "CgroupRoot", + "parentDir": "host-latency-sensitive/", + "relativePath": "nginx/", + } + } + ], + "nodeConfigs": [ + { + "name": "colocation-pool", + "nodeSelector": { + "matchLabels": { + "node-pool": "colocation" + } + }, + "applications": [ + { + "name": "nginx", + "priority": "koord-prod", + "qos": "LS", + "cgroupPath": { + "base": "CgroupRoot", + "parentDir": "host-latency-sensitive/", + "relativePath": "nginx/", + } + } + ] + } + ] + } + cpu-normalization-config: | + { + "enable": false, + "ratioModel": { + "Intel(R) Xeon(R) Platinum XXX CPU @ 2.50GHz": { + "baseRatio": 1.5, + "hyperThreadEnabledRatio": 1.0, + "turboEnabledRatio": 1.8, + "hyperThreadTurboEnabledRatio": 1.2 + }, + "Intel(R) Xeon(R) Platinum YYY CPU @ 2.50GHz": { + "baseRatio": 1.8, + "hyperThreadEnabledRatio": 1.2, + "turboEnabledRatio": 2.16, + "hyperThreadTurboEnabledRatio": 1.44 + } + }, + "nodeConfigs": [ + { + "name": "test", + "nodeSelector": { + "matchLabels": { + "AAA": "BBB" + } + }, + "enable": true + } + ] + } kind: ConfigMap metadata: annotations: diff --git a/configuration/zz_generated.deepcopy.go b/configuration/zz_generated.deepcopy.go index bcb0b99..f98ec67 100644 --- a/configuration/zz_generated.deepcopy.go +++ b/configuration/zz_generated.deepcopy.go @@ -53,6 +53,56 @@ func (in *CPUBurstCfg) DeepCopy() *CPUBurstCfg { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CPUNormalizationCfg) DeepCopyInto(out *CPUNormalizationCfg) { + *out = *in + in.CPUNormalizationStrategy.DeepCopyInto(&out.CPUNormalizationStrategy) + if in.NodeConfigs != nil { + in, out := &in.NodeConfigs, &out.NodeConfigs + *out = make([]NodeCPUNormalizationCfg, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CPUNormalizationCfg. +func (in *CPUNormalizationCfg) DeepCopy() *CPUNormalizationCfg { + if in == nil { + return nil + } + out := new(CPUNormalizationCfg) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CPUNormalizationStrategy) DeepCopyInto(out *CPUNormalizationStrategy) { + *out = *in + if in.Enable != nil { + in, out := &in.Enable, &out.Enable + *out = new(bool) + **out = **in + } + if in.RatioModel != nil { + in, out := &in.RatioModel, &out.RatioModel + *out = make(map[string]ModelRatioCfg, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CPUNormalizationStrategy. +func (in *CPUNormalizationStrategy) DeepCopy() *CPUNormalizationStrategy { + if in == nil { + return nil + } + out := new(CPUNormalizationStrategy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ColocationCfg) DeepCopyInto(out *ColocationCfg) { *out = *in @@ -99,11 +149,21 @@ func (in *ColocationStrategy) DeepCopyInto(out *ColocationStrategy) { *out = new(v1alpha1.AggregatePolicy) (*in).DeepCopyInto(*out) } + if in.MetricMemoryCollectPolicy != nil { + in, out := &in.MetricMemoryCollectPolicy, &out.MetricMemoryCollectPolicy + *out = new(v1alpha1.NodeMemoryCollectPolicy) + **out = **in + } if in.CPUReclaimThresholdPercent != nil { in, out := &in.CPUReclaimThresholdPercent, &out.CPUReclaimThresholdPercent *out = new(int64) **out = **in } + if in.CPUCalculatePolicy != nil { + in, out := &in.CPUCalculatePolicy, &out.CPUCalculatePolicy + *out = new(CalculatePolicy) + **out = **in + } if in.MemoryReclaimThresholdPercent != nil { in, out := &in.MemoryReclaimThresholdPercent, &out.MemoryReclaimThresholdPercent *out = new(int64) @@ -190,6 +250,70 @@ func (in *ExtensionCfgMap) DeepCopy() *ExtensionCfgMap { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostApplicationCfg) DeepCopyInto(out *HostApplicationCfg) { + *out = *in + if in.Applications != nil { + in, out := &in.Applications, &out.Applications + *out = make([]v1alpha1.HostApplicationSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.NodeConfigs != nil { + in, out := &in.NodeConfigs, &out.NodeConfigs + *out = make([]NodeHostApplicationCfg, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostApplicationCfg. +func (in *HostApplicationCfg) DeepCopy() *HostApplicationCfg { + if in == nil { + return nil + } + out := new(HostApplicationCfg) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ModelRatioCfg) DeepCopyInto(out *ModelRatioCfg) { + *out = *in + if in.BaseRatio != nil { + in, out := &in.BaseRatio, &out.BaseRatio + *out = new(float64) + **out = **in + } + if in.HyperThreadEnabledRatio != nil { + in, out := &in.HyperThreadEnabledRatio, &out.HyperThreadEnabledRatio + *out = new(float64) + **out = **in + } + if in.TurboEnabledRatio != nil { + in, out := &in.TurboEnabledRatio, &out.TurboEnabledRatio + *out = new(float64) + **out = **in + } + if in.HyperThreadTurboEnabledRatio != nil { + in, out := &in.HyperThreadTurboEnabledRatio, &out.HyperThreadTurboEnabledRatio + *out = new(float64) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelRatioCfg. +func (in *ModelRatioCfg) DeepCopy() *ModelRatioCfg { + if in == nil { + return nil + } + out := new(ModelRatioCfg) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeCPUBurstCfg) DeepCopyInto(out *NodeCPUBurstCfg) { *out = *in @@ -211,6 +335,23 @@ func (in *NodeCPUBurstCfg) DeepCopy() *NodeCPUBurstCfg { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeCPUNormalizationCfg) DeepCopyInto(out *NodeCPUNormalizationCfg) { + *out = *in + in.NodeCfgProfile.DeepCopyInto(&out.NodeCfgProfile) + in.CPUNormalizationStrategy.DeepCopyInto(&out.CPUNormalizationStrategy) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeCPUNormalizationCfg. +func (in *NodeCPUNormalizationCfg) DeepCopy() *NodeCPUNormalizationCfg { + if in == nil { + return nil + } + out := new(NodeCPUNormalizationCfg) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeCfgProfile) DeepCopyInto(out *NodeCfgProfile) { *out = *in @@ -248,6 +389,29 @@ func (in *NodeColocationCfg) DeepCopy() *NodeColocationCfg { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeHostApplicationCfg) DeepCopyInto(out *NodeHostApplicationCfg) { + *out = *in + in.NodeCfgProfile.DeepCopyInto(&out.NodeCfgProfile) + if in.Applications != nil { + in, out := &in.Applications, &out.Applications + *out = make([]v1alpha1.HostApplicationSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeHostApplicationCfg. +func (in *NodeHostApplicationCfg) DeepCopy() *NodeHostApplicationCfg { + if in == nil { + return nil + } + out := new(NodeHostApplicationCfg) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeResourceQOSStrategy) DeepCopyInto(out *NodeResourceQOSStrategy) { *out = *in diff --git a/extension/constants.go b/extension/constants.go index d5d1ca3..17cce79 100644 --- a/extension/constants.go +++ b/extension/constants.go @@ -26,6 +26,7 @@ const ( SchedulingDomainPrefix = "scheduling.koordinator.sh" // NodeDomainPrefix represents the node domain prefix NodeDomainPrefix = "node.koordinator.sh" + PodDomainPrefix = "pod.koordinator.sh" LabelPodQoS = DomainPrefix + "qosClass" LabelPodPriority = DomainPrefix + "priority" @@ -35,6 +36,10 @@ const ( LabelPodPriorityClass = DomainPrefix + "priority-class" LabelManagedBy = "app.kubernetes.io/managed-by" + + // LabelPodMutatingUpdate is a label key that pods with `pod.koordinator.sh/mutating-update=true` will + // be mutated by Koordinator webhook when updating. + LabelPodMutatingUpdate = PodDomainPrefix + "/mutating-update" ) type AggregationType string diff --git a/extension/cpu_normalization.go b/extension/cpu_normalization.go new file mode 100644 index 0000000..c7b00e9 --- /dev/null +++ b/extension/cpu_normalization.go @@ -0,0 +1,144 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package extension + +import ( + "encoding/json" + "fmt" + "strconv" + + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/pointer" +) + +const ( + // AnnotationCPUNormalizationRatio denotes the cpu normalization ratio of the node. + AnnotationCPUNormalizationRatio = NodeDomainPrefix + "/cpu-normalization-ratio" + + // LabelCPUNormalizationEnabled indicates whether the cpu normalization is enabled on the node. + // If both the label and node-level CPUNormalizationStrategy is set, the label overrides the strategy. + LabelCPUNormalizationEnabled = NodeDomainPrefix + "/cpu-normalization-enabled" + + // AnnotationCPUBasicInfo denotes the basic CPU info of the node. + AnnotationCPUBasicInfo = NodeDomainPrefix + "/cpu-basic-info" + + // NormalizationRatioDiffEpsilon is the min difference between two cpu normalization ratios. + NormalizationRatioDiffEpsilon = 0.01 +) + +// GetCPUNormalizationRatio gets the cpu normalization ratio from the node. +// It returns -1 without an error when the cpu normalization annotation is missing. +func GetCPUNormalizationRatio(node *corev1.Node) (float64, error) { + if node.Annotations == nil { + return -1, nil + } + s, ok := node.Annotations[AnnotationCPUNormalizationRatio] + if !ok { + return -1, nil + } + + v, err := strconv.ParseFloat(s, 64) + if err != nil { + return -1, fmt.Errorf("parse cpu normalization ratio failed, err: %w", err) + } + if v <= 0 { + return -1, fmt.Errorf("illegal cpu normalization ratio: %v", v) + } + + return v, nil +} + +// SetCPUNormalizationRatio sets the node annotation according to the cpu-normalization-ratio. +// It returns true if the label value changes. +// NOTE: The ratio will be converted to string with the precision 2. e.g. 3.1415926 -> 3.14. +func SetCPUNormalizationRatio(node *corev1.Node, ratio float64) bool { + s := strconv.FormatFloat(ratio, 'f', 2, 64) + if node.Annotations == nil { + node.Annotations = map[string]string{} + } + if old := node.Annotations[AnnotationCPUNormalizationRatio]; old == s { + return false + } + + node.Annotations[AnnotationCPUNormalizationRatio] = s + return true +} + +func GetCPUNormalizationEnabled(node *corev1.Node) (*bool, error) { + if node.Labels == nil { + return nil, nil + } + s, ok := node.Labels[LabelCPUNormalizationEnabled] + if !ok { + return nil, nil + } + v, err := strconv.ParseBool(s) + if err != nil { + return nil, fmt.Errorf("parse cpu normalization enabled failed, err: %w", err) + } + return pointer.Bool(v), nil +} + +func IsCPUNormalizationRatioDifferent(old, new float64) bool { + return old > new+NormalizationRatioDiffEpsilon || old < new-NormalizationRatioDiffEpsilon +} + +// CPUBasicInfo describes the cpu basic features and status. +type CPUBasicInfo struct { + CPUModel string `json:"cpuModel,omitempty"` + HyperThreadEnabled bool `json:"hyperThreadEnabled,omitempty"` + TurboEnabled bool `json:"turboEnabled,omitempty"` + CatL3CbmMask string `json:"catL3CbmMask,omitempty"` + VendorID string `json:"vendorID,omitempty"` +} + +func (c *CPUBasicInfo) Key() string { + return fmt.Sprintf("%s_%v_%v", c.CPUModel, c.HyperThreadEnabled, c.TurboEnabled) +} + +// GetCPUBasicInfo gets the cpu basic info from the node-level annotations. +// It returns nil info without an error when the cpu basic info annotation is missing. +func GetCPUBasicInfo(annotations map[string]string) (*CPUBasicInfo, error) { + if annotations == nil { + return nil, nil + } + s, ok := annotations[AnnotationCPUBasicInfo] + if !ok { + return nil, nil + } + + var info CPUBasicInfo + err := json.Unmarshal([]byte(s), &info) + if err != nil { + return nil, fmt.Errorf("unmarshal cpu basic info failed, err: %w", err) + } + return &info, nil +} + +// SetCPUBasicInfo sets the cpu basic info at the node-level annotations. +// It returns true if the annotations changes. +func SetCPUBasicInfo(annotations map[string]string, info *CPUBasicInfo) bool { + b, _ := json.Marshal(info) + s := string(b) + + if old := annotations[AnnotationCPUBasicInfo]; s == old { + return false + } + + annotations[AnnotationCPUBasicInfo] = s + return true +} diff --git a/extension/device_share.go b/extension/device_share.go new file mode 100644 index 0000000..a90c401 --- /dev/null +++ b/extension/device_share.go @@ -0,0 +1,226 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package extension + +import ( + "encoding/json" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + schedulingv1alpha1 "github.com/koordinator-sh/apis/scheduling/v1alpha1" +) + +const ( + // AnnotationDeviceAllocated represents the device allocated by the pod + AnnotationDeviceAllocated = SchedulingDomainPrefix + "/device-allocated" + // AnnotationDeviceAllocateHint guides the scheduler in selecting and allocating specialized hardware resources + AnnotationDeviceAllocateHint = SchedulingDomainPrefix + "/device-allocate-hint" + // AnnotationDeviceJointAllocate guides the scheduler joint-allocates devices + AnnotationDeviceJointAllocate = SchedulingDomainPrefix + "/device-joint-allocate" +) + +const ( + ResourceNvidiaGPU corev1.ResourceName = "nvidia.com/gpu" + ResourceHygonDCU corev1.ResourceName = "dcu.com/gpu" + ResourceRDMA corev1.ResourceName = DomainPrefix + "rdma" + ResourceFPGA corev1.ResourceName = DomainPrefix + "fpga" + ResourceGPU corev1.ResourceName = DomainPrefix + "gpu" + ResourceGPUShared corev1.ResourceName = DomainPrefix + "gpu.shared" + ResourceGPUCore corev1.ResourceName = DomainPrefix + "gpu-core" + ResourceGPUMemory corev1.ResourceName = DomainPrefix + "gpu-memory" + ResourceGPUMemoryRatio corev1.ResourceName = DomainPrefix + "gpu-memory-ratio" +) + +const ( + LabelGPUModel string = NodeDomainPrefix + "/gpu-model" + LabelGPUDriverVersion string = NodeDomainPrefix + "/gpu-driver-version" +) + +// DeviceAllocations would be injected into Pod as form of annotation during Pre-bind stage. +/* +{ + "gpu": [ + { + "minor": 0, + "resources": { + "koordinator.sh/gpu-core": 100, + "koordinator.sh/gpu-mem-ratio": 100, + "koordinator.sh/gpu-mem": "16Gi" + } + }, + { + "minor": 1, + "resources": { + "koordinator.sh/gpu-core": 100, + "koordinator.sh/gpu-mem-ratio": 100, + "koordinator.sh/gpu-mem": "16Gi" + } + } + ] +} +*/ +type DeviceAllocations map[schedulingv1alpha1.DeviceType][]*DeviceAllocation + +type DeviceAllocation struct { + Minor int32 `json:"minor"` + Resources corev1.ResourceList `json:"resources"` + Extension *DeviceAllocationExtension `json:"extension,omitempty"` +} + +type DeviceAllocationExtension struct { + VirtualFunctions []VirtualFunction `json:"vfs,omitempty"` +} + +type VirtualFunction struct { + Minor int `json:"minor,omitempty"` + BusID string `json:"busID,omitempty"` +} + +type DeviceJointAllocate struct { + // DeviceTypes indicates that the specified types of devices are grouped and allocated according to topology. + DeviceTypes []schedulingv1alpha1.DeviceType `json:"deviceTypes,omitempty"` + // RequiredScope specifies the allocation scope required for the joint allocation of devices. + // It defines the granularity at which devices should be joint-allocated, e.g. in the same PCIe. + RequiredScope DeviceJointAllocateScope `json:"requiredScope,omitempty"` +} + +type DeviceJointAllocateScope string + +const ( + SamePCIeDeviceJointAllocateScope DeviceJointAllocateScope = "SamePCIe" +) + +type DeviceAllocateHints map[schedulingv1alpha1.DeviceType]*DeviceHint + +type DeviceHint struct { + // Selector selects devices by label selector. + Selector *metav1.LabelSelector `json:"selector,omitempty"` + // VFSelector selects VFs by label selector. + // If specified the VFSelector, scheduler will allocate VFs from PFs which satisfy VFSelector. + VFSelector *metav1.LabelSelector `json:"vfSelector,omitempty"` + // AllocateStrategy controls the allocation strategy + AllocateStrategy DeviceAllocateStrategy `json:"allocateStrategy,omitempty"` + // ExclusivePolicy indicates the exclusive policy. + ExclusivePolicy DeviceExclusivePolicy `json:"exclusivePolicy,omitempty"` +} + +type DeviceAllocateStrategy string + +const ( + ApplyForAllDeviceAllocateStrategy DeviceAllocateStrategy = "ApplyForAll" + RequestsAsCountAllocateStrategy DeviceAllocateStrategy = "RequestsAsCount" +) + +type DeviceExclusivePolicy string + +const ( + // DeviceLevelDeviceExclusivePolicy represents mutual exclusion in the device instance dimension + DeviceLevelDeviceExclusivePolicy DeviceExclusivePolicy = "DeviceLevel" + // PCIExpressLevelDeviceExclusivePolicy represents mutual exclusion in the PCIe dimension + PCIExpressLevelDeviceExclusivePolicy DeviceExclusivePolicy = "PCIeLevel" +) + +func GetDeviceAllocations(podAnnotations map[string]string) (DeviceAllocations, error) { + deviceAllocations := DeviceAllocations{} + data, ok := podAnnotations[AnnotationDeviceAllocated] + if !ok { + return nil, nil + } + err := json.Unmarshal([]byte(data), &deviceAllocations) + if err != nil { + return nil, err + } + return deviceAllocations, nil +} + +func SetDeviceAllocations(obj metav1.Object, allocations DeviceAllocations) error { + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + + data, err := json.Marshal(allocations) + if err != nil { + return err + } + + annotations[AnnotationDeviceAllocated] = string(data) + obj.SetAnnotations(annotations) + return nil +} + +func SetDeviceAllocateHints(obj metav1.Object, hint DeviceAllocateHints) error { + if hint == nil { + return nil + } + + data, err := json.Marshal(hint) + if err != nil { + return err + } + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[AnnotationDeviceAllocateHint] = string(data) + obj.SetAnnotations(annotations) + return nil +} + +func GetDeviceAllocateHints(annotations map[string]string) (DeviceAllocateHints, error) { + var hint DeviceAllocateHints + if val, ok := annotations[AnnotationDeviceAllocateHint]; ok { + hint = DeviceAllocateHints{} + err := json.Unmarshal([]byte(val), &hint) + if err != nil { + return nil, err + } + } + return hint, nil +} + +func SetDeviceJointAllocate(obj metav1.Object, jointAllocate *DeviceJointAllocate) error { + if jointAllocate == nil { + return nil + } + + data, err := json.Marshal(jointAllocate) + if err != nil { + return err + } + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[AnnotationDeviceJointAllocate] = string(data) + obj.SetAnnotations(annotations) + return nil +} + +func GetDeviceJointAllocate(annotations map[string]string) (*DeviceJointAllocate, error) { + val, ok := annotations[AnnotationDeviceJointAllocate] + if !ok { + return nil, nil + } + var jointAllocate DeviceJointAllocate + err := json.Unmarshal([]byte(val), &jointAllocate) + if err != nil { + return nil, err + } + return &jointAllocate, nil +} diff --git a/extension/elastic_quota.go b/extension/elastic_quota.go index d1fd292..b061eb8 100644 --- a/extension/elastic_quota.go +++ b/extension/elastic_quota.go @@ -27,22 +27,36 @@ import ( // RootQuotaName means quotaTree's root\head. const ( - SystemQuotaName = "koordinator-system-quota" - RootQuotaName = "koordinator-root-quota" - DefaultQuotaName = "koordinator-default-quota" - QuotaKoordinatorPrefix = "quota.scheduling.koordinator.sh" - LabelQuotaIsParent = QuotaKoordinatorPrefix + "/is-parent" - LabelQuotaParent = QuotaKoordinatorPrefix + "/parent" - LabelAllowLentResource = QuotaKoordinatorPrefix + "/allow-lent-resource" - LabelQuotaName = QuotaKoordinatorPrefix + "/name" - AnnotationSharedWeight = QuotaKoordinatorPrefix + "/shared-weight" - AnnotationRuntime = QuotaKoordinatorPrefix + "/runtime" - AnnotationRequest = QuotaKoordinatorPrefix + "/request" + SystemQuotaName = "koordinator-system-quota" + RootQuotaName = "koordinator-root-quota" + DefaultQuotaName = "koordinator-default-quota" + QuotaKoordinatorPrefix = "quota.scheduling.koordinator.sh" + LabelQuotaIsParent = QuotaKoordinatorPrefix + "/is-parent" + LabelQuotaParent = QuotaKoordinatorPrefix + "/parent" + LabelAllowLentResource = QuotaKoordinatorPrefix + "/allow-lent-resource" + LabelQuotaName = QuotaKoordinatorPrefix + "/name" + LabelQuotaProfile = QuotaKoordinatorPrefix + "/profile" + LabelQuotaIsRoot = QuotaKoordinatorPrefix + "/is-root" + LabelQuotaTreeID = QuotaKoordinatorPrefix + "/tree-id" + LabelQuotaIgnoreDefaultTree = QuotaKoordinatorPrefix + "/ignore-default-tree" + LabelPreemptible = QuotaKoordinatorPrefix + "/preemptible" + LabelAllowForceUpdate = QuotaKoordinatorPrefix + "/allow-force-update" + AnnotationSharedWeight = QuotaKoordinatorPrefix + "/shared-weight" + AnnotationRuntime = QuotaKoordinatorPrefix + "/runtime" + AnnotationRequest = QuotaKoordinatorPrefix + "/request" + AnnotationChildRequest = QuotaKoordinatorPrefix + "/child-request" + AnnotationResourceKeys = QuotaKoordinatorPrefix + "/resource-keys" + AnnotationTotalResource = QuotaKoordinatorPrefix + "/total-resource" + AnnotationQuotaNamespaces = QuotaKoordinatorPrefix + "/namespaces" + AnnotationGuaranteed = QuotaKoordinatorPrefix + "/guaranteed" + AnnotationAllocated = QuotaKoordinatorPrefix + "/allocated" + AnnotationNonPreemptibleRequest = QuotaKoordinatorPrefix + "/non-preemptible-request" + AnnotationNonPreemptibleUsed = QuotaKoordinatorPrefix + "/non-preemptible-used" ) func GetParentQuotaName(quota *v1alpha1.ElasticQuota) string { parentName := quota.Labels[LabelQuotaParent] - if parentName == "" { + if parentName == "" && quota.Name != RootQuotaName { return RootQuotaName //default return RootQuotaName } return parentName @@ -56,6 +70,22 @@ func IsAllowLentResource(quota *v1alpha1.ElasticQuota) bool { return quota.Labels[LabelAllowLentResource] != "false" } +func IsAllowForceUpdate(quota *v1alpha1.ElasticQuota) bool { + return quota.Labels[LabelAllowForceUpdate] == "true" +} + +func IsTreeRootQuota(quota *v1alpha1.ElasticQuota) bool { + return quota.Labels[LabelQuotaIsRoot] == "true" +} + +func IsPodNonPreemptible(pod *corev1.Pod) bool { + return pod.Labels[LabelPreemptible] == "false" +} + +func GetQuotaTreeID(quota *v1alpha1.ElasticQuota) string { + return quota.Labels[LabelQuotaTreeID] +} + func GetSharedWeight(quota *v1alpha1.ElasticQuota) corev1.ResourceList { value, exist := quota.Annotations[AnnotationSharedWeight] if exist { @@ -77,6 +107,91 @@ func IsForbiddenModify(quota *v1alpha1.ElasticQuota) (bool, error) { return false, nil } -var GetQuotaName = func(pod *corev1.Pod) string { +func GetQuotaName(pod *corev1.Pod) string { return pod.Labels[LabelQuotaName] } + +func GetAnnotationQuotaNamespaces(quota *v1alpha1.ElasticQuota) []string { + if quota.Annotations == nil { + return nil + } + if quota.Annotations[AnnotationQuotaNamespaces] == "" { + return nil + } + + var namespaces []string + if err := json.Unmarshal([]byte(quota.Annotations[AnnotationQuotaNamespaces]), &namespaces); err != nil { + return nil + } + return namespaces +} + +func GetNonPreemptibleRequest(quota *v1alpha1.ElasticQuota) (corev1.ResourceList, error) { + nonPreemptibleRequest := corev1.ResourceList{} + if quota.Annotations[AnnotationNonPreemptibleRequest] != "" { + if err := json.Unmarshal([]byte(quota.Annotations[AnnotationNonPreemptibleRequest]), &nonPreemptibleRequest); err != nil { + return nonPreemptibleRequest, err + } + } + return nonPreemptibleRequest, nil +} + +func GetNonPreemptibleUsed(quota *v1alpha1.ElasticQuota) (corev1.ResourceList, error) { + nonPreemptibleUsed := corev1.ResourceList{} + if quota.Annotations[AnnotationNonPreemptibleUsed] != "" { + if err := json.Unmarshal([]byte(quota.Annotations[AnnotationNonPreemptibleUsed]), &nonPreemptibleUsed); err != nil { + return nonPreemptibleUsed, err + } + } + return nonPreemptibleUsed, nil +} + +func GetGuaranteed(quota *v1alpha1.ElasticQuota) (corev1.ResourceList, error) { + guaranteed := corev1.ResourceList{} + if quota.Annotations[AnnotationGuaranteed] != "" { + if err := json.Unmarshal([]byte(quota.Annotations[AnnotationGuaranteed]), &guaranteed); err != nil { + return guaranteed, err + } + } + return guaranteed, nil +} + +func GetAllocated(quota *v1alpha1.ElasticQuota) (corev1.ResourceList, error) { + allocated := corev1.ResourceList{} + if quota.Annotations[AnnotationAllocated] != "" { + if err := json.Unmarshal([]byte(quota.Annotations[AnnotationAllocated]), &allocated); err != nil { + return allocated, err + } + } + return allocated, nil +} + +func GetRuntime(quota *v1alpha1.ElasticQuota) (corev1.ResourceList, error) { + runtime := corev1.ResourceList{} + if quota.Annotations[AnnotationRuntime] != "" { + if err := json.Unmarshal([]byte(quota.Annotations[AnnotationRuntime]), &runtime); err != nil { + return runtime, err + } + } + return runtime, nil +} + +func GetRequest(quota *v1alpha1.ElasticQuota) (corev1.ResourceList, error) { + request := corev1.ResourceList{} + if quota.Annotations[AnnotationRequest] != "" { + if err := json.Unmarshal([]byte(quota.Annotations[AnnotationRequest]), &request); err != nil { + return request, err + } + } + return request, nil +} + +func GetChildRequest(quota *v1alpha1.ElasticQuota) (corev1.ResourceList, error) { + request := corev1.ResourceList{} + if quota.Annotations[AnnotationChildRequest] != "" { + if err := json.Unmarshal([]byte(quota.Annotations[AnnotationChildRequest]), &request); err != nil { + return request, err + } + } + return request, nil +} diff --git a/extension/scheduling.go b/extension/load_aware.go similarity index 60% rename from extension/scheduling.go rename to extension/load_aware.go index f569c4d..46b2745 100644 --- a/extension/scheduling.go +++ b/extension/load_aware.go @@ -21,17 +21,12 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - schedulingv1alpha1 "github.com/koordinator-sh/apis/scheduling/v1alpha1" ) const ( // AnnotationCustomUsageThresholds represents the user-defined resource utilization threshold. // For specific value definitions, see CustomUsageThresholds AnnotationCustomUsageThresholds = SchedulingDomainPrefix + "/usage-thresholds" - - // AnnotationDeviceAllocated represents the device allocated by the pod - AnnotationDeviceAllocated = SchedulingDomainPrefix + "/device-allocated" ) // CustomUsageThresholds supports user-defined node resource utilization thresholds. @@ -65,63 +60,3 @@ func GetCustomUsageThresholds(node *corev1.Node) (*CustomUsageThresholds, error) } return usageThresholds, nil } - -// DeviceAllocations would be injected into Pod as form of annotation during Pre-bind stage. -/* -{ - "gpu": [ - { - "minor": 0, - "resources": { - "koordinator.sh/gpu-core": 100, - "koordinator.sh/gpu-mem-ratio": 100, - "koordinator.sh/gpu-mem": "16Gi" - } - }, - { - "minor": 1, - "resources": { - "koordinator.sh/gpu-core": 100, - "koordinator.sh/gpu-mem-ratio": 100, - "koordinator.sh/gpu-mem": "16Gi" - } - } - ] -} -*/ -type DeviceAllocations map[schedulingv1alpha1.DeviceType][]*DeviceAllocation - -type DeviceAllocation struct { - Minor int32 `json:"minor"` - Resources corev1.ResourceList `json:"resources"` - Extension json.RawMessage `json:"extension,omitempty"` -} - -func GetDeviceAllocations(podAnnotations map[string]string) (DeviceAllocations, error) { - deviceAllocations := DeviceAllocations{} - data, ok := podAnnotations[AnnotationDeviceAllocated] - if !ok { - return nil, nil - } - err := json.Unmarshal([]byte(data), &deviceAllocations) - if err != nil { - return nil, err - } - return deviceAllocations, nil -} - -func SetDeviceAllocations(obj metav1.Object, allocations DeviceAllocations) error { - annotations := obj.GetAnnotations() - if annotations == nil { - annotations = map[string]string{} - } - - data, err := json.Marshal(allocations) - if err != nil { - return err - } - - annotations[AnnotationDeviceAllocated] = string(data) - obj.SetAnnotations(annotations) - return nil -} diff --git a/extension/node.go b/extension/node.go deleted file mode 100644 index da42910..0000000 --- a/extension/node.go +++ /dev/null @@ -1,155 +0,0 @@ -/* -Copyright 2022 The Koordinator Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package extension - -import ( - "encoding/json" - - "k8s.io/apimachinery/pkg/types" -) - -const ( - // AnnotationNodeCPUTopology describes the detailed CPU topology. - AnnotationNodeCPUTopology = NodeDomainPrefix + "/cpu-topology" - // AnnotationNodeCPUAllocs describes K8s Guaranteed Pods. - AnnotationNodeCPUAllocs = NodeDomainPrefix + "/pod-cpu-allocs" - // AnnotationNodeCPUSharedPools describes the CPU Shared Pool defined by Koordinator. - // The shared pool is mainly used by Koordinator LS Pods or K8s Burstable Pods. - AnnotationNodeCPUSharedPools = NodeDomainPrefix + "/cpu-shared-pools" - - // LabelNodeCPUBindPolicy constrains how to bind CPU logical CPUs when scheduling. - LabelNodeCPUBindPolicy = NodeDomainPrefix + "/cpu-bind-policy" - // LabelNodeNUMAAllocateStrategy indicates how to choose satisfied NUMA Nodes when scheduling. - LabelNodeNUMAAllocateStrategy = NodeDomainPrefix + "/numa-allocate-strategy" -) - -const ( - // NodeCPUBindPolicyNone does not perform any bind policy - NodeCPUBindPolicyNone = "None" - // NodeCPUBindPolicyFullPCPUsOnly requires that the scheduler must allocate full physical cores. - // Equivalent to kubelet CPU manager policy option full-pcpus-only=true. - NodeCPUBindPolicyFullPCPUsOnly = "FullPCPUsOnly" - // NodeCPUBindPolicySpreadByPCPUs requires that the scheduler must evenly allocate logical cpus across physical cores - NodeCPUBindPolicySpreadByPCPUs = "SpreadByPCPUs" -) - -const ( - NodeNUMAAllocateStrategyLeastAllocated = string(NUMALeastAllocated) - NodeNUMAAllocateStrategyMostAllocated = string(NUMAMostAllocated) -) - -const ( - // AnnotationKubeletCPUManagerPolicy describes the cpu manager policy options of kubelet - AnnotationKubeletCPUManagerPolicy = "kubelet.koordinator.sh/cpu-manager-policy" - - KubeletCPUManagerPolicyStatic = "static" - KubeletCPUManagerPolicyNone = "none" - KubeletCPUManagerPolicyFullPCPUsOnlyOption = "full-pcpus-only" - KubeletCPUManagerPolicyDistributeCPUsAcrossNUMAOption = "distribute-cpus-across-numa" -) - -type CPUTopology struct { - Detail []CPUInfo `json:"detail,omitempty"` -} - -type CPUInfo struct { - ID int32 `json:"id"` - Core int32 `json:"core"` - Socket int32 `json:"socket"` - Node int32 `json:"node"` -} - -type PodCPUAlloc struct { - Namespace string `json:"namespace,omitempty"` - Name string `json:"name,omitempty"` - UID types.UID `json:"uid,omitempty"` - CPUSet string `json:"cpuset,omitempty"` - ManagedByKubelet bool `json:"managedByKubelet,omitempty"` -} - -type PodCPUAllocs []PodCPUAlloc - -type KubeletCPUManagerPolicy struct { - Policy string `json:"policy,omitempty"` - Options map[string]string `json:"options,omitempty"` - ReservedCPUs string `json:"reservedCPUs,omitempty"` -} - -func GetCPUTopology(annotations map[string]string) (*CPUTopology, error) { - topology := &CPUTopology{} - data, ok := annotations[AnnotationNodeCPUTopology] - if !ok { - return topology, nil - } - err := json.Unmarshal([]byte(data), topology) - if err != nil { - return nil, err - } - return topology, nil -} - -func GetPodCPUAllocs(annotations map[string]string) (PodCPUAllocs, error) { - var allocs PodCPUAllocs - data, ok := annotations[AnnotationNodeCPUAllocs] - if !ok { - return allocs, nil - } - err := json.Unmarshal([]byte(data), &allocs) - if err != nil { - return nil, err - } - return allocs, nil -} - -func GetNodeCPUSharePools(nodeTopoAnnotations map[string]string) ([]CPUSharedPool, error) { - var cpuSharePools []CPUSharedPool - data, ok := nodeTopoAnnotations[AnnotationNodeCPUSharedPools] - if !ok { - return cpuSharePools, nil - } - err := json.Unmarshal([]byte(data), &cpuSharePools) - if err != nil { - return nil, err - } - return cpuSharePools, nil -} - -func GetKubeletCPUManagerPolicy(annotations map[string]string) (*KubeletCPUManagerPolicy, error) { - cpuManagerPolicy := &KubeletCPUManagerPolicy{} - data, ok := annotations[AnnotationKubeletCPUManagerPolicy] - if !ok { - return cpuManagerPolicy, nil - } - err := json.Unmarshal([]byte(data), cpuManagerPolicy) - if err != nil { - return nil, err - } - return cpuManagerPolicy, nil -} - -func GetNodeCPUBindPolicy(nodeLabels map[string]string, kubeletCPUPolicy *KubeletCPUManagerPolicy) string { - nodeCPUBindPolicy := nodeLabels[LabelNodeCPUBindPolicy] - if nodeCPUBindPolicy == NodeCPUBindPolicyFullPCPUsOnly || - (kubeletCPUPolicy != nil && kubeletCPUPolicy.Policy == KubeletCPUManagerPolicyStatic && - kubeletCPUPolicy.Options[KubeletCPUManagerPolicyFullPCPUsOnlyOption] == "true") { - return NodeCPUBindPolicyFullPCPUsOnly - } - if nodeCPUBindPolicy == NodeCPUBindPolicySpreadByPCPUs { - return nodeCPUBindPolicy - } - return NodeCPUBindPolicyNone -} diff --git a/extension/node_colocation.go b/extension/node_colocation.go new file mode 100644 index 0000000..7052742 --- /dev/null +++ b/extension/node_colocation.go @@ -0,0 +1,33 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package extension + +const ( + // AnnotationNodeColocationStrategy denotes the annotation key of the node colocation strategy. + // The value is the ColocationStrategy. It takes precedence to the ColocationStrategy in the slo-controller-config. + // The illegal value will be ignored. + AnnotationNodeColocationStrategy = NodeDomainPrefix + "/colocation-strategy" + + // LabelCPUReclaimRatio denotes the CPU reclaim ratio of a node. The value is a float number. + // It takes precedence to the CPUReclaimThresholdPercent in the slo-controller-config and the node annotations. + // The illegal value will be ignored. + LabelCPUReclaimRatio = NodeDomainPrefix + "/cpu-reclaim-ratio" + // LabelMemoryReclaimRatio denotes the memory reclaim ratio of a node. The value is a float number. + // It takes precedence to the MemoryReclaimThresholdPercent in the slo-controller-config and the node annotations. + // The illegal value will be ignored. + LabelMemoryReclaimRatio = NodeDomainPrefix + "/memory-reclaim-ratio" +) diff --git a/extension/node_resource_amplification.go b/extension/node_resource_amplification.go new file mode 100644 index 0000000..d7ed169 --- /dev/null +++ b/extension/node_resource_amplification.go @@ -0,0 +1,175 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package extension + +import ( + "encoding/json" + "fmt" + "math" + "strconv" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +const ( + // AnnotationNodeResourceAmplificationRatio denotes the resource amplification ratio of the node. + AnnotationNodeResourceAmplificationRatio = NodeDomainPrefix + "/resource-amplification-ratio" + + // AnnotationNodeRawAllocatable denotes the un-amplified raw allocatable of the node. + AnnotationNodeRawAllocatable = NodeDomainPrefix + "/raw-allocatable" +) + +// Ratio is a float64 wrapper which will always be json marshalled with precision 2. +type Ratio float64 + +func (f Ratio) MarshalJSON() ([]byte, error) { + return []byte(strconv.FormatFloat(float64(f), 'f', 2, 64)), nil +} + +// GetNodeResourceAmplificationRatios gets the resource amplification ratios of node from annotations. +func GetNodeResourceAmplificationRatios(annotations map[string]string) (map[corev1.ResourceName]Ratio, error) { + s, ok := annotations[AnnotationNodeResourceAmplificationRatio] + if !ok { + return nil, nil + } + + var ratios map[corev1.ResourceName]Ratio + if err := json.Unmarshal([]byte(s), &ratios); err != nil { + return nil, fmt.Errorf("failed to unmarshal node resource amplification ratio: %w", err) + } + + return ratios, nil +} + +// GetNodeResourceAmplificationRatio gets the amplification ratio of a specific resource of node from annotations. +// It returns -1 without an error when the amplification ratio is not set for this resource. +func GetNodeResourceAmplificationRatio(annotations map[string]string, resource corev1.ResourceName) (Ratio, error) { + ratios, err := GetNodeResourceAmplificationRatios(annotations) + if err != nil { + return -1, err + } + + ratio, ok := ratios[resource] + if !ok { + return -1, nil + } + + return ratio, nil +} + +// SetNodeResourceAmplificationRatios sets the node annotation according to the resource amplification ratios. +// NOTE: The ratio will be converted to string with the precision 2. e.g. 3.1415926 -> 3.14. +func SetNodeResourceAmplificationRatios(node *corev1.Node, ratios map[corev1.ResourceName]Ratio) { + s, _ := json.Marshal(ratios) + if node.Annotations == nil { + node.Annotations = map[string]string{} + } + node.Annotations[AnnotationNodeResourceAmplificationRatio] = string(s) +} + +// SetNodeResourceAmplificationRatio sets the amplification ratio of a specific resource of the node. +// It returns true if the ratio changes. +// NOTE: The ratio will be converted to string with the precision 2. e.g. 3.1415926 -> 3.14. +func SetNodeResourceAmplificationRatio(node *corev1.Node, resource corev1.ResourceName, ratio Ratio) (bool, error) { + ratios, err := GetNodeResourceAmplificationRatios(node.Annotations) + if err != nil { + return false, err + } + + if old := ratios[resource]; old == ratio { + return false, nil + } + + if ratios == nil { + ratios = map[corev1.ResourceName]Ratio{} + } + ratios[resource] = ratio + SetNodeResourceAmplificationRatios(node, ratios) + return true, nil +} + +// HasNodeRawAllocatable checks if the node has raw allocatable annotation. +func HasNodeRawAllocatable(annotations map[string]string) bool { + _, ok := annotations[AnnotationNodeRawAllocatable] + return ok +} + +// GetNodeRawAllocatable gets the raw allocatable of node from annotations. +func GetNodeRawAllocatable(annotations map[string]string) (corev1.ResourceList, error) { + s, ok := annotations[AnnotationNodeRawAllocatable] + if !ok { + return nil, nil + } + + var allocatable corev1.ResourceList + if err := json.Unmarshal([]byte(s), &allocatable); err != nil { + return nil, fmt.Errorf("failed to unmarshal node raw allocatable: %w", err) + } + + return allocatable, nil +} + +// SetNodeRawAllocatable sets the node annotation according to the raw allocatable. +func SetNodeRawAllocatable(node *corev1.Node, allocatable corev1.ResourceList) { + s, _ := json.Marshal(allocatable) + if node.Annotations == nil { + node.Annotations = map[string]string{} + } + node.Annotations[AnnotationNodeRawAllocatable] = string(s) +} + +func AmplifyResourceList(requests corev1.ResourceList, amplificationRatios map[corev1.ResourceName]Ratio, resourceNames ...corev1.ResourceName) { + fn := func(resourceName corev1.ResourceName) { + ratio := amplificationRatios[resourceName] + if ratio <= 1 { + return + } + quantity := requests[resourceName] + if quantity.IsZero() { + return + } + + if resourceName == corev1.ResourceCPU { + cpu := Amplify(quantity.MilliValue(), ratio) + requests[resourceName] = *resource.NewMilliQuantity(cpu, resource.DecimalSI) + } else if resourceName == corev1.ResourceMemory || resourceName == corev1.ResourceEphemeralStorage { + val := Amplify(quantity.Value(), ratio) + requests[resourceName] = *resource.NewQuantity(val, resource.BinarySI) + } else { + val := Amplify(quantity.Value(), ratio) + requests[resourceName] = *resource.NewQuantity(val, resource.DecimalSI) + } + } + + if len(resourceNames) > 0 { + for _, name := range resourceNames { + fn(name) + } + } else { + for name := range requests { + fn(name) + } + } +} + +func Amplify(origin int64, ratio Ratio) int64 { + if ratio <= 1 { + return origin + } + return int64(math.Ceil(float64(origin) * float64(ratio))) +} diff --git a/extension/numa_aware.go b/extension/numa_aware.go new file mode 100644 index 0000000..9b86414 --- /dev/null +++ b/extension/numa_aware.go @@ -0,0 +1,339 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package extension + +import ( + "encoding/json" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +// Defines the pod level annotations and labels +const ( + // AnnotationResourceSpec represents resource allocation API defined by Koordinator. + // The user specifies the desired CPU orchestration policy by setting the annotation. + AnnotationResourceSpec = SchedulingDomainPrefix + "/resource-spec" + // AnnotationResourceStatus represents resource allocation result. + // koord-scheduler patch Pod with the annotation before binding to node. + AnnotationResourceStatus = SchedulingDomainPrefix + "/resource-status" +) + +// Defines the node level annotations and labels +const ( + // AnnotationNodeCPUTopology describes the detailed CPU topology. + AnnotationNodeCPUTopology = NodeDomainPrefix + "/cpu-topology" + // AnnotationNodeCPUAllocs describes K8s Guaranteed Pods. + AnnotationNodeCPUAllocs = NodeDomainPrefix + "/pod-cpu-allocs" + // AnnotationNodeCPUSharedPools describes the CPU Shared Pool defined by Koordinator. + // The shared pool is mainly used by Koordinator LS Pods or K8s Burstable Pods. + AnnotationNodeCPUSharedPools = NodeDomainPrefix + "/cpu-shared-pools" + // AnnotationNodeBECPUSharedPools describes the CPU Shared Pool defined by Koordinator. + // The shared pool is mainly used by Koordinator BE Pods or K8s Besteffort Pods. + AnnotationNodeBECPUSharedPools = NodeDomainPrefix + "/be-cpu-shared-pools" + + // LabelNodeCPUBindPolicy constrains how to bind CPU logical CPUs when scheduling. + LabelNodeCPUBindPolicy = NodeDomainPrefix + "/cpu-bind-policy" + // LabelNodeNUMAAllocateStrategy indicates how to choose satisfied NUMA Nodes when scheduling. + LabelNodeNUMAAllocateStrategy = NodeDomainPrefix + "/numa-allocate-strategy" + + // LabelNUMATopologyPolicy represents that how to align resource allocation according to the NUMA topology + LabelNUMATopologyPolicy = NodeDomainPrefix + "/numa-topology-policy" +) + +// ResourceSpec describes extra attributes of the resource requirements. +type ResourceSpec struct { + // RequiredCPUBindPolicy indicates that the CPU is allocated strictly + // according to the specified CPUBindPolicy, otherwise the scheduling fails + RequiredCPUBindPolicy CPUBindPolicy `json:"requiredCPUBindPolicy,omitempty"` + // PreferredCPUBindPolicy represents best-effort CPU bind policy. + PreferredCPUBindPolicy CPUBindPolicy `json:"preferredCPUBindPolicy,omitempty"` + // PreferredCPUExclusivePolicy represents best-effort CPU exclusive policy. + PreferredCPUExclusivePolicy CPUExclusivePolicy `json:"preferredCPUExclusivePolicy,omitempty"` +} + +// ResourceStatus describes resource allocation result, such as how to bind CPU. +type ResourceStatus struct { + // CPUSet represents the allocated CPUs. It is Linux CPU list formatted string. + // When LSE/LSR Pod requested, koord-scheduler will update the field. + CPUSet string `json:"cpuset,omitempty"` + // NUMANodeResources indicates that the Pod is constrained to run on the specified NUMA Node. + NUMANodeResources []NUMANodeResource `json:"numaNodeResources,omitempty"` +} + +type NUMANodeResource struct { + Node int32 `json:"node"` + Resources corev1.ResourceList `json:"resources,omitempty"` +} + +// CPUBindPolicy defines the CPU binding policy +type CPUBindPolicy string + +const ( + // CPUBindPolicyDefault performs the default bind policy that specified in koord-scheduler configuration + CPUBindPolicyDefault CPUBindPolicy = "Default" + // CPUBindPolicyFullPCPUs favor cpuset allocation that pack in few physical cores + CPUBindPolicyFullPCPUs CPUBindPolicy = "FullPCPUs" + // CPUBindPolicySpreadByPCPUs favor cpuset allocation that evenly allocate logical cpus across physical cores + CPUBindPolicySpreadByPCPUs CPUBindPolicy = "SpreadByPCPUs" + // CPUBindPolicyConstrainedBurst constrains the CPU Shared Pool range of the Burstable Pod + CPUBindPolicyConstrainedBurst CPUBindPolicy = "ConstrainedBurst" +) + +type CPUExclusivePolicy string + +const ( + // CPUExclusivePolicyNone does not perform any exclusive policy + CPUExclusivePolicyNone CPUExclusivePolicy = "None" + // CPUExclusivePolicyPCPULevel represents mutual exclusion in the physical core dimension + CPUExclusivePolicyPCPULevel CPUExclusivePolicy = "PCPULevel" + // CPUExclusivePolicyNUMANodeLevel indicates mutual exclusion in the NUMA topology dimension + CPUExclusivePolicyNUMANodeLevel CPUExclusivePolicy = "NUMANodeLevel" +) + +type NodeCPUBindPolicy string + +const ( + // NodeCPUBindPolicyNone does not perform any bind policy + NodeCPUBindPolicyNone NodeCPUBindPolicy = "None" + // NodeCPUBindPolicyFullPCPUsOnly requires that the scheduler must allocate full physical cores. + // Equivalent to kubelet CPU manager policy option full-pcpus-only=true. + NodeCPUBindPolicyFullPCPUsOnly NodeCPUBindPolicy = "FullPCPUsOnly" + // NodeCPUBindPolicySpreadByPCPUs requires that the scheduler must evenly allocate logical cpus across physical cores + NodeCPUBindPolicySpreadByPCPUs NodeCPUBindPolicy = "SpreadByPCPUs" +) + +// NUMAAllocateStrategy indicates how to choose satisfied NUMA Nodes +type NUMAAllocateStrategy string + +const ( + // NUMAMostAllocated indicates that allocates from the NUMA Node with the least amount of available resource. + NUMAMostAllocated NUMAAllocateStrategy = "MostAllocated" + // NUMALeastAllocated indicates that allocates from the NUMA Node with the most amount of available resource. + NUMALeastAllocated NUMAAllocateStrategy = "LeastAllocated" + // NUMADistributeEvenly indicates that evenly distribute CPUs across NUMA Nodes. + NUMADistributeEvenly NUMAAllocateStrategy = "DistributeEvenly" +) + +const ( + NodeNUMAAllocateStrategyLeastAllocated = NUMALeastAllocated + NodeNUMAAllocateStrategyMostAllocated = NUMAMostAllocated +) + +type NUMATopologyPolicy string + +const ( + NUMATopologyPolicyNone NUMATopologyPolicy = "" + NUMATopologyPolicyBestEffort NUMATopologyPolicy = "BestEffort" + NUMATopologyPolicyRestricted NUMATopologyPolicy = "Restricted" + NUMATopologyPolicySingleNUMANode NUMATopologyPolicy = "SingleNUMANode" +) + +const ( + // AnnotationKubeletCPUManagerPolicy describes the cpu manager policy options of kubelet + AnnotationKubeletCPUManagerPolicy = "kubelet.koordinator.sh/cpu-manager-policy" + + KubeletCPUManagerPolicyStatic = "static" + KubeletCPUManagerPolicyNone = "none" + KubeletCPUManagerPolicyFullPCPUsOnlyOption = "full-pcpus-only" + KubeletCPUManagerPolicyDistributeCPUsAcrossNUMAOption = "distribute-cpus-across-numa" +) + +type CPUTopology struct { + Detail []CPUInfo `json:"detail,omitempty"` +} + +type CPUInfo struct { + ID int32 `json:"id"` + Core int32 `json:"core"` + Socket int32 `json:"socket"` + Node int32 `json:"node"` +} + +type PodCPUAlloc struct { + Namespace string `json:"namespace,omitempty"` + Name string `json:"name,omitempty"` + UID types.UID `json:"uid,omitempty"` + CPUSet string `json:"cpuset,omitempty"` + ManagedByKubelet bool `json:"managedByKubelet,omitempty"` +} + +type PodCPUAllocs []PodCPUAlloc + +type CPUSharedPool struct { + Socket int32 `json:"socket"` + Node int32 `json:"node"` + CPUSet string `json:"cpuset,omitempty"` +} + +type KubeletCPUManagerPolicy struct { + Policy string `json:"policy,omitempty"` + Options map[string]string `json:"options,omitempty"` + ReservedCPUs string `json:"reservedCPUs,omitempty"` +} + +// GetResourceSpec parses ResourceSpec from annotations +func GetResourceSpec(annotations map[string]string) (*ResourceSpec, error) { + resourceSpec := &ResourceSpec{} + data, ok := annotations[AnnotationResourceSpec] + if !ok { + return resourceSpec, nil + } + err := json.Unmarshal([]byte(data), resourceSpec) + if err != nil { + return nil, err + } + return resourceSpec, nil +} + +func SetResourceSpec(obj metav1.Object, spec *ResourceSpec) error { + data, err := json.Marshal(spec) + if err != nil { + return err + } + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[AnnotationResourceSpec] = string(data) + obj.SetAnnotations(annotations) + return nil +} + +// GetResourceStatus parses ResourceStatus from annotations +func GetResourceStatus(annotations map[string]string) (*ResourceStatus, error) { + resourceStatus := &ResourceStatus{} + data, ok := annotations[AnnotationResourceStatus] + if !ok { + return resourceStatus, nil + } + err := json.Unmarshal([]byte(data), resourceStatus) + if err != nil { + return nil, err + } + return resourceStatus, nil +} + +func SetResourceStatus(obj metav1.Object, status *ResourceStatus) error { + if obj == nil { + return nil + } + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + data, err := json.Marshal(status) + if err != nil { + return err + } + annotations[AnnotationResourceStatus] = string(data) + obj.SetAnnotations(annotations) + return nil +} + +func GetCPUTopology(annotations map[string]string) (*CPUTopology, error) { + topology := &CPUTopology{} + data, ok := annotations[AnnotationNodeCPUTopology] + if !ok { + return topology, nil + } + err := json.Unmarshal([]byte(data), topology) + if err != nil { + return nil, err + } + return topology, nil +} + +func GetPodCPUAllocs(annotations map[string]string) (PodCPUAllocs, error) { + var allocs PodCPUAllocs + data, ok := annotations[AnnotationNodeCPUAllocs] + if !ok { + return allocs, nil + } + err := json.Unmarshal([]byte(data), &allocs) + if err != nil { + return nil, err + } + return allocs, nil +} + +func GetNodeCPUSharePools(nodeTopoAnnotations map[string]string) ([]CPUSharedPool, error) { + var cpuSharePools []CPUSharedPool + data, ok := nodeTopoAnnotations[AnnotationNodeCPUSharedPools] + if !ok { + return cpuSharePools, nil + } + err := json.Unmarshal([]byte(data), &cpuSharePools) + if err != nil { + return nil, err + } + return cpuSharePools, nil +} + +func GetNodeBECPUSharePools(nodeTopoAnnotations map[string]string) ([]CPUSharedPool, error) { + var beCPUSharePools []CPUSharedPool + data, ok := nodeTopoAnnotations[AnnotationNodeBECPUSharedPools] + if !ok { + return beCPUSharePools, nil + } + err := json.Unmarshal([]byte(data), &beCPUSharePools) + if err != nil { + return nil, err + } + return beCPUSharePools, nil +} + +func GetKubeletCPUManagerPolicy(annotations map[string]string) (*KubeletCPUManagerPolicy, error) { + cpuManagerPolicy := &KubeletCPUManagerPolicy{} + data, ok := annotations[AnnotationKubeletCPUManagerPolicy] + if !ok { + return cpuManagerPolicy, nil + } + err := json.Unmarshal([]byte(data), cpuManagerPolicy) + if err != nil { + return nil, err + } + return cpuManagerPolicy, nil +} + +func GetNodeCPUBindPolicy(nodeLabels map[string]string, kubeletCPUPolicy *KubeletCPUManagerPolicy) NodeCPUBindPolicy { + nodeCPUBindPolicy := NodeCPUBindPolicy(nodeLabels[LabelNodeCPUBindPolicy]) + if nodeCPUBindPolicy == NodeCPUBindPolicyFullPCPUsOnly || + (kubeletCPUPolicy != nil && kubeletCPUPolicy.Policy == KubeletCPUManagerPolicyStatic && + kubeletCPUPolicy.Options[KubeletCPUManagerPolicyFullPCPUsOnlyOption] == "true") { + return NodeCPUBindPolicyFullPCPUsOnly + } + if nodeCPUBindPolicy == NodeCPUBindPolicySpreadByPCPUs { + return nodeCPUBindPolicy + } + return NodeCPUBindPolicyNone +} + +func GetNodeNUMATopologyPolicy(labels map[string]string) NUMATopologyPolicy { + return NUMATopologyPolicy(labels[LabelNUMATopologyPolicy]) +} + +func SetNodeNUMATopologyPolicy(obj metav1.Object, policy NUMATopologyPolicy) { + labels := obj.GetLabels() + if labels == nil { + labels = map[string]string{} + } + labels[LabelNUMATopologyPolicy] = string(policy) + obj.SetLabels(labels) + return +} diff --git a/extension/operating_pod.go b/extension/operating_pod.go index dd911a8..7d7f360 100644 --- a/extension/operating_pod.go +++ b/extension/operating_pod.go @@ -20,6 +20,7 @@ import ( "encoding/json" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" schedulingv1alpha1 "github.com/koordinator-sh/apis/scheduling/v1alpha1" ) @@ -52,6 +53,20 @@ func IsReservationOperatingMode(pod *corev1.Pod) bool { return pod.Labels[LabelPodOperatingMode] == string(ReservationPodOperatingMode) } +func SetReservationOwners(obj metav1.Object, owners []schedulingv1alpha1.ReservationOwner) error { + data, err := json.Marshal(owners) + if err != nil { + return err + } + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[AnnotationReservationOwners] = string(data) + obj.SetAnnotations(annotations) + return nil +} + func GetReservationOwners(annotations map[string]string) ([]schedulingv1alpha1.ReservationOwner, error) { var owners []schedulingv1alpha1.ReservationOwner if s := annotations[AnnotationReservationOwners]; s != "" { diff --git a/extension/reservation.go b/extension/reservation.go index 653e30a..0236848 100644 --- a/extension/reservation.go +++ b/extension/reservation.go @@ -108,3 +108,17 @@ func GetReservationAffinity(annotations map[string]string) (*ReservationAffinity } return &affinity, nil } + +func SetReservationAffinity(obj metav1.Object, affinity *ReservationAffinity) error { + data, err := json.Marshal(affinity) + if err != nil { + return err + } + annotations := obj.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[AnnotationReservationAffinity] = string(data) + obj.SetAnnotations(annotations) + return nil +} diff --git a/extension/resource.go b/extension/resource.go index 7f23b4d..263f0c2 100644 --- a/extension/resource.go +++ b/extension/resource.go @@ -20,7 +20,6 @@ import ( "encoding/json" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const ( @@ -28,30 +27,9 @@ const ( BatchMemory corev1.ResourceName = ResourceDomainPrefix + "batch-memory" MidCPU corev1.ResourceName = ResourceDomainPrefix + "mid-cpu" MidMemory corev1.ResourceName = ResourceDomainPrefix + "mid-memory" - - ResourceNvidiaGPU corev1.ResourceName = "nvidia.com/gpu" - ResourceHygonDCU corev1.ResourceName = "dcu.com/gpu" - ResourceRDMA corev1.ResourceName = DomainPrefix + "rdma" - ResourceFPGA corev1.ResourceName = DomainPrefix + "fpga" - ResourceGPU corev1.ResourceName = DomainPrefix + "gpu" - ResourceGPUCore corev1.ResourceName = DomainPrefix + "gpu-core" - ResourceGPUMemory corev1.ResourceName = DomainPrefix + "gpu-memory" - ResourceGPUMemoryRatio corev1.ResourceName = DomainPrefix + "gpu-memory-ratio" -) - -const ( - LabelGPUModel string = NodeDomainPrefix + "/gpu-model" - LabelGPUDriverVersion string = NodeDomainPrefix + "/gpu-driver-version" ) const ( - // AnnotationResourceSpec represents resource allocation API defined by Koordinator. - // The user specifies the desired CPU orchestration policy by setting the annotation. - AnnotationResourceSpec = SchedulingDomainPrefix + "/resource-spec" - // AnnotationResourceStatus represents resource allocation result. - // koord-scheduler patch Pod with the annotation before binding to node. - AnnotationResourceStatus = SchedulingDomainPrefix + "/resource-status" - // AnnotationExtendedResourceSpec specifies the resource requirements of extended resources for internal usage. // It annotates the requests/limits of extended resources and can be used by runtime proxy and koordlet that // cannot get the original pod spec in CRI requests. @@ -71,129 +49,6 @@ var ( } ) -// ResourceSpec describes extra attributes of the resource requirements. -type ResourceSpec struct { - // PreferredCPUBindPolicy represents best-effort CPU bind policy. - PreferredCPUBindPolicy CPUBindPolicy `json:"preferredCPUBindPolicy,omitempty"` - // PreferredCPUExclusivePolicy represents best-effort CPU exclusive policy. - PreferredCPUExclusivePolicy CPUExclusivePolicy `json:"preferredCPUExclusivePolicy,omitempty"` -} - -// ResourceStatus describes resource allocation result, such as how to bind CPU. -type ResourceStatus struct { - // CPUSet represents the allocated CPUs. It is Linux CPU list formatted string. - // When LSE/LSR Pod requested, koord-scheduler will update the field. - CPUSet string `json:"cpuset,omitempty"` - // CPUSharedPools represents the desired CPU Shared Pools used by LS Pods. - CPUSharedPools []CPUSharedPool `json:"cpuSharedPools,omitempty"` -} - -// CPUBindPolicy defines the CPU binding policy -type CPUBindPolicy string - -const ( - // CPUBindPolicyDefault performs the default bind policy that specified in koord-scheduler configuration - CPUBindPolicyDefault CPUBindPolicy = "Default" - // CPUBindPolicyFullPCPUs favor cpuset allocation that pack in few physical cores - CPUBindPolicyFullPCPUs CPUBindPolicy = "FullPCPUs" - // CPUBindPolicySpreadByPCPUs favor cpuset allocation that evenly allocate logical cpus across physical cores - CPUBindPolicySpreadByPCPUs CPUBindPolicy = "SpreadByPCPUs" - // CPUBindPolicyConstrainedBurst constrains the CPU Shared Pool range of the Burstable Pod - CPUBindPolicyConstrainedBurst CPUBindPolicy = "ConstrainedBurst" -) - -type CPUExclusivePolicy string - -const ( - // CPUExclusivePolicyNone does not perform any exclusive policy - CPUExclusivePolicyNone CPUExclusivePolicy = "None" - // CPUExclusivePolicyPCPULevel represents mutual exclusion in the physical core dimension - CPUExclusivePolicyPCPULevel CPUExclusivePolicy = "PCPULevel" - // CPUExclusivePolicyNUMANodeLevel indicates mutual exclusion in the NUMA topology dimension - CPUExclusivePolicyNUMANodeLevel CPUExclusivePolicy = "NUMANodeLevel" -) - -// NUMAAllocateStrategy indicates how to choose satisfied NUMA Nodes -type NUMAAllocateStrategy string - -const ( - // NUMAMostAllocated indicates that allocates from the NUMA Node with the least amount of available resource. - NUMAMostAllocated NUMAAllocateStrategy = "MostAllocated" - // NUMALeastAllocated indicates that allocates from the NUMA Node with the most amount of available resource. - NUMALeastAllocated NUMAAllocateStrategy = "LeastAllocated" - // NUMADistributeEvenly indicates that evenly distribute CPUs across NUMA Nodes. - NUMADistributeEvenly NUMAAllocateStrategy = "DistributeEvenly" -) - -type NUMACPUSharedPools []CPUSharedPool - -type CPUSharedPool struct { - Socket int32 `json:"socket"` - Node int32 `json:"node"` - CPUSet string `json:"cpuset,omitempty"` -} - -// GetResourceSpec parses ResourceSpec from annotations -func GetResourceSpec(annotations map[string]string) (*ResourceSpec, error) { - resourceSpec := &ResourceSpec{ - PreferredCPUBindPolicy: CPUBindPolicyDefault, - } - data, ok := annotations[AnnotationResourceSpec] - if !ok { - return resourceSpec, nil - } - err := json.Unmarshal([]byte(data), resourceSpec) - if err != nil { - return nil, err - } - return resourceSpec, nil -} - -func SetResourceSpec(obj metav1.Object, spec *ResourceSpec) error { - data, err := json.Marshal(spec) - if err != nil { - return err - } - annotations := obj.GetAnnotations() - if annotations == nil { - annotations = map[string]string{} - } - annotations[AnnotationResourceSpec] = string(data) - obj.SetAnnotations(annotations) - return nil -} - -// GetResourceStatus parses ResourceStatus from annotations -func GetResourceStatus(annotations map[string]string) (*ResourceStatus, error) { - resourceStatus := &ResourceStatus{} - data, ok := annotations[AnnotationResourceStatus] - if !ok { - return resourceStatus, nil - } - err := json.Unmarshal([]byte(data), resourceStatus) - if err != nil { - return nil, err - } - return resourceStatus, nil -} - -func SetResourceStatus(obj metav1.Object, status *ResourceStatus) error { - if obj == nil { - return nil - } - annotations := obj.GetAnnotations() - if annotations == nil { - annotations = map[string]string{} - } - data, err := json.Marshal(status) - if err != nil { - return err - } - annotations[AnnotationResourceStatus] = string(data) - obj.SetAnnotations(annotations) - return nil -} - // TranslateResourceNameByPriorityClass translates defaultResourceName to extend resourceName by PriorityClass func TranslateResourceNameByPriorityClass(priorityClass PriorityClass, defaultResourceName corev1.ResourceName) corev1.ResourceName { if priorityClass == PriorityProd || priorityClass == PriorityNone { diff --git a/go.mod b/go.mod index 02beda2..865ffb6 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/koordinator-sh/apis -go 1.17 +go 1.18 require ( github.com/golang/protobuf v1.5.2 diff --git a/hack/clone-api-files.sh b/hack/clone-api-files.sh index a34b4cd..f3d5503 100755 --- a/hack/clone-api-files.sh +++ b/hack/clone-api-files.sh @@ -11,6 +11,7 @@ API_PATHS_MAP=( "apis/scheme:scheme" "apis/extension:extension" "apis/scheduling:scheduling" + "apis/quota:quota" ) help() { diff --git a/quota/v1alpha1/elastic_quota_profile_types.go b/quota/v1alpha1/elastic_quota_profile_types.go new file mode 100644 index 0000000..7839c7c --- /dev/null +++ b/quota/v1alpha1/elastic_quota_profile_types.go @@ -0,0 +1,69 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +type ElasticQuotaProfileSpec struct { + // QuotaName defines the associated quota name of the profile. + // +required + QuotaName string `json:"quotaName"` + // QuotaLabels defines the labels of the quota. + QuotaLabels map[string]string `json:"quotaLabels,omitempty"` + // ResourceRatio is a ratio, we will use it to fix the resource fragmentation problem. + // If the total resource is 100 and the resource ratio is 0.9, the allocable resource is 100*0.9=90 + ResourceRatio *string `json:"resourceRatio,omitempty"` + // NodeSelector defines a node selector to select nodes. + // +required + NodeSelector *metav1.LabelSelector `json:"nodeSelector"` +} + +type ElasticQuotaProfileStatus struct { +} + +// ElasticQuotaProfile is the Schema for the ElasticQuotaProfile API +// +k8s:openapi-gen=true +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +genclient +// +kubebuilder:resource:shortName=eqp +// +kubebuilder:object:root=true + +type ElasticQuotaProfile struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec ElasticQuotaProfileSpec `json:"spec,omitempty"` + Status ElasticQuotaProfileStatus `json:"status,omitempty"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// ElasticQuotaProfileList contains a list of ElasticQuotaProfile +type ElasticQuotaProfileList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []ElasticQuotaProfile `json:"items"` +} + +func init() { + SchemeBuilder.Register(&ElasticQuotaProfile{}, &ElasticQuotaProfileList{}) +} diff --git a/quota/v1alpha1/groupversion_info.go b/quota/v1alpha1/groupversion_info.go new file mode 100644 index 0000000..41377b7 --- /dev/null +++ b/quota/v1alpha1/groupversion_info.go @@ -0,0 +1,44 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package v1alpha1 contains API Schema definitions for the scheduling v1alpha1 API group +// +kubebuilder:object:generate=true +// +groupName=quota.koordinator.sh +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + + "github.com/koordinator-sh/apis/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "quota.koordinator.sh", Version: "v1alpha1"} + + SchemeGroupVersion = GroupVersion + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) + +// Resource is required by pkg/client/listers/... +func Resource(resource string) schema.GroupResource { + return SchemeGroupVersion.WithResource(resource).GroupResource() +} diff --git a/quota/v1alpha1/zz_generated.deepcopy.go b/quota/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 0000000..d3e3825 --- /dev/null +++ b/quota/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,133 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ElasticQuotaProfile) DeepCopyInto(out *ElasticQuotaProfile) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + out.Status = in.Status +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticQuotaProfile. +func (in *ElasticQuotaProfile) DeepCopy() *ElasticQuotaProfile { + if in == nil { + return nil + } + out := new(ElasticQuotaProfile) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ElasticQuotaProfile) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ElasticQuotaProfileList) DeepCopyInto(out *ElasticQuotaProfileList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ElasticQuotaProfile, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticQuotaProfileList. +func (in *ElasticQuotaProfileList) DeepCopy() *ElasticQuotaProfileList { + if in == nil { + return nil + } + out := new(ElasticQuotaProfileList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ElasticQuotaProfileList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ElasticQuotaProfileSpec) DeepCopyInto(out *ElasticQuotaProfileSpec) { + *out = *in + if in.QuotaLabels != nil { + in, out := &in.QuotaLabels, &out.QuotaLabels + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.ResourceRatio != nil { + in, out := &in.ResourceRatio, &out.ResourceRatio + *out = new(string) + **out = **in + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = new(v1.LabelSelector) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticQuotaProfileSpec. +func (in *ElasticQuotaProfileSpec) DeepCopy() *ElasticQuotaProfileSpec { + if in == nil { + return nil + } + out := new(ElasticQuotaProfileSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ElasticQuotaProfileStatus) DeepCopyInto(out *ElasticQuotaProfileStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElasticQuotaProfileStatus. +func (in *ElasticQuotaProfileStatus) DeepCopy() *ElasticQuotaProfileStatus { + if in == nil { + return nil + } + out := new(ElasticQuotaProfileStatus) + in.DeepCopyInto(out) + return out +} diff --git a/scheduling/v1alpha1/device_types.go b/scheduling/v1alpha1/device_types.go index fd55f4a..aad7f2a 100644 --- a/scheduling/v1alpha1/device_types.go +++ b/scheduling/v1alpha1/device_types.go @@ -45,7 +45,8 @@ type DeviceInfo struct { // ModuleID represents the physical id of Device ModuleID *int32 `json:"moduleID,omitempty"` // Health indicates whether the device is normal - Health bool `json:"health,omitempty"` + // +kubebuilder:default=false + Health bool `json:"health"` // Resources is a set of (resource name, quantity) pairs Resources corev1.ResourceList `json:"resources,omitempty"` // Topology represents the topology information about the device @@ -55,19 +56,27 @@ type DeviceInfo struct { } type DeviceTopology struct { - SocketID int32 `json:"socketID"` - NodeID int32 `json:"nodeID"` - PCIEID int32 `json:"pcieID"` - BusID string `json:"busID,omitempty"` + // SocketID is the ID of CPU Socket to which the device belongs + SocketID int32 `json:"socketID"` + // NodeID is the ID of NUMA Node to which the device belongs, it should be unique across different CPU Sockets + NodeID int32 `json:"nodeID"` + // PCIEID is the ID of PCIE Switch to which the device is connected, it should be unique across difference NUMANodes + PCIEID string `json:"pcieID"` + // BusID is the domain:bus:device.function formatted identifier of PCI/PCIE device + BusID string `json:"busID,omitempty"` } type VirtualFunctionGroup struct { + // Labels represents the Virtual Function properties that can be used to organize and categorize (scope and select) objects Labels map[string]string `json:"labels,omitempty"` - VFs []VirtualFunction `json:"vfs,omitempty"` + // VFs are the virtual function devices which belong to the group + VFs []VirtualFunction `json:"vfs,omitempty"` } type VirtualFunction struct { - Minor int32 `json:"minor"` + // Minor represents the Minor number of VirtualFunction, starting from 0, used to identify virtual function. + Minor int32 `json:"minor"` + // BusID is the domain:bus:device.function formatted identifier of PCI/PCIE virtual function device BusID string `json:"busID,omitempty"` } diff --git a/slo/v1alpha1/host_application.go b/slo/v1alpha1/host_application.go new file mode 100644 index 0000000..30871c5 --- /dev/null +++ b/slo/v1alpha1/host_application.go @@ -0,0 +1,61 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "github.com/koordinator-sh/apis/extension" +) + +// HostApplicationSpec describes the QoS management for out-out-band applications on node +type HostApplicationSpec struct { + Name string `json:"name,omitempty"` + // Priority class of the application + Priority extension.PriorityClass `json:"priority,omitempty"` + // QoS class of the application + QoS extension.QoSClass `json:"qos,omitempty"` + // Optional, defines the host cgroup configuration, use default if not specified according to priority and qos + CgroupPath *CgroupPath `json:"cgroupPath,omitempty"` + // QoS Strategy of host application + Strategy *HostApplicationStrategy `json:"strategy,omitempty"` +} + +type HostApplicationStrategy struct { +} + +// CgroupPath decribes the cgroup path for out-of-band applications +type CgroupPath struct { + // cgroup base dir, the format is various across cgroup drivers + Base CgroupBaseType `json:"base,omitempty"` + // cgroup parent path under base dir + ParentDir string `json:"parentDir,omitempty"` + // cgroup relative path under parent dir + RelativePath string `json:"relativePath,omitempty"` +} + +// CgroupBaseType defines the cgroup base dir for HostCgroup +type CgroupBaseType string + +const ( + // CgroupBaseTypeRoot is the root dir of cgroup fs on node, e.g. /sys/fs/cgroup/cpu/ + CgroupBaseTypeRoot CgroupBaseType = "CgroupRoot" + // CgroupBaseTypeRoot is the cgroup dir for k8s pods, e.g. /sys/fs/cgroup/cpu/kubepods/ + CgroupBaseTypeKubepods CgroupBaseType = "Kubepods" + // CgroupBaseTypeRoot is the cgroup dir for k8s burstable pods, e.g. /sys/fs/cgroup/cpu/kubepods/burstable/ + CgroupBaseTypeKubeBurstable CgroupBaseType = "KubepodsBurstable" + // CgroupBaseTypeRoot is the cgroup dir for k8s besteffort pods, e.g. /sys/fs/cgroup/cpu/kubepods/besteffort/ + CgroupBaseTypeKubeBesteffort CgroupBaseType = "KubepodsBesteffort" +) diff --git a/slo/v1alpha1/node.go b/slo/v1alpha1/node.go new file mode 100644 index 0000000..dee51d5 --- /dev/null +++ b/slo/v1alpha1/node.go @@ -0,0 +1,153 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "encoding/json" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + quotav1 "k8s.io/apiserver/pkg/quota/v1" + + apiext "github.com/koordinator-sh/apis/extension" +) + +const ( + // batch resource can be shared with other allocators such as Hadoop YARN + // record origin batch allocatable on node for calculating the batch allocatable of K8s and YARN, e.g. + // k8s_batch_allocatable = origin_batch_allocatable - yarn_batch_requested + // yarn_allocatable = origin_batch_allocatable - k8s_batch_requested + NodeOriginExtendedAllocatableAnnotationKey = "node.koordinator.sh/originExtendedAllocatable" + + // record (batch) allocations of other schedulers such as YARN, which should be excluded before updating node extended resource + NodeThirdPartyAllocationsAnnotationKey = "node.koordinator.sh/thirdPartyAllocations" +) + +type OriginAllocatable struct { + Resources corev1.ResourceList `json:"resources,omitempty"` +} + +func GetOriginExtendedAllocatable(annotations map[string]string) (*OriginAllocatable, error) { + originAllocatableStr, exist := annotations[NodeOriginExtendedAllocatableAnnotationKey] + if !exist { + return nil, nil + } + originAllocatable := &OriginAllocatable{} + if err := json.Unmarshal([]byte(originAllocatableStr), originAllocatable); err != nil { + return nil, err + } + return originAllocatable, nil +} + +func SetOriginExtendedAllocatableRes(annotations map[string]string, extendedAllocatable corev1.ResourceList) error { + old, err := GetOriginExtendedAllocatable(annotations) + if old == nil || err != nil { + old = &OriginAllocatable{} + } + if old.Resources == nil { + old.Resources = map[corev1.ResourceName]resource.Quantity{} + } + for resourceName, value := range extendedAllocatable { + old.Resources[resourceName] = value + } + newStr, err := json.Marshal(old) + if err != nil { + return err + } + if annotations == nil { + annotations = map[string]string{} + } + annotations[NodeOriginExtendedAllocatableAnnotationKey] = string(newStr) + return nil +} + +type ThirdPartyAllocations struct { + Allocations []ThirdPartyAllocation `json:"allocations,omitempty"` +} + +type ThirdPartyAllocation struct { + Name string `json:"name"` + Priority apiext.PriorityClass `json:"priority"` + Resources corev1.ResourceList `json:"resources,omitempty"` +} + +func GetThirdPartyAllocations(annotations map[string]string) (*ThirdPartyAllocations, error) { + valueStr, exist := annotations[NodeThirdPartyAllocationsAnnotationKey] + if !exist { + return nil, nil + } + object := &ThirdPartyAllocations{} + if err := json.Unmarshal([]byte(valueStr), object); err != nil { + return nil, err + } + return object, nil +} + +func GetThirdPartyAllocatedResByPriority(annotations map[string]string, priority apiext.PriorityClass) (corev1.ResourceList, error) { + allocations, err := GetThirdPartyAllocations(annotations) + if err != nil || allocations == nil { + return nil, err + } + result := corev1.ResourceList{} + for _, alloc := range allocations.Allocations { + if alloc.Priority == priority { + result = quotav1.Add(result, alloc.Resources) + } + } + return result, nil +} + +func SetThirdPartyAllocation(annotations map[string]string, name string, priority apiext.PriorityClass, + resource corev1.ResourceList) error { + // parse or init old allocations + oldAllocations, err := GetThirdPartyAllocations(annotations) + if oldAllocations == nil || err != nil { + oldAllocations = &ThirdPartyAllocations{} + } + if oldAllocations.Allocations == nil { + oldAllocations.Allocations = make([]ThirdPartyAllocation, 0, 1) + } + + // create or update old alloc + newAlloc := ThirdPartyAllocation{ + Name: name, + Priority: priority, + Resources: resource, + } + exist := false + for i := range oldAllocations.Allocations { + if oldAllocations.Allocations[i].Name == name { + oldAllocations.Allocations[i] = newAlloc + exist = true + break + } + } + if !exist { + oldAllocations.Allocations = append(oldAllocations.Allocations, newAlloc) + } + + // update allocation string + newStr, err := json.Marshal(oldAllocations) + if err != nil { + return err + } + if annotations == nil { + annotations = map[string]string{} + } + annotations[NodeThirdPartyAllocationsAnnotationKey] = string(newStr) + return nil +} diff --git a/slo/v1alpha1/nodemetric_types.go b/slo/v1alpha1/nodemetric_types.go index bd58312..632bfba 100644 --- a/slo/v1alpha1/nodemetric_types.go +++ b/slo/v1alpha1/nodemetric_types.go @@ -24,6 +24,17 @@ import ( // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. +// default NodeMemoryCollectPolicy is usageWithoutPageCache +// +kubebuilder:validation:Enum=usageWithHotPageCache;usageWithoutPageCache;usageWithPageCache +type NodeMemoryCollectPolicy string + +const ( + UsageWithoutPageCache NodeMemoryCollectPolicy = "usageWithoutPageCache" + UsageWithHotPageCache NodeMemoryCollectPolicy = "usageWithHotPageCache" + // TODO(BUPT-wxq): implement the UsageWithPageCache policy + UsageWithPageCache NodeMemoryCollectPolicy = "usageWithPageCache" +) + type NodeMetricInfo struct { // NodeUsage is the total resource usage of node NodeUsage ResourceMap `json:"nodeUsage,omitempty"` @@ -45,10 +56,25 @@ type PodMetricInfo struct { Name string `json:"name,omitempty"` Namespace string `json:"namespace,omitempty"` PodUsage ResourceMap `json:"podUsage,omitempty"` + // Priority class of the application + Priority apiext.PriorityClass `json:"priority,omitempty"` + // QoS class of the application + QoS apiext.QoSClass `json:"qos,omitempty"` // Third party extensions for PodMetric Extensions *ExtensionsMap `json:"extensions,omitempty"` } +type HostApplicationMetricInfo struct { + // Name of the host application + Name string `json:"name,omitempty"` + // Resource usage of the host application + Usage ResourceMap `json:"usage,omitempty"` + // Priority class of the application + Priority apiext.PriorityClass `json:"priority,omitempty"` + // QoS class of the application + QoS apiext.QoSClass `json:"qos,omitempty"` +} + // NodeMetricSpec defines the desired state of NodeMetric type NodeMetricSpec struct { // CollectPolicy defines the Metric collection policy @@ -63,6 +89,8 @@ type NodeMetricCollectPolicy struct { ReportIntervalSeconds *int64 `json:"reportIntervalSeconds,omitempty"` // NodeAggregatePolicy represents the target grain of node aggregated usage NodeAggregatePolicy *AggregatePolicy `json:"nodeAggregatePolicy,omitempty"` + // NodeMemoryPolicy represents apply which method collect memory info + NodeMemoryCollectPolicy *NodeMemoryCollectPolicy `json:"nodeMemoryCollectPolicy,omitempty"` } type AggregatePolicy struct { @@ -86,6 +114,9 @@ type NodeMetricStatus struct { // PodsMetric contains the metrics for pods belong to this node. PodsMetric []*PodMetricInfo `json:"podsMetric,omitempty"` + // HostApplicationMetric contains the metrics of out-out-band applications on node. + HostApplicationMetric []*HostApplicationMetricInfo `json:"hostApplicationMetric,omitempty"` + // ProdReclaimableMetric is the indicator statistics of Prod type resources reclaimable ProdReclaimableMetric *ReclaimableMetric `json:"prodReclaimableMetric,omitempty"` } diff --git a/slo/v1alpha1/nodeslo_types.go b/slo/v1alpha1/nodeslo_types.go index beaa8f0..798f243 100644 --- a/slo/v1alpha1/nodeslo_types.go +++ b/slo/v1alpha1/nodeslo_types.go @@ -17,7 +17,9 @@ limitations under the License. package v1alpha1 import ( + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" ) // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! @@ -26,9 +28,28 @@ import ( // CPUQOS enables cpu qos features. type CPUQOS struct { // group identity value for pods, default = 0 + // NOTE: It takes effect if cpuPolicy = "groupIdentity". GroupIdentity *int64 `json:"groupIdentity,omitempty" validate:"omitempty,min=-1,max=2"` + // cpu.idle value for pods, default = 0. + // `1` means using SCHED_IDLE. + // CGroup Idle (introduced since mainline Linux 5.15): https://lore.kernel.org/lkml/162971078674.25758.15464079371945307825.tip-bot2@tip-bot2/#r + // NOTE: It takes effect if cpuPolicy = "coreSched". + SchedIdle *int64 `json:"schedIdle,omitempty" validate:"omitempty,min=0,max=1"` + // whether pods of the QoS class can expel the cgroup idle pods at the SMT-level. default = false + // If set to true, pods of this QoS will use a dedicated core sched group for noise clean with the SchedIdle pods. + // NOTE: It takes effect if cpuPolicy = "coreSched". + CoreExpeller *bool `json:"coreExpeller,omitempty"` } +type CPUQOSPolicy string + +const ( + // CPUQOSPolicyGroupIdentity indicates the Group Identity is applied to ensure the CPU QoS. + CPUQOSPolicyGroupIdentity CPUQOSPolicy = "groupIdentity" + // CPUQOSPolicyCoreSched indicates the Linux Core Scheduling and CGroup Idle is applied to ensure the CPU QoS. + CPUQOSPolicyCoreSched CPUQOSPolicy = "coreSched" +) + // MemoryQOS enables memory qos features. type MemoryQOS struct { // memcg qos @@ -183,9 +204,51 @@ type ResourceQOS struct { MemoryQOS *MemoryQOSCfg `json:"memoryQOS,omitempty"` BlkIOQOS *BlkIOQOSCfg `json:"blkioQOS,omitempty"` ResctrlQOS *ResctrlQOSCfg `json:"resctrlQOS,omitempty"` + NetworkQOS *NetworkQOSCfg `json:"networkQOS,omitempty"` +} + +type NetworkQOSCfg struct { + Enable *bool `json:"enable,omitempty"` + NetworkQOS `json:",inline"` +} + +type NetworkQOS struct { + // IngressRequest describes the minimum network bandwidth guaranteed in the ingress direction. + // unit: bps(bytes per second), two expressions are supported,int and string, + // int: percentage based on total bandwidth,valid in 0-100 + // string: a specific network bandwidth value, eg: 50M. + // +kubebuilder:default=0 + IngressRequest *intstr.IntOrString `json:"ingressRequest,omitempty"` + // IngressLimit describes the maximum network bandwidth can be used in the ingress direction, + // unit: bps(bytes per second), two expressions are supported,int and string, + // int: percentage based on total bandwidth,valid in 0-100 + // string: a specific network bandwidth value, eg: 50M. + // +kubebuilder:default=100 + IngressLimit *intstr.IntOrString `json:"ingressLimit,omitempty"` + + // EgressRequest describes the minimum network bandwidth guaranteed in the egress direction. + // unit: bps(bytes per second), two expressions are supported,int and string, + // int: percentage based on total bandwidth,valid in 0-100 + // string: a specific network bandwidth value, eg: 50M. + // +kubebuilder:default=0 + EgressRequest *intstr.IntOrString `json:"egressRequest,omitempty"` + // EgressLimit describes the maximum network bandwidth can be used in the egress direction, + // unit: bps(bytes per second), two expressions are supported,int and string, + // int: percentage based on total bandwidth,valid in 0-100 + // string: a specific network bandwidth value, eg: 50M. + // +kubebuilder:default=100 + EgressLimit *intstr.IntOrString `json:"egressLimit,omitempty"` +} + +type ResourceQOSPolicies struct { + // applied policy for the CPU QoS, default = "groupIdentity" + CPUPolicy *CPUQOSPolicy `json:"cpuPolicy,omitempty"` } type ResourceQOSStrategy struct { + // Policies of pod QoS. + Policies *ResourceQOSPolicies `json:"policies,omitempty"` + // ResourceQOS for LSR pods. LSRClass *ResourceQOS `json:"lsrClass,omitempty"` @@ -209,6 +272,13 @@ const ( CPUCfsQuotaPolicy CPUSuppressPolicy = "cfsQuota" ) +type CPUEvictPolicy string + +const ( + EvictByRealLimitPolicy CPUEvictPolicy = "evictByRealLimit" + EvictByAllocatablePolicy CPUEvictPolicy = "evictByAllocatable" +) + type ResourceThresholdStrategy struct { // whether the strategy is enabled, default = false Enable *bool `json:"enable,omitempty"` @@ -241,6 +311,9 @@ type ResourceThresholdStrategy struct { // when avg(cpuusage) > CPUEvictThresholdPercent, will start to evict pod by cpu, // and avg(cpuusage) is calculated based on the most recent CPUEvictTimeWindowSeconds data CPUEvictTimeWindowSeconds *int64 `json:"cpuEvictTimeWindowSeconds,omitempty" validate:"omitempty,gt=0"` + // CPUEvictPolicy defines the policy for the BECPUEvict feature. + // Default: `evictByRealLimit`. + CPUEvictPolicy CPUEvictPolicy `json:"cpuEvictPolicy,omitempty"` } // ResctrlQOSCfg stores node-level config of resctrl qos @@ -303,6 +376,9 @@ type SystemStrategy struct { WatermarkScaleFactor *int64 `json:"watermarkScaleFactor,omitempty" validate:"omitempty,gt=0,max=400"` // /sys/kernel/mm/memcg_reaper/reap_background MemcgReapBackGround *int64 `json:"memcgReapBackGround,omitempty" validate:"omitempty,min=0,max=1"` + + // TotalNetworkBandwidth indicates the overall network bandwidth, cluster manager can set this field, and default value taken from /sys/class/net/${NIC_NAME}/speed, unit: Mbps + TotalNetworkBandwidth resource.Quantity `json:"totalNetworkBandwidth,omitempty"` } // NodeSLOSpec defines the desired state of NodeSLO @@ -317,6 +393,8 @@ type NodeSLOSpec struct { SystemStrategy *SystemStrategy `json:"systemStrategy,omitempty"` // Third party extensions for NodeSLO Extensions *ExtensionsMap `json:"extensions,omitempty"` + // QoS management for out-of-band applications + HostApplications []HostApplicationSpec `json:"hostApplications,omitempty"` } // NodeSLOStatus defines the observed state of NodeSLO diff --git a/slo/v1alpha1/pod.go b/slo/v1alpha1/pod.go index 8efa11a..99f1ddd 100644 --- a/slo/v1alpha1/pod.go +++ b/slo/v1alpha1/pod.go @@ -20,6 +20,7 @@ import ( "encoding/json" corev1 "k8s.io/api/core/v1" + "k8s.io/utils/pointer" apiext "github.com/koordinator-sh/apis/extension" ) @@ -64,3 +65,40 @@ func GetPodMemoryQoSConfig(pod *corev1.Pod) (*PodMemoryQOSConfig, error) { } return &cfg, nil } + +const ( + // LabelCoreSchedGroupID is the label key of the group ID of the Linux Core Scheduling. + // Value should be a valid UUID or the none value "0". + // When the value is a valid UUID, pods with that group ID and the equal CoreExpelled status on the node will be + // assigned to the same core sched cookie. + // When the value is the none value "0", pod will be reset to the default core sched cookie `0`. + // When the k-v pair is missing but the node-level strategy enables the core sched, the pod will be assigned an + // internal group according to the pod's UID. + // + // Core Sched: https://docs.kernel.org/admin-guide/hw-vuln/core-scheduling.html + // When the Core Sched is enabled, pods with the different core sched group IDs will not be running at the same SMT + // core at the same time, which means they will take different core sched cookies. If a pod sets the core sched + // disabled, it will take the default core sched cookie (0) and will also be force-idled to run on the same SMT core + // concurrently with the core-sched-enabled pods. In addition, the CoreExpelled configured in ResourceQOS also + // enables the individual cookie from pods of other QoS classes via adding a suffix for the group ID. So the pods + // of different QoS will take different cookies when their CoreExpelled status are diverse even if their group ID + // are the same. + LabelCoreSchedGroupID = apiext.DomainPrefix + "core-sched-group-id" + + // CoreSchedGroupIDNone is the none value of the core sched group ID which indicates the core sched is disabled for + // the pod. The pod will be reset to the system-default cookie `0`. + CoreSchedGroupIDNone = "0" +) + +// GetCoreSchedGroupID gets the core sched group ID from the pod labels. +// It returns the core sched group ID and whether the pod explicitly disables the core sched. +func GetCoreSchedGroupID(labels map[string]string) (string, *bool) { + if labels == nil { + return "", nil + } + value, ok := labels[LabelCoreSchedGroupID] + if !ok { + return "", nil + } + return value, pointer.Bool(value == CoreSchedGroupIDNone) +} diff --git a/slo/v1alpha1/zz_generated.deepcopy.go b/slo/v1alpha1/zz_generated.deepcopy.go index cee9996..adc936c 100644 --- a/slo/v1alpha1/zz_generated.deepcopy.go +++ b/slo/v1alpha1/zz_generated.deepcopy.go @@ -24,9 +24,10 @@ package v1alpha1 import ( "github.com/koordinator-sh/apis/extension" schedulingv1alpha1 "github.com/koordinator-sh/apis/scheduling/v1alpha1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. @@ -34,7 +35,7 @@ func (in *AggregatePolicy) DeepCopyInto(out *AggregatePolicy) { *out = *in if in.Durations != nil { in, out := &in.Durations, &out.Durations - *out = make([]v1.Duration, len(*in)) + *out = make([]metav1.Duration, len(*in)) copy(*out, *in) } } @@ -194,6 +195,16 @@ func (in *CPUQOS) DeepCopyInto(out *CPUQOS) { *out = new(int64) **out = **in } + if in.SchedIdle != nil { + in, out := &in.SchedIdle, &out.SchedIdle + *out = new(int64) + **out = **in + } + if in.CoreExpeller != nil { + in, out := &in.CoreExpeller, &out.CoreExpeller + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CPUQOS. @@ -227,6 +238,77 @@ func (in *CPUQOSCfg) DeepCopy() *CPUQOSCfg { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CgroupPath) DeepCopyInto(out *CgroupPath) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CgroupPath. +func (in *CgroupPath) DeepCopy() *CgroupPath { + if in == nil { + return nil + } + out := new(CgroupPath) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostApplicationMetricInfo) DeepCopyInto(out *HostApplicationMetricInfo) { + *out = *in + in.Usage.DeepCopyInto(&out.Usage) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostApplicationMetricInfo. +func (in *HostApplicationMetricInfo) DeepCopy() *HostApplicationMetricInfo { + if in == nil { + return nil + } + out := new(HostApplicationMetricInfo) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostApplicationSpec) DeepCopyInto(out *HostApplicationSpec) { + *out = *in + if in.CgroupPath != nil { + in, out := &in.CgroupPath, &out.CgroupPath + *out = new(CgroupPath) + **out = **in + } + if in.Strategy != nil { + in, out := &in.Strategy, &out.Strategy + *out = new(HostApplicationStrategy) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostApplicationSpec. +func (in *HostApplicationSpec) DeepCopy() *HostApplicationSpec { + if in == nil { + return nil + } + out := new(HostApplicationSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostApplicationStrategy) DeepCopyInto(out *HostApplicationStrategy) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostApplicationStrategy. +func (in *HostApplicationStrategy) DeepCopy() *HostApplicationStrategy { + if in == nil { + return nil + } + out := new(HostApplicationStrategy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *IOCfg) DeepCopyInto(out *IOCfg) { *out = *in @@ -358,6 +440,62 @@ func (in *MemoryQOSCfg) DeepCopy() *MemoryQOSCfg { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NetworkQOS) DeepCopyInto(out *NetworkQOS) { + *out = *in + if in.IngressRequest != nil { + in, out := &in.IngressRequest, &out.IngressRequest + *out = new(intstr.IntOrString) + **out = **in + } + if in.IngressLimit != nil { + in, out := &in.IngressLimit, &out.IngressLimit + *out = new(intstr.IntOrString) + **out = **in + } + if in.EgressRequest != nil { + in, out := &in.EgressRequest, &out.EgressRequest + *out = new(intstr.IntOrString) + **out = **in + } + if in.EgressLimit != nil { + in, out := &in.EgressLimit, &out.EgressLimit + *out = new(intstr.IntOrString) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NetworkQOS. +func (in *NetworkQOS) DeepCopy() *NetworkQOS { + if in == nil { + return nil + } + out := new(NetworkQOS) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NetworkQOSCfg) DeepCopyInto(out *NetworkQOSCfg) { + *out = *in + if in.Enable != nil { + in, out := &in.Enable, &out.Enable + *out = new(bool) + **out = **in + } + in.NetworkQOS.DeepCopyInto(&out.NetworkQOS) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NetworkQOSCfg. +func (in *NetworkQOSCfg) DeepCopy() *NetworkQOSCfg { + if in == nil { + return nil + } + out := new(NetworkQOSCfg) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeMetric) DeepCopyInto(out *NodeMetric) { *out = *in @@ -403,6 +541,11 @@ func (in *NodeMetricCollectPolicy) DeepCopyInto(out *NodeMetricCollectPolicy) { *out = new(AggregatePolicy) (*in).DeepCopyInto(*out) } + if in.NodeMemoryCollectPolicy != nil { + in, out := &in.NodeMemoryCollectPolicy, &out.NodeMemoryCollectPolicy + *out = new(NodeMemoryCollectPolicy) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeMetricCollectPolicy. @@ -521,6 +664,17 @@ func (in *NodeMetricStatus) DeepCopyInto(out *NodeMetricStatus) { } } } + if in.HostApplicationMetric != nil { + in, out := &in.HostApplicationMetric, &out.HostApplicationMetric + *out = make([]*HostApplicationMetricInfo, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(HostApplicationMetricInfo) + (*in).DeepCopyInto(*out) + } + } + } if in.ProdReclaimableMetric != nil { in, out := &in.ProdReclaimableMetric, &out.ProdReclaimableMetric *out = new(ReclaimableMetric) @@ -624,6 +778,13 @@ func (in *NodeSLOSpec) DeepCopyInto(out *NodeSLOSpec) { in, out := &in.Extensions, &out.Extensions *out = (*in).DeepCopy() } + if in.HostApplications != nil { + in, out := &in.HostApplications, &out.HostApplications + *out = make([]HostApplicationSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeSLOSpec. @@ -651,6 +812,28 @@ func (in *NodeSLOStatus) DeepCopy() *NodeSLOStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OriginAllocatable) DeepCopyInto(out *OriginAllocatable) { + *out = *in + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = make(v1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OriginAllocatable. +func (in *OriginAllocatable) DeepCopy() *OriginAllocatable { + if in == nil { + return nil + } + out := new(OriginAllocatable) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PodMemoryQOSConfig) DeepCopyInto(out *PodMemoryQOSConfig) { *out = *in @@ -759,7 +942,7 @@ func (in *ResourceMap) DeepCopyInto(out *ResourceMap) { *out = *in if in.ResourceList != nil { in, out := &in.ResourceList, &out.ResourceList - *out = make(corev1.ResourceList, len(*in)) + *out = make(v1.ResourceList, len(*in)) for key, val := range *in { (*out)[key] = val.DeepCopy() } @@ -806,6 +989,11 @@ func (in *ResourceQOS) DeepCopyInto(out *ResourceQOS) { *out = new(ResctrlQOSCfg) (*in).DeepCopyInto(*out) } + if in.NetworkQOS != nil { + in, out := &in.NetworkQOS, &out.NetworkQOS + *out = new(NetworkQOSCfg) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceQOS. @@ -818,9 +1006,34 @@ func (in *ResourceQOS) DeepCopy() *ResourceQOS { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ResourceQOSPolicies) DeepCopyInto(out *ResourceQOSPolicies) { + *out = *in + if in.CPUPolicy != nil { + in, out := &in.CPUPolicy, &out.CPUPolicy + *out = new(CPUQOSPolicy) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceQOSPolicies. +func (in *ResourceQOSPolicies) DeepCopy() *ResourceQOSPolicies { + if in == nil { + return nil + } + out := new(ResourceQOSPolicies) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ResourceQOSStrategy) DeepCopyInto(out *ResourceQOSStrategy) { *out = *in + if in.Policies != nil { + in, out := &in.Policies, &out.Policies + *out = new(ResourceQOSPolicies) + (*in).DeepCopyInto(*out) + } if in.LSRClass != nil { in, out := &in.LSRClass, &out.LSRClass *out = new(ResourceQOS) @@ -931,6 +1144,7 @@ func (in *SystemStrategy) DeepCopyInto(out *SystemStrategy) { *out = new(int64) **out = **in } + out.TotalNetworkBandwidth = in.TotalNetworkBandwidth.DeepCopy() } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SystemStrategy. @@ -942,3 +1156,47 @@ func (in *SystemStrategy) DeepCopy() *SystemStrategy { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ThirdPartyAllocation) DeepCopyInto(out *ThirdPartyAllocation) { + *out = *in + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = make(v1.ResourceList, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ThirdPartyAllocation. +func (in *ThirdPartyAllocation) DeepCopy() *ThirdPartyAllocation { + if in == nil { + return nil + } + out := new(ThirdPartyAllocation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ThirdPartyAllocations) DeepCopyInto(out *ThirdPartyAllocations) { + *out = *in + if in.Allocations != nil { + in, out := &in.Allocations, &out.Allocations + *out = make([]ThirdPartyAllocation, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ThirdPartyAllocations. +func (in *ThirdPartyAllocations) DeepCopy() *ThirdPartyAllocations { + if in == nil { + return nil + } + out := new(ThirdPartyAllocations) + in.DeepCopyInto(out) + return out +}