Skip to content

Commit

Permalink
release v1.2.0
Browse files Browse the repository at this point in the history
Signed-off-by: 佑祎 <zzw261520@alibaba-inc.com>
  • Loading branch information
zwzhang0107 committed Apr 12, 2023
1 parent 15e9a59 commit e158fb3
Show file tree
Hide file tree
Showing 13 changed files with 119 additions and 124 deletions.
14 changes: 7 additions & 7 deletions client/clientset/versioned/fake/register.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions client/clientset/versioned/scheme/register.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion extension/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import corev1 "k8s.io/api/core/v1"

const (
DomainPrefix = "koordinator.sh/"
// use prefix "kubernetes.io/" for extend resource
// ResourceDomainPrefix is a prefix "kubernetes.io/" used by particular extend resources (e.g. batch resources)
ResourceDomainPrefix = corev1.ResourceDefaultNamespacePrefix
// SchedulingDomainPrefix represents the scheduling domain prefix
SchedulingDomainPrefix = "scheduling.koordinator.sh"
Expand Down
6 changes: 3 additions & 3 deletions extension/elastic_quota.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ import (

// RootQuotaName means quotaTree's root\head.
const (
SystemQuotaName = "system"
RootQuotaName = "root"
DefaultQuotaName = "default"
SystemQuotaName = "koordinator-system-quota"
RootQuotaName = "koordinator-root-quota"
DefaultQuotaName = "koordinator-default-quota"
QuotaKoordinatorPrefix = "quota.scheduling.koordinator.sh"
LabelQuotaIsParent = QuotaKoordinatorPrefix + "/is-parent"
LabelQuotaParent = QuotaKoordinatorPrefix + "/parent"
Expand Down
1 change: 1 addition & 0 deletions extension/priority.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

type PriorityClass string

// https://koordinator.sh/docs/architecture/priority/
const (
PriorityProd PriorityClass = "koord-prod"
PriorityMid PriorityClass = "koord-mid"
Expand Down
18 changes: 1 addition & 17 deletions extension/qos.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ limitations under the License.

package extension

import corev1 "k8s.io/api/core/v1"

type QoSClass string

// https://koordinator.sh/docs/architecture/qos/
const (
QoSLSE QoSClass = "LSE"
QoSLSR QoSClass = "LSR"
Expand All @@ -29,21 +28,6 @@ const (
QoSNone QoSClass = ""
)

func GetPodQoSClass(pod *corev1.Pod) QoSClass {
if pod == nil || pod.Labels == nil {
return QoSNone
}
return GetQoSClassByAttrs(pod.Labels, pod.Annotations)
}

func GetQoSClassByAttrs(labels, annotations map[string]string) QoSClass {
// annotations are for old format adaption reason
if q, exist := labels[LabelPodQoS]; exist {
return GetPodQoSClassByName(q)
}
return QoSNone
}

func GetPodQoSClassByName(qos string) QoSClass {
q := QoSClass(qos)

Expand Down
40 changes: 19 additions & 21 deletions extension/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,25 @@ import (
"encoding/json"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
// Deprecated: because of the limitation of extended resource naming
KoordBatchCPU corev1.ResourceName = DomainPrefix + "batch-cpu"
// Deprecated: because of the limitation of extended resource naming
KoordBatchMemory corev1.ResourceName = DomainPrefix + "batch-memory"

BatchCPU corev1.ResourceName = ResourceDomainPrefix + "batch-cpu"
BatchMemory corev1.ResourceName = ResourceDomainPrefix + "batch-memory"

KoordRDMA corev1.ResourceName = ResourceDomainPrefix + "rdma"
KoordFPGA corev1.ResourceName = ResourceDomainPrefix + "fpga"

KoordGPU corev1.ResourceName = ResourceDomainPrefix + "gpu"
NvidiaGPU corev1.ResourceName = "nvidia.com/gpu"

GPUCore corev1.ResourceName = ResourceDomainPrefix + "gpu-core"
GPUMemory corev1.ResourceName = ResourceDomainPrefix + "gpu-memory"
GPUMemoryRatio corev1.ResourceName = ResourceDomainPrefix + "gpu-memory-ratio"
ResourceNvidiaGPU corev1.ResourceName = "nvidia.com/gpu"
ResourceRDMA corev1.ResourceName = DomainPrefix + "rdma"
ResourceFPGA corev1.ResourceName = DomainPrefix + "fpga"
ResourceGPU corev1.ResourceName = DomainPrefix + "gpu"
ResourceGPUCore corev1.ResourceName = DomainPrefix + "gpu-core"
ResourceGPUMemory corev1.ResourceName = DomainPrefix + "gpu-memory"
ResourceGPUMemoryRatio corev1.ResourceName = DomainPrefix + "gpu-memory-ratio"
)

GPUDriver string = ResourceDomainPrefix + "gpu-driver"
GPUModel string = ResourceDomainPrefix + "gpu-model"
const (
LabelGPUModel string = NodeDomainPrefix + "/gpu-model"
LabelGPUDriverVersion string = NodeDomainPrefix + "/gpu-driver-version"
)

const (
Expand Down Expand Up @@ -160,18 +156,20 @@ func GetResourceStatus(annotations map[string]string) (*ResourceStatus, error) {
return resourceStatus, nil
}

func SetResourceStatus(pod *corev1.Pod, status *ResourceStatus) error {
if pod == nil {
func SetResourceStatus(obj metav1.Object, status *ResourceStatus) error {
if obj == nil {
return nil
}
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
annotations := obj.GetAnnotations()
if annotations == nil {
annotations = map[string]string{}
}
data, err := json.Marshal(status)
if err != nil {
return err
}
pod.Annotations[AnnotationResourceStatus] = string(data)
annotations[AnnotationResourceStatus] = string(data)
obj.SetAnnotations(annotations)
return nil
}

Expand Down
76 changes: 15 additions & 61 deletions extension/scheduling.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,11 @@ import (

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"

schedulingv1alpha1 "github.com/koordinator-sh/apis/scheduling/v1alpha1"
slov1alpha1 "github.com/koordinator-sh/apis/slo/v1alpha1"
)

const (
// LabelReservationOrder controls the preference logic for Reservation.
// Reservation with lower order is preferred to be selected before Reservation with higher order.
// But if it is 0, Reservation will be selected according to the capacity score.
LabelReservationOrder = SchedulingDomainPrefix + "/reservation-order"

// AnnotationReservationAllocated represents the reservation allocated by the pod.
AnnotationReservationAllocated = SchedulingDomainPrefix + "/reservation-allocated"
)

const (
// AnnotationCustomUsageThresholds represents the user-defined resource utilization threshold.
// For specific value definitions, see CustomUsageThresholds
Expand Down Expand Up @@ -78,6 +67,13 @@ const (
GangModeNonStrict = "NonStrict"
)

const (
// Deprecated: kubernetes-sigs/scheduler-plugins/lightweight-coscheduling
LabelLightweightCoschedulingPodGroupName = "pod-group.scheduling.sigs.k8s.io/name"
// Deprecated: kubernetes-sigs/scheduler-plugins/lightweight-coscheduling
LabelLightweightCoschedulingPodGroupMinAvailable = "pod-group.scheduling.sigs.k8s.io/min-available"
)

// CustomUsageThresholds supports user-defined node resource utilization thresholds.
type CustomUsageThresholds struct {
// UsageThresholds indicates the resource utilization threshold of the whole machine.
Expand Down Expand Up @@ -110,51 +106,6 @@ func GetCustomUsageThresholds(node *corev1.Node) (*CustomUsageThresholds, error)
return usageThresholds, nil
}

type ReservationAllocated struct {
Name string `json:"name,omitempty"`
UID types.UID `json:"uid,omitempty"`
}

func GetReservationAllocated(pod *corev1.Pod) (*ReservationAllocated, error) {
if pod.Annotations == nil {
return nil, nil
}
data, ok := pod.Annotations[AnnotationReservationAllocated]
if !ok {
return nil, nil
}
reservationAllocated := &ReservationAllocated{}
err := json.Unmarshal([]byte(data), reservationAllocated)
if err != nil {
return nil, err
}
return reservationAllocated, nil
}

func SetReservationAllocated(pod *corev1.Pod, r *schedulingv1alpha1.Reservation) {
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
reservationAllocated := &ReservationAllocated{
Name: r.Name,
UID: r.UID,
}
data, _ := json.Marshal(reservationAllocated) // assert no error
pod.Annotations[AnnotationReservationAllocated] = string(data)
}

func RemoveReservationAllocated(pod *corev1.Pod, r *schedulingv1alpha1.Reservation) (bool, error) {
reservationAllocated, err := GetReservationAllocated(pod)
if err != nil {
return false, err
}
if reservationAllocated != nil && reservationAllocated.Name == r.Name && reservationAllocated.UID == r.UID {
delete(pod.Annotations, AnnotationReservationAllocated)
return true, nil
}
return false, nil
}

// DeviceAllocations would be injected into Pod as form of annotation during Pre-bind stage.
/*
{
Expand Down Expand Up @@ -183,9 +134,10 @@ type DeviceAllocations map[schedulingv1alpha1.DeviceType][]*DeviceAllocation
type DeviceAllocation struct {
Minor int32 `json:"minor"`
Resources corev1.ResourceList `json:"resources"`
Extension json.RawMessage `json:"extension,omitempty"`
}

func GetDeviceAllocations(podAnnotations map[string]string) (DeviceAllocations, error) {
var GetDeviceAllocations = func(podAnnotations map[string]string) (DeviceAllocations, error) {
deviceAllocations := DeviceAllocations{}
data, ok := podAnnotations[AnnotationDeviceAllocated]
if !ok {
Expand All @@ -198,17 +150,19 @@ func GetDeviceAllocations(podAnnotations map[string]string) (DeviceAllocations,
return deviceAllocations, nil
}

func SetDeviceAllocations(pod *corev1.Pod, allocations DeviceAllocations) error {
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
func SetDeviceAllocations(obj metav1.Object, allocations DeviceAllocations) error {
annotations := obj.GetAnnotations()
if annotations == nil {
annotations = map[string]string{}
}

data, err := json.Marshal(allocations)
if err != nil {
return err
}

pod.Annotations[AnnotationDeviceAllocated] = string(data)
annotations[AnnotationDeviceAllocated] = string(data)
obj.SetAnnotations(annotations)
return nil
}

Expand Down
2 changes: 2 additions & 0 deletions scheduling/v1alpha1/pod_migration_job_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ const (
PodMigrationJobConditionEviction PodMigrationJobConditionType = "Eviction"
PodMigrationJobConditionPodScheduled PodMigrationJobConditionType = "PodScheduled"
PodMigrationJobConditionReservationPodBoundReservation PodMigrationJobConditionType = "PodBoundReservation"
PodMigrationJobConditionBoundPodReady PodMigrationJobConditionType = "BoundPodReady"
PodMigrationJobConditionReservationBound PodMigrationJobConditionType = "ReservationBound"
)

Expand All @@ -181,6 +182,7 @@ const (
PodMigrationJobReasonFailedEvict = "FailedEvict"
PodMigrationJobReasonEvictComplete = "EvictComplete"
PodMigrationJobReasonWaitForPodBindReservation = "WaitForPodBindReservation"
PodMigrationJobReasonWaitForBoundPodReady = "WaitForBoundPodReady"
)

type PodMigrationJobConditionStatus string
Expand Down
6 changes: 3 additions & 3 deletions scheduling/v1alpha1/reservation_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ type ReservationSpec struct {
// reservation would be waiting to be available until free resources are sufficient.
// +optional
PreAllocation bool `json:"preAllocation,omitempty"`
// By default, reserved resources are always allocatable as long as the reservation phase is Available. When
// `AllocateOnce` is set, the reserved resources are only available for the first owner who allocates successfully
// and are not allocatable to other owners anymore.
// When `AllocateOnce` is set, the reserved resources are only available for the first owner who allocates successfully
// and are not allocatable to other owners anymore. Defaults to true.
// +kubebuilder:default=true
// +optional
AllocateOnce bool `json:"allocateOnce,omitempty"`
}
Expand Down
2 changes: 2 additions & 0 deletions slo/v1alpha1/nodemetric_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ type PodMetricInfo struct {
Name string `json:"name,omitempty"`
Namespace string `json:"namespace,omitempty"`
PodUsage ResourceMap `json:"podUsage,omitempty"`
// Third party extensions for PodMetric
Extensions *ExtensionsMap `json:"extensions,omitempty"`
}

// NodeMetricSpec defines the desired state of NodeMetric
Expand Down
23 changes: 19 additions & 4 deletions slo/v1alpha1/nodeslo_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,13 +175,17 @@ type ResourceThresholdStrategy struct {
// +kubebuilder:validation:Minimum=0
MemoryEvictLowerPercent *int64 `json:"memoryEvictLowerPercent,omitempty"`

// if be CPU RealLimit/allocatedLimit > CPUEvictBESatisfactionUpperPercent/100, then stop evict BE pods
// be.satisfactionRate = be.CPURealLimit/be.CPURequest
// if be.satisfactionRate > CPUEvictBESatisfactionUpperPercent/100, then stop to evict.
CPUEvictBESatisfactionUpperPercent *int64 `json:"cpuEvictBESatisfactionUpperPercent,omitempty"`
// if be CPU (RealLimit/allocatedLimit < CPUEvictBESatisfactionLowerPercent/100 and usage >= CPUEvictBEUsageThresholdPercent/100) continue CPUEvictTimeWindowSeconds, then start evict
// be.satisfactionRate = be.CPURealLimit/be.CPURequest; be.cpuUsage = be.CPUUsed/be.CPURealLimit
// if be.satisfactionRate < CPUEvictBESatisfactionLowerPercent/100 && be.usage >= CPUEvictBEUsageThresholdPercent/100,
// then start to evict pod, and will evict to ${CPUEvictBESatisfactionUpperPercent}
CPUEvictBESatisfactionLowerPercent *int64 `json:"cpuEvictBESatisfactionLowerPercent,omitempty"`
// if be CPU (RealLimit/allocatedLimit < CPUEvictBESatisfactionLowerPercent/100 and usage >= CPUEvictBEUsageThresholdPercent/100) continue CPUEvictTimeWindowSeconds, then start evict
// if be.cpuUsage >= CPUEvictBEUsageThresholdPercent/100, then start to calculate the resources need to be released.
CPUEvictBEUsageThresholdPercent *int64 `json:"cpuEvictBEUsageThresholdPercent,omitempty"`
// cpu evict start after continue avg(cpuusage) > CPUEvictThresholdPercent in seconds
// when avg(cpuusage) > CPUEvictThresholdPercent, will start to evict pod by cpu,
// and avg(cpuusage) is calculated based on the most recent CPUEvictTimeWindowSeconds data
CPUEvictTimeWindowSeconds *int64 `json:"cpuEvictTimeWindowSeconds,omitempty"`
}

Expand Down Expand Up @@ -238,6 +242,15 @@ type CPUBurstStrategy struct {
SharePoolThresholdPercent *int64 `json:"sharePoolThresholdPercent,omitempty"`
}

type SystemStrategy struct {
// for /proc/sys/vm/min_free_kbytes, min_free_kbytes = minFreeKbytesFactor * nodeTotalMemory /10000
MinFreeKbytesFactor *int64 `json:"minFreeKbytesFactor,omitempty"`
// /proc/sys/vm/watermark_scale_factor
WatermarkScaleFactor *int64 `json:"watermarkScaleFactor,omitempty"`
// /sys/kernel/mm/memcg_reaper/reap_background
MemcgReapBackGround *int64 `json:"memcgReapBackGround,omitempty"`
}

// NodeSLOSpec defines the desired state of NodeSLO
type NodeSLOSpec struct {
// BE pods will be limited if node resource usage overload
Expand All @@ -246,6 +259,8 @@ type NodeSLOSpec struct {
ResourceQOSStrategy *ResourceQOSStrategy `json:"resourceQOSStrategy,omitempty"`
// CPU Burst Strategy
CPUBurstStrategy *CPUBurstStrategy `json:"cpuBurstStrategy,omitempty"`
//node global system config
SystemStrategy *SystemStrategy `json:"systemStrategy,omitempty"`
// Third party extensions for NodeSLO
Extensions *ExtensionsMap `json:"extensions,omitempty"`
}
Expand Down
Loading

0 comments on commit e158fb3

Please sign in to comment.