Skip to content

Commit

Permalink
koord-scheduler: support load-aware scheduling (#159)
Browse files Browse the repository at this point in the history
The scheduling plugin filters abnormal nodes and scores them according
to resource usage. The plugin extends the Filter/Score/Reserve/Unreserve
extension points defined in the Kubernetes scheduling framework.

FYI: docs/proposals/scheduling/20220510-load-aware-scheduling.md

Fix #95

Signed-off-by: Tao Li <joseph.t.lee@outlook.com>
  • Loading branch information
eahydra committed May 26, 2022
1 parent 093647a commit 6ef657a
Show file tree
Hide file tree
Showing 36 changed files with 4,121 additions and 28 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
@hack/generate_client.sh
@hack/update-scheduler-codegen.sh
$(CONTROLLER_GEN) object:headerFile="$(LICENSE_HEADER_GO)" paths="./apis/..."

.PHONY: fmt
Expand Down
8 changes: 8 additions & 0 deletions apis/extension/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,11 @@ var (
},
}
)

// TranslateResourceNameByPriorityClass translates defaultResourceName to extend resourceName by PriorityClass
func TranslateResourceNameByPriorityClass(priorityClass PriorityClass, defaultResourceName corev1.ResourceName) corev1.ResourceName {
if priorityClass == PriorityProd || priorityClass == PriorityNone {
return defaultResourceName
}
return ResourceNameMap[priorityClass][defaultResourceName]
}
47 changes: 47 additions & 0 deletions apis/extension/scheduling.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package extension

import (
"encoding/json"

corev1 "k8s.io/api/core/v1"
)

const (
// AnnotationCustomUsageThresholds represents the user-defined resource utilization threshold.
// For specific value definitions, see CustomUsageThresholds
AnnotationCustomUsageThresholds = "scheduling.koordinator.sh/usage-thresholds"
)

// CustomUsageThresholds supports user-defined node resource utilization thresholds.
type CustomUsageThresholds struct {
UsageThresholds map[corev1.ResourceName]int64 `json:"usageThresholds,omitempty"`
}

func GetCustomUsageThresholds(node *corev1.Node) (*CustomUsageThresholds, error) {
usageThresholds := &CustomUsageThresholds{}
data, ok := node.Annotations[AnnotationCustomUsageThresholds]
if !ok {
return usageThresholds, nil
}
err := json.Unmarshal([]byte(data), usageThresholds)
if err != nil {
return nil, err
}
return usageThresholds, nil
}
48 changes: 48 additions & 0 deletions cmd/koord-scheduler/app/config/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package config

import (
schedulerappconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config"

koordinatorclientset "github.com/koordinator-sh/koordinator/pkg/client/clientset/versioned"
koordinatorinformers "github.com/koordinator-sh/koordinator/pkg/client/informers/externalversions"
)

// Config has all the context to run a Scheduler
type Config struct {
*schedulerappconfig.Config
KoordinatorClient koordinatorclientset.Interface
KoordinatorSharedInformerFactory koordinatorinformers.SharedInformerFactory
}

type completedConfig struct {
*Config
}

// CompletedConfig same as Config, just to swap private object.
type CompletedConfig struct {
// Embed a private pointer that cannot be instantiated outside of this package.
*completedConfig
}

// Complete fills in any fields not set that are required to have valid data. It's mutating the receiver.
func (c *Config) Complete() CompletedConfig {
c.Config.Complete()
cc := completedConfig{c}
return CompletedConfig{&cc}
}
58 changes: 58 additions & 0 deletions cmd/koord-scheduler/app/options/options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package options

import (
scheduleroptions "k8s.io/kubernetes/cmd/kube-scheduler/app/options"

schedulerappconfig "github.com/koordinator-sh/koordinator/cmd/koord-scheduler/app/config"
koordinatorclientset "github.com/koordinator-sh/koordinator/pkg/client/clientset/versioned"
koordinatorinformers "github.com/koordinator-sh/koordinator/pkg/client/informers/externalversions"
)

// Options has all the params needed to run a Scheduler
type Options struct {
*scheduleroptions.Options
}

// NewOptions returns default scheduler app options.
func NewOptions() *Options {
return &Options{
Options: scheduleroptions.NewOptions(),
}
}

// Config return a scheduler config object
func (o *Options) Config() (*schedulerappconfig.Config, error) {
config, err := o.Options.Config()
if err != nil {
return nil, err
}

koordinatorClient, err := koordinatorclientset.NewForConfig(config.KubeConfig)
if err != nil {
return nil, err
}

koordinatorSharedInformerFactory := koordinatorinformers.NewSharedInformerFactoryWithOptions(koordinatorClient, 0)

return &schedulerappconfig.Config{
Config: config,
KoordinatorClient: koordinatorClient,
KoordinatorSharedInformerFactory: koordinatorSharedInformerFactory,
}, nil
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright 2014 The Kubernetes Authors.
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -25,7 +25,6 @@ import (
goruntime "runtime"

"github.com/spf13/cobra"

utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apiserver/pkg/authentication/authenticator"
"k8s.io/apiserver/pkg/authorization/authorizer"
Expand All @@ -49,33 +48,35 @@ import (
"k8s.io/component-base/version"
"k8s.io/component-base/version/verflag"
"k8s.io/klog/v2"
schedulerserverconfig "k8s.io/kubernetes/cmd/kube-scheduler/app/config"
"k8s.io/kubernetes/cmd/kube-scheduler/app/options"
scheduleroptions "k8s.io/kubernetes/cmd/kube-scheduler/app/options"
"k8s.io/kubernetes/pkg/scheduler"
kubeschedulerconfig "k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/apis/config/latest"
"k8s.io/kubernetes/pkg/scheduler/framework/runtime"
"k8s.io/kubernetes/pkg/scheduler/metrics/resources"
"k8s.io/kubernetes/pkg/scheduler/profile"

schedulerserverconfig "github.com/koordinator-sh/koordinator/cmd/koord-scheduler/app/config"
"github.com/koordinator-sh/koordinator/cmd/koord-scheduler/app/options"
"github.com/koordinator-sh/koordinator/pkg/scheduler/frameworkext"
)

// Option configures a framework.Registry.
type Option func(runtime.Registry) error
type Option func(frameworkext.ExtendedHandle, runtime.Registry) error

// NewSchedulerCommand creates a *cobra.Command object with default parameters and registryOptions
func NewSchedulerCommand(registryOptions ...Option) *cobra.Command {
opts := options.NewOptions()

cmd := &cobra.Command{
Use: "kube-scheduler",
Long: `The Kubernetes scheduler is a control plane process which assigns
Pods to Nodes. The scheduler determines which Nodes are valid placements for
each Pod in the scheduling queue according to constraints and available
resources. The scheduler then ranks each valid Node and binds the Pod to a
suitable Node. Multiple different schedulers may be used within a cluster;
kube-scheduler is the reference implementation.
See [scheduling](https://kubernetes.io/docs/concepts/scheduling-eviction/)
for more information about scheduling and the kube-scheduler component.`,
Use: "koord-scheduler",
Long: `The Koordinator scheduler is a control plane process which assigns
Pods to Nodes. The scheduler implements based on kubernetes scheduling framework.
On the basis of compatibility with community scheduling capabilities, it provides
richer advanced scheduling capabilities to address scheduling needs in co-located
scenarios,ensuring the runtime quality of different workloads and users' demands
for cost reduction and efficiency enhancement.
`,
Run: func(cmd *cobra.Command, args []string) {
if err := runCommand(cmd, opts, registryOptions...); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
Expand Down Expand Up @@ -132,7 +133,7 @@ func runCommand(cmd *cobra.Command, opts *options.Options, registryOptions ...Op
// Run executes the scheduler based on the given configuration. It only returns on error or when context is done.
func Run(ctx context.Context, cc *schedulerserverconfig.CompletedConfig, sched *scheduler.Scheduler) error {
// To help debugging, immediately log version
klog.V(1).InfoS("Starting Kubernetes Scheduler version", "version", version.Get())
klog.V(1).InfoS("Starting Koordinator Scheduler version", "version", version.Get())

// Configz registration.
if cz, err := configz.New("componentconfig"); err == nil {
Expand Down Expand Up @@ -187,9 +188,11 @@ func Run(ctx context.Context, cc *schedulerserverconfig.CompletedConfig, sched *

// Start all informers.
cc.InformerFactory.Start(ctx.Done())
cc.KoordinatorSharedInformerFactory.Start(ctx.Done())

// Wait for all caches to sync before scheduling.
cc.InformerFactory.WaitForCacheSync(ctx.Done())
cc.KoordinatorSharedInformerFactory.WaitForCacheSync(ctx.Done())

// If leader election is enabled, runCommand via LeaderElector until done and exit.
if cc.LeaderElection != nil {
Expand Down Expand Up @@ -296,8 +299,8 @@ func getRecorderFactory(cc *schedulerserverconfig.CompletedConfig) profile.Recor
// WithPlugin creates an Option based on plugin name and factory. Please don't remove this function: it is used to register out-of-tree plugins,
// hence there are no references to it from the kubernetes scheduler code base.
func WithPlugin(name string, factory runtime.PluginFactory) Option {
return func(registry runtime.Registry) error {
return registry.Register(name, factory)
return func(handle frameworkext.ExtendedHandle, registry runtime.Registry) error {
return registry.Register(name, frameworkext.PluginFactoryProxy(handle, factory))
}
}

Expand All @@ -321,9 +324,13 @@ func Setup(ctx context.Context, opts *options.Options, outOfTreeRegistryOptions
// Get the completed config
cc := c.Complete()

// NOTE(joseph): K8s scheduling framework does not provide extension point for initialization.
// Currently, only by copying the initialization code and implementing custom initialization.
extendedHandle := frameworkext.NewExtendedHandle(cc.KoordinatorClient, cc.KoordinatorSharedInformerFactory)

outOfTreeRegistry := make(runtime.Registry)
for _, option := range outOfTreeRegistryOptions {
if err := option(outOfTreeRegistry); err != nil {
if err := option(extendedHandle, outOfTreeRegistry); err != nil {
return nil, nil, err
}
}
Expand Down Expand Up @@ -353,9 +360,12 @@ func Setup(ctx context.Context, opts *options.Options, outOfTreeRegistryOptions
if err != nil {
return nil, nil, err
}
if err := options.LogOrWriteConfig(opts.WriteConfigTo, &cc.ComponentConfig, completedProfiles); err != nil {
if err := scheduleroptions.LogOrWriteConfig(opts.WriteConfigTo, &cc.ComponentConfig, completedProfiles); err != nil {
return nil, nil, err
}

// TODO(joseph): Some extensions can also be made in the future,
// such as replacing some interfaces in Scheduler to implement custom logic

return &cc, sched, nil
}
11 changes: 9 additions & 2 deletions cmd/koord-scheduler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@ import (
"time"

"k8s.io/component-base/logs"
"k8s.io/kubernetes/cmd/kube-scheduler/app"

"github.com/koordinator-sh/koordinator/cmd/koord-scheduler/app"
"github.com/koordinator-sh/koordinator/pkg/scheduler/plugins/loadaware"

// Ensure scheme package is initialized.
_ "github.com/koordinator-sh/koordinator/pkg/scheduler/apis/config/scheme"
)

func main() {
Expand All @@ -31,7 +36,9 @@ func main() {
// Register custom plugins to the scheduler framework.
// Later they can consist of scheduler profile(s) and hence
// used by various kinds of workloads.
command := app.NewSchedulerCommand()
command := app.NewSchedulerCommand(
app.WithPlugin(loadaware.Name, loadaware.New),
)

logs.InitLogs()
defer logs.FlushLogs()
Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ require (
github.com/onsi/gomega v1.15.0
github.com/prashantv/gostub v1.1.0
github.com/prometheus/client_golang v1.11.0
github.com/spf13/cobra v1.1.3
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.7.0
go.uber.org/atomic v1.7.0
Expand All @@ -30,7 +31,8 @@ require (
k8s.io/component-base v0.22.6
k8s.io/cri-api v0.22.6
k8s.io/klog/v2 v2.9.0
k8s.io/kubernetes v0.0.0-00010101000000-000000000000
k8s.io/kube-scheduler v0.22.6
k8s.io/kubernetes v1.22.6
k8s.io/utils v0.0.0-20210819203725-bdf08cb9a70a
sigs.k8s.io/controller-runtime v0.10.2
)
Expand Down Expand Up @@ -92,7 +94,6 @@ require (
github.com/prometheus/common v0.32.1 // indirect
github.com/prometheus/procfs v0.7.3 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/cobra v1.1.3 // indirect
go.etcd.io/etcd/api/v3 v3.5.0 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.0 // indirect
go.etcd.io/etcd/client/v3 v3.5.0 // indirect
Expand Down Expand Up @@ -133,7 +134,6 @@ require (
k8s.io/csi-translation-lib v0.22.6 // indirect
k8s.io/gengo v0.0.0-20201214224949-b6c5ce23f027 // indirect
k8s.io/kube-openapi v0.0.0-20211109043538-20434351676c // indirect
k8s.io/kube-scheduler v0.0.0 // indirect
k8s.io/mount-utils v0.22.6 // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.27 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.1 // indirect
Expand Down
20 changes: 20 additions & 0 deletions hack/update-scheduler-codegen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

set -o errexit
set -o nounset
set -o pipefail

GOPATH=`go env GOPATH`
export GOPATH

SCRIPT_ROOT=$(dirname "${BASH_SOURCE[@]}")/..

CODEGEN_PKG=${CODEGEN_PKG:-$(cd "${SCRIPT_ROOT}"; ls -d -1 ./vendor/k8s.io/code-generator 2>/dev/null || echo ../code-generator)}

bash "${CODEGEN_PKG}"/generate-internal-groups.sh \
"deepcopy,conversion,defaulter" \
github.com/koordinator-sh/koordinator/pkg/scheduler/apis/generated \
github.com/koordinator-sh/koordinator/pkg/scheduler/apis \
github.com/koordinator-sh/koordinator/pkg/scheduler/apis \
"config:v1beta2" \
--go-header-file "${SCRIPT_ROOT}"/hack/boilerplate.go.txt
1 change: 1 addition & 0 deletions pkg/scheduler/OWNERS
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
reviewers:
- eahydra
- allwmh
- zhaoxianyang385
Loading

0 comments on commit 6ef657a

Please sign in to comment.