Skip to content

Commit

Permalink
apis & slo-controller: allow specifying node-wise total bandwidth via…
Browse files Browse the repository at this point in the history
… annotation (#1982)

Signed-off-by: sjtufl <jerryfang555@qq.com>
  • Loading branch information
sjtufl committed Apr 12, 2024
1 parent 252c29d commit 0ea2f6e
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 6 deletions.
45 changes: 45 additions & 0 deletions apis/extension/node_qos.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package extension

import (
"fmt"

"k8s.io/apimachinery/pkg/api/resource"
)

const (
// AnnotationNodeBandwidth specifies the total network bandwidth of the node, which can
// be set by cluster administrator or third party components. The value should be a valid
// resource.Quantity. Unit: bps.
AnnotationNodeBandwidth = NodeDomainPrefix + "/network-bandwidth"
)

func GetNodeTotalBandwidth(annotations map[string]string) (*resource.Quantity, error) {
var (
val string
ok bool
)
if val, ok = annotations[AnnotationNodeBandwidth]; !ok {
return nil, nil
}
if quantity, err := resource.ParseQuantity(val); err != nil {
return nil, fmt.Errorf("failed to parse node bandwidth %v", err)
} else {
return &quantity, nil
}
}
82 changes: 82 additions & 0 deletions apis/extension/node_qos_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package extension

import (
"testing"

"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestGetNodeTotalBandwidth(t *testing.T) {
tests := []struct {
name string
node *corev1.Node
want *resource.Quantity
wantErr bool
}{
{
name: "normal",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
AnnotationNodeBandwidth: "10M",
},
},
},
want: resource.NewScaledQuantity(10, 6),
wantErr: false,
},
{
name: "node has empty annotation",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{},
},
},
want: nil,
wantErr: false,
},
{
name: "node has wrong-formatted annotation value",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
AnnotationNodeBandwidth: "wrong-format",
},
},
},
want: nil,
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, gotErr := GetNodeTotalBandwidth(tt.node.Annotations)
if tt.want == nil {
assert.Nil(t, got)
} else {
assert.NotNil(t, t, got)
assert.Equal(t, tt.want.Value(), got.Value())
}
assert.Equal(t, tt.wantErr, gotErr != nil)
})
}
}
26 changes: 24 additions & 2 deletions pkg/slo-controller/nodeslo/resource_strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"k8s.io/klog/v2"

"github.com/koordinator-sh/koordinator/apis/configuration"
"github.com/koordinator-sh/koordinator/apis/extension"
slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1"
"github.com/koordinator-sh/koordinator/pkg/util"
)
Expand Down Expand Up @@ -79,6 +80,9 @@ func getCPUBurstConfigSpec(node *corev1.Node, cfg *configuration.CPUBurstCfg) (*
}

func getSystemConfigSpec(node *corev1.Node, cfg *configuration.SystemCfg) (*slov1alpha1.SystemStrategy, error) {
var nodeSystemConfig *slov1alpha1.SystemStrategy

// Find strategy that matches current node.
nodeLabels := labels.Set(node.Labels)
for _, nodeStrategy := range cfg.NodeStrategies {
selector, err := metav1.LabelSelectorAsSelector(nodeStrategy.NodeSelector)
Expand All @@ -87,11 +91,29 @@ func getSystemConfigSpec(node *corev1.Node, cfg *configuration.SystemCfg) (*slov
continue
}
if selector.Matches(nodeLabels) {
return nodeStrategy.SystemStrategy.DeepCopy(), nil
nodeSystemConfig = nodeStrategy.SystemStrategy.DeepCopy()
break
}
}

// If there is no matched node strategy, use cluster strategy.
if nodeSystemConfig == nil {
nodeSystemConfig = cfg.ClusterStrategy.DeepCopy()
}
return cfg.ClusterStrategy.DeepCopy(), nil

// Check whether node bandwidth is specified on current node, which takes HIGHER priority
// than ones configured in cluster strategy or node strategy.
// Error is returned if failed to parse node total bandwidth from annotation, which is not
// supposed to happen because we will check the validity of the annotation value in node
// plugins of validating webhook.
if nodeBandwidthQuantity, err := extension.GetNodeTotalBandwidth(node.Annotations); err != nil {
klog.Errorf("failed to get node total bandwidth from annotation, error: %v", err)
return nil, err
} else if nodeBandwidthQuantity != nil {
nodeSystemConfig.TotalNetworkBandwidth = *nodeBandwidthQuantity
}

return nodeSystemConfig, nil
}

func getHostApplicationConfig(node *corev1.Node, cfg *configuration.HostApplicationCfg) ([]slov1alpha1.HostApplicationSpec, error) {
Expand Down
50 changes: 46 additions & 4 deletions pkg/slo-controller/nodeslo/resource_strategy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"k8s.io/utils/pointer"

"github.com/koordinator-sh/koordinator/apis/configuration"
"github.com/koordinator-sh/koordinator/apis/extension"
ext "github.com/koordinator-sh/koordinator/apis/extension"
slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1"
"github.com/koordinator-sh/koordinator/pkg/util/sloconfig"
Expand Down Expand Up @@ -812,12 +813,14 @@ func Test_getSystemConfigSpec(t *testing.T) {
defaultConfig := DefaultSLOCfg()
testingSystemConfig := &configuration.SystemCfg{
ClusterStrategy: &slov1alpha1.SystemStrategy{
MinFreeKbytesFactor: pointer.Int64(150),
MinFreeKbytesFactor: pointer.Int64(150),
TotalNetworkBandwidth: resource.MustParse("10G"),
},
}
testingSystemConfig1 := &configuration.SystemCfg{
ClusterStrategy: &slov1alpha1.SystemStrategy{
MinFreeKbytesFactor: pointer.Int64(150),
MinFreeKbytesFactor: pointer.Int64(150),
TotalNetworkBandwidth: resource.MustParse("100M"),
},
NodeStrategies: []configuration.NodeSystemStrategy{
{
Expand All @@ -829,7 +832,8 @@ func Test_getSystemConfigSpec(t *testing.T) {
},
},
SystemStrategy: &slov1alpha1.SystemStrategy{
MinFreeKbytesFactor: pointer.Int64(120),
MinFreeKbytesFactor: pointer.Int64(120),
TotalNetworkBandwidth: resource.MustParse("10M"),
},
},
{
Expand All @@ -841,11 +845,16 @@ func Test_getSystemConfigSpec(t *testing.T) {
},
},
SystemStrategy: &slov1alpha1.SystemStrategy{
MinFreeKbytesFactor: pointer.Int64(130),
MinFreeKbytesFactor: pointer.Int64(130),
TotalNetworkBandwidth: resource.MustParse("1000M"),
},
},
},
}
injectNodeBandwidth := func(systemStrategy *slov1alpha1.SystemStrategy, bandwidth resource.Quantity) *slov1alpha1.SystemStrategy {
systemStrategy.TotalNetworkBandwidth = bandwidth
return systemStrategy
}
type args struct {
node *corev1.Node
cfg *configuration.SystemCfg
Expand Down Expand Up @@ -907,6 +916,39 @@ func Test_getSystemConfigSpec(t *testing.T) {
},
want: testingSystemConfig1.NodeStrategies[0].SystemStrategy,
},
{
name: "use node-wise bandwidth config while use cluster strategy",
args: args{
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
Annotations: map[string]string{
extension.AnnotationNodeBandwidth: "99M",
},
},
},
cfg: testingSystemConfig,
},
want: injectNodeBandwidth(testingSystemConfig.ClusterStrategy.DeepCopy(), resource.MustParse("99M")),
},
{
name: "use node-wise bandwidth config while use node strategy",
args: args{
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-node",
Labels: map[string]string{
"zzz": "zzz",
},
Annotations: map[string]string{
extension.AnnotationNodeBandwidth: "99M",
},
},
},
cfg: testingSystemConfig1,
},
want: injectNodeBandwidth(testingSystemConfig1.NodeStrategies[1].SystemStrategy.DeepCopy(), resource.MustParse("99M")),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit 0ea2f6e

Please sign in to comment.