Skip to content

Commit

Permalink
add cpu qos and mv nodeslo informer to states informer
Browse files Browse the repository at this point in the history
Signed-off-by: zwzhang0107 <zuoweizhang@outlook.com>
  • Loading branch information
zwzhang0107 committed May 20, 2022
1 parent 0cf1616 commit c409a87
Show file tree
Hide file tree
Showing 27 changed files with 1,213 additions and 800 deletions.
14 changes: 14 additions & 0 deletions apis/slo/v1alpha1/nodeslo_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ import (
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// CPUQOS enables cpu qos features.
type CPUQoS struct {
// group identity value for pods, default = 0
GroupIdentity *int64 `json:"groupIdentity,omitempty"`
}

// MemoryQoS enables memory qos features.
type MemoryQoS struct {
// memcg qos
Expand Down Expand Up @@ -103,6 +109,13 @@ type PodMemoryQoSConfig struct {
MemoryQoS `json:",inline"`
}

// CPUQoSCfg stores node-level config of cpu qos
type CPUQoSCfg struct {
// Enable indicates whether the cpu qos is enabled.
Enable *bool `json:"enable,omitempty"`
CPUQoS `json:",inline"`
}

// MemoryQoSCfg stores node-level config of memory qos
type MemoryQoSCfg struct {
// Enable indicates whether the memory qos is enabled (default: false).
Expand All @@ -113,6 +126,7 @@ type MemoryQoSCfg struct {
}

type ResourceQoS struct {
CPUQoS *CPUQoSCfg `json:"cpuQoS,omitempty"`
MemoryQoS *MemoryQoSCfg `json:"memoryQoS,omitempty"`
ResctrlQoS *ResctrlQoSCfg `json:"resctrlQoS,omitempty"`
}
Expand Down
46 changes: 46 additions & 0 deletions apis/slo/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 60 additions & 0 deletions config/crd/bases/slo.koordinator.sh_nodeslos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ spec:
be:
description: ResourceQoS for BE pods.
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down Expand Up @@ -208,6 +220,18 @@ spec:
cgroupRoot:
description: ResourceQoS for root cgroup.
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down Expand Up @@ -343,6 +367,18 @@ spec:
ls:
description: ResourceQoS for LS pods.
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down Expand Up @@ -478,6 +514,18 @@ spec:
lsr:
description: ResourceQoS for LSR pods.
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down Expand Up @@ -613,6 +661,18 @@ spec:
system:
description: ResourceQoS for system pods
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down
4 changes: 2 additions & 2 deletions pkg/koordlet/koordlet.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func NewDaemon(config *config.Configuration) (Daemon, error) {
return nil, err
}

statesInformer := statesinformer.NewStatesInformer(config.StatesInformerConf, kubeClient, pleg, nodeName)
statesInformer := statesinformer.NewStatesInformer(config.StatesInformerConf, kubeClient, crdClient, pleg, nodeName)
metricCache, err := metriccache.NewMetricCache(config.MetricCacheConf)
if err != nil {
return nil, err
Expand All @@ -117,7 +117,7 @@ func NewDaemon(config *config.Configuration) (Daemon, error) {

resManagerService := resmanager.NewResManager(config.ResManagerConf, scheme, kubeClient, crdClient, nodeName, statesInformer, metricCache, int64(config.CollectorConf.CollectResUsedIntervalSeconds))

runtimeHook, err := runtimehooks.NewRuntimeHook(config.RuntimeHookConf)
runtimeHook, err := runtimehooks.NewRuntimeHook(statesInformer, config.RuntimeHookConf)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/koordlet/resmanager/cpu_burst_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1537,6 +1537,7 @@ func TestCPUBurst_start(t *testing.T) {
ctl := gomock.NewController(t)
mockStatesInformer := mock_statesinformer.NewMockStatesInformer(ctl)
mockStatesInformer.EXPECT().GetAllPods().Return(getPodMetas(tt.fields.pods)).AnyTimes()
mockStatesInformer.EXPECT().GetNodeSLO().Return(tt.fields.nodeSLO).AnyTimes()

mockMetricCache := mock_metriccache.NewMockMetricCache(ctl)
mockMetricCache.EXPECT().GetNodeResourceMetric(gomock.Any()).Return(tt.fields.nodeMetric).AnyTimes()
Expand All @@ -1562,7 +1563,6 @@ func TestCPUBurst_start(t *testing.T) {
metricCache: mockMetricCache,
eventRecorder: fakeRecorder,
kubeClient: client,
nodeSLO: tt.fields.nodeSLO,
}

testHelper := system.NewFileTestUtil(t)
Expand Down
2 changes: 1 addition & 1 deletion pkg/koordlet/resmanager/cpu_suppress_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ func Test_cpuSuppress_suppressBECPU(t *testing.T) {
si := mockstatesinformer.NewMockStatesInformer(ctl)
si.EXPECT().GetAllPods().Return(tt.args.podMetas).AnyTimes()
si.EXPECT().GetNode().Return(tt.args.node).AnyTimes()
si.EXPECT().GetNodeSLO().Return(getNodeSLOByThreshold(tt.args.thresholdConfig)).AnyTimes()

// prepareData: mockMetricCache pods node beMetrics(AVG,current)
mockMetricCache := mockmetriccache.NewMockMetricCache(ctl)
Expand Down Expand Up @@ -604,7 +605,6 @@ func Test_cpuSuppress_suppressBECPU(t *testing.T) {
statesInformer: si,
metricCache: mockMetricCache,
config: NewDefaultConfig(),
nodeSLO: getNodeSLOByThreshold(tt.args.thresholdConfig),
collectResUsedIntervalSeconds: 1,
}
cpuSuppress := NewCPUSuppress(r)
Expand Down
9 changes: 8 additions & 1 deletion pkg/koordlet/resmanager/memory_evict_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ func Test_memoryEvict(t *testing.T) {
mockStatesInformer := mock_statesinformer.NewMockStatesInformer(ctl)
mockStatesInformer.EXPECT().GetAllPods().Return(getPodMetas(tt.pods)).AnyTimes()
mockStatesInformer.EXPECT().GetNode().Return(tt.node).AnyTimes()
mockStatesInformer.EXPECT().GetNodeSLO().Return(getNodeSLOByThreshold(tt.thresholdConfig)).AnyTimes()

mockMetricCache := mock_metriccache.NewMockMetricCache(ctl)
mockNodeQueryResult := metriccache.NodeResourceQueryResult{Metric: tt.nodeMetric}
Expand All @@ -385,7 +386,13 @@ func Test_memoryEvict(t *testing.T) {

fakeRecorder := &FakeRecorder{}
client := clientsetfake.NewSimpleClientset()
resmanager := &resmanager{statesInformer: mockStatesInformer, podsEvicted: cache.NewCacheDefault(), eventRecorder: fakeRecorder, metricCache: mockMetricCache, kubeClient: client, nodeSLO: getNodeSLOByThreshold(tt.thresholdConfig), config: NewDefaultConfig()}
resmanager := &resmanager{
statesInformer: mockStatesInformer,
podsEvicted: cache.NewCacheDefault(),
eventRecorder: fakeRecorder,
metricCache: mockMetricCache,
kubeClient: client,
config: NewDefaultConfig()}
stop := make(chan struct{})
_ = resmanager.podsEvicted.Run(stop)
defer func() { stop <- struct{}{} }()
Expand Down
5 changes: 3 additions & 2 deletions pkg/koordlet/resmanager/resctrl_reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1172,12 +1172,12 @@ func TestResctrlReconcile_reconcile(t *testing.T) {
statesInformer := mock_statesinformer.NewMockStatesInformer(ctrl)
metricCache := mock_metriccache.NewMockMetricCache(ctrl)
statesInformer.EXPECT().GetAllPods().Return([]*statesinformer.PodMeta{testingPodMeta}).AnyTimes()
statesInformer.EXPECT().GetNodeSLO().Return(testingNodeSLO).AnyTimes()
metricCache.EXPECT().GetNodeCPUInfo(&metriccache.QueryParam{}).Return(testingNodeCPUInfo, nil).AnyTimes()
rm := &resmanager{
statesInformer: statesInformer,
metricCache: metricCache,
config: NewDefaultConfig(),
nodeSLO: testingNodeSLO,
}

helper := system.NewFileTestUtil(t)
Expand Down Expand Up @@ -1215,7 +1215,8 @@ func TestResctrlReconcile_reconcile(t *testing.T) {
r.reconcile()

// test strategy parse error
r.resManager.nodeSLO.Spec.ResourceQoSStrategy = nil
testingNodeSLO.Spec.ResourceQoSStrategy = nil
statesInformer.EXPECT().GetNodeSLO().Return(testingNodeSLO).AnyTimes()
r.reconcile()

})
Expand Down
Loading

0 comments on commit c409a87

Please sign in to comment.