Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add cpu qos and mv nodeslo informer to states informer #153

Merged
merged 1 commit into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions apis/slo/v1alpha1/nodeslo_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ import (
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// CPUQOS enables cpu qos features.
type CPUQoS struct {
// group identity value for pods, default = 0
GroupIdentity *int64 `json:"groupIdentity,omitempty"`
}

// MemoryQoS enables memory qos features.
type MemoryQoS struct {
// memcg qos
Expand Down Expand Up @@ -103,6 +109,13 @@ type PodMemoryQoSConfig struct {
MemoryQoS `json:",inline"`
}

// CPUQoSCfg stores node-level config of cpu qos
type CPUQoSCfg struct {
// Enable indicates whether the cpu qos is enabled.
Enable *bool `json:"enable,omitempty"`
CPUQoS `json:",inline"`
}

// MemoryQoSCfg stores node-level config of memory qos
type MemoryQoSCfg struct {
// Enable indicates whether the memory qos is enabled (default: false).
Expand All @@ -113,6 +126,7 @@ type MemoryQoSCfg struct {
}

type ResourceQoS struct {
CPUQoS *CPUQoSCfg `json:"cpuQoS,omitempty"`
MemoryQoS *MemoryQoSCfg `json:"memoryQoS,omitempty"`
ResctrlQoS *ResctrlQoSCfg `json:"resctrlQoS,omitempty"`
}
Expand Down
46 changes: 46 additions & 0 deletions apis/slo/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 60 additions & 0 deletions config/crd/bases/slo.koordinator.sh_nodeslos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ spec:
be:
description: ResourceQoS for BE pods.
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down Expand Up @@ -208,6 +220,18 @@ spec:
cgroupRoot:
description: ResourceQoS for root cgroup.
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down Expand Up @@ -343,6 +367,18 @@ spec:
ls:
description: ResourceQoS for LS pods.
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down Expand Up @@ -478,6 +514,18 @@ spec:
lsr:
description: ResourceQoS for LSR pods.
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down Expand Up @@ -613,6 +661,18 @@ spec:
system:
description: ResourceQoS for system pods
properties:
cpuQoS:
description: CPUQoSCfg stores node-level config of cpu qos
properties:
enable:
description: Enable indicates whether the cpu qos is enabled.
type: boolean
groupIdentity:
description: group identity value for pods, default =
0
format: int64
type: integer
type: object
memoryQoS:
description: MemoryQoSCfg stores node-level config of memory
qos
Expand Down
4 changes: 2 additions & 2 deletions pkg/koordlet/koordlet.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func NewDaemon(config *config.Configuration) (Daemon, error) {
return nil, err
}

statesInformer := statesinformer.NewStatesInformer(config.StatesInformerConf, kubeClient, pleg, nodeName)
statesInformer := statesinformer.NewStatesInformer(config.StatesInformerConf, kubeClient, crdClient, pleg, nodeName)
metricCache, err := metriccache.NewMetricCache(config.MetricCacheConf)
if err != nil {
return nil, err
Expand All @@ -117,7 +117,7 @@ func NewDaemon(config *config.Configuration) (Daemon, error) {

resManagerService := resmanager.NewResManager(config.ResManagerConf, scheme, kubeClient, crdClient, nodeName, statesInformer, metricCache, int64(config.CollectorConf.CollectResUsedIntervalSeconds))

runtimeHook, err := runtimehooks.NewRuntimeHook(config.RuntimeHookConf)
runtimeHook, err := runtimehooks.NewRuntimeHook(statesInformer, config.RuntimeHookConf)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/koordlet/resmanager/cpu_burst_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1537,6 +1537,7 @@ func TestCPUBurst_start(t *testing.T) {
ctl := gomock.NewController(t)
mockStatesInformer := mock_statesinformer.NewMockStatesInformer(ctl)
mockStatesInformer.EXPECT().GetAllPods().Return(getPodMetas(tt.fields.pods)).AnyTimes()
mockStatesInformer.EXPECT().GetNodeSLO().Return(tt.fields.nodeSLO).AnyTimes()

mockMetricCache := mock_metriccache.NewMockMetricCache(ctl)
mockMetricCache.EXPECT().GetNodeResourceMetric(gomock.Any()).Return(tt.fields.nodeMetric).AnyTimes()
Expand All @@ -1562,7 +1563,6 @@ func TestCPUBurst_start(t *testing.T) {
metricCache: mockMetricCache,
eventRecorder: fakeRecorder,
kubeClient: client,
nodeSLO: tt.fields.nodeSLO,
}

testHelper := system.NewFileTestUtil(t)
Expand Down
2 changes: 1 addition & 1 deletion pkg/koordlet/resmanager/cpu_suppress_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ func Test_cpuSuppress_suppressBECPU(t *testing.T) {
si := mockstatesinformer.NewMockStatesInformer(ctl)
si.EXPECT().GetAllPods().Return(tt.args.podMetas).AnyTimes()
si.EXPECT().GetNode().Return(tt.args.node).AnyTimes()
si.EXPECT().GetNodeSLO().Return(getNodeSLOByThreshold(tt.args.thresholdConfig)).AnyTimes()

// prepareData: mockMetricCache pods node beMetrics(AVG,current)
mockMetricCache := mockmetriccache.NewMockMetricCache(ctl)
Expand Down Expand Up @@ -604,7 +605,6 @@ func Test_cpuSuppress_suppressBECPU(t *testing.T) {
statesInformer: si,
metricCache: mockMetricCache,
config: NewDefaultConfig(),
nodeSLO: getNodeSLOByThreshold(tt.args.thresholdConfig),
collectResUsedIntervalSeconds: 1,
}
cpuSuppress := NewCPUSuppress(r)
Expand Down
9 changes: 8 additions & 1 deletion pkg/koordlet/resmanager/memory_evict_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ func Test_memoryEvict(t *testing.T) {
mockStatesInformer := mock_statesinformer.NewMockStatesInformer(ctl)
mockStatesInformer.EXPECT().GetAllPods().Return(getPodMetas(tt.pods)).AnyTimes()
mockStatesInformer.EXPECT().GetNode().Return(tt.node).AnyTimes()
mockStatesInformer.EXPECT().GetNodeSLO().Return(getNodeSLOByThreshold(tt.thresholdConfig)).AnyTimes()

mockMetricCache := mock_metriccache.NewMockMetricCache(ctl)
mockNodeQueryResult := metriccache.NodeResourceQueryResult{Metric: tt.nodeMetric}
Expand All @@ -385,7 +386,13 @@ func Test_memoryEvict(t *testing.T) {

fakeRecorder := &FakeRecorder{}
client := clientsetfake.NewSimpleClientset()
resmanager := &resmanager{statesInformer: mockStatesInformer, podsEvicted: cache.NewCacheDefault(), eventRecorder: fakeRecorder, metricCache: mockMetricCache, kubeClient: client, nodeSLO: getNodeSLOByThreshold(tt.thresholdConfig), config: NewDefaultConfig()}
resmanager := &resmanager{
statesInformer: mockStatesInformer,
podsEvicted: cache.NewCacheDefault(),
eventRecorder: fakeRecorder,
metricCache: mockMetricCache,
kubeClient: client,
config: NewDefaultConfig()}
stop := make(chan struct{})
_ = resmanager.podsEvicted.Run(stop)
defer func() { stop <- struct{}{} }()
Expand Down
5 changes: 3 additions & 2 deletions pkg/koordlet/resmanager/resctrl_reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1172,12 +1172,12 @@ func TestResctrlReconcile_reconcile(t *testing.T) {
statesInformer := mock_statesinformer.NewMockStatesInformer(ctrl)
metricCache := mock_metriccache.NewMockMetricCache(ctrl)
statesInformer.EXPECT().GetAllPods().Return([]*statesinformer.PodMeta{testingPodMeta}).AnyTimes()
statesInformer.EXPECT().GetNodeSLO().Return(testingNodeSLO).AnyTimes()
metricCache.EXPECT().GetNodeCPUInfo(&metriccache.QueryParam{}).Return(testingNodeCPUInfo, nil).AnyTimes()
rm := &resmanager{
statesInformer: statesInformer,
metricCache: metricCache,
config: NewDefaultConfig(),
nodeSLO: testingNodeSLO,
}

helper := system.NewFileTestUtil(t)
Expand Down Expand Up @@ -1215,7 +1215,8 @@ func TestResctrlReconcile_reconcile(t *testing.T) {
r.reconcile()

// test strategy parse error
r.resManager.nodeSLO.Spec.ResourceQoSStrategy = nil
testingNodeSLO.Spec.ResourceQoSStrategy = nil
statesInformer.EXPECT().GetNodeSLO().Return(testingNodeSLO).AnyTimes()
r.reconcile()

})
Expand Down
Loading