From bb19d8094ec06a5c5719eb3fd41176adbd88850c Mon Sep 17 00:00:00 2001 From: saintube Date: Thu, 11 Jan 2024 22:13:35 +0800 Subject: [PATCH] koordlet: revise core sched group api Signed-off-by: saintube --- apis/slo/v1alpha1/pod.go | 56 +++-- .../hooks/coresched/core_sched.go | 120 +++++----- .../hooks/coresched/core_sched_test.go | 110 +++++++-- .../runtimehooks/hooks/coresched/helper.go | 30 +-- .../hooks/coresched/helper_test.go | 134 ++++++++--- .../runtimehooks/hooks/coresched/rule.go | 28 ++- .../runtimehooks/hooks/coresched/rule_test.go | 75 +++++- .../runtimehooks/hooks/groupidentity/bvt.go | 76 +++++-- .../hooks/groupidentity/bvt_test.go | 214 +++++++++++++----- .../hooks/groupidentity/interceptor.go | 10 +- .../hooks/groupidentity/interceptor_test.go | 92 ++++++-- .../runtimehooks/hooks/groupidentity/rule.go | 24 ++ .../hooks/groupidentity/rule_test.go | 136 +++++++++-- pkg/koordlet/util/system/core_sched.go | 26 +++ pkg/koordlet/util/system/system_file.go | 14 ++ 15 files changed, 881 insertions(+), 264 deletions(-) diff --git a/apis/slo/v1alpha1/pod.go b/apis/slo/v1alpha1/pod.go index 501779127..e6c1a7e28 100644 --- a/apis/slo/v1alpha1/pod.go +++ b/apis/slo/v1alpha1/pod.go @@ -20,7 +20,6 @@ import ( "encoding/json" corev1 "k8s.io/api/core/v1" - "k8s.io/utils/pointer" apiext "github.com/koordinator-sh/koordinator/apis/extension" ) @@ -68,12 +67,8 @@ func GetPodMemoryQoSConfig(pod *corev1.Pod) (*PodMemoryQOSConfig, error) { const ( // LabelCoreSchedGroupID is the label key of the group ID of the Linux Core Scheduling. - // Value should be a valid UUID or the none value "0". - // When the value is a valid UUID, pods with that group ID and the equal CoreExpelled status on the node will be - // assigned to the same core sched cookie. - // When the value is the none value "0", pod will be reset to the default core sched cookie `0`. - // When the k-v pair is missing but the node-level strategy enables the core sched, the pod will be assigned an - // internal group according to the pod's UID. + // Value should be a valid UUID or empty. If it is empty, the pod is considered to belong to a core sched group "". + // Otherwise, the pod is set its core sched group ID according to the value. // // Core Sched: https://docs.kernel.org/admin-guide/hw-vuln/core-scheduling.html // When the Core Sched is enabled, pods with the different core sched group IDs will not be running at the same SMT @@ -85,20 +80,47 @@ const ( // are the same. LabelCoreSchedGroupID = apiext.DomainPrefix + "core-sched-group-id" - // CoreSchedGroupIDNone is the none value of the core sched group ID which indicates the core sched is disabled for + // LabelCoreSchedPolicy is the label key that indicates the policy of the core scheduling. + // It supports the following values: + // - "" (default): If the core sched is enabled for the node, the pod is set the group ID according to the value + // of the LabelCoreSchedGroupID. + // - "none": The core sched is explicitly disabled for the pod even if the node-level strategy is enabled. + // - "exclusive": If the core sched is enabled for the node, the pod is set the group ID according to the pod UID, + // so that the pod is exclusive to any other pods. + LabelCoreSchedPolicy = apiext.DomainPrefix + "core-sched-policy" +) + +type CoreSchedPolicy string + +const ( + // CoreSchedPolicyDefault is the default policy of the core scheduling which indicates the core sched group ID + // is set according to the value of the LabelCoreSchedGroupID. + CoreSchedPolicyDefault CoreSchedPolicy = "" + // CoreSchedPolicyNone is the none policy of the core scheduling which indicates the core sched is disabled for // the pod. The pod will be reset to the system-default cookie `0`. - CoreSchedGroupIDNone = "0" + CoreSchedPolicyNone CoreSchedPolicy = "none" + // CoreSchedPolicyExclusive is the exclusive policy of the core scheduling which indicates the core sched group ID + // is set the same as the pod's UID + CoreSchedPolicyExclusive CoreSchedPolicy = "exclusive" ) -// GetCoreSchedGroupID gets the core sched group ID from the pod labels. -// It returns the core sched group ID and whether the pod explicitly disables the core sched. -func GetCoreSchedGroupID(labels map[string]string) (string, *bool) { +// GetCoreSchedGroupID gets the core sched group ID for the pod according to the labels. +func GetCoreSchedGroupID(labels map[string]string) string { + if labels != nil { + return labels[LabelCoreSchedGroupID] + } + return "" +} + +// GetCoreSchedPolicy gets the core sched policy for the pod according to the labels. +func GetCoreSchedPolicy(labels map[string]string) CoreSchedPolicy { if labels == nil { - return "", nil + return CoreSchedPolicyDefault } - value, ok := labels[LabelCoreSchedGroupID] - if !ok { - return "", nil + if v := labels[LabelCoreSchedPolicy]; v == string(CoreSchedPolicyNone) { + return CoreSchedPolicyNone + } else if v == string(CoreSchedPolicyExclusive) { + return CoreSchedPolicyExclusive } - return value, pointer.Bool(value == CoreSchedGroupIDNone) + return CoreSchedPolicyDefault } diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go index eafe3df6c..4d1116a2c 100644 --- a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go +++ b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go @@ -27,7 +27,6 @@ import ( "k8s.io/utils/pointer" "github.com/koordinator-sh/koordinator/apis/extension" - slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1" "github.com/koordinator-sh/koordinator/pkg/koordlet/metrics" "github.com/koordinator-sh/koordinator/pkg/koordlet/resourceexecutor" "github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks" @@ -51,6 +50,8 @@ const ( // ExpellerGroupSuffix is the default suffix of the expeller core sched group. ExpellerGroupSuffix = "-expeller" + // NoneGroupID is the special ID denoting none core sched group. + NoneGroupID = "__0__" ) // SYSTEM QoS is excluded from the cookie mutating. @@ -66,10 +67,9 @@ type Plugin struct { initialized *atomic.Bool // whether the cache has been initialized allPodsSyncOnce sync.Once // sync once for AllPods - sysSupported *bool - supportedMsg string - sysEnabled bool - giSupported *bool + sysSupported *bool + supportedMsg string + giSysctlSupported *bool cookieCache *gocache.Cache // core-sched-group-id -> cookie id, set; if the group has had cookie cookieCacheRWMutex sync.RWMutex @@ -104,10 +104,12 @@ func (p *Plugin) Register(op hooks.Options) { // TODO: hook NRI events RunPodSandbox, PostStartContainer rule.Register(ruleNameForNodeSLO, description, rule.WithParseFunc(statesinformer.RegisterTypeNodeSLOSpec, p.parseRuleForNodeSLO), - rule.WithUpdateCallback(p.ruleUpdateCb)) + rule.WithUpdateCallback(p.ruleUpdateCb), + rule.WithSystemSupported(p.SystemSupported)) rule.Register(ruleNameForAllPods, description, rule.WithParseFunc(statesinformer.RegisterTypeAllPods, p.parseForAllPods), - rule.WithUpdateCallback(p.ruleUpdateCb)) + rule.WithUpdateCallback(p.ruleUpdateCb), + rule.WithSystemSupported(p.SystemSupported)) reconciler.RegisterCgroupReconciler(reconciler.ContainerLevel, sysutil.VirtualCoreSchedCookie, "set core sched cookie to process groups of container specified", p.SetContainerCookie, reconciler.PodQOSFilter(), podQOSConditions...) @@ -126,50 +128,21 @@ func (p *Plugin) Setup(op hooks.Options) { p.cse = sysutil.NewCoreSchedExtended() } -func (p *Plugin) SystemSupported() (bool, string) { +func (p *Plugin) SystemSupported() bool { if p.sysSupported == nil { - isSupported, msg := sysutil.EnableCoreSchedIfSupported() - p.sysSupported = pointer.Bool(isSupported) + sysSupported, msg := sysutil.IsCoreSchedSupported() + p.sysSupported = pointer.Bool(sysSupported) p.supportedMsg = msg - klog.Infof("update system supported info for plugin %s, supported %v, msg %s", - name, *p.sysSupported, p.supportedMsg) + klog.Infof("update system supported info to %v for plugin %v, supported msg %s", + sysSupported, name, msg) } - return *p.sysSupported, p.supportedMsg -} - -func (p *Plugin) InitCache(podMetas []*statesinformer.PodMeta) bool { - if p.initialized.Load() { - return true - } - - synced := p.LoadAllCookies(podMetas) - - p.initialized.Store(synced) - return synced + return *p.sysSupported } func (p *Plugin) IsCacheInited() bool { return p.initialized.Load() } -// TryDisableGroupIdentity tries disabling the group identity via sysctl to safely enable the core sched. -func (p *Plugin) TryDisableGroupIdentity() error { - if p.giSupported == nil { - bvtConfigPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) - p.giSupported = pointer.Bool(sysutil.FileExists(bvtConfigPath)) - } - if !*p.giSupported { // not support either the group identity or the core sched - return nil - } - - err := sysutil.SetSchedGroupIdentity(false) - if err != nil { - return err - } - - return nil -} - func (p *Plugin) SetKubeQOSCPUIdle(proto protocol.HooksProtocol) error { kubeQOSCtx := proto.(*protocol.KubeQOSContext) if kubeQOSCtx == nil { @@ -225,6 +198,11 @@ func (p *Plugin) SetContainerCookie(proto protocol.HooksProtocol) error { return nil } + if !p.SystemSupported() { + klog.V(6).Infof("skipped to set cookie for container %s/%s, core sched is unsupported, msg: %s", + containerCtx.Request.PodMeta.String(), containerCtx.Request.ContainerMeta.Name, p.supportedMsg) + return nil + } if !p.rule.IsInited() || !p.IsCacheInited() { klog.V(5).Infof("plugin %s has not been inited, rule inited %v, cache inited %v, aborted to set cookie for container %s/%s", name, p.rule.IsInited(), p.IsCacheInited(), containerCtx.Request.PodMeta.String(), containerCtx.Request.ContainerMeta.Name) @@ -240,13 +218,6 @@ func (p *Plugin) SetContainerCookie(proto protocol.HooksProtocol) error { // 1. disabled -> enabled: Add or Assign. // 2. keep enabled: Check the differences of cookie, group ID and the PIDs, and do Assign. if isEnabled { - // FIXME(saintube): Currently the kernel feature core scheduling is strictly excluded with the group identity's - // bvt=-1. So we have to check if the GroupIdentity can be disabled before creating core sched cookies. - // This check will be removed when the conflict between kernel features is resolved. - if err := p.TryDisableGroupIdentity(); err != nil { - return fmt.Errorf("failed to disable group identity for core sched, err: %w", err) - } - return p.enableContainerCookie(containerCtx, groupID) } // else pod disables @@ -254,8 +225,19 @@ func (p *Plugin) SetContainerCookie(proto protocol.HooksProtocol) error { return p.disableContainerCookie(containerCtx, groupID) } -// LoadAllCookies syncs the current core sched cookies of all pods into the cookie cache. -func (p *Plugin) LoadAllCookies(podMetas []*statesinformer.PodMeta) bool { +func (p *Plugin) initCache(podMetas []*statesinformer.PodMeta) bool { + if p.initialized.Load() { + return true + } + + synced := p.loadAllCookies(podMetas) + + p.initialized.Store(synced) + return synced +} + +// loadAllCookies syncs the current core sched cookies of all pods into the cookie cache. +func (p *Plugin) loadAllCookies(podMetas []*statesinformer.PodMeta) bool { hasSynced := false p.cookieCacheRWMutex.Lock() defer p.cookieCacheRWMutex.Unlock() @@ -329,6 +311,38 @@ func (p *Plugin) LoadAllCookies(podMetas []*statesinformer.PodMeta) bool { return hasSynced } +func (p *Plugin) initSystem(isEnabled bool) error { + if !isEnabled { // only switch sysctl if enabled + return nil + } + + // NOTE: Currently the kernel feature core scheduling is strictly excluded with the group identity's + // bvt=-1. So we have to check if the GroupIdentity can be disabled before creating core sched cookies. + err := p.tryDisableGroupIdentity() + if err != nil { + return err + } + + sysEnabled, msg := sysutil.EnableCoreSchedIfSupported() + if !sysEnabled { + return fmt.Errorf("failed to enable core sched, msg: %s", msg) + } + + return nil +} + +// tryDisableGroupIdentity tries disabling the group identity via sysctl to safely enable the core sched. +func (p *Plugin) tryDisableGroupIdentity() error { + if p.giSysctlSupported == nil { + p.giSysctlSupported = pointer.Bool(sysutil.IsGroupIdentitySysctlSupported()) + } + if !*p.giSysctlSupported { // not support either the group identity or the core sched + return nil + } + + return sysutil.SetSchedGroupIdentity(false) +} + // enableContainerCookie adds or assigns a core sched cookie for the container. func (p *Plugin) enableContainerCookie(containerCtx *protocol.ContainerContext, groupID string) error { podMetaName := containerCtx.Request.PodMeta.String() @@ -433,7 +447,7 @@ func (p *Plugin) disableContainerCookie(containerCtx *protocol.ContainerContext, // invalid lastGroupID means container not in group cache (container should be cleared or not ever added) // invalid lastCookieEntry means group not in cookie cache (group should be cleared) // let its cached PIDs expire or removed by siblings' Assign - if (len(lastGroupID) <= 0 || lastGroupID == slov1alpha1.CoreSchedGroupIDNone) && lastCookieEntry == nil { + if (len(lastGroupID) <= 0 || lastGroupID == NoneGroupID) && lastCookieEntry == nil { return nil } @@ -474,7 +488,7 @@ func (p *Plugin) getCookieCacheForContainer(groupID, containerUID string) (strin defer p.cookieCacheRWMutex.RUnlock() lastGroupIDIf, containerHasGroup := p.groupCache.Get(containerUID) - lastGroupID := slov1alpha1.CoreSchedGroupIDNone + lastGroupID := NoneGroupID if containerHasGroup { lastGroupID = lastGroupIDIf.(string) } diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go index 6b915fcbc..3cf18b9d4 100644 --- a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go +++ b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go @@ -65,7 +65,7 @@ func TestPluginSystemSupported(t *testing.T) { name: "plugin unsupported since no sched features file", wants: wants{ systemSupported: false, - supportMsg: "core sched not supported", + supportMsg: "file not exist", }, }, { @@ -78,7 +78,7 @@ func TestPluginSystemSupported(t *testing.T) { }, wants: wants{ systemSupported: false, - supportMsg: "core sched not supported", + supportMsg: "not supported neither by sysctl nor by sched_features", }, }, { @@ -91,7 +91,7 @@ func TestPluginSystemSupported(t *testing.T) { }, wants: wants{ systemSupported: true, - supportMsg: "", + supportMsg: "sysctl supported", }, }, { @@ -104,6 +104,7 @@ func TestPluginSystemSupported(t *testing.T) { }, wants: wants{ systemSupported: true, + supportMsg: "sched_features supported", }, }, { @@ -116,6 +117,7 @@ func TestPluginSystemSupported(t *testing.T) { }, wants: wants{ systemSupported: true, + supportMsg: "sysctl supported", }, }, } @@ -132,14 +134,14 @@ func TestPluginSystemSupported(t *testing.T) { Reader: resourceexecutor.NewCgroupReader(), Executor: resourceexecutor.NewTestResourceExecutor(), }) - sysSupported, supportMsg := p.SystemSupported() + sysSupported := p.SystemSupported() assert.Equal(t, tt.wants.systemSupported, sysSupported) - assert.Equal(t, tt.wants.supportMsg, supportMsg) + assert.Equal(t, tt.wants.supportMsg, p.supportedMsg) }) } } -func TestPluginTryDisableGroupIdentity(t *testing.T) { +func TestPlugin_initSystem(t *testing.T) { type fields struct { prepareFn func(helper *sysutil.FileTestUtil) giSupported *bool @@ -147,66 +149,129 @@ func TestPluginTryDisableGroupIdentity(t *testing.T) { tests := []struct { name string fields fields + arg bool wantErr bool wantField *bool wantExtra func(t *testing.T, helper *sysutil.FileTestUtil) }{ + { + name: "skip to init if rule disabled", + fields: fields{ + giSupported: pointer.Bool(true), + }, + arg: false, + wantErr: false, + wantField: pointer.Bool(true), + }, { name: "system does not support sysctl for group identity", fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore), "1") + }, giSupported: nil, }, + arg: true, wantErr: false, wantField: pointer.Bool(false), }, { name: "already know system does not support sysctl for group identity", fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore), "1") + }, giSupported: pointer.Bool(false), }, + arg: true, wantErr: false, wantField: pointer.Bool(false), }, + { + name: "successfully enable core sched and disable group identity", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore), "0") + bvtConfigPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) + helper.WriteFileContents(bvtConfigPath, "1") + }, + }, + arg: true, + wantErr: false, + wantField: pointer.Bool(true), + wantExtra: func(t *testing.T, helper *sysutil.FileTestUtil) { + bvtConfigPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) + got := helper.ReadFileContents(bvtConfigPath) + assert.Equal(t, "0", got) + got = helper.ReadFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore)) + assert.Equal(t, "1", got) + }, + }, { name: "successfully disable group identity", fields: fields{ prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore), "1") bvtConfigPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) helper.WriteFileContents(bvtConfigPath, "1") }, }, + arg: true, wantErr: false, wantField: pointer.Bool(true), wantExtra: func(t *testing.T, helper *sysutil.FileTestUtil) { bvtConfigPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) got := helper.ReadFileContents(bvtConfigPath) assert.Equal(t, "0", got) + got = helper.ReadFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore)) + assert.Equal(t, "1", got) }, }, { name: "successfully disable group identity for known sysctl support", fields: fields{ prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore), "1") bvtConfigPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) helper.WriteFileContents(bvtConfigPath, "1") }, giSupported: pointer.Bool(true), }, + arg: true, wantErr: false, wantField: pointer.Bool(true), wantExtra: func(t *testing.T, helper *sysutil.FileTestUtil) { bvtConfigPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) got := helper.ReadFileContents(bvtConfigPath) assert.Equal(t, "0", got) + got = helper.ReadFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore)) + assert.Equal(t, "1", got) }, }, { name: "failed to disable group identity", fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + helper.WriteFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore), "0") + }, giSupported: pointer.Bool(true), }, + arg: true, wantErr: true, wantField: pointer.Bool(true), + wantExtra: func(t *testing.T, helper *sysutil.FileTestUtil) { + got := helper.ReadFileContents(sysutil.GetProcSysFilePath(sysutil.KernelSchedCore)) + assert.Equal(t, "0", got) + }, + }, + { + name: "failed to enable core sched", + fields: fields{ + giSupported: pointer.Bool(false), + }, + arg: true, + wantErr: true, + wantField: pointer.Bool(false), }, } for _, tt := range tests { @@ -219,10 +284,10 @@ func TestPluginTryDisableGroupIdentity(t *testing.T) { p := newPlugin() p.Setup(hooks.Options{}) - p.giSupported = tt.fields.giSupported - gotErr := p.TryDisableGroupIdentity() - assert.Equal(t, tt.wantErr, gotErr != nil) - assert.Equal(t, tt.wantField, p.giSupported) + p.giSysctlSupported = tt.fields.giSupported + gotErr := p.initSystem(tt.arg) + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + assert.Equal(t, tt.wantField, p.giSysctlSupported) if tt.wantExtra != nil { tt.wantExtra(t, helper) } @@ -306,11 +371,11 @@ func TestPlugin_SetContainerCookie(t *testing.T) { wantErr: false, }, { - name: "failed to disable group identity", + name: "system does not support core sched", fields: fields{ plugin: testGetEnabledPlugin(), preparePluginFn: func(p *Plugin) { - p.giSupported = pointer.Bool(true) + p.sysSupported = pointer.Bool(false) }, }, arg: &protocol.ContainerContext{ @@ -325,7 +390,7 @@ func TestPlugin_SetContainerCookie(t *testing.T) { CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", }, }, - wantErr: true, + wantErr: false, }, { name: "add cookie for LS container correctly", @@ -738,7 +803,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { PodAnnotations: map[string]string{}, PodLabels: map[string]string{ extension.LabelPodQoS: string(extension.QoSLS), - slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + slov1alpha1.LabelCoreSchedPolicy: string(slov1alpha1.CoreSchedPolicyNone), }, ContainerMeta: protocol.ContainerMeta{ Name: "test-container", @@ -811,7 +877,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { PodAnnotations: map[string]string{}, PodLabels: map[string]string{ extension.LabelPodQoS: string(extension.QoSLSR), - slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + slov1alpha1.LabelCoreSchedPolicy: string(slov1alpha1.CoreSchedPolicyNone), }, ContainerMeta: protocol.ContainerMeta{ Name: "test-container", @@ -879,7 +946,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { PodAnnotations: map[string]string{}, PodLabels: map[string]string{ extension.LabelPodQoS: string(extension.QoSBE), - slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + slov1alpha1.LabelCoreSchedPolicy: string(slov1alpha1.CoreSchedPolicyNone), }, ContainerMeta: protocol.ContainerMeta{ Name: "test-container", @@ -939,7 +1007,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { PodAnnotations: map[string]string{}, PodLabels: map[string]string{ extension.LabelPodQoS: string(extension.QoSLS), - slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + slov1alpha1.LabelCoreSchedPolicy: string(slov1alpha1.CoreSchedPolicyNone), }, ContainerMeta: protocol.ContainerMeta{ Name: "test-container", @@ -999,7 +1068,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { PodAnnotations: map[string]string{}, PodLabels: map[string]string{ extension.LabelPodQoS: string(extension.QoSBE), - slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + slov1alpha1.LabelCoreSchedPolicy: string(slov1alpha1.CoreSchedPolicyNone), }, ContainerMeta: protocol.ContainerMeta{ Name: "test-container", @@ -1131,7 +1201,7 @@ func TestPlugin_SetContainerCookie(t *testing.T) { } } -func TestPlugin_LoadAllCookies(t *testing.T) { +func TestPlugin_loadAllCookies(t *testing.T) { type fields struct { prepareFn func(helper *sysutil.FileTestUtil) plugin *Plugin @@ -1929,7 +1999,7 @@ func TestPlugin_LoadAllCookies(t *testing.T) { tt.fields.preparePluginFn(p) } - got := p.LoadAllCookies(tt.arg) + got := p.loadAllCookies(tt.arg) assert.Equal(t, tt.want, got) for groupID, cookieID := range tt.wantFields.groupToCookie { if cookieID <= 0 { diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/helper.go b/pkg/koordlet/runtimehooks/hooks/coresched/helper.go index 041404555..f98e68ec0 100644 --- a/pkg/koordlet/runtimehooks/hooks/coresched/helper.go +++ b/pkg/koordlet/runtimehooks/hooks/coresched/helper.go @@ -236,38 +236,24 @@ func (p *Plugin) clearCookie(pids []uint32, groupID string, lastCookieID uint64) // getPodEnabledAndGroup gets whether the pod enables the core scheduling and the group ID if it does. func (p *Plugin) getPodEnabledAndGroup(podAnnotations, podLabels map[string]string, podKubeQOS corev1.PodQOSClass, podUID string) (bool, string) { - // if the pod enables/disables the core-sched explicitly - groupID, isPodDisabled := slov1alpha1.GetCoreSchedGroupID(podLabels) - if isPodDisabled != nil && *isPodDisabled { // pod disables - return false, groupID - } - + groupID := slov1alpha1.GetCoreSchedGroupID(podLabels) + policy := slov1alpha1.GetCoreSchedPolicy(podLabels) podQOS := extension.QoSNone if podLabels != nil { podQOS = extension.GetQoSClassByAttrs(podLabels, podAnnotations) } - isQOSEnabled, isExpeller := p.rule.IsPodEnabled(podQOS, podKubeQOS) - groupID = p.getGroupID(groupID, podUID, isExpeller) - - if isPodDisabled != nil { // assert *isPodDisabled == true - return true, groupID - } + isEnabled, isExpeller := p.rule.IsPodEnabled(podQOS, podKubeQOS) - // use the QoS-level rules - return isQOSEnabled, groupID -} - -func (p *Plugin) getGroupID(baseGroupID string, podUID string, isExpeller bool) string { - var groupID string - if len(baseGroupID) > 0 { - groupID = baseGroupID - } else { + if policy == slov1alpha1.CoreSchedPolicyExclusive { groupID = podUID + } else if policy == slov1alpha1.CoreSchedPolicyNone { + isEnabled = false } if isExpeller { groupID += ExpellerGroupSuffix } - return groupID + + return isEnabled, groupID } func (p *Plugin) getContainerUID(podUID string, containerID string) string { diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/helper_test.go b/pkg/koordlet/runtimehooks/hooks/coresched/helper_test.go index ac825158a..d2c52c141 100644 --- a/pkg/koordlet/runtimehooks/hooks/coresched/helper_test.go +++ b/pkg/koordlet/runtimehooks/hooks/coresched/helper_test.go @@ -722,7 +722,7 @@ func Test_isPodEnabled(t *testing.T) { want1 string }{ { - name: "pod enabled on annotation", + name: "pod enabled", field: field{ rule: testGetEnabledRule(), }, @@ -738,67 +738,140 @@ func Test_isPodEnabled(t *testing.T) { want1: "group-xxx-expeller", }, { - name: "pod enabled on annotation 1", + name: "BE pod enabled", field: field{ - rule: testGetDisabledRule(), + rule: testGetEnabledRule(), }, args: args{ podAnnotations: map[string]string{}, podLabels: map[string]string{ - slov1alpha1.LabelCoreSchedGroupID: "", + extension.LabelPodQoS: string(extension.QoSBE), }, podUID: "xxx", }, want: true, - want1: "xxx", + want1: "", }, { - name: "pod disabled on annotation", + name: "burstable pod enabled", field: field{ rule: testGetEnabledRule(), }, args: args{ podAnnotations: map[string]string{}, - podLabels: map[string]string{ - slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, - }, - podUID: "xxx", + podLabels: map[string]string{}, + podKubeQOS: corev1.PodQOSBurstable, + podUID: "xxx", }, - want: false, - want1: slov1alpha1.CoreSchedGroupIDNone, + want: true, + want1: "-expeller", }, { - name: "pod enabled according to nodeSLO", + name: "pod enabled without group id label", field: field{ rule: testGetEnabledRule(), }, args: args{ - podKubeQOS: corev1.PodQOSBurstable, - podUID: "xxx", + podAnnotations: map[string]string{}, + podUID: "xxx", + podKubeQOS: corev1.PodQOSBurstable, }, want: true, - want1: "xxx-expeller", + want1: "-expeller", }, { - name: "pod enabled according to nodeSLO 1", + name: "pod enabled with policy exclusive", field: field{ rule: testGetEnabledRule(), }, args: args{ + podAnnotations: map[string]string{}, podLabels: map[string]string{ - extension.LabelPodQoS: string(extension.QoSLS), + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedPolicy: string(slov1alpha1.CoreSchedPolicyExclusive), }, - podAnnotations: map[string]string{}, - podKubeQOS: corev1.PodQOSGuaranteed, - podUID: "xxx", + podUID: "xxx", }, want: true, want1: "xxx-expeller", }, { - name: "pod enabled according to nodeSLO 2", + name: "pod disabled", + field: field{ + rule: testGetDisabledRule(), + }, + args: args{ + podAnnotations: map[string]string{}, + podLabels: map[string]string{ + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + podUID: "xxx", + }, + want: false, + want1: "group-xxx", + }, + { + name: "BE pod disabled", + field: field{ + rule: testGetDisabledRule(), + }, + args: args{ + podAnnotations: map[string]string{}, + podLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSBE), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + podUID: "xxx", + }, + want: false, + want1: "group-xxx", + }, + { + name: "burstable pod disabled", + field: field{ + rule: testGetDisabledRule(), + }, + args: args{ + podAnnotations: map[string]string{}, + podLabels: map[string]string{}, + podKubeQOS: corev1.PodQOSBurstable, + podUID: "xxx", + }, + want: false, + want1: "", + }, + { + name: "pod disabled without group id label", + field: field{ + rule: testGetDisabledRule(), + }, + args: args{ + podLabels: map[string]string{}, + podKubeQOS: corev1.PodQOSBestEffort, + }, + want: false, + want1: "", + }, + { + name: "pod disabled by policy none", + field: field{ + rule: testGetEnabledRule(), + }, + args: args{ + podLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedPolicy: string(slov1alpha1.CoreSchedPolicyNone), + }, + podKubeQOS: corev1.PodQOSGuaranteed, + }, + want: false, + want1: "-expeller", + }, + { + name: "pod enabled according to nodeSLO", field: field{ rule: &Rule{ + enable: true, podQOSParams: map[extension.QoSClass]Param{ extension.QoSLSE: testGetDisabledRuleParam(), extension.QoSLSR: testGetDisabledRuleParam(), @@ -841,24 +914,13 @@ func Test_isPodEnabled(t *testing.T) { podUID: "xxx", }, want: true, - want1: "xxx", + want1: "", }, { name: "pod disabled according to nodeSLO", - field: field{ - rule: testGetDisabledRule(), - }, - args: args{ - podKubeQOS: corev1.PodQOSBestEffort, - podUID: "xxx", - }, - want: false, - want1: "xxx", - }, - { - name: "pod disabled according to nodeSLO 1", field: field{ rule: &Rule{ + enable: false, podQOSParams: map[extension.QoSClass]Param{ extension.QoSLSE: testGetDisabledRuleParam(), extension.QoSLSR: testGetDisabledRuleParam(), @@ -901,7 +963,7 @@ func Test_isPodEnabled(t *testing.T) { podUID: "xxx", }, want: false, - want1: "xxx", + want1: "", }, } for _, tt := range tests { diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/rule.go b/pkg/koordlet/runtimehooks/hooks/coresched/rule.go index dffb1c05f..d08b17dec 100644 --- a/pkg/koordlet/runtimehooks/hooks/coresched/rule.go +++ b/pkg/koordlet/runtimehooks/hooks/coresched/rule.go @@ -49,12 +49,14 @@ func newParam(qosCfg *slov1alpha1.CPUQOSCfg, policy slov1alpha1.CPUQOSPolicy) Pa type Rule struct { lock sync.RWMutex + enable bool // node-level switch podQOSParams map[extension.QoSClass]Param kubeQOSPodParams map[corev1.PodQOSClass]Param } func newRule() *Rule { return &Rule{ + enable: false, podQOSParams: make(map[extension.QoSClass]Param), kubeQOSPodParams: make(map[corev1.PodQOSClass]Param), } @@ -66,10 +68,19 @@ func (r *Rule) IsInited() bool { return len(r.podQOSParams) > 0 && len(r.kubeQOSPodParams) > 0 } +func (r *Rule) IsEnabled() bool { + r.lock.RLock() + defer r.lock.RUnlock() + return r.enable +} + // IsPodEnabled returns if the pod's core sched is enabled by the rule, and if the QoS-level core expeller is enabled. func (r *Rule) IsPodEnabled(podQoSClass extension.QoSClass, podKubeQOS corev1.PodQOSClass) (bool, bool) { r.lock.RLock() defer r.lock.RUnlock() + if !r.enable { + return false, false + } if val, exist := r.podQOSParams[podQoSClass]; exist { return val.IsPodEnabled, val.IsExpeller } @@ -93,11 +104,13 @@ func (r *Rule) IsKubeQOSCPUIdle(KubeQOS corev1.PodQOSClass) bool { func (r *Rule) Update(ruleNew *Rule) bool { r.lock.Lock() defer r.lock.Unlock() - isEqual := reflect.DeepEqual(r.podQOSParams, ruleNew.podQOSParams) && + isEqual := r.enable == ruleNew.enable && + reflect.DeepEqual(r.podQOSParams, ruleNew.podQOSParams) && reflect.DeepEqual(r.kubeQOSPodParams, ruleNew.kubeQOSPodParams) if isEqual { return false } + r.enable = ruleNew.enable r.podQOSParams = ruleNew.podQOSParams r.kubeQOSPodParams = ruleNew.kubeQOSPodParams return true @@ -127,6 +140,7 @@ func (p *Plugin) parseRuleForNodeSLO(mergedNodeSLOIf interface{}) (bool, error) } ruleNew := &Rule{ + enable: lsrValue.IsPodEnabled || lsValue.IsPodEnabled || beValue.IsPodEnabled, podQOSParams: map[extension.QoSClass]Param{ extension.QoSLSE: lsrValue, extension.QoSLSR: lsrValue, @@ -173,8 +187,8 @@ func (p *Plugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error { } // check the kernel feature and enable if needed - if supported, msg := p.SystemSupported(); !supported { - klog.V(4).Infof("plugin %s is not supported by system, msg: %s", name, msg) + if !p.SystemSupported() { + klog.V(4).Infof("plugin %s is not supported by system, msg: %s", name, p.supportedMsg) return nil } @@ -184,7 +198,13 @@ func (p *Plugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error { return nil } - if !p.InitCache(podMetas) { + if err := p.initSystem(p.rule.IsEnabled()); err != nil { + klog.V(4).Infof("plugin %s failed to initialize system, err: %s", name, err) + return nil + } + klog.V(6).Infof("plugin %s initialize system successfully", name) + + if !p.initCache(podMetas) { klog.V(4).Infof("plugin %s aborted for cookie cache has not been initialized", name) return nil } diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/rule_test.go b/pkg/koordlet/runtimehooks/hooks/coresched/rule_test.go index a7a1e9734..60f9eeb9f 100644 --- a/pkg/koordlet/runtimehooks/hooks/coresched/rule_test.go +++ b/pkg/koordlet/runtimehooks/hooks/coresched/rule_test.go @@ -268,6 +268,7 @@ func Test_parseRuleForNodeSLO(t *testing.T) { want: true, wantErr: false, wantField: &Rule{ + enable: true, podQOSParams: map[extension.QoSClass]Param{ extension.QoSLSE: { IsPodEnabled: false, @@ -313,6 +314,7 @@ func Test_parseRuleForNodeSLO(t *testing.T) { name: "policy enabled on BE", field: field{ rule: &Rule{ + enable: true, podQOSParams: map[extension.QoSClass]Param{ extension.QoSLSE: { IsPodEnabled: true, @@ -384,6 +386,7 @@ func Test_parseRuleForNodeSLO(t *testing.T) { want: true, wantErr: false, wantField: &Rule{ + enable: true, podQOSParams: map[extension.QoSClass]Param{ extension.QoSLSE: { IsPodEnabled: true, @@ -559,6 +562,72 @@ func Test_ruleUpdateCb(t *testing.T) { initialized: false, }, }, + { + name: "failed to init core sched via sysctl", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + }, + plugin: newPlugin(), + preparePluginFn: func(p *Plugin) { + p.rule = testGetEnabledRule() + p.sysSupported = pointer.Bool(true) + }, + }, + arg: &statesinformer.CallbackTarget{ + Pods: []*statesinformer.PodMeta{ + { + CgroupDir: "kubepods.slice/kubepods-podxxxxxx.slice", + Pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + UID: "xxxxxx", + Annotations: map[string]string{}, + Labels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test-container", + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + QOSClass: corev1.PodQOSGuaranteed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "test-container", + ContainerID: "containerd://yyyyyy", + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + }, + }, + }, + wantErr: false, + wantFields: wantFields{ + rule: testGetEnabledRule(), + sysSupported: pointer.Bool(true), + initialized: false, + }, + }, { name: "no cookie has been synced", fields: fields{ @@ -1513,7 +1582,8 @@ func Test_ruleUpdateCb(t *testing.T) { Annotations: map[string]string{}, Labels: map[string]string{ extension.LabelPodQoS: string(extension.QoSLSR), - slov1alpha1.LabelCoreSchedGroupID: slov1alpha1.CoreSchedGroupIDNone, + slov1alpha1.LabelCoreSchedGroupID: "group-nnn", + slov1alpha1.LabelCoreSchedPolicy: string(slov1alpha1.CoreSchedPolicyNone), }, }, Spec: corev1.PodSpec{ @@ -1635,6 +1705,7 @@ func testGetDisabledRuleParam() Param { func testGetEnabledRule() *Rule { // use default CPUQOS return &Rule{ + enable: true, podQOSParams: map[extension.QoSClass]Param{ extension.QoSLSE: { IsPodEnabled: true, @@ -1680,6 +1751,7 @@ func testGetEnabledRule() *Rule { func testGetAllEnabledRule() *Rule { // use default CPUQOS and enable CPU Idle return &Rule{ + enable: true, podQOSParams: map[extension.QoSClass]Param{ extension.QoSLSE: { IsPodEnabled: true, @@ -1724,6 +1796,7 @@ func testGetAllEnabledRule() *Rule { func testGetDisabledRule() *Rule { return &Rule{ + enable: false, podQOSParams: map[extension.QoSClass]Param{ extension.QoSLSE: testGetDisabledRuleParam(), extension.QoSLSR: testGetDisabledRuleParam(), diff --git a/pkg/koordlet/runtimehooks/hooks/groupidentity/bvt.go b/pkg/koordlet/runtimehooks/hooks/groupidentity/bvt.go index 21d6abfe1..37ec1872c 100644 --- a/pkg/koordlet/runtimehooks/hooks/groupidentity/bvt.go +++ b/pkg/koordlet/runtimehooks/hooks/groupidentity/bvt.go @@ -40,12 +40,14 @@ const ( ) type bvtPlugin struct { - rule *bvtRule - ruleRWMutex sync.RWMutex - sysSupported *bool - hasKernelEnabled *bool // whether kernel is configurable for enabling bvt (via `kernel.sched_group_identity_enabled`) - kernelEnabled *bool // if not nil, indicates whether bvt feature is enabled via `kernel.sched_group_identity_enabled` - executor resourceexecutor.ResourceUpdateExecutor + rule *bvtRule + ruleRWMutex sync.RWMutex + + sysSupported *bool + hasKernelEnabled *bool // whether kernel is configurable for enabling bvt (via `kernel.sched_group_identity_enabled`) + coreSchedSysctlSupported *bool // whether core sched is supported by the sysctl + + executor resourceexecutor.ResourceUpdateExecutor } func (b *bvtPlugin) Register(op hooks.Options) { @@ -71,8 +73,8 @@ func (b *bvtPlugin) SystemSupported() bool { if err == nil { isBVTSupported, msg = bvtResource.IsSupported(util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed)) } - bvtConfigPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) - b.sysSupported = pointer.Bool(isBVTSupported || sysutil.FileExists(bvtConfigPath)) + sysSupported := isBVTSupported || sysutil.IsGroupIdentitySysctlSupported() + b.sysSupported = pointer.Bool(sysSupported) klog.Infof("update system supported info to %v for plugin %v, supported msg %s", *b.sysSupported, name, msg) } @@ -81,42 +83,66 @@ func (b *bvtPlugin) SystemSupported() bool { func (b *bvtPlugin) hasKernelEnable() bool { if b.hasKernelEnabled == nil { - bvtConfigPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) - b.hasKernelEnabled = pointer.Bool(sysutil.FileExists(bvtConfigPath)) + b.hasKernelEnabled = pointer.Bool(sysutil.IsGroupIdentitySysctlSupported()) } return *b.hasKernelEnabled } -// initKernelEnable checks and initializes the sysctl configuration for the bvt (group identity). -// It returns any error for the initialization. -func (b *bvtPlugin) initialize() error { - // NOTE: bvt (group identity) is supported and can be initialized in the system if: - // 1. anolis os kernel (<26.4): cgroup cpu.bvt_warp_ns exists but sysctl kernel.sched_group_identity_enabled no exist, - // the bvt feature is enabled by default, no need to set sysctl. - // 2. anolis os kernel (>=26.4): both cgroup cpu.bvt_warp_ns and sysctl kernel.sched_group_identity_enabled exist, - // the bvt feature is enabled when kernel.sched_group_identity_enabled is set as `1`. - if !b.hasKernelEnable() { // skip initialization of kernel does not support bvt sysctl +// tryDisableCoreSched tries disabling the core scheduling via sysctl to safely enable the group identity. +func (b *bvtPlugin) tryDisableCoreSched() error { + if b.coreSchedSysctlSupported == nil { + b.coreSchedSysctlSupported = pointer.Bool(sysutil.IsCoreSchedSysctlSupported()) + } + if !*b.coreSchedSysctlSupported { return nil } - // if cpu qos is enabled/disabled in rule, check if we need to change the sysctl config for bvt (group identity) - if b.kernelEnabled != nil && *b.kernelEnabled { - klog.V(6).Infof("skip initialize plugin %s, no need to change sysctl", name) + return sysutil.SetSchedCore(false) +} + +// initSysctl initializes the sysctl for the group identity. +// It returns whether cgroups need to set, e.g. if the sysctl config is not disabled. +func (b *bvtPlugin) initSysctl() error { + if !b.hasKernelEnable() { // kernel does not support bvt sysctl return nil } + // NOTE: Currently the kernel feature core scheduling is strictly excluded with the group identity's + // bvt=-1. So we have to check if the CoreSched can be disabled before enabling group identity. + err := b.tryDisableCoreSched() + if err != nil { + return err + } + // try to set bvt kernel enabled via sysctl when the sysctl config is disabled or unknown // https://github.com/koordinator-sh/koordinator/pull/1172 - err := sysutil.SetSchedGroupIdentity(true) + err = sysutil.SetSchedGroupIdentity(true) if err != nil { return fmt.Errorf("cannot enable kernel sysctl for bvt, err: %w", err) } - b.kernelEnabled = pointer.Bool(true) - klog.V(4).Infof("hook plugin %s is successfully initialized", name) return nil } +// isSysctlEnabled checks if the sysctl configuration for the bvt (group identity) is enabled. +// It returns whether cgroups need to set, e.g. if the sysctl config is not disabled. +func (b *bvtPlugin) isSysctlEnabled() (bool, error) { + // NOTE: bvt (group identity) is supported and can be initialized in the system if: + // 1. anolis os kernel (<26.4): cgroup cpu.bvt_warp_ns exists but sysctl kernel.sched_group_identity_enabled no exist, + // the bvt feature is enabled by default, no need to set sysctl. + // 2. anolis os kernel (>=26.4): both cgroup cpu.bvt_warp_ns and sysctl kernel.sched_group_identity_enabled exist, + // the bvt feature is enabled when kernel.sched_group_identity_enabled is set as `1`. + if !b.hasKernelEnable() { // consider enabled when kernel does not support bvt sysctl + return true, nil + } + + isSysEnabled, err := sysutil.GetSchedGroupIdentity() + if err != nil { + return true, fmt.Errorf("cannot get sysctl group identity, err: %v", err) + } + return isSysEnabled, nil +} + var singleton *bvtPlugin func Object() *bvtPlugin { diff --git a/pkg/koordlet/runtimehooks/hooks/groupidentity/bvt_test.go b/pkg/koordlet/runtimehooks/hooks/groupidentity/bvt_test.go index 88d034c53..1d072a1ba 100644 --- a/pkg/koordlet/runtimehooks/hooks/groupidentity/bvt_test.go +++ b/pkg/koordlet/runtimehooks/hooks/groupidentity/bvt_test.go @@ -40,11 +40,6 @@ func initKernelGroupIdentity(value int64, helper *system.FileTestUtil) { helper.WriteProcSubFileContents(filepath.Join(system.SysctlSubDir, system.KernelSchedGroupIdentityEnable), strconv.FormatInt(value, 10)) } -func getKernelGroupIdentity(helper *system.FileTestUtil) (int64, error) { - valueStr := helper.ReadProcSubFileContents(filepath.Join(system.SysctlSubDir, system.KernelSchedGroupIdentityEnable)) - return strconv.ParseInt(valueStr, 10, 64) -} - func getPodCPUBvt(podDirWithKube string, helper *system.FileTestUtil) int64 { valueStr := helper.ReadCgroupFileContents(podDirWithKube, system.CPUBVTWarpNs) value, _ := strconv.ParseInt(valueStr, 10, 64) @@ -110,6 +105,15 @@ func Test_bvtPlugin_systemSupported(t *testing.T) { } } +func TestObject(t *testing.T) { + t.Run("test", func(t *testing.T) { + b := Object() + assert.NotNil(t, b) + b1 := Object() + assert.Equal(t, b, b1) + }) +} + func Test_bvtPlugin_Register(t *testing.T) { t.Run("register bvt plugin", func(t *testing.T) { b := &bvtPlugin{} @@ -117,106 +121,208 @@ func Test_bvtPlugin_Register(t *testing.T) { }) } -func Test_bvtPlugin_initialized(t *testing.T) { - kubeRootDir := util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed) +func Test_bvtPlugin_initSysctl(t *testing.T) { type fields struct { - initPath *string - initKernelGroupIdentity bool - initKernelGroupIdentityValue int - rule *bvtRule - hasKernelEnable *bool - kernelEnabled *bool + prepareFn func(helper *system.FileTestUtil) + hasKernelEnabled *bool + coreSchedSysctlSupported *bool } tests := []struct { - name string - fields fields - wantErr bool - wantFields *int64 + name string + fields fields + wantErr bool + wantFn func(t *testing.T, helper *system.FileTestUtil) }{ { - name: "cannot initialize since system not support", - fields: fields{}, + name: "no need to init when sysctl not exist", + fields: fields{ + hasKernelEnabled: nil, + }, wantErr: false, }, { - name: "no need to initialize", + name: "only enable sysctl for group identity", fields: fields{ - hasKernelEnable: pointer.Bool(false), + prepareFn: func(helper *system.FileTestUtil) { + helper.WriteFileContents(system.GetProcSysFilePath(system.KernelSchedGroupIdentityEnable), "0") + }, + hasKernelEnabled: nil, }, wantErr: false, + wantFn: func(t *testing.T, helper *system.FileTestUtil) { + got := helper.ReadFileContents(system.GetProcSysFilePath(system.KernelSchedGroupIdentityEnable)) + assert.Equal(t, "1", got) + }, }, { - name: "failed to initialize", + name: "enable sysctl for group identity and disable core sched", fields: fields{ - hasKernelEnable: pointer.Bool(true), + prepareFn: func(helper *system.FileTestUtil) { + helper.WriteFileContents(system.GetProcSysFilePath(system.KernelSchedGroupIdentityEnable), "0") + helper.WriteFileContents(system.GetProcSysFilePath(system.KernelSchedCore), "1") + }, + hasKernelEnabled: nil, + }, + wantErr: false, + wantFn: func(t *testing.T, helper *system.FileTestUtil) { + got := helper.ReadFileContents(system.GetProcSysFilePath(system.KernelSchedGroupIdentityEnable)) + assert.Equal(t, "1", got) + got = helper.ReadFileContents(system.GetProcSysFilePath(system.KernelSchedCore)) + assert.Equal(t, "0", got) }, - wantErr: true, }, { - name: "initialized since only bvt file exist", + name: "skip enable sysctl for group identity 1", fields: fields{ - initPath: &kubeRootDir, + prepareFn: func(helper *system.FileTestUtil) { + helper.WriteFileContents(system.GetProcSysFilePath(system.KernelSchedGroupIdentityEnable), "1") + helper.WriteFileContents(system.GetProcSysFilePath(system.KernelSchedCore), "0") + }, + hasKernelEnabled: nil, }, wantErr: false, + wantFn: func(t *testing.T, helper *system.FileTestUtil) { + got := helper.ReadFileContents(system.GetProcSysFilePath(system.KernelSchedGroupIdentityEnable)) + assert.Equal(t, "1", got) + got = helper.ReadFileContents(system.GetProcSysFilePath(system.KernelSchedCore)) + assert.Equal(t, "0", got) + }, }, { - name: "initialized since bvt kernel file exist", + name: "failed to disable core sched", fields: fields{ - initKernelGroupIdentity: true, - initKernelGroupIdentityValue: 0, - rule: &bvtRule{ - enable: true, + prepareFn: func(helper *system.FileTestUtil) { + helper.WriteFileContents(system.GetProcSysFilePath(system.KernelSchedGroupIdentityEnable), "0") }, + hasKernelEnabled: pointer.Bool(true), + coreSchedSysctlSupported: pointer.Bool(true), + }, + wantErr: true, + wantFn: func(t *testing.T, helper *system.FileTestUtil) { + got := helper.ReadFileContents(system.GetProcSysFilePath(system.KernelSchedGroupIdentityEnable)) + assert.Equal(t, "0", got) }, - wantFields: pointer.Int64(1), }, { - name: "initialized since bvt kernel file exist 1", + name: "failed to disable sysctl for group identity", fields: fields{ - initKernelGroupIdentity: true, - initKernelGroupIdentityValue: 1, + prepareFn: func(helper *system.FileTestUtil) { + helper.WriteFileContents(system.GetProcSysFilePath(system.KernelSchedCore), "1") + }, + hasKernelEnabled: pointer.Bool(true), + }, + wantErr: true, + wantFn: func(t *testing.T, helper *system.FileTestUtil) { + got := helper.ReadFileContents(system.GetProcSysFilePath(system.KernelSchedCore)) + assert.Equal(t, "0", got) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + helper := system.NewFileTestUtil(t) + defer helper.Cleanup() + if tt.fields.prepareFn != nil { + tt.fields.prepareFn(helper) + } + + b := &bvtPlugin{ + hasKernelEnabled: tt.fields.hasKernelEnabled, + coreSchedSysctlSupported: tt.fields.coreSchedSysctlSupported, + } + gotErr := b.initSysctl() + assert.Equal(t, tt.wantErr, gotErr != nil, gotErr) + if tt.wantFn != nil { + tt.wantFn(t, helper) + } + }) + } +} + +func Test_bvtPlugin_prepare(t *testing.T) { + kubeRootDir := util.GetPodQoSRelativePath(corev1.PodQOSGuaranteed) + type fields struct { + initPath *string + initKernelGroupIdentity bool + initKernelGroupIdentityValue int + rule *bvtRule + hasKernelEnable *bool + sysSupported *bool + } + tests := []struct { + name string + fields fields + want *bvtRule + wantFields *int64 + }{ + { + name: "cannot prepare since system not support", + fields: fields{}, + want: nil, + }, + { + name: "prepare successfully without sysctl", + fields: fields{ + initPath: &kubeRootDir, rule: &bvtRule{ - enable: true, + enable: false, }, + hasKernelEnable: pointer.Bool(false), + }, + want: &bvtRule{ + enable: false, + }, + }, + { + name: "failed to prepare since rule is empty", + fields: fields{ + sysSupported: pointer.Bool(true), + hasKernelEnable: pointer.Bool(true), }, - wantFields: pointer.Int64(1), + want: nil, }, { - name: "initialized since bvt kernel file exist 2", + name: "no need to prepare since rule and sysctl disabled", fields: fields{ - initPath: &kubeRootDir, initKernelGroupIdentity: true, initKernelGroupIdentityValue: 0, rule: &bvtRule{ - enable: true, + enable: false, }, + sysSupported: pointer.Bool(true), + hasKernelEnable: pointer.Bool(true), }, - wantFields: pointer.Int64(1), + want: nil, }, { - name: "not initialize since bvt file not exist and cpu qos disabled", + name: "need to prepare since sysctl enabled", fields: fields{ initKernelGroupIdentity: true, initKernelGroupIdentityValue: 1, rule: &bvtRule{ enable: false, }, + sysSupported: pointer.Bool(true), + hasKernelEnable: pointer.Bool(true), + }, + want: &bvtRule{ + enable: false, }, - wantFields: pointer.Int64(1), }, { - name: "skip sysctl set since bvt kernel enable not changed", + name: "need to prepare since rule enabled", fields: fields{ - initPath: &kubeRootDir, initKernelGroupIdentity: true, - initKernelGroupIdentityValue: 0, + initKernelGroupIdentityValue: 1, rule: &bvtRule{ enable: true, }, + sysSupported: pointer.Bool(true), hasKernelEnable: pointer.Bool(true), - kernelEnabled: pointer.Bool(true), }, - wantFields: pointer.Int64(0), + want: &bvtRule{ + enable: true, + }, }, } for _, tt := range tests { @@ -232,16 +338,10 @@ func Test_bvtPlugin_initialized(t *testing.T) { b := &bvtPlugin{ rule: tt.fields.rule, hasKernelEnabled: tt.fields.hasKernelEnable, - kernelEnabled: tt.fields.kernelEnabled, - } - gotErr := b.initialize() - assert.Equal(t, tt.wantErr, gotErr != nil) - - if tt.wantFields != nil { - got, err := getKernelGroupIdentity(testHelper) - assert.NoError(t, err) - assert.Equal(t, *tt.wantFields, got) + sysSupported: tt.fields.sysSupported, } + got := b.prepare() + assert.Equal(t, tt.want, got) }) } } diff --git a/pkg/koordlet/runtimehooks/hooks/groupidentity/interceptor.go b/pkg/koordlet/runtimehooks/hooks/groupidentity/interceptor.go index a589e8852..868695460 100644 --- a/pkg/koordlet/runtimehooks/hooks/groupidentity/interceptor.go +++ b/pkg/koordlet/runtimehooks/hooks/groupidentity/interceptor.go @@ -73,10 +73,16 @@ func (b *bvtPlugin) prepare() *bvtRule { klog.V(5).Infof("hook plugin rule is nil, nothing to do for plugin %v", name) return nil } - err := b.initialize() + + isSysctlEnabled, err := b.isSysctlEnabled() if err != nil { - klog.V(4).Infof("failed to initialize plugin %s, err: %s", name, err) + klog.V(4).Infof("failed to check sysctl for plugin %s, err: %s", name, err) + return nil + } + if !r.getEnable() && !isSysctlEnabled { // no need to update cgroups if both rule and sysctl disabled + klog.V(5).Infof("rule is disabled for plugin %v, no more to do for resources", name) return nil } + return r } diff --git a/pkg/koordlet/runtimehooks/hooks/groupidentity/interceptor_test.go b/pkg/koordlet/runtimehooks/hooks/groupidentity/interceptor_test.go index 44ee6fa52..6a1b6ab6a 100644 --- a/pkg/koordlet/runtimehooks/hooks/groupidentity/interceptor_test.go +++ b/pkg/koordlet/runtimehooks/hooks/groupidentity/interceptor_test.go @@ -68,6 +68,24 @@ func Test_bvtPlugin_SetPodBvtValue_Proxy(t *testing.T) { corev1.PodQOSBestEffort: 0, }, } + testRule1 := &bvtRule{ + enable: true, + podQOSParams: map[ext.QoSClass]int64{ + ext.QoSLSR: 0, + ext.QoSLS: 0, + ext.QoSBE: -1, + }, + kubeQOSDirParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: -1, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: -1, + }, + } type fields struct { rule *bvtRule systemSupported *bool @@ -186,7 +204,7 @@ func Test_bvtPlugin_SetPodBvtValue_Proxy(t *testing.T) { }, }, { - name: "set guaranteed bvt none while kernel sysctl changed", + name: "no need to set guaranteed bvt none while kernel sysctl disabled", fields: fields{ rule: noneRule, systemSupported: pointer.Bool(true), @@ -202,6 +220,25 @@ func Test_bvtPlugin_SetPodBvtValue_Proxy(t *testing.T) { }, response: &runtimeapi.PodSandboxHookResponse{}, }, + want: want{}, + }, + { + name: "set guaranteed bvt none while kernel sysctl changed", + fields: fields{ + rule: testRule1, + systemSupported: pointer.Bool(true), + initKernelGroupIdentity: true, + initKernelGroupIdentityValue: 1, + }, + args: args{ + request: &runtimeapi.PodSandboxHookRequest{ + Labels: map[string]string{ + ext.LabelPodQoS: string(ext.QoSLS), + }, + CgroupParent: "kubepods/pod-guaranteed-test-uid/", + }, + response: &runtimeapi.PodSandboxHookResponse{}, + }, want: want{ bvtValue: pointer.Int64(0), }, @@ -320,6 +357,24 @@ func Test_bvtPlugin_SetKubeQOSBvtValue_Reconciler(t *testing.T) { corev1.PodQOSBestEffort: 0, }, } + testRule := &bvtRule{ + enable: true, + podQOSParams: map[ext.QoSClass]int64{ + ext.QoSLSR: 0, + ext.QoSLS: 0, + ext.QoSBE: -1, + }, + kubeQOSDirParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: -1, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: -1, + }, + } type fields struct { rule *bvtRule sysSupported *bool @@ -420,7 +475,7 @@ func Test_bvtPlugin_SetKubeQOSBvtValue_Reconciler(t *testing.T) { }, }, { - name: "set guaranteed bvt none while kernel sysctl changed", + name: "no need to set guaranteed bvt none while kernel sysctl disabled", fields: fields{ rule: noneRule, sysSupported: pointer.Bool(true), @@ -430,6 +485,19 @@ func Test_bvtPlugin_SetKubeQOSBvtValue_Reconciler(t *testing.T) { args: args{ kubeQOS: corev1.PodQOSGuaranteed, }, + want: want{}, + }, + { + name: "set guaranteed bvt none while kernel sysctl changed", + fields: fields{ + rule: testRule, + sysSupported: pointer.Bool(true), + initKernelGroupIdentity: true, + initKernelGroupIdentityValue: 1, + }, + args: args{ + kubeQOS: corev1.PodQOSGuaranteed, + }, want: want{ bvtValue: pointer.Int64(0), }, @@ -506,9 +574,8 @@ func Test_bvtPlugin_SetHostAppBvtValue(t *testing.T) { }, } type fields struct { - rule *bvtRule - sysSupported *bool - kernelEnabled *bool + rule *bvtRule + sysSupported *bool } type args struct { qos ext.QoSClass @@ -523,9 +590,8 @@ func Test_bvtPlugin_SetHostAppBvtValue(t *testing.T) { { name: "set bvt value for ls host application", fields: fields{ - rule: defaultRule, - sysSupported: pointer.Bool(true), - kernelEnabled: pointer.Bool(true), + rule: defaultRule, + sysSupported: pointer.Bool(true), }, args: args{ qos: ext.QoSLS, @@ -536,9 +602,8 @@ func Test_bvtPlugin_SetHostAppBvtValue(t *testing.T) { { name: "set bvt value for none host application", fields: fields{ - rule: defaultRule, - sysSupported: pointer.Bool(true), - kernelEnabled: pointer.Bool(true), + rule: defaultRule, + sysSupported: pointer.Bool(true), }, args: args{ qos: ext.QoSNone, @@ -550,9 +615,8 @@ func Test_bvtPlugin_SetHostAppBvtValue(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { b := &bvtPlugin{ - rule: tt.fields.rule, - sysSupported: tt.fields.sysSupported, - kernelEnabled: tt.fields.kernelEnabled, + rule: tt.fields.rule, + sysSupported: tt.fields.sysSupported, } ctx := &protocol.HostAppContext{} diff --git a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go index a9f50161e..d30c67db2 100644 --- a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go +++ b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go @@ -156,6 +156,29 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error { return nil } + // check sysctl + // Currently, the kernel feature core scheduling is conflict to the group identity. So before we enable the + // group identity, we should check if the GroupIdentity can be enabled via sysctl and the CoreSched can be + // disabled via sysctl. And when we disable the group identity, we can check if the GroupIdentity is already + // disabled which means we do not need to update the cgroups. + isEnabled := r.getEnable() + if isEnabled { + if err := b.initSysctl(); err != nil { + klog.Warningf("failed to initialize system config for plugin %s, err: %s", name, err) + return nil + } + } else { + isSysctlEnabled, err := b.isSysctlEnabled() + if err != nil { + klog.Warningf("failed to check sysctl for plugin %s, err: %s", name, err) + return nil + } + if !r.getEnable() && !isSysctlEnabled { // no need to update cgroups if both rule and sysctl disabled + klog.V(4).Infof("rule is disabled for plugin %v, no more to do for resources", name) + return nil + } + } + qosCgroupMap := map[string]struct{}{} for _, kubeQOS := range []corev1.PodQOSClass{ corev1.PodQOSGuaranteed, corev1.PodQOSBurstable, corev1.PodQOSBestEffort} { @@ -180,6 +203,7 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error { // bvt=-1. So we have to check and disable all the BE cgroups' bvt for the GroupIdentity before creating the // core sched cookies. To keep the consistency of the cgroup tree's configuration, we list and update the cgroups // by the level order when the group identity is globally disabled. + // This check should be removed after the kernel provides a more stable interface. podCgroupMap := map[string]int64{} // pod-level for _, kubeQOS := range []corev1.PodQOSClass{corev1.PodQOSGuaranteed, corev1.PodQOSBurstable, corev1.PodQOSBestEffort} { diff --git a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go index 9dde22a04..030e6b7fd 100644 --- a/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go +++ b/pkg/koordlet/runtimehooks/hooks/groupidentity/rule_test.go @@ -709,8 +709,9 @@ func Test_bvtPlugin_ruleUpdateCbForPods(t *testing.T) { }, } type fields struct { - rule *bvtRule - prepareFn func(helper *system.FileTestUtil) + rule *bvtRule + hasKernelEnabled *bool + prepareFn func(helper *system.FileTestUtil) } type args struct { pods map[string]*statesinformer.PodMeta @@ -722,7 +723,7 @@ func Test_bvtPlugin_ruleUpdateCbForPods(t *testing.T) { wantKubeDirVal map[corev1.PodQOSClass]int64 wantPodVal map[string]int64 wantErr bool - wantExCheckFn func(t *testing.T, helper *system.FileTestUtil) + wantFn func(t *testing.T, helper *system.FileTestUtil) }{ { name: "callback with ls and be enabled", @@ -839,7 +840,7 @@ func Test_bvtPlugin_ruleUpdateCbForPods(t *testing.T) { "burstable-pod": 0, }, wantErr: false, - wantExCheckFn: func(t *testing.T, helper *system.FileTestUtil) { + wantFn: func(t *testing.T, helper *system.FileTestUtil) { // check dangling pod got := helper.ReadCgroupFileContents(testPodMetaMap["besteffort-pod"].CgroupDir, system.CPUBVTWarpNs) assert.Equal(t, "0", got) @@ -911,13 +912,122 @@ func Test_bvtPlugin_ruleUpdateCbForPods(t *testing.T) { "besteffort-pod": 0, }, wantErr: false, - wantExCheckFn: func(t *testing.T, helper *system.FileTestUtil) { + wantFn: func(t *testing.T, helper *system.FileTestUtil) { // check be container besteffortContainerDir := testPodMetaMap["besteffort-pod"].CgroupDir + "/cri-containerd-xxxxxx.scope" got := helper.ReadCgroupFileContents(besteffortContainerDir, system.CPUBVTWarpNs) assert.Equal(t, "0", got) }, }, + { + name: "callback with ls and be enabled but init sysctl failed", + fields: fields{ + rule: &bvtRule{ + enable: true, + podQOSParams: map[ext.QoSClass]int64{ + ext.QoSLSR: 0, + ext.QoSLS: 2, + ext.QoSBE: -1, + }, + kubeQOSDirParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 2, + corev1.PodQOSBestEffort: -1, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 2, + corev1.PodQOSBurstable: 2, + corev1.PodQOSBestEffort: -1, + }, + }, + hasKernelEnabled: pointer.Bool(true), + prepareFn: func(helper *system.FileTestUtil) { + for _, kubeQoS := range []corev1.PodQOSClass{corev1.PodQOSGuaranteed, corev1.PodQOSBurstable, corev1.PodQOSBestEffort} { + initCPUBvt(util.GetPodQoSRelativePath(kubeQoS), 0, helper) + } + initCPUBvt(testPodMetaMap["lsr-pod"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["ls-pod1"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["ls-pod2"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["be-pod"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["guaranteed-pod"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["burstable-pod"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["besteffort-pod"].CgroupDir, 0, helper) + }, + }, + args: args{ + pods: testPodMetaMap, + }, + wantKubeDirVal: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: int64(0), + corev1.PodQOSBurstable: int64(0), + corev1.PodQOSBestEffort: int64(0), + }, + wantPodVal: map[string]int64{ + "lsr-pod": 0, + "ls-pod1": 0, + "ls-pod2": 0, + "be-pod": 0, + "guaranteed-pod": 0, + "burstable-pod": 0, + "besteffort-pod": 0, + }, + wantErr: false, + }, + { + name: "callback with ls and be disabled and sysctl disabled", + fields: fields{ + rule: &bvtRule{ + enable: false, + podQOSParams: map[ext.QoSClass]int64{ + ext.QoSLSR: 0, + ext.QoSLS: 0, + ext.QoSBE: 0, + }, + kubeQOSDirParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: 0, + }, + kubeQOSPodParams: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: 0, + corev1.PodQOSBurstable: 0, + corev1.PodQOSBestEffort: 0, + }, + }, + hasKernelEnabled: pointer.Bool(true), + prepareFn: func(helper *system.FileTestUtil) { + initKernelGroupIdentity(0, helper) + for _, kubeQoS := range []corev1.PodQOSClass{corev1.PodQOSGuaranteed, corev1.PodQOSBurstable, corev1.PodQOSBestEffort} { + initCPUBvt(util.GetPodQoSRelativePath(kubeQoS), 0, helper) + } + initCPUBvt(testPodMetaMap["lsr-pod"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["ls-pod1"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["ls-pod2"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["be-pod"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["guaranteed-pod"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["burstable-pod"].CgroupDir, 0, helper) + initCPUBvt(testPodMetaMap["besteffort-pod"].CgroupDir, 0, helper) + }, + }, + args: args{ + pods: testPodMetaMap, + }, + wantKubeDirVal: map[corev1.PodQOSClass]int64{ + corev1.PodQOSGuaranteed: int64(0), + corev1.PodQOSBurstable: int64(0), + corev1.PodQOSBestEffort: int64(0), + }, + wantPodVal: map[string]int64{ + "lsr-pod": 0, + "ls-pod1": 0, + "ls-pod2": 0, + "be-pod": 0, + "guaranteed-pod": 0, + "burstable-pod": 0, + "besteffort-pod": 0, + }, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -935,8 +1045,9 @@ func Test_bvtPlugin_ruleUpdateCbForPods(t *testing.T) { Pods: podList, } b := &bvtPlugin{ - rule: tt.fields.rule, - executor: resourceexecutor.NewResourceUpdateExecutor(), + rule: tt.fields.rule, + hasKernelEnabled: tt.fields.hasKernelEnabled, + executor: resourceexecutor.NewResourceUpdateExecutor(), } stop := make(chan struct{}) defer close(stop) @@ -957,8 +1068,8 @@ func Test_bvtPlugin_ruleUpdateCbForPods(t *testing.T) { wantBvt := tt.wantPodVal[podName] assert.Equal(t, wantBvt, gotBvt, "pod %s bvt value not equal", podName) } - if tt.wantExCheckFn != nil { - tt.wantExCheckFn(t, testHelper) + if tt.wantFn != nil { + tt.wantFn(t, testHelper) } }) } @@ -1031,10 +1142,9 @@ func Test_bvtPlugin_ruleUpdateCbForHostApp(t *testing.T) { initCPUBvt(util.GetPodQoSRelativePath(corev1.PodQOSBestEffort), 0, testHelper) b := &bvtPlugin{ - rule: tt.fields.rule, - sysSupported: pointer.Bool(true), - kernelEnabled: pointer.Bool(true), - executor: resourceexecutor.NewResourceUpdateExecutor(), + rule: tt.fields.rule, + sysSupported: pointer.Bool(true), + executor: resourceexecutor.NewResourceUpdateExecutor(), } stop := make(chan struct{}) defer close(stop) diff --git a/pkg/koordlet/util/system/core_sched.go b/pkg/koordlet/util/system/core_sched.go index 900b8e5d5..ca6a7b390 100644 --- a/pkg/koordlet/util/system/core_sched.go +++ b/pkg/koordlet/util/system/core_sched.go @@ -268,6 +268,32 @@ func (f *FakeCoreSchedExtended) Assign(pidTypeFrom CoreSchedScopeType, pidFrom u return nil, nil } +func IsCoreSchedSysctlSupported() bool { + return FileExists(GetProcSysFilePath(KernelSchedCore)) +} + +// IsCoreSchedSupported checks if the kernel supports the core scheduling. +// Currently, it relies on the interfaces provided by the Anolis OS. +func IsCoreSchedSupported() (bool, string) { + // kernel supports if: + // a) sysctl has sched_core, + // b) sched_features has SCHED_CORE/NO_SCHED_CORE + if IsCoreSchedSysctlSupported() { + return true, "sysctl supported" + } + + isSchedFeaturesSuppported, msg := SchedFeatures.IsSupported("") + if !isSchedFeaturesSuppported { // sched_features unavailable + return false, msg + } + _, err := IsCoreSchedFeatureEnabled() + if err == nil { + return true, "sched_features supported" + } + + return false, "not supported neither by sysctl nor by sched_features" +} + // EnableCoreSchedIfSupported checks if the core scheduling feature is enabled in the kernel sched_features. // If kernel supported (available in the latest Anolis OS), it tries to enable the core scheduling feature. // The core sched's kernel feature is known set in two places, if both of them are not found, the system is considered diff --git a/pkg/koordlet/util/system/system_file.go b/pkg/koordlet/util/system/system_file.go index d0e4c8c2d..c28ccb0d1 100644 --- a/pkg/koordlet/util/system/system_file.go +++ b/pkg/koordlet/util/system/system_file.go @@ -147,6 +147,20 @@ func (*ProcSysctl) SetSysctl(sysctl string, newVal int) error { return os.WriteFile(GetProcSysFilePath(sysctl), []byte(strconv.Itoa(newVal)), 0640) } +func IsGroupIdentitySysctlSupported() bool { + return FileExists(GetProcSysFilePath(KernelSchedGroupIdentityEnable)) +} + +func GetSchedGroupIdentity() (bool, error) { + s := NewProcSysctl() + // 0: disabled; 1: enabled + cur, err := s.GetSysctl(KernelSchedGroupIdentityEnable) + if err != nil { + return false, fmt.Errorf("cannot get sysctl group identity, err: %w", err) + } + return cur == 1, nil +} + func SetSchedGroupIdentity(enable bool) error { s := NewProcSysctl() cur, err := s.GetSysctl(KernelSchedGroupIdentityEnable)