From 7f7c869093bd9c62cb3da12c5b678b8e1979e115 Mon Sep 17 00:00:00 2001 From: saintube Date: Mon, 15 Jan 2024 17:30:40 +0800 Subject: [PATCH] koordlet: fix core sched conflicts with group identity Signed-off-by: saintube --- .../hooks/coresched/core_sched.go | 9 ++ .../hooks/coresched/core_sched_test.go | 85 +++++++++++++++++++ .../runtimehooks/hooks/coresched/rule.go | 2 +- 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go index 4d1116a2c..e05dd3ae4 100644 --- a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go +++ b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched.go @@ -218,6 +218,15 @@ func (p *Plugin) SetContainerCookie(proto protocol.HooksProtocol) error { // 1. disabled -> enabled: Add or Assign. // 2. keep enabled: Check the differences of cookie, group ID and the PIDs, and do Assign. if isEnabled { + // FIXME(saintube): Currently we need to ensure the group identity is disabled via sysctl before enabling + // the core sched cookie in the container reconciler, because the disabling during the rule update might + // fail. This check can be removed once the kernel feature provides a way to disable the group identity. + if err := p.initSystem(true); err != nil { + klog.V(4).Infof("plugin %s failed to initialize system for container %s/%s, err: %s", + name, containerCtx.Request.PodMeta.String(), containerCtx.Request.ContainerMeta.Name, err) + return nil + } + return p.enableContainerCookie(containerCtx, groupID) } // else pod disables diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go index 3cf18b9d4..e544b0c5c 100644 --- a/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go +++ b/pkg/koordlet/runtimehooks/hooks/coresched/core_sched_test.go @@ -396,6 +396,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { name: "add cookie for LS container correctly", fields: fields{ prepareFn: func(helper *sysutil.FileTestUtil) { + sysctlFeaturePath := sysutil.GetProcSysFilePath(sysutil.KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "1\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") }, @@ -455,6 +457,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { name: "failed to add cookie for LS container when core sched add failed", fields: fields{ prepareFn: func(helper *sysutil.FileTestUtil) { + sysctlFeaturePath := sysutil.GetProcSysFilePath(sysutil.KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "1\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") }, @@ -511,6 +515,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { name: "failed to add cookie for BE container when PIDs no longer exist", fields: fields{ prepareFn: func(helper *sysutil.FileTestUtil) { + sysctlFeaturePath := sysutil.GetProcSysFilePath(sysutil.KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "1\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") }, @@ -563,6 +569,10 @@ func TestPlugin_SetContainerCookie(t *testing.T) { name: "assign cookie for LS container correctly", fields: fields{ prepareFn: func(helper *sysutil.FileTestUtil) { + sysctlFeaturePath := sysutil.GetProcSysFilePath(sysutil.KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "0\n") + giSysctlPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) + helper.WriteFileContents(giSysctlPath, "1\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") }, @@ -632,6 +642,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { name: "failed to assign cookie for LS container but fallback to add correctly", fields: fields{ prepareFn: func(helper *sysutil.FileTestUtil) { + sysctlFeaturePath := sysutil.GetProcSysFilePath(sysutil.KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "1\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") }, @@ -699,6 +711,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { name: "failed to assign cookie for LS container neither add", fields: fields{ prepareFn: func(helper *sysutil.FileTestUtil) { + sysctlFeaturePath := sysutil.GetProcSysFilePath(sysutil.KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "1\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") }, @@ -758,6 +772,75 @@ func TestPlugin_SetContainerCookie(t *testing.T) { groupToCookie: map[string]uint64{}, }, }, + { + name: "failed to assign cookie for LS container since system init failed", + fields: fields{ + prepareFn: func(helper *sysutil.FileTestUtil) { + giSysctlPath := sysutil.GetProcSysFilePath(sysutil.KernelSchedGroupIdentityEnable) + helper.WriteFileContents(giSysctlPath, "1\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") + helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") + }, + plugin: testGetEnabledPlugin(), + preparePluginFn: func(p *Plugin) { + f := p.cse.(*sysutil.FakeCoreSchedExtended) + f.SetNextCookieID(2000000) + p.cookieCache.SetDefault("group-xxx-expeller", newCookieCacheEntry(1000000, 1000, 1001, 1002)) + }, + cse: sysutil.NewFakeCoreSchedExtended(map[uint32]uint64{ + 1: 0, + 10: 0, + 1000: 1000000, + 1001: 1000000, + 1002: 1000000, + 12344: 0, + 12345: 0, + 12346: 0, + }, map[uint32]uint32{ + 1: 1, + 1000: 1000, + 1001: 1001, + 1002: 1001, + 12344: 12344, + 12345: 12344, + 12346: 12346, + }, map[uint32]bool{ + 12346: true, + }), + groupID: "group-xxx-expeller", + }, + arg: &protocol.ContainerContext{ + Request: protocol.ContainerRequest{ + PodMeta: protocol.PodMeta{ + Name: "test-pod", + UID: "xxxxxx", + }, + PodAnnotations: map[string]string{}, + PodLabels: map[string]string{ + extension.LabelPodQoS: string(extension.QoSLS), + slov1alpha1.LabelCoreSchedGroupID: "group-xxx", + }, + ContainerMeta: protocol.ContainerMeta{ + Name: "test-container", + ID: "containerd://yyyyyy", + }, + CgroupParent: "kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", + }, + }, + wantErr: false, + wantFields: wantFields{ + cookieToPIDs: map[uint64][]uint32{ + 1000000: { + 1000, + 1001, + 1002, + }, + }, + groupToCookie: map[string]uint64{ + "group-xxx-expeller": 1000000, + }, + }, + }, { name: "clear cookie for LS container correctly", fields: fields{ @@ -1096,6 +1179,8 @@ func TestPlugin_SetContainerCookie(t *testing.T) { name: "add cookie for LS container migrated between groups", fields: fields{ prepareFn: func(helper *sysutil.FileTestUtil) { + sysctlFeaturePath := sysutil.GetProcSysFilePath(sysutil.KernelSchedCore) + helper.WriteFileContents(sysctlFeaturePath, "1\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcs, "12344\n12345\n12346\n") helper.WriteCgroupFileContents("kubepods.slice/kubepods-podxxxxxx.slice/cri-containerd-yyyyyy.scope", sysutil.CPUProcsV2, "12344\n12345\n12346\n") }, diff --git a/pkg/koordlet/runtimehooks/hooks/coresched/rule.go b/pkg/koordlet/runtimehooks/hooks/coresched/rule.go index d08b17dec..036535a21 100644 --- a/pkg/koordlet/runtimehooks/hooks/coresched/rule.go +++ b/pkg/koordlet/runtimehooks/hooks/coresched/rule.go @@ -199,7 +199,7 @@ func (p *Plugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error { } if err := p.initSystem(p.rule.IsEnabled()); err != nil { - klog.V(4).Infof("plugin %s failed to initialize system, err: %s", name, err) + klog.Warningf("plugin %s failed to initialize system, err: %s", name, err) return nil } klog.V(6).Infof("plugin %s initialize system successfully", name)