Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

koordlet: fix recursively disabling bvt #1848

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 42 additions & 28 deletions pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error {
bvtUpdater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, kubeQOSCgroupPath, strconv.FormatInt(bvtValue, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", kubeQOSCgroupPath, err)
continue
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update kube qos %v cpu bvt failed, dir %v, error %v", kubeQOS, kubeQOSCgroupPath, err)
Expand All @@ -200,17 +201,19 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error {
}

// FIXME(saintube): Currently the kernel feature core scheduling is strictly excluded with the group identity's
// bvt=-1. So we have to check and disable all the BE cgroups' bvt for the GroupIdentity before creating the
// core sched cookies. To keep the consistency of the cgroup tree's configuration, we list and update the cgroups
// by the level order when the group identity is globally disabled.
// This check should be removed after the kernel provides a more stable interface.
// bvt=-1. So we have to check and disable all the BE cgroups' bvt for the GroupIdentity before creating the
// core sched cookies. To keep the consistency of the cgroup tree's configuration, we list and update the cgroups
// by the level order when the group identity is globally disabled.
// This check should be removed after the kernel provides a more stable interface.
podCgroupMap := map[string]int64{}
// pod-level
for _, kubeQOS := range []corev1.PodQOSClass{corev1.PodQOSGuaranteed, corev1.PodQOSBurstable, corev1.PodQOSBestEffort} {
bvtValue := r.getKubeQOSDirBvtValue(kubeQOS)
podCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(kubeQOS, sysutil.CPUBVTWarpNsName, koordletutil.PodCgroupPathRelativeDepth)
kubeQOSParentDir := koordletutil.GetPodQoSRelativePath(kubeQOS)
podCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(sysutil.CPUBVTWarpNsName, kubeQOSParentDir, koordletutil.PodCgroupPathRelativeDepth)
if err != nil {
return fmt.Errorf("get pod cgroup paths failed, qos %s, err: %w", kubeQOS, err)
klog.Infof("get pod cgroup paths failed, qos %s, err: %w", kubeQOS, err)
continue
}
for _, cgroupDir := range podCgroupDirs {
if _, ok := qosCgroupMap[cgroupDir]; ok { // exclude qos cgroup
Expand All @@ -228,12 +231,32 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error {
bvtUpdater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, podCgroupPath, strconv.FormatInt(podBvt, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", podCgroupPath, err)
continue
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update pod %s cpu bvt failed, dir %v, error %v",
util.GetPodKey(podMeta.Pod), podCgroupPath, err)
}
delete(podCgroupMap, podCgroupPath)

// container-level
// NOTE: Although we do not set the container's cpu.bvt_warp_ns directly, it is inheritable from the pod-level,
// we have to handle the container-level only when we want to disable the group identity.
containerCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(sysutil.CPUBVTWarpNsName, podCgroupPath, 1)
if err != nil {
klog.Infof("get container cgroup paths failed, dir %s, error %v", podCgroupPath, err)
continue
}
for _, cgroupDir := range containerCgroupDirs {
bvtUpdater, err = resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, cgroupDir, strconv.FormatInt(podBvt, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", cgroupDir, err)
continue
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update container cpu bvt failed, dir %v, error %v", cgroupDir, err)
}
}
}
for _, hostApp := range target.HostApplications {
hostCtx := protocol.HooksProtocolBuilder.HostApp(&hostApp)
Expand All @@ -250,35 +273,26 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error {
bvtUpdater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, podCgroupDir, strconv.FormatInt(bvtValue, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", podCgroupDir, err)
continue
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update remaining pod cpu bvt failed, dir %v, error %v", podCgroupDir, err)
}
}

// container-level
// NOTE: Although we do not set the container's cpu.bvt_warp_ns directly, it is inheritable from the pod-level,
// we have to handle the container-level only when we want to disable the group identity.
if !r.getEnable() {
for _, kubeQOS := range []corev1.PodQOSClass{corev1.PodQOSGuaranteed, corev1.PodQOSBurstable, corev1.PodQOSBestEffort} {
bvtValue := r.getKubeQOSDirBvtValue(kubeQOS)
containerCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(kubeQOS, sysutil.CPUBVTWarpNsName, koordletutil.ContainerCgroupPathRelativeDepth)
// container-level
containerCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(sysutil.CPUBVTWarpNsName, podCgroupDir, 1)
if err != nil {
klog.Infof("get container cgroup paths failed, dir %s, error %v", podCgroupDir, err)
continue
}
for _, cgroupDir := range containerCgroupDirs {
bvtUpdater, err = resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, cgroupDir, strconv.FormatInt(bvtValue, 10), e)
if err != nil {
return fmt.Errorf("get container cgroup paths failed, qos %s, err: %w", kubeQOS, err)
klog.Infof("bvt updater create failed, dir %v, error %v", cgroupDir, err)
continue
}
for _, cgroupDir := range containerCgroupDirs {
if _, ok := podCgroupMap[cgroupDir]; ok {
continue
}

e := audit.V(3).Reason(name).Message("set bvt to %v", bvtValue)
bvtUpdater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, cgroupDir, strconv.FormatInt(bvtValue, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", cgroupDir, err)
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update container cpu bvt failed, dir %v, error %v", cgroupDir, err)
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update remaining container cpu bvt failed, dir %v, error %v", cgroupDir, err)
}
}
}
Expand Down
17 changes: 9 additions & 8 deletions pkg/koordlet/util/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,25 +92,25 @@ func GetBECPUSetPathsByMaxDepth(relativeDepth int) ([]string, error) {
return paths, err
}

func GetCgroupPathsByTargetDepth(qos corev1.PodQOSClass, resourceType system.ResourceType, relativeDepth int) ([]string, error) {
rootCgroupParentDir := GetPodQoSRelativePath(qos)
func GetCgroupPathsByTargetDepth(resourceType system.ResourceType, cgroupParent string, relativeDepth int) ([]string, error) {
r, err := system.GetCgroupResource(resourceType)
if err != nil {
return nil, fmt.Errorf("get resource type failed, err: %w", err)
}
cgroupResource := r.(*system.CgroupResource)
rootCgroupPath := filepath.Dir(r.Path(rootCgroupParentDir))
absCgroupParent := filepath.Dir(r.Path(cgroupParent))
rootSubfsPath := system.GetRootCgroupSubfsDir(cgroupResource.Subfs)
_, err = os.Stat(rootCgroupPath)
_, err = os.Stat(absCgroupParent)
if err != nil {
// make sure the rootCgroupPath is available
return nil, err
}
klog.V(6).Infof("get rootCgroupPath, qos %s, resource %s, path: %s", qos, resourceType, rootCgroupPath)
klog.V(6).Infof("get rootCgroupPath, resource %s, parent %s, absolute path: %s",
resourceType, cgroupParent, absCgroupParent)

absDepth := strings.Count(rootCgroupPath, string(os.PathSeparator)) + relativeDepth
absDepth := strings.Count(absCgroupParent, string(os.PathSeparator)) + relativeDepth
var containerPaths []string
err = filepath.WalkDir(rootCgroupPath, func(path string, info os.DirEntry, err error) error {
err = filepath.WalkDir(absCgroupParent, func(path string, info os.DirEntry, err error) error {
if err != nil {
return err
}
Expand All @@ -129,7 +129,8 @@ func GetCgroupPathsByTargetDepth(qos corev1.PodQOSClass, resourceType system.Res

// GetBECPUSetPathsByTargetDepth only gets the be containers' cpuset groups' paths
func GetBECPUSetPathsByTargetDepth(relativeDepth int) ([]string, error) {
return GetCgroupPathsByTargetDepth(corev1.PodQOSBestEffort, system.CPUSetCPUSName, relativeDepth)
beCgroupParentDir := GetPodQoSRelativePath(corev1.PodQOSBestEffort)
return GetCgroupPathsByTargetDepth(system.CPUSetCPUSName, beCgroupParentDir, relativeDepth)
}

// GetCgroupRootBlkIOAbsoluteDir gets the root blkio directory
Expand Down
46 changes: 37 additions & 9 deletions pkg/koordlet/util/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
prepareFn func(helper *system.FileTestUtil)
}
type args struct {
qos corev1.PodQOSClass
resourceType system.ResourceType
parentDir string
relativeDepth int
}
tests := []struct {
Expand All @@ -66,7 +66,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
{
name: "get cgroup resource failed",
args: args{
qos: corev1.PodQOSGuaranteed,
parentDir: GetPodQoSRelativePath(corev1.PodQOSGuaranteed),
resourceType: "unknown_resource_xxx",
relativeDepth: 1,
},
Expand All @@ -75,7 +75,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
{
name: "get root cgroup path failed",
args: args{
qos: corev1.PodQOSGuaranteed,
parentDir: GetPodQoSRelativePath(corev1.PodQOSGuaranteed),
resourceType: system.CPUSetCPUSName,
relativeDepth: 1,
},
Expand All @@ -92,7 +92,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSGuaranteed,
parentDir: GetPodQoSRelativePath(corev1.PodQOSGuaranteed),
resourceType: system.CPUSetCPUSName,
relativeDepth: 0,
},
Expand Down Expand Up @@ -122,7 +122,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSBestEffort,
parentDir: GetPodQoSRelativePath(corev1.PodQOSBestEffort),
resourceType: system.CPUSetCPUSName,
relativeDepth: PodCgroupPathRelativeDepth,
},
Expand Down Expand Up @@ -153,7 +153,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSBestEffort,
parentDir: GetPodQoSRelativePath(corev1.PodQOSBestEffort),
resourceType: system.CPUSetCPUSName,
relativeDepth: ContainerCgroupPathRelativeDepth,
},
Expand Down Expand Up @@ -184,7 +184,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSBestEffort,
parentDir: GetPodQoSRelativePath(corev1.PodQOSBestEffort),
resourceType: system.MemoryLimitName,
relativeDepth: ContainerCgroupPathRelativeDepth,
},
Expand Down Expand Up @@ -215,7 +215,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSGuaranteed,
parentDir: GetPodQoSRelativePath(corev1.PodQOSGuaranteed),
resourceType: system.CPUSetCPUSName,
relativeDepth: PodCgroupPathRelativeDepth,
},
Expand All @@ -226,6 +226,34 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
filepath.Join(GetPodQoSRelativePath(corev1.PodQOSGuaranteed), "/kubepods-test-guaranteed-pod.slice"),
},
},
{
name: "get for custom cgroup parent successfully",
fields: fields{
prepareFn: func(helper *system.FileTestUtil) {
helper.SetCgroupsV2(false)
cpuset, _ := system.GetCgroupResource(system.CPUSetCPUSName)
cgroupParentDir := "test"
helper.WriteCgroupFileContents(cgroupParentDir, cpuset, "0-63")
containerCgroupDir := filepath.Join(cgroupParentDir, "/test-pod-0.slice")
containerCgroupDir1 := filepath.Join(cgroupParentDir, "/test-pod-1.slice")
containerCgroupDir2 := filepath.Join(cgroupParentDir, "/test-pod-2.slice")
helper.WriteCgroupFileContents(containerCgroupDir, cpuset, "0-63")
helper.WriteCgroupFileContents(containerCgroupDir1, cpuset, "0-63")
helper.WriteCgroupFileContents(containerCgroupDir2, cpuset, "0-31")
},
},
args: args{
parentDir: "test",
resourceType: system.CPUSetCPUSName,
relativeDepth: 1,
},
wantErr: false,
want: []string{
"test/test-pod-0.slice",
"test/test-pod-1.slice",
"test/test-pod-2.slice",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -235,7 +263,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
tt.fields.prepareFn(helper)
}

got, gotErr := GetCgroupPathsByTargetDepth(tt.args.qos, tt.args.resourceType, tt.args.relativeDepth)
got, gotErr := GetCgroupPathsByTargetDepth(tt.args.resourceType, tt.args.parentDir, tt.args.relativeDepth)
assert.Equal(t, tt.wantErr, gotErr != nil, gotErr)
assert.Equal(t, tt.want, got)
})
Expand Down