Skip to content

Commit

Permalink
koordlet: fix recursively disabling bvt (#1848)
Browse files Browse the repository at this point in the history
Signed-off-by: saintube <saintube@foxmail.com>
  • Loading branch information
saintube committed Feb 26, 2024
1 parent 8467b8e commit 2064aa9
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 45 deletions.
70 changes: 42 additions & 28 deletions pkg/koordlet/runtimehooks/hooks/groupidentity/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error {
bvtUpdater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, kubeQOSCgroupPath, strconv.FormatInt(bvtValue, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", kubeQOSCgroupPath, err)
continue
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update kube qos %v cpu bvt failed, dir %v, error %v", kubeQOS, kubeQOSCgroupPath, err)
Expand All @@ -200,17 +201,19 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error {
}

// FIXME(saintube): Currently the kernel feature core scheduling is strictly excluded with the group identity's
// bvt=-1. So we have to check and disable all the BE cgroups' bvt for the GroupIdentity before creating the
// core sched cookies. To keep the consistency of the cgroup tree's configuration, we list and update the cgroups
// by the level order when the group identity is globally disabled.
// This check should be removed after the kernel provides a more stable interface.
// bvt=-1. So we have to check and disable all the BE cgroups' bvt for the GroupIdentity before creating the
// core sched cookies. To keep the consistency of the cgroup tree's configuration, we list and update the cgroups
// by the level order when the group identity is globally disabled.
// This check should be removed after the kernel provides a more stable interface.
podCgroupMap := map[string]int64{}
// pod-level
for _, kubeQOS := range []corev1.PodQOSClass{corev1.PodQOSGuaranteed, corev1.PodQOSBurstable, corev1.PodQOSBestEffort} {
bvtValue := r.getKubeQOSDirBvtValue(kubeQOS)
podCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(kubeQOS, sysutil.CPUBVTWarpNsName, koordletutil.PodCgroupPathRelativeDepth)
kubeQOSParentDir := koordletutil.GetPodQoSRelativePath(kubeQOS)
podCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(sysutil.CPUBVTWarpNsName, kubeQOSParentDir, koordletutil.PodCgroupPathRelativeDepth)
if err != nil {
return fmt.Errorf("get pod cgroup paths failed, qos %s, err: %w", kubeQOS, err)
klog.Infof("get pod cgroup paths failed, qos %s, err: %w", kubeQOS, err)
continue
}
for _, cgroupDir := range podCgroupDirs {
if _, ok := qosCgroupMap[cgroupDir]; ok { // exclude qos cgroup
Expand All @@ -228,12 +231,32 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error {
bvtUpdater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, podCgroupPath, strconv.FormatInt(podBvt, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", podCgroupPath, err)
continue
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update pod %s cpu bvt failed, dir %v, error %v",
util.GetPodKey(podMeta.Pod), podCgroupPath, err)
}
delete(podCgroupMap, podCgroupPath)

// container-level
// NOTE: Although we do not set the container's cpu.bvt_warp_ns directly, it is inheritable from the pod-level,
// we have to handle the container-level only when we want to disable the group identity.
containerCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(sysutil.CPUBVTWarpNsName, podCgroupPath, 1)
if err != nil {
klog.Infof("get container cgroup paths failed, dir %s, error %v", podCgroupPath, err)
continue
}
for _, cgroupDir := range containerCgroupDirs {
bvtUpdater, err = resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, cgroupDir, strconv.FormatInt(podBvt, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", cgroupDir, err)
continue
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update container cpu bvt failed, dir %v, error %v", cgroupDir, err)
}
}
}
for _, hostApp := range target.HostApplications {
hostCtx := protocol.HooksProtocolBuilder.HostApp(&hostApp)
Expand All @@ -250,35 +273,26 @@ func (b *bvtPlugin) ruleUpdateCb(target *statesinformer.CallbackTarget) error {
bvtUpdater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, podCgroupDir, strconv.FormatInt(bvtValue, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", podCgroupDir, err)
continue
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update remaining pod cpu bvt failed, dir %v, error %v", podCgroupDir, err)
}
}

// container-level
// NOTE: Although we do not set the container's cpu.bvt_warp_ns directly, it is inheritable from the pod-level,
// we have to handle the container-level only when we want to disable the group identity.
if !r.getEnable() {
for _, kubeQOS := range []corev1.PodQOSClass{corev1.PodQOSGuaranteed, corev1.PodQOSBurstable, corev1.PodQOSBestEffort} {
bvtValue := r.getKubeQOSDirBvtValue(kubeQOS)
containerCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(kubeQOS, sysutil.CPUBVTWarpNsName, koordletutil.ContainerCgroupPathRelativeDepth)
// container-level
containerCgroupDirs, err := koordletutil.GetCgroupPathsByTargetDepth(sysutil.CPUBVTWarpNsName, podCgroupDir, 1)
if err != nil {
klog.Infof("get container cgroup paths failed, dir %s, error %v", podCgroupDir, err)
continue
}
for _, cgroupDir := range containerCgroupDirs {
bvtUpdater, err = resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, cgroupDir, strconv.FormatInt(bvtValue, 10), e)
if err != nil {
return fmt.Errorf("get container cgroup paths failed, qos %s, err: %w", kubeQOS, err)
klog.Infof("bvt updater create failed, dir %v, error %v", cgroupDir, err)
continue
}
for _, cgroupDir := range containerCgroupDirs {
if _, ok := podCgroupMap[cgroupDir]; ok {
continue
}

e := audit.V(3).Reason(name).Message("set bvt to %v", bvtValue)
bvtUpdater, err := resourceexecutor.DefaultCgroupUpdaterFactory.New(sysutil.CPUBVTWarpNsName, cgroupDir, strconv.FormatInt(bvtValue, 10), e)
if err != nil {
klog.Infof("bvt updater create failed, dir %v, error %v", cgroupDir, err)
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update container cpu bvt failed, dir %v, error %v", cgroupDir, err)
}
if _, err = b.executor.Update(true, bvtUpdater); err != nil {
klog.Infof("update remaining container cpu bvt failed, dir %v, error %v", cgroupDir, err)
}
}
}
Expand Down
17 changes: 9 additions & 8 deletions pkg/koordlet/util/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,25 +92,25 @@ func GetBECPUSetPathsByMaxDepth(relativeDepth int) ([]string, error) {
return paths, err
}

func GetCgroupPathsByTargetDepth(qos corev1.PodQOSClass, resourceType system.ResourceType, relativeDepth int) ([]string, error) {
rootCgroupParentDir := GetPodQoSRelativePath(qos)
func GetCgroupPathsByTargetDepth(resourceType system.ResourceType, cgroupParent string, relativeDepth int) ([]string, error) {
r, err := system.GetCgroupResource(resourceType)
if err != nil {
return nil, fmt.Errorf("get resource type failed, err: %w", err)
}
cgroupResource := r.(*system.CgroupResource)
rootCgroupPath := filepath.Dir(r.Path(rootCgroupParentDir))
absCgroupParent := filepath.Dir(r.Path(cgroupParent))
rootSubfsPath := system.GetRootCgroupSubfsDir(cgroupResource.Subfs)
_, err = os.Stat(rootCgroupPath)
_, err = os.Stat(absCgroupParent)
if err != nil {
// make sure the rootCgroupPath is available
return nil, err
}
klog.V(6).Infof("get rootCgroupPath, qos %s, resource %s, path: %s", qos, resourceType, rootCgroupPath)
klog.V(6).Infof("get rootCgroupPath, resource %s, parent %s, absolute path: %s",
resourceType, cgroupParent, absCgroupParent)

absDepth := strings.Count(rootCgroupPath, string(os.PathSeparator)) + relativeDepth
absDepth := strings.Count(absCgroupParent, string(os.PathSeparator)) + relativeDepth
var containerPaths []string
err = filepath.WalkDir(rootCgroupPath, func(path string, info os.DirEntry, err error) error {
err = filepath.WalkDir(absCgroupParent, func(path string, info os.DirEntry, err error) error {
if err != nil {
return err
}
Expand All @@ -129,7 +129,8 @@ func GetCgroupPathsByTargetDepth(qos corev1.PodQOSClass, resourceType system.Res

// GetBECPUSetPathsByTargetDepth only gets the be containers' cpuset groups' paths
func GetBECPUSetPathsByTargetDepth(relativeDepth int) ([]string, error) {
return GetCgroupPathsByTargetDepth(corev1.PodQOSBestEffort, system.CPUSetCPUSName, relativeDepth)
beCgroupParentDir := GetPodQoSRelativePath(corev1.PodQOSBestEffort)
return GetCgroupPathsByTargetDepth(system.CPUSetCPUSName, beCgroupParentDir, relativeDepth)
}

// GetCgroupRootBlkIOAbsoluteDir gets the root blkio directory
Expand Down
46 changes: 37 additions & 9 deletions pkg/koordlet/util/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
prepareFn func(helper *system.FileTestUtil)
}
type args struct {
qos corev1.PodQOSClass
resourceType system.ResourceType
parentDir string
relativeDepth int
}
tests := []struct {
Expand All @@ -66,7 +66,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
{
name: "get cgroup resource failed",
args: args{
qos: corev1.PodQOSGuaranteed,
parentDir: GetPodQoSRelativePath(corev1.PodQOSGuaranteed),
resourceType: "unknown_resource_xxx",
relativeDepth: 1,
},
Expand All @@ -75,7 +75,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
{
name: "get root cgroup path failed",
args: args{
qos: corev1.PodQOSGuaranteed,
parentDir: GetPodQoSRelativePath(corev1.PodQOSGuaranteed),
resourceType: system.CPUSetCPUSName,
relativeDepth: 1,
},
Expand All @@ -92,7 +92,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSGuaranteed,
parentDir: GetPodQoSRelativePath(corev1.PodQOSGuaranteed),
resourceType: system.CPUSetCPUSName,
relativeDepth: 0,
},
Expand Down Expand Up @@ -122,7 +122,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSBestEffort,
parentDir: GetPodQoSRelativePath(corev1.PodQOSBestEffort),
resourceType: system.CPUSetCPUSName,
relativeDepth: PodCgroupPathRelativeDepth,
},
Expand Down Expand Up @@ -153,7 +153,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSBestEffort,
parentDir: GetPodQoSRelativePath(corev1.PodQOSBestEffort),
resourceType: system.CPUSetCPUSName,
relativeDepth: ContainerCgroupPathRelativeDepth,
},
Expand Down Expand Up @@ -184,7 +184,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSBestEffort,
parentDir: GetPodQoSRelativePath(corev1.PodQOSBestEffort),
resourceType: system.MemoryLimitName,
relativeDepth: ContainerCgroupPathRelativeDepth,
},
Expand Down Expand Up @@ -215,7 +215,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
},
},
args: args{
qos: corev1.PodQOSGuaranteed,
parentDir: GetPodQoSRelativePath(corev1.PodQOSGuaranteed),
resourceType: system.CPUSetCPUSName,
relativeDepth: PodCgroupPathRelativeDepth,
},
Expand All @@ -226,6 +226,34 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
filepath.Join(GetPodQoSRelativePath(corev1.PodQOSGuaranteed), "/kubepods-test-guaranteed-pod.slice"),
},
},
{
name: "get for custom cgroup parent successfully",
fields: fields{
prepareFn: func(helper *system.FileTestUtil) {
helper.SetCgroupsV2(false)
cpuset, _ := system.GetCgroupResource(system.CPUSetCPUSName)
cgroupParentDir := "test"
helper.WriteCgroupFileContents(cgroupParentDir, cpuset, "0-63")
containerCgroupDir := filepath.Join(cgroupParentDir, "/test-pod-0.slice")
containerCgroupDir1 := filepath.Join(cgroupParentDir, "/test-pod-1.slice")
containerCgroupDir2 := filepath.Join(cgroupParentDir, "/test-pod-2.slice")
helper.WriteCgroupFileContents(containerCgroupDir, cpuset, "0-63")
helper.WriteCgroupFileContents(containerCgroupDir1, cpuset, "0-63")
helper.WriteCgroupFileContents(containerCgroupDir2, cpuset, "0-31")
},
},
args: args{
parentDir: "test",
resourceType: system.CPUSetCPUSName,
relativeDepth: 1,
},
wantErr: false,
want: []string{
"test/test-pod-0.slice",
"test/test-pod-1.slice",
"test/test-pod-2.slice",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -235,7 +263,7 @@ func TestGetCgroupPathsByTargetDepth(t *testing.T) {
tt.fields.prepareFn(helper)
}

got, gotErr := GetCgroupPathsByTargetDepth(tt.args.qos, tt.args.resourceType, tt.args.relativeDepth)
got, gotErr := GetCgroupPathsByTargetDepth(tt.args.resourceType, tt.args.parentDir, tt.args.relativeDepth)
assert.Equal(t, tt.wantErr, gotErr != nil, gotErr)
assert.Equal(t, tt.want, got)
})
Expand Down

0 comments on commit 2064aa9

Please sign in to comment.