Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

koordlet: add pod resctrl #1974

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pkg/koordlet/qosmanager/plugins/resctrl/resctrl_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,11 @@ func (r *resctrlReconcile) reconcileResctrlGroups(qosStrategy *slov1alpha1.Resou
podsMeta := r.statesInformer.GetAllPods()
for _, podMeta := range podsMeta {
pod := podMeta.Pod

// only QoS class level pod are considered
if _, ok := pod.Annotations[extension.AnnotationResctrl]; ok {
continue
}
// only Running and Pending pods are considered
if pod.Status.Phase != corev1.PodRunning && pod.Status.Phase != corev1.PodPending {
continue
Expand Down
1 change: 1 addition & 0 deletions pkg/koordlet/resourceexecutor/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import "flag"
const (
ReasonUpdateCgroups = "UpdateCgroups"
ReasonUpdateSystemConfig = "UpdateSystemConfig"
CreateCATGroup = "CreateCATGroup"
ReasonUpdateResctrl = "UpdateResctrl" // update resctrl tasks, schemata

EvictPodByNodeMemoryUsage = "EvictPodByNodeMemoryUsage"
Expand Down
73 changes: 73 additions & 0 deletions pkg/koordlet/resourceexecutor/resctrl_updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,64 @@ func (r *ResctrlSchemataResourceUpdater) Clone() ResourceUpdater {
}
}

func NewResctrlSchemataResource(group, schemata string, e *audit.EventHelper) (ResourceUpdater, error) {
if schemata == "" {
return nil, fmt.Errorf("schemata is nil")
}
schemataFile := sysutil.ResctrlSchemata.Path(group)
schemataKey := sysutil.ResctrlSchemataName + ":" + schemataFile
// The current assumption is that the cache ids obtained through
// resctrl schemata will not go wrong. TODO: Use the ability of node info
// to obtain cache ids to replace the current method.
ids, _ := sysutil.CacheIdsCacheFunc()
schemataRaw := sysutil.NewResctrlSchemataRaw(ids).WithL3Num(len(ids))
err := schemataRaw.ParseResctrlSchemata(schemata, -1)
if err != nil {
klog.Errorf("failed to parse %v", err)
}
items := []string{}
for _, item := range []struct {
validFunc func() (bool, string)
value func() string
}{
{validFunc: schemataRaw.ValidateL3, value: schemataRaw.L3String},
{validFunc: schemataRaw.ValidateMB, value: schemataRaw.MBString},
} {
if valid, _ := item.validFunc(); valid {
items = append(items, item.value())
}
}
schemataStr := strings.Join(items, "")
klog.V(6).Infof("generate new resctrl schemata resource, file %s, key %s, value %s, schemata %s",
schemataFile, schemataKey, schemataStr, schemata)
return &ResctrlSchemataResourceUpdater{
DefaultResourceUpdater: DefaultResourceUpdater{
key: schemataKey,
file: schemataFile,
value: schemataStr,
updateFunc: UpdateResctrlSchemataFunc,
eventHelper: e,
},
schemataRaw: schemataRaw,
}, err
}

func NewCatGroupResource(group string, e *audit.EventHelper) (ResourceUpdater, error) {
if group == "" {
return nil, fmt.Errorf("group is nil")
}
schemataFile := sysutil.ResctrlSchemata.Path(group)

klog.V(6).Infof("generate new cat group resource, file %s", schemataFile)
return &DefaultResourceUpdater{
key: group,
file: schemataFile,
value: "",
updateFunc: InitCatGroupFunc,
eventHelper: e,
}, nil
}

func NewResctrlL3SchemataResource(group, schemataDelta string, l3Num int) ResourceUpdater {
schemataFile := sysutil.ResctrlSchemata.Path(group)
l3SchemataKey := sysutil.L3SchemataPrefix + ":" + schemataFile
Expand Down Expand Up @@ -116,6 +174,21 @@ func CalculateResctrlL3TasksResource(group string, taskIds []int32) (ResourceUpd
return NewCommonDefaultUpdaterWithUpdateFunc(tasksPath, tasksPath, builder.String(), UpdateResctrlTasksFunc, eventHelper)
}

func InitCatGroupFunc(u ResourceUpdater) error {
r, ok := u.(*DefaultResourceUpdater)
if !ok {
return fmt.Errorf("not a ResctrlSchemataResourceUpdater")
}

err := sysutil.InitCatGroupIfNotExist(r.key)
if err != nil {
return err
}
_ = audit.V(3).Reason(CreateCATGroup).Message("Create %v to %v", u.Key(), u.Value()).Do()

return nil
}

func UpdateResctrlSchemataFunc(u ResourceUpdater) error {
r, ok := u.(*ResctrlSchemataResourceUpdater)
if !ok {
Expand Down
44 changes: 44 additions & 0 deletions pkg/koordlet/resourceexecutor/resctrl_updater_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,47 @@ func TestNewResctrlMbSchemataResource(t *testing.T) {
assert.NoError(t, err)
})
}

func TestNewResctrlSchemataResource(t *testing.T) {
t.Run("test_all_schemata", func(t *testing.T) {
helper := system.NewFileTestUtil(t)
defer helper.Cleanup()

sysFSRootDirName := "NewResctrlSchemataResource"
helper.MkDirAll(sysFSRootDirName)
system.Conf.SysFSRootDir = filepath.Join(helper.TempDir, sysFSRootDirName)
testingPrepareResctrlL3CatGroups(t, "7ff", " L3:0=ff;1=ff\n MB:0=100;1=100")
updater, _ := NewResctrlSchemataResource("BE", "L3:0=f;1=f\nMB:0=60;1=60", nil)
assert.Equal(t, "L3:0=f;1=f;\nMB:0=60;1=60;\n", updater.Value())
err := updater.update()
assert.NoError(t, err)
})

t.Run("test_LLC_resource", func(t *testing.T) {
helper := system.NewFileTestUtil(t)
defer helper.Cleanup()

sysFSRootDirName := "NewResctrlSchemataResourceSingleLLC"
helper.MkDirAll(sysFSRootDirName)
system.Conf.SysFSRootDir = filepath.Join(helper.TempDir, sysFSRootDirName)
testingPrepareResctrlL3CatGroups(t, "7ff", " L3:0=ff;1=ff")
updater, _ := NewResctrlSchemataResource("BE", "L3:0=f;1=f", nil)
assert.Equal(t, "L3:0=f;1=f;\n", updater.Value())
err := updater.update()
assert.NoError(t, err)
})

t.Run("test_MB_resource", func(t *testing.T) {
helper := system.NewFileTestUtil(t)
defer helper.Cleanup()

sysFSRootDirName := "NewResctrlSchemataResourceSingleMB"
helper.MkDirAll(sysFSRootDirName)
system.Conf.SysFSRootDir = filepath.Join(helper.TempDir, sysFSRootDirName)
testingPrepareResctrlL3CatGroups(t, "", " MB:0=10;1=10")
updater, _ := NewResctrlSchemataResource("BE", "MB:0=20;1=20", nil)
assert.Equal(t, "MB:0=20;1=20;\n", updater.Value())
err := updater.update()
assert.NoError(t, err)
})
}
9 changes: 9 additions & 0 deletions pkg/koordlet/runtimehooks/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/cpuset"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/gpu"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/groupidentity"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/resctrl"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/terwayqos"
"github.com/koordinator-sh/koordinator/pkg/koordlet/util/system"
)
Expand Down Expand Up @@ -80,6 +81,12 @@ const (
// owner: @l1b0k
// alpha: v1.5
TerwayQoS featuregate.Feature = "TerwayQoS"

// Resctrl adjusts LLC/MB value for pod.
//
// owner: @kangclzjc @saintube @zwzhang0107
// alpha: v1.5
Resctrl featuregate.Feature = "Resctrl"
)

var (
Expand All @@ -91,6 +98,7 @@ var (
CPUNormalization: {Default: false, PreRelease: featuregate.Alpha},
CoreSched: {Default: false, PreRelease: featuregate.Alpha},
TerwayQoS: {Default: false, PreRelease: featuregate.Alpha},
Resctrl: {Default: false, PreRelease: featuregate.Alpha},
}

runtimeHookPlugins = map[featuregate.Feature]HookPlugin{
Expand All @@ -101,6 +109,7 @@ var (
CPUNormalization: cpunormalization.Object(),
CoreSched: coresched.Object(),
TerwayQoS: terwayqos.Object(),
Resctrl: resctrl.Object(),
}
)

Expand Down
7 changes: 5 additions & 2 deletions pkg/koordlet/runtimehooks/hooks/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/koordinator-sh/koordinator/pkg/koordlet/metrics"
"github.com/koordinator-sh/koordinator/pkg/koordlet/resourceexecutor"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/protocol"
"github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer"
rmconfig "github.com/koordinator-sh/koordinator/pkg/runtimeproxy/config"
)

Expand All @@ -36,8 +37,9 @@ type Hook struct {
}

type Options struct {
Reader resourceexecutor.CgroupReader
Executor resourceexecutor.ResourceUpdateExecutor
Reader resourceexecutor.CgroupReader
Executor resourceexecutor.ResourceUpdateExecutor
StatesInformer statesinformer.StatesInformer
}

type HookFn func(protocol.HooksProtocol) error
Expand Down Expand Up @@ -106,6 +108,7 @@ func init() {
rmconfig.PostStopContainer: make([]*Hook, 0),
rmconfig.PostStopPodSandbox: make([]*Hook, 0),
rmconfig.PreUpdateContainerResources: make([]*Hook, 0),
rmconfig.PreRemoveRunPodSandbox: make([]*Hook, 0),
}
}

Expand Down
Loading
Loading