Skip to content

Commit

Permalink
✨ Set cgroup value for BE pod (#78)
Browse files Browse the repository at this point in the history
* purge dead code

Signed-off-by: Jason Liu <jasonliu747@gmail.com>

* batch cgroup reconciler

Signed-off-by: Jason Liu <jasonliu747@gmail.com>

* reconcile memory limit

Signed-off-by: Jason Liu <jasonliu747@gmail.com>
  • Loading branch information
jasonliu747 committed Apr 18, 2022
1 parent 5600079 commit b84b987
Show file tree
Hide file tree
Showing 166 changed files with 1,718 additions and 21,706 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
</h1>

[![License](https://img.shields.io/badge/License-Apache_2.0-4EB1BA.svg)](https://opensource.org/licenses/Apache-2.0)
[![Slack](https://badgen.net/badge/Slack/Join/pink?icon=slack)](https://join.slack.com/t/koordinator-sh/shared_invite/zt-1756qoub4-Cn4~esfdlfAPsD7cwO2NzA)
[![codecov](https://codecov.io/github/koordinator-sh/koordinator/branch/main/graph/badge.svg?token=CPUKM09WJL)](https://codecov.io/github/koordinator-sh/koordinator)
[![PRs Welcome](https://badgen.net/badge/PRs/Welcome/green?icon=https://api.iconify.design/octicon:git-pull-request.svg?color=white)](CONTRIBUTING.md)
[![Slack](https://badgen.net/badge/Slack/Join/4A154B?icon=slack)](https://join.slack.com/t/koordinator-sh/shared_invite/zt-1756qoub4-Cn4~esfdlfAPsD7cwO2NzA)

## Introduction

Expand Down
3 changes: 0 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,9 @@ require (
github.com/moby/moby v20.10.14+incompatible
github.com/onsi/ginkgo v1.16.4
github.com/onsi/gomega v1.15.0
github.com/prashantv/gostub v1.1.0
github.com/prometheus/client_golang v1.11.0
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.7.0
github.com/vishvananda/netlink v1.1.0
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74
go.uber.org/atomic v1.7.0
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac
google.golang.org/grpc v1.38.0
Expand Down
5 changes: 0 additions & 5 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,6 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
Expand Down Expand Up @@ -605,12 +603,9 @@ github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGr
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4=
github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0=
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 h1:gga7acRE695APm9hlsSMoOoE65U4/TcqNj90mc69Rlg=
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
github.com/vmware/govmomi v0.20.3/go.mod h1:URlwyTFZX72RmxtxuaFL2Uj3fD1JTvZdx59bHWk6aFU=
github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
Expand Down
7 changes: 6 additions & 1 deletion pkg/features/koordlet_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ const (
// AuditEventsHTTPHandler is used to get recent events from koordlet port
AuditEventsHTTPHandler featuregate.Feature = "AuditEventsHTTPHandler"

// BECgroupReconcile sets cpu memory limit for best-effort pod
BECgroupReconcile featuregate.Feature = "BECgroupReconcile"

// BECPUSuppress suppresses for best-effort pod
BECPUSuppress featuregate.Feature = "BECPUSuppress"

Expand All @@ -46,6 +49,8 @@ var (
defaultKoordletFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
AuditEvents: {Default: false, PreRelease: featuregate.Alpha},
AuditEventsHTTPHandler: {Default: false, PreRelease: featuregate.Alpha},
BECPUSuppress: {Default: true, PreRelease: featuregate.Alpha},
BECgroupReconcile: {Default: false, PreRelease: featuregate.Alpha},
BECPUSuppress: {Default: false, PreRelease: featuregate.Alpha},
BEMemoryEvict: {Default: false, PreRelease: featuregate.Alpha},
}
)
4 changes: 2 additions & 2 deletions pkg/koordlet/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
"github.com/koordinator-sh/koordinator/pkg/koordlet/reporter"
"github.com/koordinator-sh/koordinator/pkg/koordlet/resmanager"
"github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer"
sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system"
"github.com/koordinator-sh/koordinator/pkg/util/system"
)

type Configuration struct {
Expand All @@ -57,7 +57,7 @@ func NewConfiguration() *Configuration {
}

func (c *Configuration) InitFlags(fs *flag.FlagSet) {
sysutil.Conf.InitFlags(fs)
system.Conf.InitFlags(fs)
c.StatesInformerConf.InitFlags(fs)
c.ReporterConf.InitFlags(fs)
c.CollectorConf.InitFlags(fs)
Expand Down
16 changes: 8 additions & 8 deletions pkg/koordlet/koordlet.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import (
"github.com/koordinator-sh/koordinator/pkg/koordlet/reporter"
"github.com/koordinator-sh/koordinator/pkg/koordlet/resmanager"
"github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer"
sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system"
"github.com/koordinator-sh/koordinator/pkg/util/system"
)

var (
Expand Down Expand Up @@ -70,20 +70,20 @@ func NewDaemon(config *config.Configuration) (Daemon, error) {
klog.Infof("NODE_NAME is %v,start time %v", nodeName, float64(time.Now().Unix()))
metrics.RecordKoordletStartTime(nodeName, float64(time.Now().Unix()))

klog.Infof("sysconf: %+v,agentMode:%v", sysutil.Conf, sysutil.AgentMode)
klog.Infof("kernel version INFO : %+v", sysutil.HostSystemInfo)
klog.Infof("sysconf: %+v,agentMode:%v", system.Conf, system.AgentMode)
klog.Infof("kernel version INFO : %+v", system.HostSystemInfo)

// setup cgroup path formatter from cgroup driver type
var detectCgroupDriver sysutil.CgroupDriverType
var detectCgroupDriver system.CgroupDriverType
if pollErr := wait.PollImmediate(time.Second*10, time.Minute, func() (bool, error) {
driver := sysutil.GuessCgroupDriverFromCgroupName()
driver := system.GuessCgroupDriverFromCgroupName()
if driver.Validate() {
detectCgroupDriver = driver
return true, nil
}
klog.Infof("can not detect cgroup driver from 'kubepods' cgroup name")

if driver, err := sysutil.GuessCgroupDriverFromKubelet(); err == nil && driver.Validate() {
if driver, err := system.GuessCgroupDriverFromKubelet(); err == nil && driver.Validate() {
detectCgroupDriver = driver
return true, nil
} else {
Expand All @@ -93,13 +93,13 @@ func NewDaemon(config *config.Configuration) (Daemon, error) {
}); pollErr != nil {
return nil, fmt.Errorf("can not detect kubelet cgroup driver: %v", pollErr)
}
sysutil.SetupCgroupPathFormatter(detectCgroupDriver)
system.SetupCgroupPathFormatter(detectCgroupDriver)
klog.Infof("Node %s use '%s' as cgroup driver", nodeName, string(detectCgroupDriver))

kubeClient := clientset.NewForConfigOrDie(config.KubeRestConf)
crdClient := clientsetbeta1.NewForConfigOrDie(config.KubeRestConf)

pleg, err := pleg.NewPLEG(sysutil.Conf.CgroupRootDir)
pleg, err := pleg.NewPLEG(system.Conf.CgroupRootDir)
if err != nil {
return nil, err
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/koordlet/metriccache/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package metriccache
import (
"k8s.io/apimachinery/pkg/api/resource"

"github.com/koordinator-sh/koordinator/pkg/koordlet/util"
"github.com/koordinator-sh/koordinator/pkg/util"
)

type CPUMetric struct {
Expand Down Expand Up @@ -69,9 +69,9 @@ type ContainerResourceQueryResult struct {
type NodeCPUInfo util.LocalCPUInfo

type BECPUResourceMetric struct {
CPUUsed resource.Quantity //cpuUsed cores for BestEffort Cgroup
CPURealLimit resource.Quantity //suppressCPUQuantity: if suppress by cfs_quota then this value is cfs_quota/cfs_period
CPURequest resource.Quantity //sum(extendResources_Cpu:request) by all qos:BE pod
CPUUsed resource.Quantity // cpuUsed cores for BestEffort Cgroup
CPURealLimit resource.Quantity // suppressCPUQuantity: if suppress by cfs_quota then this value is cfs_quota/cfs_period
CPURequest resource.Quantity // sum(extendResources_Cpu:request) by all qos:BE pod
}

type BECPUResourceQueryResult struct {
Expand Down
2 changes: 1 addition & 1 deletion pkg/koordlet/metriccache/metric_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (

"k8s.io/apimachinery/pkg/api/resource"

"github.com/koordinator-sh/koordinator/pkg/koordlet/util"
"github.com/koordinator-sh/koordinator/pkg/util"
)

func Test_metricCache_NodeResourceMetric_CRUD(t *testing.T) {
Expand Down
16 changes: 8 additions & 8 deletions pkg/koordlet/metricsadvisor/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ import (
"github.com/koordinator-sh/koordinator/pkg/koordlet/metriccache"
"github.com/koordinator-sh/koordinator/pkg/koordlet/metrics"
"github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer"
"github.com/koordinator-sh/koordinator/pkg/koordlet/util"
sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system"
"github.com/koordinator-sh/koordinator/pkg/util"
"github.com/koordinator-sh/koordinator/pkg/util/system"
)

const (
Expand Down Expand Up @@ -356,7 +356,7 @@ func (c *collector) collectPodThrottledInfo() {
uid := string(pod.UID) // types.UID
collectTime := time.Now()
cgroupStatPath := util.GetPodCgroupCPUStatPath(meta.CgroupDir)
currentCPUStat, err := sysutil.GetCPUStatRaw(cgroupStatPath)
currentCPUStat, err := system.GetCPUStatRaw(cgroupStatPath)
if err != nil || currentCPUStat == nil {
if pod.Status.Phase == corev1.PodRunning {
// print running pod collection error
Expand All @@ -372,8 +372,8 @@ func (c *collector) collectPodThrottledInfo() {
meta.Pod.Namespace, meta.Pod.Name, meta.Pod.UID)
continue
}
lastCPUThrottled := lastCPUThrottledValue.(*sysutil.CPUStatRaw)
cpuThrottledRatio := sysutil.CalcCPUThrottledRatio(currentCPUStat, lastCPUThrottled)
lastCPUThrottled := lastCPUThrottledValue.(*system.CPUStatRaw)
cpuThrottledRatio := system.CalcCPUThrottledRatio(currentCPUStat, lastCPUThrottled)

klog.V(6).Infof("collect pod %s/%s, uid %s throttled finished, metric %v",
meta.Pod.Namespace, meta.Pod.Name, meta.Pod.UID, cpuThrottledRatio)
Expand Down Expand Up @@ -410,7 +410,7 @@ func (c *collector) collectContainerThrottledInfo(podMeta *statesinformer.PodMet
pod.Namespace, pod.Name, containerStat.Name, err)
continue
}
currentCPUStat, err := sysutil.GetCPUStatRaw(containerCgroupPath)
currentCPUStat, err := system.GetCPUStatRaw(containerCgroupPath)
if err != nil {
klog.Infof("collect container %s/%s/%s cpu throttled failed, err %v, metric %v",
pod.Namespace, pod.Name, containerStat.Name, err, currentCPUStat)
Expand All @@ -423,8 +423,8 @@ func (c *collector) collectContainerThrottledInfo(podMeta *statesinformer.PodMet
pod.Namespace, pod.Name, containerStat.Name)
continue
}
lastCPUThrottled := lastCPUThrottledValue.(*sysutil.CPUStatRaw)
cpuThrottledRatio := sysutil.CalcCPUThrottledRatio(currentCPUStat, lastCPUThrottled)
lastCPUThrottled := lastCPUThrottledValue.(*system.CPUStatRaw)
cpuThrottledRatio := system.CalcCPUThrottledRatio(currentCPUStat, lastCPUThrottled)

containerMetric := &metriccache.ContainerThrottledMetric{
ContainerID: containerStat.ContainerID,
Expand Down
2 changes: 1 addition & 1 deletion pkg/koordlet/pleg/pleg.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"

"github.com/koordinator-sh/koordinator/pkg/koordlet/util"
"github.com/koordinator-sh/koordinator/pkg/util"
)

const (
Expand Down
6 changes: 3 additions & 3 deletions pkg/koordlet/pleg/pleg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
"github.com/stretchr/testify/assert"
"k8s.io/utils/inotify"

sysutil "github.com/koordinator-sh/koordinator/pkg/koordlet/util/system"
"github.com/koordinator-sh/koordinator/pkg/util/system"
)

func NewTestWatcher() (Watcher, error) {
Expand Down Expand Up @@ -81,7 +81,7 @@ func (h *testHandler) OnContainerDeleted(podID, containerID string) {
}

func TestPlegHandlePodEvents(t *testing.T) {
sysutil.SetupCgroupPathFormatter(sysutil.Cgroupfs)
system.SetupCgroupPathFormatter(system.Cgroupfs)
stopCh := make(chan struct{})
defer close(stopCh)

Expand Down Expand Up @@ -152,7 +152,7 @@ func TestPlegHandlePodEvents(t *testing.T) {
}

func TestPlegHandleContainerEvents(t *testing.T) {
sysutil.SetupCgroupPathFormatter(sysutil.Cgroupfs)
system.SetupCgroupPathFormatter(system.Cgroupfs)
stopCh := make(chan struct{})
defer close(stopCh)

Expand Down
Loading

0 comments on commit b84b987

Please sign in to comment.