Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add uninterruptible metric load d #3555

Merged
merged 2 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions info/v1/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,8 @@ type CpuStats struct {
// Load is smoothed over the last 10 seconds. Instantaneous value can be read
// from LoadStats.NrRunning.
LoadAverage int32 `json:"load_average"`
// from LoadStats.NrUninterruptible
LoadDAverage int32 `json:"load_d_average"`
}

type PerDiskStats struct {
Expand Down
14 changes: 14 additions & 0 deletions manager/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ type containerData struct {
loadReader cpuload.CpuLoadReader
summaryReader *summary.StatsSummary
loadAvg float64 // smoothed load average seen so far.
loadDAvg float64 // smoothed load.d average seen so far.
housekeepingInterval time.Duration
maxHousekeepingInterval time.Duration
allowDynamicHousekeeping bool
Expand Down Expand Up @@ -441,6 +442,7 @@ func newContainerData(containerName string, memoryCache *memory.InMemoryCache, h
allowDynamicHousekeeping: allowDynamicHousekeeping,
logUsage: logUsage,
loadAvg: -1.0, // negative value indicates uninitialized.
loadDAvg: -1.0, // negative value indicates uninitialized.
stop: make(chan struct{}),
collectorManager: collectorManager,
onDemandChan: make(chan chan struct{}, 100),
Expand Down Expand Up @@ -633,6 +635,14 @@ func (cd *containerData) updateLoad(newLoad uint64) {
}
}

func (cd *containerData) updateLoadD(newLoad uint64) {
if cd.loadDAvg < 0 {
iwankgb marked this conversation as resolved.
Show resolved Hide resolved
cd.loadDAvg = float64(newLoad) // initialize to the first seen sample for faster stabilization.
} else {
cd.loadDAvg = cd.loadDAvg*cd.loadDecay + float64(newLoad)*(1.0-cd.loadDecay)
}
}

func (cd *containerData) updateStats() error {
stats, statsErr := cd.handler.GetStats()
if statsErr != nil {
Expand All @@ -659,6 +669,10 @@ func (cd *containerData) updateStats() error {
cd.updateLoad(loadStats.NrRunning)
// convert to 'milliLoad' to avoid floats and preserve precision.
stats.Cpu.LoadAverage = int32(cd.loadAvg * 1000)

cd.updateLoadD(loadStats.NrUninterruptible)
// convert to 'milliLoad' to avoid floats and preserve precision.
stats.Cpu.LoadDAverage = int32(cd.loadDAvg * 1000)
}
}
if cd.summaryReader != nil {
Expand Down
7 changes: 7 additions & 0 deletions metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,13 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.Cpu.LoadAverage), timestamp: s.Timestamp}}
},
}, {
name: "container_cpu_load_d_average_10s",
help: "Value of container cpu load.d average over the last 10 seconds.",
valueType: prometheus.GaugeValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.Cpu.LoadDAverage), timestamp: s.Timestamp}}
},
}, {
name: "container_tasks_state",
help: "Number of tasks in given state",
Expand Down
3 changes: 2 additions & 1 deletion metrics/prometheus_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,8 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req
RunqueueTime: 479424566378,
RunPeriods: 984285,
},
LoadAverage: 2,
LoadAverage: 2,
LoadDAverage: 2,
},
Memory: info.MemoryStats{
Usage: 8,
Expand Down
3 changes: 3 additions & 0 deletions metrics/testdata/prometheus_metrics
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",container
# HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds.
# TYPE container_cpu_load_average_10s gauge
container_cpu_load_average_10s{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000
# HELP container_cpu_load_d_average_10s Value of container cpu load.d average over the last 10 seconds.
# TYPE container_cpu_load_d_average_10s gauge
container_cpu_load_d_average_10s{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000
# HELP container_cpu_schedstat_run_periods_total Number of times processes of the cgroup have run on the cpu
# TYPE container_cpu_schedstat_run_periods_total counter
container_cpu_schedstat_run_periods_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 984285 1395066363000
Expand Down
3 changes: 3 additions & 0 deletions metrics/testdata/prometheus_metrics_whitelist_filtered
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",id="testc
# HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds.
# TYPE container_cpu_load_average_10s gauge
container_cpu_load_average_10s{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000
# HELP container_cpu_load_d_average_10s Value of container cpu load.d average over the last 10 seconds.
# TYPE container_cpu_load_d_average_10s gauge
container_cpu_load_d_average_10s{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000
# HELP container_cpu_schedstat_run_periods_total Number of times processes of the cgroup have run on the cpu
# TYPE container_cpu_schedstat_run_periods_total counter
container_cpu_schedstat_run_periods_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 984285 1395066363000
Expand Down
Loading