Skip to content

Commit

Permalink
Merge pull request kubernetes#118865 from iholder101/kubelet/add-swap…
Browse files Browse the repository at this point in the history
…-to-summary-stats

Add swap to stats to Summary API and Prometheus endpoints (`/stats/summary` and `/metrics/resource`)
  • Loading branch information
k8s-ci-robot committed Jul 18, 2023
2 parents da2fdf8 + 4cb5547 commit b4d793c
Show file tree
Hide file tree
Showing 14 changed files with 1,049 additions and 443 deletions.
56 changes: 56 additions & 0 deletions pkg/kubelet/metrics/collectors/resource_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ var (
metrics.ALPHA,
"")

nodeSwapUsageDesc = metrics.NewDesc("node_swap_usage_bytes",
"Current swap usage of the node in bytes. Reported only on non-windows systems",
nil,
nil,
metrics.ALPHA,
"")

containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total",
"Cumulative cpu time consumed by the container in core-seconds",
[]string{"container", "pod", "namespace"},
Expand All @@ -55,6 +62,13 @@ var (
metrics.ALPHA,
"")

containerSwapUsageDesc = metrics.NewDesc("container_swap_usage_bytes",
"Current amount of the container swap usage in bytes. Reported only on non-windows systems",
[]string{"container", "pod", "namespace"},
nil,
metrics.ALPHA,
"")

podCPUUsageDesc = metrics.NewDesc("pod_cpu_usage_seconds_total",
"Cumulative cpu time consumed by the pod in core-seconds",
[]string{"pod", "namespace"},
Expand All @@ -69,6 +83,13 @@ var (
metrics.ALPHA,
"")

podSwapUsageDesc = metrics.NewDesc("pod_swap_usage_bytes",
"Current amount of the pod swap usage in bytes. Reported only on non-windows systems",
[]string{"pod", "namespace"},
nil,
metrics.ALPHA,
"")

resourceScrapeResultDesc = metrics.NewDesc("scrape_error",
"1 if there was an error while getting container metrics, 0 otherwise",
nil,
Expand Down Expand Up @@ -104,11 +125,14 @@ var _ metrics.StableCollector = &resourceMetricsCollector{}
func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
ch <- nodeCPUUsageDesc
ch <- nodeMemoryUsageDesc
ch <- nodeSwapUsageDesc
ch <- containerStartTimeDesc
ch <- containerCPUUsageDesc
ch <- containerMemoryUsageDesc
ch <- containerSwapUsageDesc
ch <- podCPUUsageDesc
ch <- podMemoryUsageDesc
ch <- podSwapUsageDesc
ch <- resourceScrapeResultDesc
}

Expand All @@ -131,15 +155,18 @@ func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metri

rc.collectNodeCPUMetrics(ch, statsSummary.Node)
rc.collectNodeMemoryMetrics(ch, statsSummary.Node)
rc.collectNodeSwapMetrics(ch, statsSummary.Node)

for _, pod := range statsSummary.Pods {
for _, container := range pod.Containers {
rc.collectContainerStartTime(ch, pod, container)
rc.collectContainerCPUMetrics(ch, pod, container)
rc.collectContainerMemoryMetrics(ch, pod, container)
rc.collectContainerSwapMetrics(ch, pod, container)
}
rc.collectPodCPUMetrics(ch, pod)
rc.collectPodMemoryMetrics(ch, pod)
rc.collectPodSwapMetrics(ch, pod)
}
}

Expand All @@ -161,6 +188,15 @@ func (rc *resourceMetricsCollector) collectNodeMemoryMetrics(ch chan<- metrics.M
metrics.NewLazyConstMetric(nodeMemoryUsageDesc, metrics.GaugeValue, float64(*s.Memory.WorkingSetBytes)))
}

func (rc *resourceMetricsCollector) collectNodeSwapMetrics(ch chan<- metrics.Metric, s summary.NodeStats) {
if s.Swap == nil || s.Swap.SwapUsageBytes == nil {
return
}

ch <- metrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time,
metrics.NewLazyConstMetric(nodeSwapUsageDesc, metrics.GaugeValue, float64(*s.Swap.SwapUsageBytes)))
}

func (rc *resourceMetricsCollector) collectContainerStartTime(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
if s.StartTime.Unix() <= 0 {
return
Expand Down Expand Up @@ -190,6 +226,16 @@ func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- metr
float64(*s.Memory.WorkingSetBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectContainerSwapMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
if s.Swap == nil || s.Swap.SwapUsageBytes == nil {
return
}

ch <- metrics.NewLazyMetricWithTimestamp(s.Swap.Time.Time,
metrics.NewLazyConstMetric(containerSwapUsageDesc, metrics.GaugeValue,
float64(*s.Swap.SwapUsageBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectPodCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats) {
if pod.CPU == nil || pod.CPU.UsageCoreNanoSeconds == nil {
return
Expand All @@ -209,3 +255,13 @@ func (rc *resourceMetricsCollector) collectPodMemoryMetrics(ch chan<- metrics.Me
metrics.NewLazyConstMetric(podMemoryUsageDesc, metrics.GaugeValue,
float64(*pod.Memory.WorkingSetBytes), pod.PodRef.Name, pod.PodRef.Namespace))
}

func (rc *resourceMetricsCollector) collectPodSwapMetrics(ch chan<- metrics.Metric, pod summary.PodStats) {
if pod.Swap == nil || pod.Swap.SwapUsageBytes == nil {
return
}

ch <- metrics.NewLazyMetricWithTimestamp(pod.Swap.Time.Time,
metrics.NewLazyConstMetric(podSwapUsageDesc, metrics.GaugeValue,
float64(*pod.Swap.SwapUsageBytes), pod.PodRef.Name, pod.PodRef.Namespace))
}
24 changes: 24 additions & 0 deletions pkg/kubelet/metrics/collectors/resource_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ func TestCollectResourceMetrics(t *testing.T) {
"scrape_error",
"node_cpu_usage_seconds_total",
"node_memory_working_set_bytes",
"node_swap_usage_bytes",
"container_cpu_usage_seconds_total",
"container_memory_working_set_bytes",
"container_swap_usage_bytes",
"container_start_time_seconds",
"pod_cpu_usage_seconds_total",
"pod_memory_working_set_bytes",
"pod_swap_usage_bytes",
}
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
Expand Down Expand Up @@ -75,6 +78,10 @@ func TestCollectResourceMetrics(t *testing.T) {
Time: testTime,
WorkingSetBytes: uint64Ptr(1000),
},
Swap: &statsapi.SwapStats{
Time: testTime,
SwapUsageBytes: uint64Ptr(500),
},
},
},
summaryErr: nil,
Expand All @@ -85,6 +92,9 @@ func TestCollectResourceMetrics(t *testing.T) {
# HELP node_memory_working_set_bytes [ALPHA] Current working set of the node in bytes
# TYPE node_memory_working_set_bytes gauge
node_memory_working_set_bytes 1000 1624396278302
# HELP node_swap_usage_bytes [ALPHA] Current swap usage of the node in bytes. Reported only on non-windows systems
# TYPE node_swap_usage_bytes gauge
node_swap_usage_bytes 500 1624396278302
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
# TYPE scrape_error gauge
scrape_error 0
Expand Down Expand Up @@ -132,6 +142,10 @@ func TestCollectResourceMetrics(t *testing.T) {
Time: testTime,
WorkingSetBytes: uint64Ptr(1000),
},
Swap: &statsapi.SwapStats{
Time: testTime,
SwapUsageBytes: uint64Ptr(1000),
},
},
{
Name: "container_b",
Expand Down Expand Up @@ -189,6 +203,9 @@ func TestCollectResourceMetrics(t *testing.T) {
container_start_time_seconds{container="container_a",namespace="namespace_a",pod="pod_a"} 1.6243962483020916e+09 1624396248302
container_start_time_seconds{container="container_a",namespace="namespace_b",pod="pod_b"} 1.6243956783020916e+09 1624395678302
container_start_time_seconds{container="container_b",namespace="namespace_a",pod="pod_a"} 1.6243961583020916e+09 1624396158302
# HELP container_swap_usage_bytes [ALPHA] Current amount of the container swap usage in bytes. Reported only on non-windows systems
# TYPE container_swap_usage_bytes gauge
container_swap_usage_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 1624396278302
`,
},
{
Expand Down Expand Up @@ -310,6 +327,10 @@ func TestCollectResourceMetrics(t *testing.T) {
Time: testTime,
WorkingSetBytes: uint64Ptr(1000),
},
Swap: &statsapi.SwapStats{
Time: testTime,
SwapUsageBytes: uint64Ptr(5000),
},
},
},
},
Expand All @@ -324,6 +345,9 @@ func TestCollectResourceMetrics(t *testing.T) {
# HELP pod_memory_working_set_bytes [ALPHA] Current working set of the pod in bytes
# TYPE pod_memory_working_set_bytes gauge
pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 1624396278302
# HELP pod_swap_usage_bytes [ALPHA] Current amount of the pod swap usage in bytes. Reported only on non-windows systems
# TYPE pod_swap_usage_bytes gauge
pod_swap_usage_bytes{namespace="namespace_a",pod="pod_a"} 5000 1624396278302
`,
},
{
Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/server/stats/summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func (sp *summaryProviderImpl) Get(ctx context.Context, updateStats bool) (*stat
NodeName: node.Name,
CPU: rootStats.CPU,
Memory: rootStats.Memory,
Swap: rootStats.Swap,
Network: networkStats,
StartTime: sp.systemBootTime,
Fs: rootFsStats,
Expand Down Expand Up @@ -141,6 +142,7 @@ func (sp *summaryProviderImpl) GetCPUAndMemoryStats(ctx context.Context) (*stats
NodeName: node.Name,
CPU: rootStats.CPU,
Memory: rootStats.Memory,
Swap: rootStats.Swap,
StartTime: rootStats.StartTime,
SystemContainers: sp.GetSystemContainersCPUAndMemoryStats(nodeConfig, podStats, false),
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/kubelet/server/stats/summary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ func TestSummaryProviderGetStats(t *testing.T) {
assert.Equal(summary.Node.StartTime, systemBootTime)
assert.Equal(summary.Node.CPU, cgroupStatsMap["/"].cs.CPU)
assert.Equal(summary.Node.Memory, cgroupStatsMap["/"].cs.Memory)
assert.Equal(summary.Node.Swap, cgroupStatsMap["/"].cs.Swap)
assert.Equal(summary.Node.Network, cgroupStatsMap["/"].ns)
assert.Equal(summary.Node.Fs, rootFsStats)
assert.Equal(summary.Node.Runtime, &statsapi.RuntimeStats{ImageFs: imageFsStats})
Expand All @@ -112,6 +113,7 @@ func TestSummaryProviderGetStats(t *testing.T) {
Memory: cgroupStatsMap["/kubelet"].cs.Memory,
Accelerators: cgroupStatsMap["/kubelet"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/kubelet"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/kubelet"].cs.Swap,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "misc",
Expand All @@ -120,6 +122,7 @@ func TestSummaryProviderGetStats(t *testing.T) {
Memory: cgroupStatsMap["/misc"].cs.Memory,
Accelerators: cgroupStatsMap["/misc"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/misc"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/misc"].cs.Swap,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "runtime",
Expand All @@ -128,6 +131,7 @@ func TestSummaryProviderGetStats(t *testing.T) {
Memory: cgroupStatsMap["/runtime"].cs.Memory,
Accelerators: cgroupStatsMap["/runtime"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/runtime"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/runtime"].cs.Swap,
})
assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{
Name: "pods",
Expand All @@ -136,6 +140,7 @@ func TestSummaryProviderGetStats(t *testing.T) {
Memory: cgroupStatsMap["/pods"].cs.Memory,
Accelerators: cgroupStatsMap["/pods"].cs.Accelerators,
UserDefinedMetrics: cgroupStatsMap["/pods"].cs.UserDefinedMetrics,
Swap: cgroupStatsMap["/pods"].cs.Swap,
})
assert.Equal(summary.Pods, podStats)
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/stats/cadvisor_stats_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ func (p *cadvisorStatsProvider) ListPodStats(_ context.Context) ([]statsapi.PodS
cpu, memory := cadvisorInfoToCPUandMemoryStats(podInfo)
podStats.CPU = cpu
podStats.Memory = memory
podStats.Swap = cadvisorInfoToSwapStats(podInfo)
podStats.ProcessStats = cadvisorInfoToProcessStats(podInfo)
}

Expand Down Expand Up @@ -227,6 +228,7 @@ func (p *cadvisorStatsProvider) ListPodCPUAndMemoryStats(_ context.Context) ([]s
cpu, memory := cadvisorInfoToCPUandMemoryStats(podInfo)
podStats.CPU = cpu
podStats.Memory = memory
podStats.Swap = cadvisorInfoToSwapStats(podInfo)
}
result = append(result, *podStats)
}
Expand Down
8 changes: 8 additions & 0 deletions pkg/kubelet/stats/cadvisor_stats_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,13 @@ func TestCadvisorListPodStats(t *testing.T) {
assert.EqualValues(t, testTime(creationTime, seedPod0Container0).Unix(), con.StartTime.Time.Unix())
checkCPUStats(t, "Pod0Container0", seedPod0Container0, con.CPU)
checkMemoryStats(t, "Pod0Conainer0", seedPod0Container0, infos["/pod0-c0"], con.Memory)
checkSwapStats(t, "Pod0Conainer0", seedPod0Container0, infos["/pod0-c0"], con.Swap)

con = indexCon[cName01]
assert.EqualValues(t, testTime(creationTime, seedPod0Container1).Unix(), con.StartTime.Time.Unix())
checkCPUStats(t, "Pod0Container1", seedPod0Container1, con.CPU)
checkMemoryStats(t, "Pod0Container1", seedPod0Container1, infos["/pod0-c1"], con.Memory)
checkSwapStats(t, "Pod0Container1", seedPod0Container1, infos["/pod0-c1"], con.Swap)

assert.EqualValues(t, p0Time.Unix(), ps.StartTime.Time.Unix())
checkNetworkStats(t, "Pod0", seedPod0Infra, ps.Network)
Expand All @@ -309,6 +311,9 @@ func TestCadvisorListPodStats(t *testing.T) {
if ps.Memory != nil {
checkMemoryStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Memory)
}
if ps.Swap != nil {
checkSwapStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Swap)
}

// Validate Pod1 Results
ps, found = indexPods[prf1]
Expand All @@ -318,6 +323,7 @@ func TestCadvisorListPodStats(t *testing.T) {
assert.Equal(t, cName10, con.Name)
checkCPUStats(t, "Pod1Container0", seedPod1Container, con.CPU)
checkMemoryStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Memory)
checkSwapStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Swap)
checkNetworkStats(t, "Pod1", seedPod1Infra, ps.Network)

// Validate Pod2 Results
Expand All @@ -328,6 +334,7 @@ func TestCadvisorListPodStats(t *testing.T) {
assert.Equal(t, cName20, con.Name)
checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU)
checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory)
checkSwapStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Swap)
checkNetworkStats(t, "Pod2", seedPod2Infra, ps.Network)

// Validate Pod3 Results
Expand All @@ -344,6 +351,7 @@ func TestCadvisorListPodStats(t *testing.T) {
assert.Equal(t, cName31, con.Name)
checkCPUStats(t, "Pod3Container1", seedPod3Container1, con.CPU)
checkMemoryStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Memory)
checkSwapStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Swap)
}

func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
Expand Down
40 changes: 40 additions & 0 deletions pkg/kubelet/stats/cri_stats_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ func (p *criStatsProvider) listPodStatsPartiallyFromCRI(ctx context.Context, upd
cs := p.makeContainerStats(stats, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(), updateCPUNanoCoreUsage)
p.addPodNetworkStats(ps, podSandboxID, caInfos, cs, containerNetworkStats[podSandboxID])
p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
p.addSwapStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
p.addProcessStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)

// If cadvisor stats is available for the container, use it to populate
Expand Down Expand Up @@ -548,6 +549,31 @@ func (p *criStatsProvider) addPodCPUMemoryStats(
}
}

func (p *criStatsProvider) addSwapStats(
ps *statsapi.PodStats,
podUID types.UID,
allInfos map[string]cadvisorapiv2.ContainerInfo,
cs *statsapi.ContainerStats,
) {
// try get cpu and memory stats from cadvisor first.
podCgroupInfo := getCadvisorPodInfoFromPodUID(podUID, allInfos)
if podCgroupInfo != nil {
ps.Swap = cadvisorInfoToSwapStats(podCgroupInfo)
return
}

// Sum Pod cpu and memory stats from containers stats.
if cs.Swap != nil {
if ps.Swap == nil {
ps.Swap = &statsapi.SwapStats{Time: cs.Swap.Time}
}
swapAvailableBytes := getUint64Value(cs.Swap.SwapAvailableBytes) + getUint64Value(ps.Swap.SwapAvailableBytes)
swapUsageBytes := getUint64Value(cs.Swap.SwapUsageBytes) + getUint64Value(ps.Swap.SwapUsageBytes)
ps.Swap.SwapAvailableBytes = &swapAvailableBytes
ps.Swap.SwapUsageBytes = &swapUsageBytes
}
}

func (p *criStatsProvider) addProcessStats(
ps *statsapi.PodStats,
podUID types.UID,
Expand Down Expand Up @@ -577,6 +603,7 @@ func (p *criStatsProvider) makeContainerStats(
CPU: &statsapi.CPUStats{},
Memory: &statsapi.MemoryStats{},
Rootfs: &statsapi.FsStats{},
Swap: &statsapi.SwapStats{},
// UserDefinedMetrics is not supported by CRI.
}
if stats.Cpu != nil {
Expand Down Expand Up @@ -607,6 +634,19 @@ func (p *criStatsProvider) makeContainerStats(
result.Memory.Time = metav1.NewTime(time.Unix(0, time.Now().UnixNano()))
result.Memory.WorkingSetBytes = uint64Ptr(0)
}
if stats.Swap != nil {
result.Swap.Time = metav1.NewTime(time.Unix(0, stats.Swap.Timestamp))
if stats.Swap.SwapUsageBytes != nil {
result.Swap.SwapUsageBytes = &stats.Swap.SwapUsageBytes.Value
}
if stats.Swap.SwapAvailableBytes != nil {
result.Swap.SwapAvailableBytes = &stats.Swap.SwapAvailableBytes.Value
}
} else {
result.Swap.Time = metav1.NewTime(time.Unix(0, time.Now().UnixNano()))
result.Swap.SwapUsageBytes = uint64Ptr(0)
result.Swap.SwapAvailableBytes = uint64Ptr(0)
}
if stats.WritableLayer != nil {
result.Rootfs.Time = metav1.NewTime(time.Unix(0, stats.WritableLayer.Timestamp))
if stats.WritableLayer.UsedBytes != nil {
Expand Down
Loading

0 comments on commit b4d793c

Please sign in to comment.