Skip to content

Commit

Permalink
Merge pull request #2621 from katarzyna-z/kk-memory-numa-stats
Browse files Browse the repository at this point in the history
Memory numa stats
  • Loading branch information
dashpole authored Aug 13, 2020
2 parents a6e4fcb + 6cb346e commit 90f391f
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 4 deletions.
4 changes: 3 additions & 1 deletion cmd/cadvisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ var (
// Metrics to be ignored.
// Tcp metrics are ignored by default.
ignoreMetrics metricSetValue = metricSetValue{container.MetricSet{
container.MemoryNumaMetrics: struct{}{},
container.NetworkTcpUsageMetrics: struct{}{},
container.NetworkUdpUsageMetrics: struct{}{},
container.NetworkAdvancedTcpUsageMetrics: struct{}{},
Expand All @@ -97,6 +98,7 @@ var (
container.AcceleratorUsageMetrics: struct{}{},
container.DiskUsageMetrics: struct{}{},
container.DiskIOMetrics: struct{}{},
container.MemoryNumaMetrics: struct{}{},
container.NetworkUsageMetrics: struct{}{},
container.NetworkTcpUsageMetrics: struct{}{},
container.NetworkAdvancedTcpUsageMetrics: struct{}{},
Expand Down Expand Up @@ -139,7 +141,7 @@ func (ml *metricSetValue) Set(value string) error {
}

func init() {
flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'accelerator', 'cpu_topology','disk', 'diskIO', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb', 'referenced_memory', 'resctrl'.")
flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'accelerator', 'cpu_topology','disk', 'diskIO', 'memory_numa', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb', 'referenced_memory', 'resctrl'.")

// Default logging verbosity to V(2)
flag.Set("v", "2")
Expand Down
7 changes: 7 additions & 0 deletions cmd/cadvisor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ func TestCPUTopologyMetricsAreDisabledByDefault(t *testing.T) {
assert.True(t, ignoreMetrics.Has(container.CPUTopologyMetrics))
}

func TestMemoryNumaMetricsAreDisabledByDefault(t *testing.T) {
assert.True(t, ignoreMetrics.Has(container.MemoryNumaMetrics))
flag.Parse()
assert.True(t, ignoreMetrics.Has(container.MemoryNumaMetrics))
}

func TestIgnoreMetrics(t *testing.T) {
tests := []struct {
value string
Expand Down Expand Up @@ -86,6 +92,7 @@ func TestToIncludedMetrics(t *testing.T) {
container.ProcessSchedulerMetrics: struct{}{},
container.PerCpuUsageMetrics: struct{}{},
container.MemoryUsageMetrics: struct{}{},
container.MemoryNumaMetrics: struct{}{},
container.CpuLoadMetrics: struct{}{},
container.DiskIOMetrics: struct{}{},
container.AcceleratorUsageMetrics: struct{}{},
Expand Down
2 changes: 2 additions & 0 deletions container/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const (
ProcessSchedulerMetrics MetricKind = "sched"
PerCpuUsageMetrics MetricKind = "percpu"
MemoryUsageMetrics MetricKind = "memory"
MemoryNumaMetrics MetricKind = "memory_numa"
CpuLoadMetrics MetricKind = "cpuLoad"
DiskIOMetrics MetricKind = "diskIO"
DiskUsageMetrics MetricKind = "disk"
Expand All @@ -70,6 +71,7 @@ var AllMetrics = MetricSet{
ProcessSchedulerMetrics: struct{}{},
PerCpuUsageMetrics: struct{}{},
MemoryUsageMetrics: struct{}{},
MemoryNumaMetrics: struct{}{},
CpuLoadMetrics: struct{}{},
DiskIOMetrics: struct{}{},
AcceleratorUsageMetrics: struct{}{},
Expand Down
21 changes: 21 additions & 0 deletions container/libcontainer/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,24 @@ func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.Memory.WorkingSet = workingSet
}

func getNumaStats(memoryStats map[uint8]uint64) map[uint8]uint64 {
stats := make(map[uint8]uint64, len(memoryStats))
for node, usage := range memoryStats {
stats[node] = usage
}
return stats
}

func setMemoryNumaStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.Memory.ContainerData.NumaStats.File = getNumaStats(s.MemoryStats.PageUsageByNUMA.File.Nodes)
ret.Memory.ContainerData.NumaStats.Anon = getNumaStats(s.MemoryStats.PageUsageByNUMA.Anon.Nodes)
ret.Memory.ContainerData.NumaStats.Unevictable = getNumaStats(s.MemoryStats.PageUsageByNUMA.Unevictable.Nodes)

ret.Memory.HierarchicalData.NumaStats.File = getNumaStats(s.MemoryStats.PageUsageByNUMA.Hierarchical.File.Nodes)
ret.Memory.HierarchicalData.NumaStats.Anon = getNumaStats(s.MemoryStats.PageUsageByNUMA.Hierarchical.Anon.Nodes)
ret.Memory.HierarchicalData.NumaStats.Unevictable = getNumaStats(s.MemoryStats.PageUsageByNUMA.Hierarchical.Unevictable.Nodes)
}

func setHugepageStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.Hugetlb = make(map[string]info.HugetlbStats)
for k, v := range s.HugetlbStats {
Expand Down Expand Up @@ -923,6 +941,9 @@ func newContainerStats(libcontainerStats *libcontainer.Stats, includedMetrics co
setDiskIoStats(s, ret)
}
setMemoryStats(s, ret)
if includedMetrics.Has(container.MemoryNumaMetrics) {
setMemoryNumaStats(s, ret)
}
if includedMetrics.Has(container.HugetlbUsageMetrics) {
setHugepageStats(s, ret)
}
Expand Down
1 change: 1 addition & 0 deletions docs/storage/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ Metric name | Type | Description | Unit (where applicable) | -disable_metrics pa
`container_memory_cache` | Gauge | Total page cache memory | bytes | |
`container_memory_failcnt` | Counter | Number of memory usage hits limits | | |
`container_memory_failures_total` | Counter | Cumulative count of memory allocation failures | | |
`container_memory_numa_pages` | Gauge | Number of used pages per NUMA node | | memory_numa |
`container_memory_max_usage_bytes` | Gauge | Maximum memory usage recorded | bytes | |
`container_memory_rss` | Gauge | Size of RSS | bytes | |
`container_memory_swap` | Gauge | Container swap usage | bytes | |
Expand Down
11 changes: 9 additions & 2 deletions info/v1/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,16 @@ type MemoryStats struct {
HierarchicalData MemoryStatsMemoryData `json:"hierarchical_data,omitempty"`
}

type MemoryNumaStats struct {
File map[uint8]uint64 `json:"file,omitempty"`
Anon map[uint8]uint64 `json:"anon,omitempty"`
Unevictable map[uint8]uint64 `json:"unevictable,omitempty"`
}

type MemoryStatsMemoryData struct {
Pgfault uint64 `json:"pgfault"`
Pgmajfault uint64 `json:"pgmajfault"`
Pgfault uint64 `json:"pgfault"`
Pgmajfault uint64 `json:"pgmajfault"`
NumaStats MemoryNumaStats `json:"numa_stats,omitempty"`
}

type InterfaceStats struct {
Expand Down
39 changes: 38 additions & 1 deletion metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,8 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.Memory.WorkingSet), timestamp: s.Timestamp}}
},
}, {
},
{
name: "container_memory_failures_total",
help: "Cumulative count of memory allocation failures.",
valueType: prometheus.CounterValue,
Expand Down Expand Up @@ -454,6 +455,33 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
}...)
}
if includedMetrics.Has(container.MemoryNumaMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_memory_numa_pages",
help: "Number of used pages per NUMA node",
valueType: prometheus.GaugeValue,
extraLabels: []string{"type", "scope", "node"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0)
values = append(values, getNumaStatsPerNode(s.Memory.ContainerData.NumaStats.File,
[]string{"file", "container"}, s.Timestamp)...)
values = append(values, getNumaStatsPerNode(s.Memory.ContainerData.NumaStats.Anon,
[]string{"anon", "container"}, s.Timestamp)...)
values = append(values, getNumaStatsPerNode(s.Memory.ContainerData.NumaStats.Unevictable,
[]string{"unevictable", "container"}, s.Timestamp)...)

values = append(values, getNumaStatsPerNode(s.Memory.HierarchicalData.NumaStats.File,
[]string{"file", "hierarchy"}, s.Timestamp)...)
values = append(values, getNumaStatsPerNode(s.Memory.HierarchicalData.NumaStats.Anon,
[]string{"anon", "hierarchy"}, s.Timestamp)...)
values = append(values, getNumaStatsPerNode(s.Memory.HierarchicalData.NumaStats.Unevictable,
[]string{"unevictable", "hierarchy"}, s.Timestamp)...)
return values
},
},
}...)
}
if includedMetrics.Has(container.AcceleratorUsageMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
Expand Down Expand Up @@ -1903,3 +1931,12 @@ var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
func sanitizeLabelName(name string) string {
return invalidNameCharRE.ReplaceAllString(name, "_")
}

func getNumaStatsPerNode(nodeStats map[uint8]uint64, labels []string, timestamp time.Time) metricValues {
mValues := make(metricValues, 0, len(nodeStats))
for node, stat := range nodeStats {
nodeLabels := append(labels, strconv.FormatUint(uint64(node), 10))
mValues = append(mValues, metricValue{value: float64(stat), labels: nodeLabels, timestamp: timestamp})
}
return mValues
}
10 changes: 10 additions & 0 deletions metrics/prometheus_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,20 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req
ContainerData: info.MemoryStatsMemoryData{
Pgfault: 10,
Pgmajfault: 11,
NumaStats: info.MemoryNumaStats{
File: map[uint8]uint64{0: 16649, 1: 10000},
Anon: map[uint8]uint64{0: 10000, 1: 7109},
Unevictable: map[uint8]uint64{0: 8900, 1: 10000},
},
},
HierarchicalData: info.MemoryStatsMemoryData{
Pgfault: 12,
Pgmajfault: 13,
NumaStats: info.MemoryNumaStats{
File: map[uint8]uint64{0: 36649, 1: 10000},
Anon: map[uint8]uint64{0: 20000, 1: 7109},
Unevictable: map[uint8]uint64{0: 8900, 1: 20000},
},
},
Cache: 14,
RSS: 15,
Expand Down
14 changes: 14 additions & 0 deletions metrics/testdata/prometheus_metrics
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,20 @@ container_memory_mapped_file{container_env_foo_env="prod",container_label_foo_la
# HELP container_memory_max_usage_bytes Maximum memory usage recorded in bytes
# TYPE container_memory_max_usage_bytes gauge
container_memory_max_usage_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 8 1395066363000
# HELP container_memory_numa_pages Number of used pages per NUMA node
# TYPE container_memory_numa_pages gauge
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="0",scope="container",type="anon",zone_name="hello"} 10000 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="0",scope="container",type="file",zone_name="hello"} 16649 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="0",scope="container",type="unevictable",zone_name="hello"} 8900 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="0",scope="hierarchy",type="anon",zone_name="hello"} 20000 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="0",scope="hierarchy",type="file",zone_name="hello"} 36649 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="0",scope="hierarchy",type="unevictable",zone_name="hello"} 8900 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="1",scope="container",type="anon",zone_name="hello"} 7109 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="1",scope="container",type="file",zone_name="hello"} 10000 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="1",scope="container",type="unevictable",zone_name="hello"} 10000 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="1",scope="hierarchy",type="anon",zone_name="hello"} 7109 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="1",scope="hierarchy",type="file",zone_name="hello"} 10000 1395066363000
container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="1",scope="hierarchy",type="unevictable",zone_name="hello"} 20000 1395066363000
# HELP container_memory_rss Size of RSS in bytes.
# TYPE container_memory_rss gauge
container_memory_rss{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 15 1395066363000
Expand Down

0 comments on commit 90f391f

Please sign in to comment.