diff --git a/config/net-cgroup-system-stats-monitor.json b/config/net-cgroup-system-stats-monitor.json new file mode 100644 index 000000000..ec479186a --- /dev/null +++ b/config/net-cgroup-system-stats-monitor.json @@ -0,0 +1,55 @@ +{ + "net": { + "metricsConfigs": { + "net/rx_bytes": { + "displayName": "net/rx_bytes" + }, + "net/rx_packets": { + "displayName": "net/rx_packets" + }, + "net/rx_errors": { + "displayName": "net/rx_errors" + }, + "net/rx_dropped": { + "displayName": "net/rx_dropped" + }, + "net/rx_fifo": { + "displayName": "net/rx_fifo" + }, + "net/rx_frame": { + "displayName": "net/rx_frame" + }, + "net/rx_compressed": { + "displayName": "net/rx_compressed" + }, + "net/rx_multicast": { + "displayName": "net/rx_multicast" + }, + "net/tx_bytes": { + "displayName": "net/tx_bytes" + }, + "net/tx_packets": { + "displayName": "net/tx_packets" + }, + "net/tx_errors": { + "displayName": "net/tx_errors" + }, + "net/tx_dropped": { + "displayName": "net/tx_dropped" + }, + "net/tx_fifo": { + "displayName": "net/tx_fifo" + }, + "net/tx_collisions": { + "displayName": "net/tx_collisions" + }, + "net/tx_carrier": { + "displayName": "net/tx_carrier" + }, + "net/tx_compressed": { + "displayName": "net/tx_compressed" + } + } + }, + "invokeInterval": "120s" +} diff --git a/config/systemd/node-problem-detector-metric-only.service b/config/systemd/node-problem-detector-metric-only.service index 2503bb139..b84206b6d 100644 --- a/config/systemd/node-problem-detector-metric-only.service +++ b/config/systemd/node-problem-detector-metric-only.service @@ -10,7 +10,7 @@ ExecStart=/home/kubernetes/bin/node-problem-detector --v=2 --logtostderr --enabl --exporter.stackdriver=/home/kubernetes/node-problem-detector/config/exporter/stackdriver-exporter.json \ --config.system-log-monitor=/home/kubernetes/node-problem-detector/config/kernel-monitor.json,/home/kubernetes/node-problem-detector/config/docker-monitor.json,/home/kubernetes/node-problem-detector/config/systemd-monitor.json \ --config.custom-plugin-monitor=/home/kubernetes/node-problem-detector/config/kernel-monitor-counter.json,/home/kubernetes/node-problem-detector/config/systemd-monitor-counter.json \ - --config.system-stats-monitor=/home/kubernetes/node-problem-detector/config/system-stats-monitor.json + --config.system-stats-monitor=/home/kubernetes/node-problem-detector/config/system-stats-monitor.json,/home/kubernetes/node-problem-detector/config/net-cgroup-system-stats-monitor.json [Install] WantedBy=multi-user.target diff --git a/pkg/exporters/stackdriver/stackdriver_exporter.go b/pkg/exporters/stackdriver/stackdriver_exporter.go index 1e01c57ec..50a52289c 100644 --- a/pkg/exporters/stackdriver/stackdriver_exporter.go +++ b/pkg/exporters/stackdriver/stackdriver_exporter.go @@ -73,6 +73,22 @@ var NPDMetricToSDMetric = map[metrics.MetricID]string{ metrics.SystemProcsRunning: "kubernetes.io/internal/node/guest/system/procs_running", metrics.SystemProcsBlocked: "kubernetes.io/internal/node/guest/system/procs_blocked", metrics.SystemInterruptsTotal: "kubernetes.io/internal/node/guest/system/interrupts_total", + metrics.NetDevRxBytes: "kubernetes.io/internal/node/guest/net/rx_bytes", + metrics.NetDevRxPackets: "kubernetes.io/internal/node/guest/net/rx_packets", + metrics.NetDevRxErrors: "kubernetes.io/internal/node/guest/net/rx_errors", + metrics.NetDevRxDropped: "kubernetes.io/internal/node/guest/net/rx_dropped", + metrics.NetDevRxFifo: "kubernetes.io/internal/node/guest/net/rx_fifo", + metrics.NetDevRxFrame: "kubernetes.io/internal/node/guest/net/rx_frame", + metrics.NetDevRxCompressed: "kubernetes.io/internal/node/guest/net/rx_compressed", + metrics.NetDevRxMulticast: "kubernetes.io/internal/node/guest/net/rx_multicast", + metrics.NetDevTxBytes: "kubernetes.io/internal/node/guest/net/tx_bytes", + metrics.NetDevTxPackets: "kubernetes.io/internal/node/guest/net/tx_packets", + metrics.NetDevTxErrors: "kubernetes.io/internal/node/guest/net/tx_errors", + metrics.NetDevTxDropped: "kubernetes.io/internal/node/guest/net/tx_dropped", + metrics.NetDevTxFifo: "kubernetes.io/internal/node/guest/net/tx_fifo", + metrics.NetDevTxCollisions: "kubernetes.io/internal/node/guest/net/tx_collisions", + metrics.NetDevTxCarrier: "kubernetes.io/internal/node/guest/net/tx_carrier", + metrics.NetDevTxCompressed: "kubernetes.io/internal/node/guest/net/tx_compressed", } func getMetricTypeConversionFunction(customMetricPrefix string) func(*view.View) string { diff --git a/pkg/systemstatsmonitor/README.md b/pkg/systemstatsmonitor/README.md index 791b950d3..2153009e5 100644 --- a/pkg/systemstatsmonitor/README.md +++ b/pkg/systemstatsmonitor/README.md @@ -77,9 +77,9 @@ Below metrics are collected from `memory` component: * `memory_unevictable_used`: [Unevictable memory][/proc doc] usage, in Bytes. * `memory_dirty_used`: Dirty pages usage, in Bytes. Memory usage state is reported under the `state` metric label (e.g. `dirty`, `writeback`). `dirty` means the memory is waiting to be written back to disk, and `writeback` means the memory is actively being written back to disk. -### OS features +### OS features -The guest OS features such as KTD kernel, GPU support are collected. Below are the OS +The guest OS features such as KTD kernel, GPU support are collected. Below are the OS features collected: * `KTD`: Enabled, if KTD feature is enabled on OS @@ -87,8 +87,31 @@ features collected: * `KernelModuleIntegrity`: Enabled, if load pin security is enabled and modules are signed. * `GPUSupport`: Enabled, if OS has GPU drivers installed like nvidia. * `UnknownModules`: Enabled, if the OS has third party kernel modules installed. -UnknownModules are derived from the /proc/modules compared with the known-modules.json. +UnknownModules are derived from the /proc/modules compared with the known-modules.json. And an option: -`knownModulesConfigPath`: The path to the file that contains the known modules(default -modules) can be set. By default, the path is set to `known-modules.json` \ No newline at end of file +`knownModulesConfigPath`: The path to the file that contains the known modules(default +modules) can be set. By default, the path is set to `known-modules.json` + +### IP Stats (Net Dev) + +Below metrics are collected from `net` component: + +* `net/rx_bytes`: Cumulative count of bytes received. +* `net/rx_packets`: Cumulative count of packets received. +* `net/rx_errors`: Cumulative count of receive errors encountered. +* `net/rx_dropped`: Cumulative count of packets dropped while receiving. +* `net/rx_fifo`: Cumulative count of FIFO buffer errors. +* `net/rx_frame`: Cumulative count of packet framing errors. +* `net/rx_compressed`: Cumulative count of compressed packets received by the device driver. +* `net/rx_multicast`: Cumulative count of multicast frames received by the device driver. +* `net/tx_bytes`: Cumulative count of bytes transmitted. +* `net/tx_packets`: Cumulative count of packets transmitted. +* `net/tx_errors`: Cumulative count of transmit errors encountered. +* `net/tx_dropped`: Cumulative count of packets dropped while transmitting. +* `net/tx_fifo`: Cumulative count of FIFO buffer errors. +* `net/tx_collisions`: Cumulative count of collisions detected on the interface. +* `net/tx_carrier`: Cumulative count of carrier losses detected by the device driver. +* `net/tx_compressed`: Cumulative count of compressed packets transmitted by the device driver. + +All of the above have `interface_name` label for the net interface. diff --git a/pkg/systemstatsmonitor/labels.go b/pkg/systemstatsmonitor/labels.go index e34ad3d0a..bf82a90ff 100644 --- a/pkg/systemstatsmonitor/labels.go +++ b/pkg/systemstatsmonitor/labels.go @@ -42,3 +42,6 @@ const osVersionLabel = "os_version" // osVersionLabel labels the kernel version const kernelVersionLabel = "kernel_version" + +// interfaceNameLabel labels the network interface name +const interfaceNameLabel = "interface_name" diff --git a/pkg/systemstatsmonitor/net_collector.go b/pkg/systemstatsmonitor/net_collector.go new file mode 100644 index 000000000..d9ed2a4a1 --- /dev/null +++ b/pkg/systemstatsmonitor/net_collector.go @@ -0,0 +1,330 @@ +/* +Copyright 2020 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package systemstatsmonitor + +import ( + "github.com/golang/glog" + "github.com/prometheus/procfs" + "github.com/shirou/gopsutil/host" + ssmtypes "k8s.io/node-problem-detector/pkg/systemstatsmonitor/types" + "k8s.io/node-problem-detector/pkg/util" + "k8s.io/node-problem-detector/pkg/util/metrics" +) + +type netCollector struct { + tags map[string]string + + mNetDevRxBytes *metrics.Int64Metric + mNetDevRxPackets *metrics.Int64Metric + mNetDevRxErrors *metrics.Int64Metric + mNetDevRxDropped *metrics.Int64Metric + mNetDevRxFifo *metrics.Int64Metric + mNetDevRxFrame *metrics.Int64Metric + mNetDevRxCompressed *metrics.Int64Metric + mNetDevRxMulticast *metrics.Int64Metric + mNetDevTxBytes *metrics.Int64Metric + mNetDevTxPackets *metrics.Int64Metric + mNetDevTxErrors *metrics.Int64Metric + mNetDevTxDropped *metrics.Int64Metric + mNetDevTxFifo *metrics.Int64Metric + mNetDevTxCollisions *metrics.Int64Metric + mNetDevTxCarrier *metrics.Int64Metric + mNetDevTxCompressed *metrics.Int64Metric + + config *ssmtypes.NetStatsConfig +} + +func NewNetCollectorOrDie(netConfig *ssmtypes.NetStatsConfig) *netCollector { + nc := netCollector{tags: map[string]string{}, config: netConfig} + + kernelVersion, err := host.KernelVersion() + if err != nil { + glog.Fatalf("Failed to retrieve kernel version: %v", err) + } + nc.tags[kernelVersionLabel] = kernelVersion + + osVersion, err := util.GetOSVersion() + if err != nil { + glog.Fatalf("Failed to retrieve OS version: %v", err) + } + nc.tags[osVersionLabel] = osVersion + + nc.mNetDevRxBytes, err = metrics.NewInt64Metric( + metrics.NetDevRxBytes, + netConfig.MetricsConfigs[string(metrics.NetDevRxBytes)].DisplayName, + "Cumulative count of bytes received.", + "Byte", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxBytes, err) + } + + nc.mNetDevRxPackets, err = metrics.NewInt64Metric( + metrics.NetDevRxPackets, + netConfig.MetricsConfigs[string(metrics.NetDevRxPackets)].DisplayName, + "Cumulative count of packets received.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxPackets, err) + } + + nc.mNetDevRxErrors, err = metrics.NewInt64Metric( + metrics.NetDevRxErrors, + netConfig.MetricsConfigs[string(metrics.NetDevRxErrors)].DisplayName, + "Cumulative count of receive errors encountered.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxErrors, err) + } + + nc.mNetDevRxDropped, err = metrics.NewInt64Metric( + metrics.NetDevRxDropped, + netConfig.MetricsConfigs[string(metrics.NetDevRxDropped)].DisplayName, + "Cumulative count of packets dropped while receiving.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxDropped, err) + } + + nc.mNetDevRxFifo, err = metrics.NewInt64Metric( + metrics.NetDevRxFifo, + netConfig.MetricsConfigs[string(metrics.NetDevRxFifo)].DisplayName, + "Cumulative count of FIFO buffer errors.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxFifo, err) + } + + nc.mNetDevRxFrame, err = metrics.NewInt64Metric( + metrics.NetDevRxFrame, + netConfig.MetricsConfigs[string(metrics.NetDevRxFrame)].DisplayName, + "Cumulative count of packet framing errors.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxFrame, err) + } + + nc.mNetDevRxCompressed, err = metrics.NewInt64Metric( + metrics.NetDevRxCompressed, + netConfig.MetricsConfigs[string(metrics.NetDevRxCompressed)].DisplayName, + "Cumulative count of compressed packets received by the device driver.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxCompressed, err) + } + + nc.mNetDevRxMulticast, err = metrics.NewInt64Metric( + metrics.NetDevRxMulticast, + netConfig.MetricsConfigs[string(metrics.NetDevRxMulticast)].DisplayName, + "Cumulative count of multicast frames received by the device driver.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevRxMulticast, err) + } + + nc.mNetDevTxBytes, err = metrics.NewInt64Metric( + metrics.NetDevTxBytes, + netConfig.MetricsConfigs[string(metrics.NetDevTxBytes)].DisplayName, + "Cumulative count of bytes transmitted.", + "Byte", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxBytes, err) + } + + nc.mNetDevTxPackets, err = metrics.NewInt64Metric( + metrics.NetDevTxPackets, + netConfig.MetricsConfigs[string(metrics.NetDevTxPackets)].DisplayName, + "Cumulative count of packets transmitted.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxPackets, err) + } + + nc.mNetDevTxErrors, err = metrics.NewInt64Metric( + metrics.NetDevTxErrors, + netConfig.MetricsConfigs[string(metrics.NetDevTxErrors)].DisplayName, + "Cumulative count of transmit errors encountered.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxErrors, err) + } + + nc.mNetDevTxDropped, err = metrics.NewInt64Metric( + metrics.NetDevTxDropped, + netConfig.MetricsConfigs[string(metrics.NetDevTxDropped)].DisplayName, + "Cumulative count of packets dropped while transmitting.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxDropped, err) + } + + nc.mNetDevTxFifo, err = metrics.NewInt64Metric( + metrics.NetDevTxFifo, + netConfig.MetricsConfigs[string(metrics.NetDevTxFifo)].DisplayName, + "Cumulative count of FIFO buffer errors.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxFifo, err) + } + + nc.mNetDevTxCollisions, err = metrics.NewInt64Metric( + metrics.NetDevTxCollisions, + netConfig.MetricsConfigs[string(metrics.NetDevTxCollisions)].DisplayName, + "Cumulative count of collisions detected on the interface.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxCollisions, err) + } + + nc.mNetDevTxCarrier, err = metrics.NewInt64Metric( + metrics.NetDevTxCarrier, + netConfig.MetricsConfigs[string(metrics.NetDevTxCarrier)].DisplayName, + "Cumulative count of carrier losses detected by the device driver.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxCarrier, err) + } + + nc.mNetDevTxCompressed, err = metrics.NewInt64Metric( + metrics.NetDevTxCompressed, + netConfig.MetricsConfigs[string(metrics.NetDevTxCompressed)].DisplayName, + "Cumulative count of compressed packets transmitted by the device driver.", + "1", + metrics.Sum, + []string{osVersionLabel, kernelVersionLabel, interfaceNameLabel}) + if err != nil { + glog.Fatalf("Error initializing metric for %q: %v", metrics.NetDevTxCompressed, err) + } + + return &nc +} + +func (nc *netCollector) recordNetDev() { + if nc.mNetDevRxBytes == nil { + return + } + if nc.mNetDevRxPackets == nil { + return + } + if nc.mNetDevRxErrors == nil { + return + } + if nc.mNetDevRxDropped == nil { + return + } + if nc.mNetDevRxFifo == nil { + return + } + if nc.mNetDevRxFrame == nil { + return + } + if nc.mNetDevRxCompressed == nil { + return + } + if nc.mNetDevRxMulticast == nil { + return + } + if nc.mNetDevTxBytes == nil { + return + } + if nc.mNetDevTxPackets == nil { + return + } + if nc.mNetDevTxErrors == nil { + return + } + if nc.mNetDevTxDropped == nil { + return + } + if nc.mNetDevTxFifo == nil { + return + } + if nc.mNetDevTxCollisions == nil { + return + } + if nc.mNetDevTxCarrier == nil { + return + } + if nc.mNetDevTxCompressed == nil { + return + } + + fs, err := procfs.NewFS("/proc") + stats, err := fs.NetDev() + if err != nil { + glog.Errorf("Failed to retrieve net dev stat: %v", err) + return + } + + for iface, ifaceStats := range stats { + nc.tags[interfaceNameLabel] = iface + + nc.mNetDevRxBytes.Record(nc.tags, int64(ifaceStats.RxBytes)) + nc.mNetDevRxPackets.Record(nc.tags, int64(ifaceStats.RxPackets)) + nc.mNetDevRxErrors.Record(nc.tags, int64(ifaceStats.RxErrors)) + nc.mNetDevRxDropped.Record(nc.tags, int64(ifaceStats.RxDropped)) + nc.mNetDevRxFifo.Record(nc.tags, int64(ifaceStats.RxFIFO)) + nc.mNetDevRxFrame.Record(nc.tags, int64(ifaceStats.RxFrame)) + nc.mNetDevRxCompressed.Record(nc.tags, int64(ifaceStats.RxCompressed)) + nc.mNetDevRxMulticast.Record(nc.tags, int64(ifaceStats.RxMulticast)) + nc.mNetDevTxBytes.Record(nc.tags, int64(ifaceStats.TxBytes)) + nc.mNetDevTxPackets.Record(nc.tags, int64(ifaceStats.TxPackets)) + nc.mNetDevTxErrors.Record(nc.tags, int64(ifaceStats.TxErrors)) + nc.mNetDevTxDropped.Record(nc.tags, int64(ifaceStats.TxDropped)) + nc.mNetDevTxFifo.Record(nc.tags, int64(ifaceStats.TxFIFO)) + nc.mNetDevTxCollisions.Record(nc.tags, int64(ifaceStats.TxCollisions)) + nc.mNetDevTxCarrier.Record(nc.tags, int64(ifaceStats.TxCarrier)) + nc.mNetDevTxCompressed.Record(nc.tags, int64(ifaceStats.TxCompressed)) + } +} + +func (nc *netCollector) collect() { + if nc == nil { + return + } + + nc.recordNetDev() +} diff --git a/pkg/systemstatsmonitor/system_stats_monitor.go b/pkg/systemstatsmonitor/system_stats_monitor.go index 648c6dd27..717e9617a 100644 --- a/pkg/systemstatsmonitor/system_stats_monitor.go +++ b/pkg/systemstatsmonitor/system_stats_monitor.go @@ -44,6 +44,7 @@ type systemStatsMonitor struct { diskCollector *diskCollector hostCollector *hostCollector memoryCollector *memoryCollector + netCollector *netCollector osFeatureCollector *osFeatureCollector tomb *tomb.Tomb } @@ -90,6 +91,9 @@ func NewSystemStatsMonitorOrDie(configPath string) types.Monitor { if len(ssm.config.OsFeatureConfig.MetricsConfigs) > 0 { ssm.osFeatureCollector = NewOsFeatureCollectorOrDie(&ssm.config.OsFeatureConfig) } + if len(ssm.config.NetConfig.MetricsConfigs) > 0 { + ssm.netCollector = NewNetCollectorOrDie(&ssm.config.NetConfig) + } return &ssm } @@ -115,6 +119,7 @@ func (ssm *systemStatsMonitor) monitorLoop() { ssm.hostCollector.collect() ssm.memoryCollector.collect() ssm.osFeatureCollector.collect() + ssm.netCollector.collect() } for { @@ -125,6 +130,7 @@ func (ssm *systemStatsMonitor) monitorLoop() { ssm.hostCollector.collect() ssm.memoryCollector.collect() ssm.osFeatureCollector.collect() + ssm.netCollector.collect() case <-ssm.tomb.Stopping(): glog.Infof("System stats monitor stopped: %s", ssm.configPath) return diff --git a/pkg/systemstatsmonitor/types/config.go b/pkg/systemstatsmonitor/types/config.go index 7d0e338af..54594f724 100644 --- a/pkg/systemstatsmonitor/types/config.go +++ b/pkg/systemstatsmonitor/types/config.go @@ -56,12 +56,17 @@ type OSFeatureStatsConfig struct { KnownModulesConfigPath string `json:"knownModulesConfigPath"` } +type NetStatsConfig struct { + MetricsConfigs map[string]MetricConfig `json:"metricsConfigs"` +} + type SystemStatsConfig struct { CPUConfig CPUStatsConfig `json:"cpu"` DiskConfig DiskStatsConfig `json:"disk"` HostConfig HostStatsConfig `json:"host"` MemoryConfig MemoryStatsConfig `json:"memory"` OsFeatureConfig OSFeatureStatsConfig `json:"osFeature"` + NetConfig NetStatsConfig `json:"net"` InvokeIntervalString string `json:"invokeInterval"` InvokeInterval time.Duration `json:"-"` } diff --git a/pkg/util/metrics/metric.go b/pkg/util/metrics/metric.go index 4d1ae8477..3aa6d2aa9 100644 --- a/pkg/util/metrics/metric.go +++ b/pkg/util/metrics/metric.go @@ -46,6 +46,22 @@ const ( SystemProcsRunning MetricID = "system/procs_running" SystemProcsBlocked MetricID = "system/procs_blocked" SystemInterruptsTotal MetricID = "system/interrupts_total" + NetDevRxBytes MetricID = "net/rx_bytes" + NetDevRxPackets MetricID = "net/rx_packets" + NetDevRxErrors MetricID = "net/rx_errors" + NetDevRxDropped MetricID = "net/rx_dropped" + NetDevRxFifo MetricID = "net/rx_fifo" + NetDevRxFrame MetricID = "net/rx_frame" + NetDevRxCompressed MetricID = "net/rx_compressed" + NetDevRxMulticast MetricID = "net/rx_multicast" + NetDevTxBytes MetricID = "net/tx_bytes" + NetDevTxPackets MetricID = "net/tx_packets" + NetDevTxErrors MetricID = "net/tx_errors" + NetDevTxDropped MetricID = "net/tx_dropped" + NetDevTxFifo MetricID = "net/tx_fifo" + NetDevTxCollisions MetricID = "net/tx_collisions" + NetDevTxCarrier MetricID = "net/tx_carrier" + NetDevTxCompressed MetricID = "net/tx_compressed" ) var MetricMap MetricMapping diff --git a/test/e2e-install.sh b/test/e2e-install.sh index a789ca46f..d6c4639ee 100755 --- a/test/e2e-install.sh +++ b/test/e2e-install.sh @@ -47,13 +47,16 @@ function install-npd() { readonly workdir=$(mktemp -d) tar -xf "${TARBALL}" --directory "${workdir}" - + echo "Preparing NPD binary directory." mkdir -p "${BIN_DIR}" mount --bind "${BIN_DIR}" "${BIN_DIR}" # Below remount is to work around COS's noexec mount on /home. mount -o remount,exec "${BIN_DIR}" + echo "Stopping NPD" + systemctl stop node-problem-detector.service || true + echo "Installing NPD binary." cp "${workdir}"/bin/node-problem-detector "${BIN_DIR}" @@ -75,7 +78,6 @@ function install-npd() { # Start systemd service. echo "Starting NPD systemd service." systemctl daemon-reload - systemctl stop node-problem-detector.service || true systemctl start node-problem-detector.service } @@ -97,4 +99,4 @@ done shift "$((OPTIND-1))" -main "${@}" \ No newline at end of file +main "${@}"