From 9e218010a1022992e42139d68e387ae3702b5905 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Wed, 22 May 2024 16:09:15 +0200 Subject: [PATCH] e2e: Verify metrics-exporter expose netdevice metrics Exposed metrics can be verified by scraping the prometheus endpoint on the `sriov-network-metrics-exporter` pod. Add a test that spawns an SR-IOV consuming pod and verifies its receiving counter increase when the interface is pinged from outside. Signed-off-by: Andrea Panattoni --- go.mod | 4 +- .../tests/test_exporter_metrics.go | 179 ++++++++++++++++++ 2 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 test/conformance/tests/test_exporter_metrics.go diff --git a/go.mod b/go.mod index fca66390c..18e1f9f6a 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,8 @@ require ( github.com/openshift/client-go v0.0.0-20220831193253-4950ae70c8ea github.com/openshift/machine-config-operator v0.0.1-0.20230118083703-fc27a2bdaa85 github.com/pkg/errors v0.9.1 + github.com/prometheus/client_model v0.5.0 + github.com/prometheus/common v0.45.0 github.com/safchain/ethtool v0.3.0 github.com/spf13/cobra v1.7.0 github.com/stretchr/testify v1.8.4 @@ -110,8 +112,6 @@ require ( github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_golang v1.17.0 // indirect - github.com/prometheus/client_model v0.5.0 // indirect - github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/shopspring/decimal v1.2.0 // indirect diff --git a/test/conformance/tests/test_exporter_metrics.go b/test/conformance/tests/test_exporter_metrics.go new file mode 100644 index 000000000..720c685d3 --- /dev/null +++ b/test/conformance/tests/test_exporter_metrics.go @@ -0,0 +1,179 @@ +package tests + +import ( + "context" + "fmt" + "strings" + + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/network" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/pod" + + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { + + BeforeAll(func() { + err := namespaces.Create(namespaces.Test, clients) + Expect(err).ToNot(HaveOccurred()) + + err = namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) + Expect(err).ToNot(HaveOccurred()) + + featureFlagInitialValue := isFeatureFlagEnabled("metricsExporter") + DeferCleanup(func() { + By("Restoring initial feature flag value") + setFeatureFlag("metricsExporter", featureFlagInitialValue) + }) + + By("Enabling `metricsExporter` feature flag") + setFeatureFlag("metricsExporter", true) + + WaitForSRIOVStable() + }) + + FIt("collects metrics regarding receiving traffic via VF", func() { + sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace) + Expect(err).ToNot(HaveOccurred()) + + node, nic, err := sriovInfos.FindOneSriovNodeAndDevice() + Expect(err).ToNot(HaveOccurred()) + By("Using device " + nic.Name + " on node " + node) + + _, err = network.CreateSriovPolicy(clients, "test-me-policy-", operatorNamespace, nic.Name, node, 2, "metricsResource", "netdevice") + Expect(err).ToNot(HaveOccurred()) + + err = network.CreateSriovNetwork(clients, nic, "test-me-network", namespaces.Test, operatorNamespace, "metricsResource", ipamIpv4) + Expect(err).ToNot(HaveOccurred()) + waitForNetAttachDef("test-me-network", namespaces.Test) + + pod := createTestPod(node, []string{"test-me-network"}) + + ips, err := network.GetSriovNicIPs(pod, "net1") + Expect(err).ToNot(HaveOccurred()) + Expect(ips).NotTo(BeNil(), "No sriov network interface found.") + Expect(len(ips)).Should(Equal(1)) + + initialMetrics := getMetricsForNode(node) + initialRxBytes := getCounterForPod(initialMetrics, pod, "sriov_vf_rx_bytes") + initialRxPackets := getCounterForPod(initialMetrics, pod, "sriov_vf_rx_packets") + + for _, ip := range ips { + pingPod(ip, node, "test-me-network") + } + + finalMetrics := getMetricsForNode(node) + finalRxBytes := getCounterForPod(finalMetrics, pod, "sriov_vf_rx_bytes") + finalRxPackets := getCounterForPod(finalMetrics, pod, "sriov_vf_rx_packets") + + Expect(finalRxBytes).Should(BeNumerically(">", initialRxBytes)) + Expect(finalRxPackets).Should(BeNumerically(">", initialRxPackets+3)) + }) + +}) + +func getMetricsForNode(nodeName string) map[string]*dto.MetricFamily { + metricsExporterPods, err := clients.Pods(operatorNamespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=sriov-network-metrics-exporter", + FieldSelector: "spec.nodeName=" + nodeName, + }) + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, metricsExporterPods.Items).ToNot(HaveLen(0), "At least one operator pod expected") + + metricsExporterPod := metricsExporterPods.Items[0] + + command := []string{"wget", "-qO-", "http://127.0.0.1:9110/metrics"} + stdout, stderr, err := pod.ExecCommand(clients, &metricsExporterPod, command...) + Expect(err).ToNot(HaveOccurred(), + "pod: [%s/%s] command: [%v]\nstdout: %s\nstderr: %s", metricsExporterPod.Namespace, metricsExporterPod.Name, command, stdout, stderr) + + // Clean the scraped output from carriage returns + stdout = strings.ReplaceAll(stdout, "\r", "") + + var parser expfmt.TextParser + mf, err := parser.TextToMetricFamilies(strings.NewReader(stdout)) + Expect(err).ToNot(HaveOccurred()) + + return mf +} + +func getCounterForPod(mf map[string]*dto.MetricFamily, p *corev1.Pod, metricName string) float64 { + pciAddress := findPciAddressForPod(mf, p) + return findCounterForPciAddr(mf, pciAddress, metricName) +} + +func findPciAddressForPod(mf map[string]*dto.MetricFamily, p *corev1.Pod) string { + kubePodDeviceMetric := findKubePodDeviceMetric(mf, p) + for _, labelPair := range kubePodDeviceMetric.Label { + if labelPair.GetName() == "pciAddr" { + return *labelPair.Value + } + } + + Fail(fmt.Sprintf("Can't find PCI Address for pod [%s/%s] in metrics %+v", p.Name, p.Namespace, mf)) + return "" +} + +func findKubePodDeviceMetric(mf map[string]*dto.MetricFamily, pod *corev1.Pod) *dto.Metric { + metricFamily, ok := mf["sriov_kubepoddevice"] + Expect(ok).To(BeTrue(), "sriov_kubepoddevice metric not found: %+v", mf) + + kubePodDeviceMetric := findMetricForPod(metricFamily.Metric, pod) + Expect(kubePodDeviceMetric).ToNot(BeNil(), "sriov_kubepoddevice metric for pod [%s/%s] not found: %+v", pod.Name, pod.Namespace, mf) + + return kubePodDeviceMetric +} + +func findCounterForPciAddr(mf map[string]*dto.MetricFamily, pciAddress string, metricName string) float64 { + metricFamily, ok := mf[metricName] + Expect(ok).To(BeTrue(), "metric %s not found: %+v", metricName, mf) + + metric := findMetricFor(metricFamily.Metric, map[string]string{ + "pciAddr": pciAddress, + }) + Expect(metric).ToNot(BeNil(), "metric %s for pciAddr %s not found: %+v", metricName, pciAddress, mf) + + return *metric.GetCounter().Value +} + +func findMetricForPod(metrics []*dto.Metric, pod *corev1.Pod) *dto.Metric { + return findMetricFor(metrics, map[string]string{ + "pod": pod.Name, + "namespace": pod.Namespace, + }) +} + +func findMetricFor(metrics []*dto.Metric, labelsToMatch map[string]string) *dto.Metric { + for _, metric := range metrics { + if areLabelsMatching(metric.Label, labelsToMatch) { + return metric + } + } + + return nil +} + +func areLabelsMatching(labels []*dto.LabelPair, labelsToMatch map[string]string) bool { + for _, labelPair := range labels { + valueToMatch, ok := labelsToMatch[labelPair.GetName()] + if !ok { + continue + } + + if *labelPair.Value != valueToMatch { + return false + } + } + + return true +}