measurment: introduce WatchListLatencyPrometheus measurment

kubernetes · Sep 11, 2023 · b9e3f4f · b9e3f4f
1 parent 1f0a1bd
commit b9e3f4f
Show file tree

Hide file tree

Showing 4 changed files with 301 additions and 0 deletions.
diff --git a/clusterloader2/pkg/measurement/common/testdata/watch_list_latency_prometheus/sample.yaml b/clusterloader2/pkg/measurement/common/testdata/watch_list_latency_prometheus/sample.yaml
@@ -0,0 +1,36 @@
+interval: 1m
+input_series:
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1"}
+    values: 0 0 0 0 0 0 1 991 991 991 991
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="5"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
+
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1"}
+    values: 0 0 0 0 0 0 1 991 991 991 991
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="5"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
+
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1"}
+    values: 0 0 0 0 0 0 1 991 991 991 991
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="5"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
+
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1"}
+    values: 0 0 0 0 0 0 1 701 701 701 701 701
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1.5"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
+
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="5"}
+    values: 0 0 0 0 0 0 1 901 901 901 901 901
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="10"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
+  - series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
+    values: 0 0 0 0 0 0 1 1001 1001 1001 1001
diff --git a/...rloader2/pkg/measurement/common/testdata/watch_list_latency_prometheus/sample.yaml.golden b/...rloader2/pkg/measurement/common/testdata/watch_list_latency_prometheus/sample.yaml.golden
@@ -0,0 +1,18 @@
+{
+  "version": "v1",
+  "dataItems": [
+    {
+      "data": {
+        "Perc50": 713.980028,
+        "Perc90": 1333.166666,
+        "Perc99": 1483.316666
+      },
+      "unit": "ms",
+      "labels": {
+        "Group": "storage.k8s.io",
+        "Resource": "pod",
+        "Scope": "namespace"
+      }
+    }
+  ]
+}
diff --git a/clusterloader2/pkg/measurement/common/watch_list_latency_prometheus.go b/clusterloader2/pkg/measurement/common/watch_list_latency_prometheus.go
@@ -0,0 +1,168 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package common
+
+import (
+	"fmt"
+	"sort"
+	"strconv"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	"k8s.io/klog/v2"
+	"k8s.io/perf-tests/clusterloader2/pkg/measurement"
+	measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util"
+	"k8s.io/perf-tests/clusterloader2/pkg/util"
+)
+
+const (
+	watchListLatencyPrometheusMeasurementName = "WatchListLatencyPrometheus"
+
+	// watchListLatencyQuery placeholders must be replaced with (1) quantile (2) query window size
+	watchListLatencyQuery = "histogram_quantile(%.2f, sum(rate(apiserver_watch_cache_watch_list_duration_seconds{}[%v])) by (group, resource, scope, le))"
+)
+
+func init() {
+	create := func() measurement.Measurement { return CreatePrometheusMeasurement(&watchListLatencyGatherer{}) }
+	if err := measurement.Register(watchListLatencyPrometheusMeasurementName, create); err != nil {
+		klog.Fatalf("Cannot register %s: %v", watchListLatencyPrometheusMeasurementName, err)
+	}
+}
+
+type watchListLatencyMetric struct {
+	Group    string                        `json:"group"`
+	Resource string                        `json:"resource"`
+	Scope    string                        `json:"scope"`
+	Latency  measurementutil.LatencyMetric `json:"latency"`
+}
+
+type watchListLatencyMetrics map[string]*watchListLatencyMetric
+
+func (m watchListLatencyMetrics) SetLatency(group, resource, scope string, quantile float64, latency time.Duration) {
+	key := fmt.Sprintf("%s|%s|%s", group, resource, scope)
+	entry, exists := m[key]
+	if !exists {
+		entry = &watchListLatencyMetric{
+			Group:    group,
+			Resource: resource,
+			Scope:    scope,
+		}
+		m[key] = entry
+	}
+	entry.Latency.SetQuantile(quantile, latency)
+}
+
+type watchListLatencyGatherer struct{}
+
+func (m *watchListLatencyGatherer) Gather(executor QueryExecutor, startTime, endTime time.Time, config *measurement.Config) ([]measurement.Summary, error) {
+	rawWatchListMetrics, err := gatherWatchListLatencyPrometheusSamples(executor, startTime, endTime)
+	if err != nil {
+		return nil, err
+	}
+	watchListMetrics, err := convertWatchListPrometheusSamplesToWatchListLatencyMetrics(rawWatchListMetrics)
+	if err != nil {
+		return nil, err
+	}
+	watchListMetricsJSON, err := util.PrettyPrintJSON(convertWatchListLatencyMetricsToPerfData(watchListMetrics))
+	if err != nil {
+		return nil, err
+	}
+	summaryName, err := util.GetStringOrDefault(config.Params, "summaryName", m.String())
+	if err != nil {
+		return nil, err
+	}
+	summaries := []measurement.Summary{
+		measurement.CreateSummary(summaryName, "json", watchListMetricsJSON),
+	}
+	return summaries, nil
+}
+
+func (m *watchListLatencyGatherer) Configure(_ *measurement.Config) error { return nil }
+func (m *watchListLatencyGatherer) IsEnabled(_ *measurement.Config) bool  { return true }
+func (m *watchListLatencyGatherer) String() string                        { return watchListLatencyPrometheusMeasurementName }
+
+func gatherWatchListLatencyPrometheusSamples(executor QueryExecutor, startTime, endTime time.Time) ([]*model.Sample, error) {
+	var latencySamples []*model.Sample
+	// since we collect LatencyMetric only 0.5, 0.9 and 0.99 quantiles are supported
+	quantiles := []float64{0.5, 0.9, 0.99}
+	measurementDuration := endTime.Sub(startTime)
+	promDuration := measurementutil.ToPrometheusTime(measurementDuration)
+
+	for _, q := range quantiles {
+		query := fmt.Sprintf(watchListLatencyQuery, q, promDuration)
+		samples, err := executor.Query(query, endTime)
+		if err != nil {
+			return nil, err
+		}
+		for _, sample := range samples {
+			sample.Metric["quantile"] = model.LabelValue(fmt.Sprintf("%.2f", q))
+		}
+		latencySamples = append(latencySamples, samples...)
+	}
+
+	return latencySamples, nil
+}
+
+func convertWatchListPrometheusSamplesToWatchListLatencyMetrics(latencySamples []*model.Sample) (watchListLatencyMetrics, error) {
+	latencyMetrics := make(watchListLatencyMetrics)
+	extractLabels := func(sample *model.Sample) (string, string, string) {
+		return string(sample.Metric["group"]), string(sample.Metric["resource"]), string(sample.Metric["scope"])
+	}
+
+	for _, sample := range latencySamples {
+		group, resource, scope := extractLabels(sample)
+		quantile, err := strconv.ParseFloat(string(sample.Metric["quantile"]), 64)
+		if err != nil {
+			return nil, err
+		}
+
+		latency := time.Duration(float64(sample.Value) * float64(time.Second))
+		latencyMetrics.SetLatency(group, resource, scope, quantile, latency)
+	}
+
+	return latencyMetrics, nil
+}
+
+func convertWatchListLatencyMetricsToPerfData(watchListMetrics watchListLatencyMetrics) *measurementutil.PerfData {
+	var watchListMetricsSlice []*watchListLatencyMetric
+	for _, v := range watchListMetrics {
+		watchListMetricsSlice = append(watchListMetricsSlice, v)
+	}
+	sort.Slice(watchListMetricsSlice, func(i, j int) bool {
+		return watchListMetricsSlice[i].Latency.Perc99 > watchListMetricsSlice[j].Latency.Perc99
+	})
+
+	perfData := &measurementutil.PerfData{Version: "v1"}
+	for _, watchListMetric := range watchListMetricsSlice {
+		item := measurementutil.DataItem{
+			Data: map[string]float64{
+				"Perc50": float64(watchListMetric.Latency.Perc50) / 1000000,
+				"Perc90": float64(watchListMetric.Latency.Perc90) / 1000000,
+				"Perc99": float64(watchListMetric.Latency.Perc99) / 1000000,
+			},
+			Unit: "ms",
+			Labels: map[string]string{
+				"Group":    watchListMetric.Group,
+				"Resource": watchListMetric.Resource,
+				"Scope":    watchListMetric.Scope,
+			},
+		}
+		perfData.DataItems = append(perfData.DataItems, item)
+	}
+	return perfData
+}
diff --git a/clusterloader2/pkg/measurement/common/watch_list_latency_prometheus_test.go b/clusterloader2/pkg/measurement/common/watch_list_latency_prometheus_test.go
@@ -0,0 +1,79 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package common
+
+import (
+	"fmt"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/google/go-cmp/cmp"
+
+	"k8s.io/perf-tests/clusterloader2/pkg/measurement"
+	"k8s.io/perf-tests/clusterloader2/pkg/measurement/common/executors"
+)
+
+func TestWatchListLatencyGather(t *testing.T) {
+	scenarios := []struct {
+		name          string
+		inputFileName string
+
+		duration time.Duration
+	}{
+		{
+			name:          "smoke test: make sure the output matches the static golden file",
+			inputFileName: "sample.yaml",
+			duration:      10 * time.Minute,
+		},
+	}
+
+	for _, scenario := range scenarios {
+		t.Run(scenario.name, func(t *testing.T) {
+			inputFilePath := fmt.Sprintf("testdata/watch_list_latency_prometheus/%s", scenario.inputFileName)
+			executor, err := executors.NewPromqlExecutor(inputFilePath)
+			if err != nil {
+				t.Fatalf("failed to create PromQL executor: %v", err)
+			}
+			defer executor.Close()
+
+			emptyConfig := &measurement.Config{Params: map[string]interface{}{}}
+			target := &watchListLatencyGatherer{}
+			start := time.Unix(0, 0).UTC()
+			end := start.Add(scenario.duration)
+			output, err := target.Gather(executor, start, end, emptyConfig)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if len(output) != 1 {
+				t.Fatalf("expected only one summary, got: %d", len(output))
+			}
+
+			rawGoldenFile, err := os.ReadFile(inputFilePath + ".golden")
+			if err != nil {
+				t.Fatalf("unable to read the golden file, err: %v", err)
+			}
+			if diff := cmp.Diff(string(rawGoldenFile), output[0].SummaryContent()); diff != "" {
+				t.Errorf("unexpected output (-want +got):\n%s", diff)
+			}
+			// for simplicity, you can uncomment the following line to
+			// generate a new golden file for a failed test case.
+			//
+			//os.WriteFile(inputFilePath, []byte(output[0].SummaryContent()), 0644)
+		})
+	}
+}