Skip to content

Commit

Permalink
measurment: introduce WatchListLatencyPrometheus measurment
Browse files Browse the repository at this point in the history
  • Loading branch information
p0lyn0mial committed Sep 11, 2023
1 parent 1f0a1bd commit b9e3f4f
Show file tree
Hide file tree
Showing 4 changed files with 301 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
interval: 1m
input_series:
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1"}
values: 0 0 0 0 0 0 1 991 991 991 991
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="5"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001

- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1"}
values: 0 0 0 0 0 0 1 991 991 991 991
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="5"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001

- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1"}
values: 0 0 0 0 0 0 1 991 991 991 991
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="5"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001

- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1"}
values: 0 0 0 0 0 0 1 701 701 701 701 701
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="1.5"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001

- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="5"}
values: 0 0 0 0 0 0 1 901 901 901 901 901
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="10"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001
- series: apiserver_watch_cache_watch_list_duration_seconds{component="apiserver",group="storage.k8s.io",resource="pod",scope="namespace",version="v1",le="+Inf"}
values: 0 0 0 0 0 0 1 1001 1001 1001 1001
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"version": "v1",
"dataItems": [
{
"data": {
"Perc50": 713.980028,
"Perc90": 1333.166666,
"Perc99": 1483.316666
},
"unit": "ms",
"labels": {
"Group": "storage.k8s.io",
"Resource": "pod",
"Scope": "namespace"
}
}
]
}
168 changes: 168 additions & 0 deletions clusterloader2/pkg/measurement/common/watch_list_latency_prometheus.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package common

import (
"fmt"
"sort"
"strconv"
"time"

"github.com/prometheus/common/model"

"k8s.io/klog/v2"
"k8s.io/perf-tests/clusterloader2/pkg/measurement"
measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util"
"k8s.io/perf-tests/clusterloader2/pkg/util"
)

const (
watchListLatencyPrometheusMeasurementName = "WatchListLatencyPrometheus"

// watchListLatencyQuery placeholders must be replaced with (1) quantile (2) query window size
watchListLatencyQuery = "histogram_quantile(%.2f, sum(rate(apiserver_watch_cache_watch_list_duration_seconds{}[%v])) by (group, resource, scope, le))"
)

func init() {
create := func() measurement.Measurement { return CreatePrometheusMeasurement(&watchListLatencyGatherer{}) }
if err := measurement.Register(watchListLatencyPrometheusMeasurementName, create); err != nil {
klog.Fatalf("Cannot register %s: %v", watchListLatencyPrometheusMeasurementName, err)
}
}

type watchListLatencyMetric struct {
Group string `json:"group"`
Resource string `json:"resource"`
Scope string `json:"scope"`
Latency measurementutil.LatencyMetric `json:"latency"`
}

type watchListLatencyMetrics map[string]*watchListLatencyMetric

func (m watchListLatencyMetrics) SetLatency(group, resource, scope string, quantile float64, latency time.Duration) {
key := fmt.Sprintf("%s|%s|%s", group, resource, scope)
entry, exists := m[key]
if !exists {
entry = &watchListLatencyMetric{
Group: group,
Resource: resource,
Scope: scope,
}
m[key] = entry
}
entry.Latency.SetQuantile(quantile, latency)
}

type watchListLatencyGatherer struct{}

func (m *watchListLatencyGatherer) Gather(executor QueryExecutor, startTime, endTime time.Time, config *measurement.Config) ([]measurement.Summary, error) {
rawWatchListMetrics, err := gatherWatchListLatencyPrometheusSamples(executor, startTime, endTime)
if err != nil {
return nil, err
}
watchListMetrics, err := convertWatchListPrometheusSamplesToWatchListLatencyMetrics(rawWatchListMetrics)
if err != nil {
return nil, err
}
watchListMetricsJSON, err := util.PrettyPrintJSON(convertWatchListLatencyMetricsToPerfData(watchListMetrics))
if err != nil {
return nil, err
}
summaryName, err := util.GetStringOrDefault(config.Params, "summaryName", m.String())
if err != nil {
return nil, err
}
summaries := []measurement.Summary{
measurement.CreateSummary(summaryName, "json", watchListMetricsJSON),
}
return summaries, nil
}

func (m *watchListLatencyGatherer) Configure(_ *measurement.Config) error { return nil }
func (m *watchListLatencyGatherer) IsEnabled(_ *measurement.Config) bool { return true }
func (m *watchListLatencyGatherer) String() string { return watchListLatencyPrometheusMeasurementName }

func gatherWatchListLatencyPrometheusSamples(executor QueryExecutor, startTime, endTime time.Time) ([]*model.Sample, error) {
var latencySamples []*model.Sample
// since we collect LatencyMetric only 0.5, 0.9 and 0.99 quantiles are supported
quantiles := []float64{0.5, 0.9, 0.99}
measurementDuration := endTime.Sub(startTime)
promDuration := measurementutil.ToPrometheusTime(measurementDuration)

for _, q := range quantiles {
query := fmt.Sprintf(watchListLatencyQuery, q, promDuration)
samples, err := executor.Query(query, endTime)
if err != nil {
return nil, err
}
for _, sample := range samples {
sample.Metric["quantile"] = model.LabelValue(fmt.Sprintf("%.2f", q))
}
latencySamples = append(latencySamples, samples...)
}

return latencySamples, nil
}

func convertWatchListPrometheusSamplesToWatchListLatencyMetrics(latencySamples []*model.Sample) (watchListLatencyMetrics, error) {
latencyMetrics := make(watchListLatencyMetrics)
extractLabels := func(sample *model.Sample) (string, string, string) {
return string(sample.Metric["group"]), string(sample.Metric["resource"]), string(sample.Metric["scope"])
}

for _, sample := range latencySamples {
group, resource, scope := extractLabels(sample)
quantile, err := strconv.ParseFloat(string(sample.Metric["quantile"]), 64)
if err != nil {
return nil, err
}

latency := time.Duration(float64(sample.Value) * float64(time.Second))
latencyMetrics.SetLatency(group, resource, scope, quantile, latency)
}

return latencyMetrics, nil
}

func convertWatchListLatencyMetricsToPerfData(watchListMetrics watchListLatencyMetrics) *measurementutil.PerfData {
var watchListMetricsSlice []*watchListLatencyMetric
for _, v := range watchListMetrics {
watchListMetricsSlice = append(watchListMetricsSlice, v)
}
sort.Slice(watchListMetricsSlice, func(i, j int) bool {
return watchListMetricsSlice[i].Latency.Perc99 > watchListMetricsSlice[j].Latency.Perc99
})

perfData := &measurementutil.PerfData{Version: "v1"}
for _, watchListMetric := range watchListMetricsSlice {
item := measurementutil.DataItem{
Data: map[string]float64{
"Perc50": float64(watchListMetric.Latency.Perc50) / 1000000,
"Perc90": float64(watchListMetric.Latency.Perc90) / 1000000,
"Perc99": float64(watchListMetric.Latency.Perc99) / 1000000,
},
Unit: "ms",
Labels: map[string]string{
"Group": watchListMetric.Group,
"Resource": watchListMetric.Resource,
"Scope": watchListMetric.Scope,
},
}
perfData.DataItems = append(perfData.DataItems, item)
}
return perfData
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package common

import (
"fmt"
"os"
"testing"
"time"

"github.com/google/go-cmp/cmp"

"k8s.io/perf-tests/clusterloader2/pkg/measurement"
"k8s.io/perf-tests/clusterloader2/pkg/measurement/common/executors"
)

func TestWatchListLatencyGather(t *testing.T) {
scenarios := []struct {
name string
inputFileName string

duration time.Duration
}{
{
name: "smoke test: make sure the output matches the static golden file",
inputFileName: "sample.yaml",
duration: 10 * time.Minute,
},
}

for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
inputFilePath := fmt.Sprintf("testdata/watch_list_latency_prometheus/%s", scenario.inputFileName)
executor, err := executors.NewPromqlExecutor(inputFilePath)
if err != nil {
t.Fatalf("failed to create PromQL executor: %v", err)
}
defer executor.Close()

emptyConfig := &measurement.Config{Params: map[string]interface{}{}}
target := &watchListLatencyGatherer{}
start := time.Unix(0, 0).UTC()
end := start.Add(scenario.duration)
output, err := target.Gather(executor, start, end, emptyConfig)
if err != nil {
t.Fatal(err)
}
if len(output) != 1 {
t.Fatalf("expected only one summary, got: %d", len(output))
}

rawGoldenFile, err := os.ReadFile(inputFilePath + ".golden")
if err != nil {
t.Fatalf("unable to read the golden file, err: %v", err)
}
if diff := cmp.Diff(string(rawGoldenFile), output[0].SummaryContent()); diff != "" {
t.Errorf("unexpected output (-want +got):\n%s", diff)
}
// for simplicity, you can uncomment the following line to
// generate a new golden file for a failed test case.
//
//os.WriteFile(inputFilePath, []byte(output[0].SummaryContent()), 0644)
})
}
}

0 comments on commit b9e3f4f

Please sign in to comment.