Skip to content

Commit

Permalink
Merge pull request kubernetes#123639 from liggitt/authz-metrics
Browse files Browse the repository at this point in the history
Add authorization webhook duration/count/failopen metrics
  • Loading branch information
k8s-ci-robot committed Mar 4, 2024
2 parents 320e288 + 79b344d commit 46a2137
Show file tree
Hide file tree
Showing 8 changed files with 426 additions and 23 deletions.
4 changes: 3 additions & 1 deletion pkg/kubeapiserver/authorizer/reload.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func (r *reloadableAuthorizerResolver) newForConfig(authzConfig *authzconfig.Aut
decisionOnError,
configuredAuthorizer.Webhook.MatchConditions,
configuredAuthorizer.Name,
kubeapiserverWebhookMetrics{MatcherMetrics: cel.NewMatcherMetrics()},
kubeapiserverWebhookMetrics{WebhookMetrics: webhookmetrics.NewWebhookMetrics(), MatcherMetrics: cel.NewMatcherMetrics()},
)
if err != nil {
return nil, nil, err
Expand All @@ -169,6 +169,8 @@ func (r *reloadableAuthorizerResolver) newForConfig(authzConfig *authzconfig.Aut
type kubeapiserverWebhookMetrics struct {
// kube-apiserver doesn't report request metrics
webhookmetrics.NoopRequestMetrics
// kube-apiserver does report webhook metrics
webhookmetrics.WebhookMetrics
// kube-apiserver does report matchCondition metrics
cel.MatcherMetrics
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ var (
var _ = webhookmetrics.AuthorizerMetrics(delegatingAuthorizerMetrics{})

type delegatingAuthorizerMetrics struct {
// no-op for webhook metrics for now, delegating authorization reports original total/latency metrics
webhookmetrics.NoopWebhookMetrics
// no-op for matchCondition metrics for now, delegating authorization doesn't configure match conditions
celmetrics.NoopMatcherMetrics
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,26 @@ package metrics

import (
"context"
"sync"

"k8s.io/apiserver/pkg/authorization/cel"
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)

// AuthorizerMetrics specifies a set of methods that are used to register various metrics for the webhook authorizer
type AuthorizerMetrics interface {
// Request total and latency metrics
RequestMetrics
// Webhook count, latency, and fail open metrics
WebhookMetrics
// match condition metrics
cel.MatcherMetrics
}

type NoopAuthorizerMetrics struct {
NoopRequestMetrics
NoopWebhookMetrics
cel.NoopMatcherMetrics
}

Expand All @@ -47,3 +53,114 @@ type NoopRequestMetrics struct{}

func (NoopRequestMetrics) RecordRequestTotal(context.Context, string) {}
func (NoopRequestMetrics) RecordRequestLatency(context.Context, string, float64) {}

type WebhookMetrics interface {
// RecordWebhookEvaluation increments with each round-trip of a webhook authorizer.
// result is one of:
// - canceled: the call invoking the webhook request was canceled
// - timeout: the webhook request timed out
// - error: the webhook response completed and was invalid
// - success: the webhook response completed and was well-formed
RecordWebhookEvaluation(ctx context.Context, name, result string)
// RecordWebhookDuration records latency for each round-trip of a webhook authorizer.
// result is one of:
// - canceled: the call invoking the webhook request was canceled
// - timeout: the webhook request timed out
// - error: the webhook response completed and was invalid
// - success: the webhook response completed and was well-formed
RecordWebhookDuration(ctx context.Context, name, result string, duration float64)
// RecordWebhookFailOpen increments when a webhook timeout or error results in a fail open
// of a request which has not been canceled.
// result is one of:
// - timeout: the webhook request timed out
// - error: the webhook response completed and was invalid
RecordWebhookFailOpen(ctx context.Context, name, result string)
}

type NoopWebhookMetrics struct{}

func (NoopWebhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {}
func (NoopWebhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
}
func (NoopWebhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {}

var registerWebhookMetrics sync.Once

// RegisterMetrics registers authorizer metrics.
func RegisterWebhookMetrics() {
registerWebhookMetrics.Do(func() {
legacyregistry.MustRegister(webhookEvaluations)
legacyregistry.MustRegister(webhookDuration)
legacyregistry.MustRegister(webhookFailOpen)
})
}

func ResetMetricsForTest() {
webhookEvaluations.Reset()
webhookDuration.Reset()
webhookFailOpen.Reset()
}

const (
namespace = "apiserver"
subsystem = "authorization"
)

var (
webhookEvaluations = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_evaluations_total",
Help: "Round-trips to authorization webhooks.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name", "result"},
)

webhookDuration = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_duration_seconds",
Help: "Request latency in seconds.",
Buckets: compbasemetrics.DefBuckets,
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name", "result"},
)

webhookFailOpen = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_evaluations_fail_open_total",
Help: "NoOpinion results due to webhook timeout or error.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name", "result"},
)
)

type webhookMetrics struct{}

func NewWebhookMetrics() WebhookMetrics {
RegisterWebhookMetrics()
return webhookMetrics{}
}

func ResetWebhookMetricsForTest() {
webhookEvaluations.Reset()
webhookDuration.Reset()
webhookFailOpen.Reset()
}

func (webhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {
webhookEvaluations.WithContext(ctx).WithLabelValues(name, result).Inc()
}
func (webhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
webhookDuration.WithContext(ctx).WithLabelValues(name, result).Observe(duration)
}
func (webhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {
webhookFailOpen.WithContext(ctx).WithLabelValues(name, result).Inc()
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"context"
"strings"
"testing"

"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"
)

func TestRecordWebhookMetrics(t *testing.T) {
testCases := []struct {
desc string
metrics []string
name string
result string
duration float64
want string
}{
{
desc: "evaluation failure total",
metrics: []string{
"apiserver_authorization_webhook_duration_seconds",
"apiserver_authorization_webhook_evaluations_total",
"apiserver_authorization_webhook_evaluations_fail_open_total",
},
name: "wh1.example.com",
result: "timeout",
duration: 1.5,
want: `
# HELP apiserver_authorization_webhook_duration_seconds [ALPHA] Request latency in seconds.
# TYPE apiserver_authorization_webhook_duration_seconds histogram
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.005"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.01"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.025"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.05"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.1"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.25"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.5"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="1"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="2.5"} 1
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="5"} 1
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="10"} 1
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="+Inf"} 1
apiserver_authorization_webhook_duration_seconds_sum{name="wh1.example.com",result="timeout"} 1.5
apiserver_authorization_webhook_duration_seconds_count{name="wh1.example.com",result="timeout"} 1
# HELP apiserver_authorization_webhook_evaluations_fail_open_total [ALPHA] NoOpinion results due to webhook timeout or error.
# TYPE apiserver_authorization_webhook_evaluations_fail_open_total counter
apiserver_authorization_webhook_evaluations_fail_open_total{name="wh1.example.com",result="timeout"} 1
# HELP apiserver_authorization_webhook_evaluations_total [ALPHA] Round-trips to authorization webhooks.
# TYPE apiserver_authorization_webhook_evaluations_total counter
apiserver_authorization_webhook_evaluations_total{name="wh1.example.com",result="timeout"} 1
`,
},
}

for _, tt := range testCases {
t.Run(tt.desc, func(t *testing.T) {
ResetWebhookMetricsForTest()
m := NewWebhookMetrics()
m.RecordWebhookDuration(context.Background(), tt.name, tt.result, tt.duration)
m.RecordWebhookEvaluation(context.Background(), tt.name, tt.result)
m.RecordWebhookFailOpen(context.Background(), tt.name, tt.result)
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil {
t.Fatal(err)
}
})
}
}
Loading

0 comments on commit 46a2137

Please sign in to comment.