From 1c5692882e4a98cc39eec82da08e988c18c9e473 Mon Sep 17 00:00:00 2001 From: Bryce Palmer Date: Fri, 8 Sep 2023 12:48:47 -0400 Subject: [PATCH] add metrics to catalogd http server that can be used for calculating the Apdex Score and assess the health of the http server that is serving catalog contents to clients Signed-off-by: Bryce Palmer --- cmd/manager/main.go | 44 ++++++++++++++++++++++++------------------- go.mod | 2 +- pkg/server/metrics.go | 40 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 20 deletions(-) create mode 100644 pkg/server/metrics.go diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 1b16a6c0..0a3316d6 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -33,6 +33,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/spf13/pflag" @@ -42,6 +43,7 @@ import ( corecontrollers "github.com/operator-framework/catalogd/pkg/controllers/core" "github.com/operator-framework/catalogd/pkg/features" "github.com/operator-framework/catalogd/pkg/profile" + catalogdserver "github.com/operator-framework/catalogd/pkg/server" "github.com/operator-framework/catalogd/pkg/storage" //+kubebuilder:scaffold:imports @@ -124,24 +126,29 @@ func main() { os.Exit(1) } - if err := os.MkdirAll(storageDir, 0700); err != nil { - setupLog.Error(err, "unable to create storage directory for catalogs") - } - localStorage := storage.LocalDir{RootDir: storageDir} - shutdownTimeout := 30 * time.Second - catalogServer := server.Server{ - Kind: "catalogs", - Server: &http.Server{ - Addr: catalogServerAddr, - Handler: localStorage.StorageServerHandler(), - ReadTimeout: 5 * time.Second, - WriteTimeout: 10 * time.Second, - }, - ShutdownTimeout: &shutdownTimeout, - } - if err := mgr.Add(&catalogServer); err != nil { - setupLog.Error(err, "unable to start catalog server") - os.Exit(1) + var localStorage storage.Instance + if features.CatalogdFeatureGate.Enabled(features.HTTPServer) { + metrics.Registry.MustRegister(catalogdserver.RequestDurationMetric) + + if err := os.MkdirAll(storageDir, 0700); err != nil { + setupLog.Error(err, "unable to create storage directory for catalogs") + } + localStorage = storage.LocalDir{RootDir: storageDir} + shutdownTimeout := 30 * time.Second + catalogServer := server.Server{ + Kind: "catalogs", + Server: &http.Server{ + Addr: catalogServerAddr, + Handler: catalogdserver.AddMetricsToHandler(localStorage.StorageServerHandler()), + ReadTimeout: 5 * time.Second, + WriteTimeout: 10 * time.Second, + }, + ShutdownTimeout: &shutdownTimeout, + } + if err := mgr.Add(&catalogServer); err != nil { + setupLog.Error(err, "unable to start catalog server") + os.Exit(1) + } } if err = (&corecontrollers.CatalogReconciler{ @@ -170,7 +177,6 @@ func main() { os.Exit(1) } } - setupLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") diff --git a/go.mod b/go.mod index 97a9969b..a6e9782a 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/onsi/ginkgo/v2 v2.9.7 github.com/onsi/gomega v1.27.7 github.com/operator-framework/operator-registry v1.27.1 + github.com/prometheus/client_golang v1.14.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.1 k8s.io/api v0.26.1 @@ -59,7 +60,6 @@ require ( github.com/operator-framework/api v0.17.4-0.20230223191600-0131a6301e42 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.14.0 // indirect github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/common v0.37.0 // indirect github.com/prometheus/procfs v0.8.0 // indirect diff --git a/pkg/server/metrics.go b/pkg/server/metrics.go new file mode 100644 index 00000000..00e5d9ab --- /dev/null +++ b/pkg/server/metrics.go @@ -0,0 +1,40 @@ +package server + +import ( + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +const ( + RequestDurationMetricName = "catalogd_http_request_duration_seconds" +) + +// Sets up the necessary metrics for calculating the Apdex Score +// If using Grafana for visualization connected to a Prometheus data +// source that is scraping these metrics, you can create a panel that +// uses the following queries + expressions for calculating the Apdex Score where T = 0.5: +// Query A: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="0.5"}) +// Query B: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="2"}) +// Query C: sum(catalogd_http_request_duration_seconds_count) +// Expression for Apdex Score: ($A + (($B - $A) / 2)) / $C +var ( + RequestDurationMetric = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: RequestDurationMetricName, + Help: "Histogram of request duration in seconds", + // create a bucket for each 100 ms up to 1s and ensure it multiplied by 4 also exists. + // Include a 10s bucket to capture very long running requests. This allows us to easily + // calculate Apdex Scores up to a T of 1 second, but using various mathmatical formulas we + // should be able to estimate Apdex Scores up to a T of 2.5. Having a larger range of buckets + // will allow us to more easily calculate health indicators other than the Apdex Score. + Buckets: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.6, 2, 2.4, 2.8, 3.2, 3.6, 4, 10}, + }, + []string{"code"}, + ) +) + +func AddMetricsToHandler(handler http.Handler) http.Handler { + return promhttp.InstrumentHandlerDuration(RequestDurationMetric, handler) +}