From a1663ec2b64f560330ce57f78a8018357d400656 Mon Sep 17 00:00:00 2001 From: Bryce Palmer Date: Fri, 8 Sep 2023 13:18:32 -0400 Subject: [PATCH] Add metrics to the catalog server (#156) * add metrics to catalogd http server that can be used for calculating the Apdex Score and assess the health of the http server that is serving catalog contents to clients Signed-off-by: Bryce Palmer * quick fixes from review comments Signed-off-by: Bryce Palmer * rename package from server --> metrics Signed-off-by: Bryce Palmer * rename package from server --> metrics Signed-off-by: Bryce Palmer --------- Signed-off-by: Bryce Palmer --- cmd/manager/main.go | 46 +++++++++++++++++++++++++----------------- go.mod | 2 +- pkg/metrics/metrics.go | 40 ++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 19 deletions(-) create mode 100644 pkg/metrics/metrics.go diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 1b16a6c0..f4b0f2da 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -33,6 +33,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/spf13/pflag" @@ -41,6 +42,7 @@ import ( "github.com/operator-framework/catalogd/internal/version" corecontrollers "github.com/operator-framework/catalogd/pkg/controllers/core" "github.com/operator-framework/catalogd/pkg/features" + catalogdmetrics "github.com/operator-framework/catalogd/pkg/metrics" "github.com/operator-framework/catalogd/pkg/profile" "github.com/operator-framework/catalogd/pkg/storage" @@ -124,24 +126,32 @@ func main() { os.Exit(1) } - if err := os.MkdirAll(storageDir, 0700); err != nil { - setupLog.Error(err, "unable to create storage directory for catalogs") - } - localStorage := storage.LocalDir{RootDir: storageDir} - shutdownTimeout := 30 * time.Second - catalogServer := server.Server{ - Kind: "catalogs", - Server: &http.Server{ - Addr: catalogServerAddr, - Handler: localStorage.StorageServerHandler(), - ReadTimeout: 5 * time.Second, - WriteTimeout: 10 * time.Second, - }, - ShutdownTimeout: &shutdownTimeout, - } - if err := mgr.Add(&catalogServer); err != nil { - setupLog.Error(err, "unable to start catalog server") - os.Exit(1) + var localStorage storage.Instance + if features.CatalogdFeatureGate.Enabled(features.HTTPServer) { + metrics.Registry.MustRegister(catalogdmetrics.RequestDurationMetric) + + if err := os.MkdirAll(storageDir, 0700); err != nil { + setupLog.Error(err, "unable to create storage directory for catalogs") + os.Exit(1) + } + + localStorage = storage.LocalDir{RootDir: storageDir} + shutdownTimeout := 30 * time.Second + catalogServer := server.Server{ + Kind: "catalogs", + Server: &http.Server{ + Addr: catalogServerAddr, + Handler: catalogdmetrics.AddMetricsToHandler(localStorage.StorageServerHandler()), + ReadTimeout: 5 * time.Second, + WriteTimeout: 10 * time.Second, + }, + ShutdownTimeout: &shutdownTimeout, + } + + if err := mgr.Add(&catalogServer); err != nil { + setupLog.Error(err, "unable to start catalog server") + os.Exit(1) + } } if err = (&corecontrollers.CatalogReconciler{ diff --git a/go.mod b/go.mod index 97a9969b..a6e9782a 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/onsi/ginkgo/v2 v2.9.7 github.com/onsi/gomega v1.27.7 github.com/operator-framework/operator-registry v1.27.1 + github.com/prometheus/client_golang v1.14.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.1 k8s.io/api v0.26.1 @@ -59,7 +60,6 @@ require ( github.com/operator-framework/api v0.17.4-0.20230223191600-0131a6301e42 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.14.0 // indirect github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/common v0.37.0 // indirect github.com/prometheus/procfs v0.8.0 // indirect diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go new file mode 100644 index 00000000..c30aed58 --- /dev/null +++ b/pkg/metrics/metrics.go @@ -0,0 +1,40 @@ +package metrics + +import ( + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +const ( + RequestDurationMetricName = "catalogd_http_request_duration_seconds" +) + +// Sets up the necessary metrics for calculating the Apdex Score +// If using Grafana for visualization connected to a Prometheus data +// source that is scraping these metrics, you can create a panel that +// uses the following queries + expressions for calculating the Apdex Score where T = 0.5: +// Query A: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="0.5"}) +// Query B: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="2"}) +// Query C: sum(catalogd_http_request_duration_seconds_count) +// Expression for Apdex Score: ($A + (($B - $A) / 2)) / $C +var ( + RequestDurationMetric = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: RequestDurationMetricName, + Help: "Histogram of request duration in seconds", + // create a bucket for each 100 ms up to 1s and ensure it multiplied by 4 also exists. + // Include a 10s bucket to capture very long running requests. This allows us to easily + // calculate Apdex Scores up to a T of 1 second, but using various mathmatical formulas we + // should be able to estimate Apdex Scores up to a T of 2.5. Having a larger range of buckets + // will allow us to more easily calculate health indicators other than the Apdex Score. + Buckets: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.6, 2, 2.4, 2.8, 3.2, 3.6, 4, 10}, + }, + []string{"code"}, + ) +) + +func AddMetricsToHandler(handler http.Handler) http.Handler { + return promhttp.InstrumentHandlerDuration(RequestDurationMetric, handler) +}