From 1c5692882e4a98cc39eec82da08e988c18c9e473 Mon Sep 17 00:00:00 2001 From: Bryce Palmer Date: Fri, 8 Sep 2023 12:48:47 -0400 Subject: [PATCH 1/4] add metrics to catalogd http server that can be used for calculating the Apdex Score and assess the health of the http server that is serving catalog contents to clients Signed-off-by: Bryce Palmer --- cmd/manager/main.go | 44 ++++++++++++++++++++++++------------------- go.mod | 2 +- pkg/server/metrics.go | 40 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 20 deletions(-) create mode 100644 pkg/server/metrics.go diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 1b16a6c0..0a3316d6 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -33,6 +33,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/spf13/pflag" @@ -42,6 +43,7 @@ import ( corecontrollers "github.com/operator-framework/catalogd/pkg/controllers/core" "github.com/operator-framework/catalogd/pkg/features" "github.com/operator-framework/catalogd/pkg/profile" + catalogdserver "github.com/operator-framework/catalogd/pkg/server" "github.com/operator-framework/catalogd/pkg/storage" //+kubebuilder:scaffold:imports @@ -124,24 +126,29 @@ func main() { os.Exit(1) } - if err := os.MkdirAll(storageDir, 0700); err != nil { - setupLog.Error(err, "unable to create storage directory for catalogs") - } - localStorage := storage.LocalDir{RootDir: storageDir} - shutdownTimeout := 30 * time.Second - catalogServer := server.Server{ - Kind: "catalogs", - Server: &http.Server{ - Addr: catalogServerAddr, - Handler: localStorage.StorageServerHandler(), - ReadTimeout: 5 * time.Second, - WriteTimeout: 10 * time.Second, - }, - ShutdownTimeout: &shutdownTimeout, - } - if err := mgr.Add(&catalogServer); err != nil { - setupLog.Error(err, "unable to start catalog server") - os.Exit(1) + var localStorage storage.Instance + if features.CatalogdFeatureGate.Enabled(features.HTTPServer) { + metrics.Registry.MustRegister(catalogdserver.RequestDurationMetric) + + if err := os.MkdirAll(storageDir, 0700); err != nil { + setupLog.Error(err, "unable to create storage directory for catalogs") + } + localStorage = storage.LocalDir{RootDir: storageDir} + shutdownTimeout := 30 * time.Second + catalogServer := server.Server{ + Kind: "catalogs", + Server: &http.Server{ + Addr: catalogServerAddr, + Handler: catalogdserver.AddMetricsToHandler(localStorage.StorageServerHandler()), + ReadTimeout: 5 * time.Second, + WriteTimeout: 10 * time.Second, + }, + ShutdownTimeout: &shutdownTimeout, + } + if err := mgr.Add(&catalogServer); err != nil { + setupLog.Error(err, "unable to start catalog server") + os.Exit(1) + } } if err = (&corecontrollers.CatalogReconciler{ @@ -170,7 +177,6 @@ func main() { os.Exit(1) } } - setupLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") diff --git a/go.mod b/go.mod index 97a9969b..a6e9782a 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/onsi/ginkgo/v2 v2.9.7 github.com/onsi/gomega v1.27.7 github.com/operator-framework/operator-registry v1.27.1 + github.com/prometheus/client_golang v1.14.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.1 k8s.io/api v0.26.1 @@ -59,7 +60,6 @@ require ( github.com/operator-framework/api v0.17.4-0.20230223191600-0131a6301e42 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.14.0 // indirect github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/common v0.37.0 // indirect github.com/prometheus/procfs v0.8.0 // indirect diff --git a/pkg/server/metrics.go b/pkg/server/metrics.go new file mode 100644 index 00000000..00e5d9ab --- /dev/null +++ b/pkg/server/metrics.go @@ -0,0 +1,40 @@ +package server + +import ( + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +const ( + RequestDurationMetricName = "catalogd_http_request_duration_seconds" +) + +// Sets up the necessary metrics for calculating the Apdex Score +// If using Grafana for visualization connected to a Prometheus data +// source that is scraping these metrics, you can create a panel that +// uses the following queries + expressions for calculating the Apdex Score where T = 0.5: +// Query A: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="0.5"}) +// Query B: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="2"}) +// Query C: sum(catalogd_http_request_duration_seconds_count) +// Expression for Apdex Score: ($A + (($B - $A) / 2)) / $C +var ( + RequestDurationMetric = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: RequestDurationMetricName, + Help: "Histogram of request duration in seconds", + // create a bucket for each 100 ms up to 1s and ensure it multiplied by 4 also exists. + // Include a 10s bucket to capture very long running requests. This allows us to easily + // calculate Apdex Scores up to a T of 1 second, but using various mathmatical formulas we + // should be able to estimate Apdex Scores up to a T of 2.5. Having a larger range of buckets + // will allow us to more easily calculate health indicators other than the Apdex Score. + Buckets: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.6, 2, 2.4, 2.8, 3.2, 3.6, 4, 10}, + }, + []string{"code"}, + ) +) + +func AddMetricsToHandler(handler http.Handler) http.Handler { + return promhttp.InstrumentHandlerDuration(RequestDurationMetric, handler) +} From 87b10f2cc593c12fbaf87657593fdeec56bd03ba Mon Sep 17 00:00:00 2001 From: Bryce Palmer Date: Fri, 8 Sep 2023 12:57:41 -0400 Subject: [PATCH 2/4] quick fixes from review comments Signed-off-by: Bryce Palmer --- cmd/manager/main.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 0a3316d6..6f28a54b 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -132,7 +132,9 @@ func main() { if err := os.MkdirAll(storageDir, 0700); err != nil { setupLog.Error(err, "unable to create storage directory for catalogs") + os.Exit(1) } + localStorage = storage.LocalDir{RootDir: storageDir} shutdownTimeout := 30 * time.Second catalogServer := server.Server{ @@ -145,6 +147,7 @@ func main() { }, ShutdownTimeout: &shutdownTimeout, } + if err := mgr.Add(&catalogServer); err != nil { setupLog.Error(err, "unable to start catalog server") os.Exit(1) @@ -177,6 +180,7 @@ func main() { os.Exit(1) } } + setupLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") From eb9e187fb493e9de59ada7e464d9558f0041d0b1 Mon Sep 17 00:00:00 2001 From: Bryce Palmer Date: Fri, 8 Sep 2023 12:59:57 -0400 Subject: [PATCH 3/4] rename package from server --> metrics Signed-off-by: Bryce Palmer --- cmd/manager/main.go | 6 +++--- pkg/server/metrics.go | 40 ---------------------------------------- 2 files changed, 3 insertions(+), 43 deletions(-) delete mode 100644 pkg/server/metrics.go diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 6f28a54b..f4b0f2da 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -42,8 +42,8 @@ import ( "github.com/operator-framework/catalogd/internal/version" corecontrollers "github.com/operator-framework/catalogd/pkg/controllers/core" "github.com/operator-framework/catalogd/pkg/features" + catalogdmetrics "github.com/operator-framework/catalogd/pkg/metrics" "github.com/operator-framework/catalogd/pkg/profile" - catalogdserver "github.com/operator-framework/catalogd/pkg/server" "github.com/operator-framework/catalogd/pkg/storage" //+kubebuilder:scaffold:imports @@ -128,7 +128,7 @@ func main() { var localStorage storage.Instance if features.CatalogdFeatureGate.Enabled(features.HTTPServer) { - metrics.Registry.MustRegister(catalogdserver.RequestDurationMetric) + metrics.Registry.MustRegister(catalogdmetrics.RequestDurationMetric) if err := os.MkdirAll(storageDir, 0700); err != nil { setupLog.Error(err, "unable to create storage directory for catalogs") @@ -141,7 +141,7 @@ func main() { Kind: "catalogs", Server: &http.Server{ Addr: catalogServerAddr, - Handler: catalogdserver.AddMetricsToHandler(localStorage.StorageServerHandler()), + Handler: catalogdmetrics.AddMetricsToHandler(localStorage.StorageServerHandler()), ReadTimeout: 5 * time.Second, WriteTimeout: 10 * time.Second, }, diff --git a/pkg/server/metrics.go b/pkg/server/metrics.go deleted file mode 100644 index 00e5d9ab..00000000 --- a/pkg/server/metrics.go +++ /dev/null @@ -1,40 +0,0 @@ -package server - -import ( - "net/http" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" -) - -const ( - RequestDurationMetricName = "catalogd_http_request_duration_seconds" -) - -// Sets up the necessary metrics for calculating the Apdex Score -// If using Grafana for visualization connected to a Prometheus data -// source that is scraping these metrics, you can create a panel that -// uses the following queries + expressions for calculating the Apdex Score where T = 0.5: -// Query A: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="0.5"}) -// Query B: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="2"}) -// Query C: sum(catalogd_http_request_duration_seconds_count) -// Expression for Apdex Score: ($A + (($B - $A) / 2)) / $C -var ( - RequestDurationMetric = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Name: RequestDurationMetricName, - Help: "Histogram of request duration in seconds", - // create a bucket for each 100 ms up to 1s and ensure it multiplied by 4 also exists. - // Include a 10s bucket to capture very long running requests. This allows us to easily - // calculate Apdex Scores up to a T of 1 second, but using various mathmatical formulas we - // should be able to estimate Apdex Scores up to a T of 2.5. Having a larger range of buckets - // will allow us to more easily calculate health indicators other than the Apdex Score. - Buckets: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.6, 2, 2.4, 2.8, 3.2, 3.6, 4, 10}, - }, - []string{"code"}, - ) -) - -func AddMetricsToHandler(handler http.Handler) http.Handler { - return promhttp.InstrumentHandlerDuration(RequestDurationMetric, handler) -} From 484acfde07957272a11bd1c55e1c0d9f381d6664 Mon Sep 17 00:00:00 2001 From: Bryce Palmer Date: Fri, 8 Sep 2023 13:00:02 -0400 Subject: [PATCH 4/4] rename package from server --> metrics Signed-off-by: Bryce Palmer --- pkg/metrics/metrics.go | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 pkg/metrics/metrics.go diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go new file mode 100644 index 00000000..c30aed58 --- /dev/null +++ b/pkg/metrics/metrics.go @@ -0,0 +1,40 @@ +package metrics + +import ( + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +const ( + RequestDurationMetricName = "catalogd_http_request_duration_seconds" +) + +// Sets up the necessary metrics for calculating the Apdex Score +// If using Grafana for visualization connected to a Prometheus data +// source that is scraping these metrics, you can create a panel that +// uses the following queries + expressions for calculating the Apdex Score where T = 0.5: +// Query A: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="0.5"}) +// Query B: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="2"}) +// Query C: sum(catalogd_http_request_duration_seconds_count) +// Expression for Apdex Score: ($A + (($B - $A) / 2)) / $C +var ( + RequestDurationMetric = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: RequestDurationMetricName, + Help: "Histogram of request duration in seconds", + // create a bucket for each 100 ms up to 1s and ensure it multiplied by 4 also exists. + // Include a 10s bucket to capture very long running requests. This allows us to easily + // calculate Apdex Scores up to a T of 1 second, but using various mathmatical formulas we + // should be able to estimate Apdex Scores up to a T of 2.5. Having a larger range of buckets + // will allow us to more easily calculate health indicators other than the Apdex Score. + Buckets: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.6, 2, 2.4, 2.8, 3.2, 3.6, 4, 10}, + }, + []string{"code"}, + ) +) + +func AddMetricsToHandler(handler http.Handler) http.Handler { + return promhttp.InstrumentHandlerDuration(RequestDurationMetric, handler) +}