From dbb9540452ee2c3c3c399f6e765daeabed117051 Mon Sep 17 00:00:00 2001
From: Bryce Palmer <bpalmer@redhat.com>
Date: Tue, 5 Sep 2023 12:35:59 -0400
Subject: [PATCH] remove apdex target gauge, increase histogram buckets

Signed-off-by: Bryce Palmer <bpalmer@redhat.com>
---
 cmd/manager/main.go   |  2 +-
 pkg/server/metrics.go | 30 +++++++++++-------------------
 2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/cmd/manager/main.go b/cmd/manager/main.go
index c4bb5c44..fc1dda5c 100644
--- a/cmd/manager/main.go
+++ b/cmd/manager/main.go
@@ -126,7 +126,7 @@ func main() {
 	}
 
 	if features.CatalogdFeatureGate.Enabled(features.HTTPServer) {
-		metrics.Registry.MustRegister(server.ApdexTargetMetric, server.RequestDurationMetric)
+		metrics.Registry.MustRegister(server.RequestDurationMetric)
 		srv := server.Instance{StorageDir: storageDir}
 		mgr.AddMetricsExtraHandler("/catalogs/", server.AddMetricsToHandler(srv.CatalogServerHandler()))
 	}
diff --git a/pkg/server/metrics.go b/pkg/server/metrics.go
index 96d2779e..00e5d9ab 100644
--- a/pkg/server/metrics.go
+++ b/pkg/server/metrics.go
@@ -8,41 +8,33 @@ import (
 )
 
 const (
-	// target response time in seconds
-	ApdexTarget = 0.5
-
-	ApdexTargetMetricName     = "catalogd_http_request_apdex_target_seconds"
 	RequestDurationMetricName = "catalogd_http_request_duration_seconds"
 )
 
 // Sets up the necessary metrics for calculating the Apdex Score
 // If using Grafana for visualization connected to a Prometheus data
 // source that is scraping these metrics, you can create a panel that
-// uses the following queries + expressions for calculating the Apdex Score:
+// uses the following queries + expressions for calculating the Apdex Score where T = 0.5:
 // Query A: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="0.5"})
-// Query B: (sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="2"}) - on(code) sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="0.5"}))
+// Query B: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="2"})
 // Query C: sum(catalogd_http_request_duration_seconds_count)
-// Expression for Apdex Score: ($A - ($B / 2) / $C
+// Expression for Apdex Score: ($A + (($B - $A) / 2)) / $C
 var (
-	ApdexTargetMetric = prometheus.NewGauge(prometheus.GaugeOpts{
-		Name: ApdexTargetMetricName,
-		Help: "The apdex target in seconds",
-	})
-
 	RequestDurationMetric = prometheus.NewHistogramVec(
 		prometheus.HistogramOpts{
-			Name:    RequestDurationMetricName,
-			Help:    "Histogram of request duration in seconds",
-			Buckets: []float64{ApdexTarget, ApdexTarget * 4, ApdexTarget * 8},
+			Name: RequestDurationMetricName,
+			Help: "Histogram of request duration in seconds",
+			// create a bucket for each 100 ms up to 1s and ensure it multiplied by 4 also exists.
+			// Include a 10s bucket to capture very long running requests. This allows us to easily
+			// calculate Apdex Scores up to a T of 1 second, but using various mathmatical formulas we
+			// should be able to estimate Apdex Scores up to a T of 2.5. Having a larger range of buckets
+			// will allow us to more easily calculate health indicators other than the Apdex Score.
+			Buckets: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.6, 2, 2.4, 2.8, 3.2, 3.6, 4, 10},
 		},
 		[]string{"code"},
 	)
 )
 
-func init() {
-	ApdexTargetMetric.Set(ApdexTarget)
-}
-
 func AddMetricsToHandler(handler http.Handler) http.Handler {
 	return promhttp.InstrumentHandlerDuration(RequestDurationMetric, handler)
 }