diff --git a/build/grafana/dashboard-gameservers.yaml b/build/grafana/dashboard-gameservers.yaml index 2c1172678c..925c688e6e 100644 --- a/build/grafana/dashboard-gameservers.yaml +++ b/build/grafana/dashboard-gameservers.yaml @@ -401,42 +401,42 @@ data: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))", + "expr": "histogram_quantile(1, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "max", "refId": "F" }, { - "expr": "histogram_quantile(0.99, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "99th", "refId": "A" }, { - "expr": "histogram_quantile(0.90, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))", + "expr": "histogram_quantile(0.90, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "90th", "refId": "B" }, { - "expr": "histogram_quantile(0.50, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))", + "expr": "histogram_quantile(0.50, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "50th", "refId": "C" }, { - "expr": "histogram_quantile(0, sum(rate(agones_gameservers_node_count_bucket[5m])) by (le))", + "expr": "histogram_quantile(0, sum(rate(agones_gameservers_node_count_bucket[1m])) by (le))", "format": "time_series", "intervalFactor": 1, "legendFormat": "min", "refId": "E" }, { - "expr": " agones_gameservers_node_count_sum /\n agones_gameservers_node_count_count", + "expr": "avg(delta(agones_gameservers_node_count_sum[1m]) / delta(agones_gameservers_node_count_count[1m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "avg", diff --git a/pkg/metrics/controller.go b/pkg/metrics/controller.go index ebb76fad7f..c8f12a9bb7 100644 --- a/pkg/metrics/controller.go +++ b/pkg/metrics/controller.go @@ -17,9 +17,11 @@ package metrics import ( "context" "strconv" + "strings" "sync" "time" + corev1 "k8s.io/api/core/v1" v1 "k8s.io/client-go/listers/core/v1" stablev1alpha1 "agones.dev/agones/pkg/apis/stable/v1alpha1" @@ -371,6 +373,8 @@ func (c *Controller) collectNodeCounts() { c.logger.WithError(err).Warn("failed listing gameservers") return } + + nodes = removeSystemNodes(nodes) recordWithTags(context.Background(), []tag.Mutator{tag.Insert(keyEmpty, "true")}, nodesCountStats.M(int64(len(nodes)-len(gsPerNodes)))) recordWithTags(context.Background(), []tag.Mutator{tag.Insert(keyEmpty, "false")}, @@ -381,3 +385,26 @@ func (c *Controller) collectNodeCounts() { } } + +func removeSystemNodes(nodes []*corev1.Node) []*corev1.Node { + var result []*corev1.Node + + for _, n := range nodes { + if !isSystemNode(n) { + result = append(result, n) + } + } + + return result +} + +// isSystemNode determines if a node is a system node, by checking if it has any taints starting with "stable.agones.dev/" +func isSystemNode(n *corev1.Node) bool { + for _, t := range n.Spec.Taints { + if strings.HasPrefix(t.Key, "stable.agones.dev/") { + return true + } + } + + return false +} diff --git a/pkg/metrics/controller_metrics.go b/pkg/metrics/controller_metrics.go index b2de06320b..9df2c3055e 100644 --- a/pkg/metrics/controller_metrics.go +++ b/pkg/metrics/controller_metrics.go @@ -124,7 +124,7 @@ var ( Name: "gameservers_node_count", Measure: gsPerNodesCountStats, Description: "The count of gameservers per node in the cluster", - Aggregation: view.Distribution(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 40, 50, 60, 70, 80, 90, 100, 110, 120), + Aggregation: view.Distribution(0.00001, 1.00001, 2.00001, 3.00001, 4.00001, 5.00001, 6.00001, 7.00001, 8.00001, 9.00001, 10.00001, 11.00001, 12.00001, 13.00001, 14.00001, 15.00001, 16.00001, 32.00001, 40.00001, 50.00001, 60.00001, 70.00001, 80.00001, 90.00001, 100.00001, 110.00001, 120.00001), }, } ) diff --git a/pkg/metrics/controller_test.go b/pkg/metrics/controller_test.go index b47cc1c22f..4a60cc73f5 100644 --- a/pkg/metrics/controller_test.go +++ b/pkg/metrics/controller_test.go @@ -243,6 +243,7 @@ func TestControllerGameServersNodeState(t *testing.T) { c.collect() report() - assert.Nil(t, testutil.GatherAndCompare(registry, strings.NewReader(nodeCountExpected), "agones_nodes_count", "agones_gameservers_node_count")) - + if err := testutil.GatherAndCompare(registry, strings.NewReader(nodeCountExpected), "agones_nodes_count", "agones_gameservers_node_count"); err != nil { + t.Fatal(err) + } } diff --git a/pkg/metrics/util_test.go b/pkg/metrics/util_test.go index 4ce46241be..391b752de0 100644 --- a/pkg/metrics/util_test.go +++ b/pkg/metrics/util_test.go @@ -290,32 +290,33 @@ agones_fleet_autoscalers_limited{fleet_name="deleted-fleet",name="deleted"} 0 var nodeCountExpected = `# HELP agones_gameservers_node_count The count of gameservers per node in the cluster # TYPE agones_gameservers_node_count histogram -agones_gameservers_node_count_bucket{le="1"} 1 -agones_gameservers_node_count_bucket{le="2"} 2 -agones_gameservers_node_count_bucket{le="3"} 3 -agones_gameservers_node_count_bucket{le="4"} 3 -agones_gameservers_node_count_bucket{le="5"} 3 -agones_gameservers_node_count_bucket{le="6"} 3 -agones_gameservers_node_count_bucket{le="7"} 3 -agones_gameservers_node_count_bucket{le="8"} 3 -agones_gameservers_node_count_bucket{le="9"} 3 -agones_gameservers_node_count_bucket{le="10"} 3 -agones_gameservers_node_count_bucket{le="11"} 3 -agones_gameservers_node_count_bucket{le="12"} 3 -agones_gameservers_node_count_bucket{le="13"} 3 -agones_gameservers_node_count_bucket{le="14"} 3 -agones_gameservers_node_count_bucket{le="15"} 3 -agones_gameservers_node_count_bucket{le="16"} 3 -agones_gameservers_node_count_bucket{le="32"} 3 -agones_gameservers_node_count_bucket{le="40"} 3 -agones_gameservers_node_count_bucket{le="50"} 3 -agones_gameservers_node_count_bucket{le="60"} 3 -agones_gameservers_node_count_bucket{le="70"} 3 -agones_gameservers_node_count_bucket{le="80"} 3 -agones_gameservers_node_count_bucket{le="90"} 3 -agones_gameservers_node_count_bucket{le="100"} 3 -agones_gameservers_node_count_bucket{le="110"} 3 -agones_gameservers_node_count_bucket{le="120"} 3 +agones_gameservers_node_count_bucket{le="1e-05"} 1 +agones_gameservers_node_count_bucket{le="1.00001"} 2 +agones_gameservers_node_count_bucket{le="2.00001"} 3 +agones_gameservers_node_count_bucket{le="3.00001"} 3 +agones_gameservers_node_count_bucket{le="4.00001"} 3 +agones_gameservers_node_count_bucket{le="5.00001"} 3 +agones_gameservers_node_count_bucket{le="6.00001"} 3 +agones_gameservers_node_count_bucket{le="7.00001"} 3 +agones_gameservers_node_count_bucket{le="8.00001"} 3 +agones_gameservers_node_count_bucket{le="9.00001"} 3 +agones_gameservers_node_count_bucket{le="10.00001"} 3 +agones_gameservers_node_count_bucket{le="11.00001"} 3 +agones_gameservers_node_count_bucket{le="12.00001"} 3 +agones_gameservers_node_count_bucket{le="13.00001"} 3 +agones_gameservers_node_count_bucket{le="14.00001"} 3 +agones_gameservers_node_count_bucket{le="15.00001"} 3 +agones_gameservers_node_count_bucket{le="16.00001"} 3 +agones_gameservers_node_count_bucket{le="32.00001"} 3 +agones_gameservers_node_count_bucket{le="40.00001"} 3 +agones_gameservers_node_count_bucket{le="50.00001"} 3 +agones_gameservers_node_count_bucket{le="60.00001"} 3 +agones_gameservers_node_count_bucket{le="70.00001"} 3 +agones_gameservers_node_count_bucket{le="80.00001"} 3 +agones_gameservers_node_count_bucket{le="90.00001"} 3 +agones_gameservers_node_count_bucket{le="100.00001"} 3 +agones_gameservers_node_count_bucket{le="110.00001"} 3 +agones_gameservers_node_count_bucket{le="120.00001"} 3 agones_gameservers_node_count_bucket{le="+Inf"} 3 agones_gameservers_node_count_sum 3 agones_gameservers_node_count_count 3