From 9bd1fe734c982ab01439ff4c3541c00e62c58714 Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 30 May 2020 00:56:42 +0300 Subject: [PATCH] Metrics: add namespace to distinguish fleets with the same name (#1585) * Added namespace to recordFleetReplicas Co-authored-by: Alexander Apalikov Co-authored-by: Mark Mandel --- pkg/metrics/controller.go | 18 ++++--- pkg/metrics/controller_metrics.go | 18 +++---- pkg/metrics/controller_test.go | 81 +++++++++++++++---------------- pkg/metrics/gameservers_count.go | 35 +++++++++---- pkg/metrics/util.go | 1 + 5 files changed, 85 insertions(+), 68 deletions(-) diff --git a/pkg/metrics/controller.go b/pkg/metrics/controller.go index 7f0358cf9d..904b82b4d8 100644 --- a/pkg/metrics/controller.go +++ b/pkg/metrics/controller.go @@ -41,6 +41,8 @@ import ( "k8s.io/client-go/tools/cache" ) +const noneValue = "none" + var ( // MetricResyncPeriod is the interval to re-synchronize metrics based on indexed cache. MetricResyncPeriod = time.Second * 15 @@ -140,7 +142,7 @@ func (c *Controller) recordFleetAutoScalerChanges(old, next interface{}) { } ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fas.Name), - tag.Upsert(keyFleetName, fas.Spec.FleetName)) + tag.Upsert(keyFleetName, fas.Spec.FleetName), tag.Upsert(keyNamespace, fas.Namespace)) ableToScale := 0 limited := 0 @@ -189,7 +191,7 @@ func (c *Controller) recordFleetAutoScalerDeletion(obj interface{}) { return } ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fas.Name), - tag.Upsert(keyFleetName, fas.Spec.FleetName)) + tag.Upsert(keyFleetName, fas.Spec.FleetName), tag.Upsert(keyNamespace, fas.Namespace)) // recording status stats.Record(ctx, @@ -211,7 +213,7 @@ func (c *Controller) recordFleetChanges(obj interface{}) { return } - c.recordFleetReplicas(f.Name, f.Status.Replicas, f.Status.AllocatedReplicas, + c.recordFleetReplicas(f.Name, f.Namespace, f.Status.Replicas, f.Status.AllocatedReplicas, f.Status.ReadyReplicas, f.Spec.Replicas) } @@ -221,12 +223,12 @@ func (c *Controller) recordFleetDeletion(obj interface{}) { return } - c.recordFleetReplicas(f.Name, 0, 0, 0, 0) + c.recordFleetReplicas(f.Name, f.Namespace, 0, 0, 0, 0) } -func (c *Controller) recordFleetReplicas(fleetName string, total, allocated, ready, desired int32) { +func (c *Controller) recordFleetReplicas(fleetName, fleetNamespace string, total, allocated, ready, desired int32) { - ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fleetName)) + ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fleetName), tag.Upsert(keyNamespace, fleetNamespace)) recordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "total")}, fleetsReplicasCountStats.M(int64(total))) @@ -258,10 +260,10 @@ func (c *Controller) recordGameServerStatusChanges(old, next interface{}) { if newGs.Status.State != oldGs.Status.State { fleetName := newGs.Labels[agonesv1.FleetNameLabel] if fleetName == "" { - fleetName = "none" + fleetName = noneValue } recordWithTags(context.Background(), []tag.Mutator{tag.Upsert(keyType, string(newGs.Status.State)), - tag.Upsert(keyFleetName, fleetName)}, gameServerTotalStats.M(1)) + tag.Upsert(keyFleetName, fleetName), tag.Upsert(keyNamespace, newGs.GetNamespace())}, gameServerTotalStats.M(1)) } } diff --git a/pkg/metrics/controller_metrics.go b/pkg/metrics/controller_metrics.go index 46cb5ff56d..8226d32635 100644 --- a/pkg/metrics/controller_metrics.go +++ b/pkg/metrics/controller_metrics.go @@ -39,63 +39,63 @@ var ( Measure: fleetsReplicasCountStats, Description: "The number of replicas per fleet", Aggregation: view.LastValue(), - TagKeys: []tag.Key{keyName, keyType}, + TagKeys: []tag.Key{keyName, keyType, keyNamespace}, }, &view.View{ Name: "fleet_autoscalers_buffer_limits", Measure: fasBufferLimitsCountStats, Description: "The limits of buffer based fleet autoscalers", Aggregation: view.LastValue(), - TagKeys: []tag.Key{keyName, keyType, keyFleetName}, + TagKeys: []tag.Key{keyName, keyType, keyFleetName, keyNamespace}, }, &view.View{ Name: "fleet_autoscalers_buffer_size", Measure: fasBufferSizeStats, Description: "The buffer size of fleet autoscalers", Aggregation: view.LastValue(), - TagKeys: []tag.Key{keyName, keyType, keyFleetName}, + TagKeys: []tag.Key{keyName, keyType, keyFleetName, keyNamespace}, }, &view.View{ Name: "fleet_autoscalers_current_replicas_count", Measure: fasCurrentReplicasStats, Description: "The current replicas count as seen by autoscalers", Aggregation: view.LastValue(), - TagKeys: []tag.Key{keyName, keyFleetName}, + TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace}, }, &view.View{ Name: "fleet_autoscalers_desired_replicas_count", Measure: fasDesiredReplicasStats, Description: "The desired replicas count as seen by autoscalers", Aggregation: view.LastValue(), - TagKeys: []tag.Key{keyName, keyFleetName}, + TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace}, }, &view.View{ Name: "fleet_autoscalers_able_to_scale", Measure: fasAbleToScaleStats, Description: "The fleet autoscaler can access the fleet to scale", Aggregation: view.LastValue(), - TagKeys: []tag.Key{keyName, keyFleetName}, + TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace}, }, &view.View{ Name: "fleet_autoscalers_limited", Measure: fasLimitedStats, Description: "The fleet autoscaler is capped", Aggregation: view.LastValue(), - TagKeys: []tag.Key{keyName, keyFleetName}, + TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace}, }, &view.View{ Name: "gameservers_count", Measure: gameServerCountStats, Description: "The number of gameservers", Aggregation: view.LastValue(), - TagKeys: []tag.Key{keyType, keyFleetName}, + TagKeys: []tag.Key{keyType, keyFleetName, keyNamespace}, }, &view.View{ Name: "gameservers_total", Measure: gameServerTotalStats, Description: "The total of gameservers", Aggregation: view.Count(), - TagKeys: []tag.Key{keyType, keyFleetName}, + TagKeys: []tag.Key{keyType, keyFleetName, keyNamespace}, }, &view.View{ Name: "nodes_count", diff --git a/pkg/metrics/controller_test.go b/pkg/metrics/controller_test.go index 2099ceb613..535b7db727 100644 --- a/pkg/metrics/controller_test.go +++ b/pkg/metrics/controller_test.go @@ -69,7 +69,6 @@ func assertMetricData(t *testing.T, exporter *metricExporter, metricName string, } e, ok := expectedValuesAsMap[serialize(actualLabelValues)] assert.True(t, ok, "no TimeSeries found with labels: %v", actualLabelValues) - assert.Equal(t, actualLabelValues, e.labels, "label values don't match") assert.Equal(t, len(tsd.Points), 1, "assertMetricDataValues can only handle a single Point in a TimeSeries") assert.Equal(t, tsd.Points[0].Value, e.val, "metric: %s, tags: %v, values don't match; got: %v, want: %v", metricName, tsd.LabelValues, tsd.Points[0].Value, e.val) @@ -110,9 +109,9 @@ func TestControllerGameServerCount(t *testing.T) { c.collect() reader.ReadAndExport(exporter) assertMetricData(t, exporter, "gameservers_count", []expectedMetricData{ - {labels: []string{"test-fleet", "Ready"}, val: int64(0)}, - {labels: []string{"test-fleet", "Shutdown"}, val: int64(1)}, - {labels: []string{"none", "PortAllocation"}, val: int64(2)}, + {labels: []string{"test-fleet", "default", "Ready"}, val: int64(0)}, + {labels: []string{"test-fleet", "default", "Shutdown"}, val: int64(1)}, + {labels: []string{"none", "default", "PortAllocation"}, val: int64(2)}, }) } @@ -141,14 +140,14 @@ func TestControllerGameServersTotal(t *testing.T) { c.sync() reader.ReadAndExport(exporter) assertMetricData(t, exporter, "gameservers_total", []expectedMetricData{ - {labels: []string{"test", "Creating"}, val: int64(16)}, - {labels: []string{"test", "Scheduled"}, val: int64(15)}, - {labels: []string{"test", "Starting"}, val: int64(10)}, - {labels: []string{"test", "Unhealthy"}, val: int64(1)}, - {labels: []string{"none", "Creating"}, val: int64(19)}, - {labels: []string{"none", "Scheduled"}, val: int64(18)}, - {labels: []string{"none", "Starting"}, val: int64(16)}, - {labels: []string{"none", "Unhealthy"}, val: int64(1)}, + {labels: []string{"test", "default", "Creating"}, val: int64(16)}, + {labels: []string{"test", "default", "Scheduled"}, val: int64(15)}, + {labels: []string{"test", "default", "Starting"}, val: int64(10)}, + {labels: []string{"test", "default", "Unhealthy"}, val: int64(1)}, + {labels: []string{"none", "default", "Creating"}, val: int64(19)}, + {labels: []string{"none", "default", "Scheduled"}, val: int64(18)}, + {labels: []string{"none", "default", "Starting"}, val: int64(16)}, + {labels: []string{"none", "default", "Unhealthy"}, val: int64(1)}, }) } @@ -175,14 +174,14 @@ func TestControllerFleetReplicasCount(t *testing.T) { reader.ReadAndExport(exporter) assertMetricData(t, exporter, "fleets_replicas_count", []expectedMetricData{ - {labels: []string{"fleet-deleted", "allocated"}, val: int64(0)}, - {labels: []string{"fleet-deleted", "desired"}, val: int64(0)}, - {labels: []string{"fleet-deleted", "ready"}, val: int64(0)}, - {labels: []string{"fleet-deleted", "total"}, val: int64(0)}, - {labels: []string{"fleet-test", "allocated"}, val: int64(2)}, - {labels: []string{"fleet-test", "desired"}, val: int64(5)}, - {labels: []string{"fleet-test", "ready"}, val: int64(1)}, - {labels: []string{"fleet-test", "total"}, val: int64(8)}, + {labels: []string{"fleet-deleted", "default", "allocated"}, val: int64(0)}, + {labels: []string{"fleet-deleted", "default", "desired"}, val: int64(0)}, + {labels: []string{"fleet-deleted", "default", "ready"}, val: int64(0)}, + {labels: []string{"fleet-deleted", "default", "total"}, val: int64(0)}, + {labels: []string{"fleet-test", "default", "allocated"}, val: int64(2)}, + {labels: []string{"fleet-test", "default", "desired"}, val: int64(5)}, + {labels: []string{"fleet-test", "default", "ready"}, val: int64(1)}, + {labels: []string{"fleet-test", "default", "total"}, val: int64(8)}, }) } @@ -220,37 +219,37 @@ func TestControllerFleetAutoScalerState(t *testing.T) { reader.ReadAndExport(exporter) assertMetricData(t, exporter, "fleet_autoscalers_able_to_scale", []expectedMetricData{ - {labels: []string{"first-fleet", "name-switch"}, val: int64(0)}, - {labels: []string{"second-fleet", "name-switch"}, val: int64(1)}, - {labels: []string{"deleted-fleet", "deleted"}, val: int64(0)}, + {labels: []string{"first-fleet", "name-switch", "default"}, val: int64(0)}, + {labels: []string{"second-fleet", "name-switch", "default"}, val: int64(1)}, + {labels: []string{"deleted-fleet", "deleted", "default"}, val: int64(0)}, }) assertMetricData(t, exporter, "fleet_autoscalers_buffer_limits", []expectedMetricData{ - {labels: []string{"first-fleet", "name-switch", "max"}, val: int64(50)}, - {labels: []string{"first-fleet", "name-switch", "min"}, val: int64(10)}, - {labels: []string{"second-fleet", "name-switch", "max"}, val: int64(50)}, - {labels: []string{"second-fleet", "name-switch", "min"}, val: int64(10)}, - {labels: []string{"deleted-fleet", "deleted", "max"}, val: int64(150)}, - {labels: []string{"deleted-fleet", "deleted", "min"}, val: int64(15)}, + {labels: []string{"first-fleet", "name-switch", "default", "max"}, val: int64(50)}, + {labels: []string{"first-fleet", "name-switch", "default", "min"}, val: int64(10)}, + {labels: []string{"second-fleet", "name-switch", "default", "max"}, val: int64(50)}, + {labels: []string{"second-fleet", "name-switch", "default", "min"}, val: int64(10)}, + {labels: []string{"deleted-fleet", "deleted", "default", "max"}, val: int64(150)}, + {labels: []string{"deleted-fleet", "deleted", "default", "min"}, val: int64(15)}, }) assertMetricData(t, exporter, "fleet_autoscalers_buffer_size", []expectedMetricData{ - {labels: []string{"first-fleet", "name-switch", "count"}, val: int64(10)}, - {labels: []string{"second-fleet", "name-switch", "count"}, val: int64(10)}, - {labels: []string{"deleted-fleet", "deleted", "percentage"}, val: int64(50)}, + {labels: []string{"first-fleet", "name-switch", "default", "count"}, val: int64(10)}, + {labels: []string{"second-fleet", "name-switch", "default", "count"}, val: int64(10)}, + {labels: []string{"deleted-fleet", "deleted", "default", "percentage"}, val: int64(50)}, }) assertMetricData(t, exporter, "fleet_autoscalers_current_replicas_count", []expectedMetricData{ - {labels: []string{"first-fleet", "name-switch"}, val: int64(0)}, - {labels: []string{"second-fleet", "name-switch"}, val: int64(20)}, - {labels: []string{"deleted-fleet", "deleted"}, val: int64(0)}, + {labels: []string{"first-fleet", "name-switch", "default"}, val: int64(0)}, + {labels: []string{"second-fleet", "name-switch", "default"}, val: int64(20)}, + {labels: []string{"deleted-fleet", "deleted", "default"}, val: int64(0)}, }) assertMetricData(t, exporter, "fleet_autoscalers_desired_replicas_count", []expectedMetricData{ - {labels: []string{"first-fleet", "name-switch"}, val: int64(0)}, - {labels: []string{"second-fleet", "name-switch"}, val: int64(10)}, - {labels: []string{"deleted-fleet", "deleted"}, val: int64(0)}, + {labels: []string{"first-fleet", "name-switch", "default"}, val: int64(0)}, + {labels: []string{"second-fleet", "name-switch", "default"}, val: int64(10)}, + {labels: []string{"deleted-fleet", "deleted", "default"}, val: int64(0)}, }) assertMetricData(t, exporter, "fleet_autoscalers_limited", []expectedMetricData{ - {labels: []string{"first-fleet", "name-switch"}, val: int64(0)}, - {labels: []string{"second-fleet", "name-switch"}, val: int64(1)}, - {labels: []string{"deleted-fleet", "deleted"}, val: int64(0)}, + {labels: []string{"first-fleet", "name-switch", "default"}, val: int64(0)}, + {labels: []string{"second-fleet", "name-switch", "default"}, val: int64(1)}, + {labels: []string{"deleted-fleet", "deleted", "default"}, val: int64(0)}, }) } diff --git a/pkg/metrics/gameservers_count.go b/pkg/metrics/gameservers_count.go index 3bb27ce86a..72340626f4 100644 --- a/pkg/metrics/gameservers_count.go +++ b/pkg/metrics/gameservers_count.go @@ -24,16 +24,23 @@ import ( ) // GameServerCount is the count of gameserver per current state and per fleet name -type GameServerCount map[agonesv1.GameServerState]map[string]int64 +type GameServerCount map[agonesv1.GameServerState]map[fleetKey]int64 + +type fleetKey struct { + name string + namespace string +} // increment adds the count of gameservers for a given fleetName and state -func (c GameServerCount) increment(fleetName string, state agonesv1.GameServerState) { +func (c GameServerCount) increment(fleetName, fleetNamespace string, state agonesv1.GameServerState) { + key := fleetKey{name: fleetName, namespace: fleetNamespace} + fleets, ok := c[state] if !ok { - fleets = map[string]int64{} + fleets = map[fleetKey]int64{} c[state] = fleets } - fleets[fleetName]++ + fleets[key]++ } // reset sets zero to the whole metrics set @@ -52,18 +59,26 @@ func (c GameServerCount) record(gameservers []*agonesv1.GameServer) error { // Otherwise OpenCensus will write the last value recorded to the prom endpoint. // TL;DR we can't remove a gauge c.reset() - // counts gameserver per state and fleet - for _, g := range gameservers { - c.increment(g.Labels[agonesv1.FleetNameLabel], g.Status.State) + + if len(gameservers) != 0 { + // counts gameserver per state and fleet + for _, g := range gameservers { + c.increment(g.Labels[agonesv1.FleetNameLabel], g.GetNamespace(), g.Status.State) + } } + errs := []error{} for state, fleets := range c { for fleet, count := range fleets { - if fleet == "" { - fleet = "none" + if fleet.name == "" { + fleet.name = noneValue } + if fleet.namespace == "" { + fleet.namespace = noneValue + } + if err := stats.RecordWithTags(context.Background(), []tag.Mutator{tag.Upsert(keyType, string(state)), - tag.Upsert(keyFleetName, fleet)}, gameServerCountStats.M(count)); err != nil { + tag.Upsert(keyFleetName, fleet.name), tag.Upsert(keyNamespace, fleet.namespace)}, gameServerCountStats.M(count)); err != nil { errs = append(errs, err) } } diff --git a/pkg/metrics/util.go b/pkg/metrics/util.go index 189eace637..e3b4211189 100644 --- a/pkg/metrics/util.go +++ b/pkg/metrics/util.go @@ -31,6 +31,7 @@ var ( logger = runtime.NewLoggerWithSource("metrics") keyName = MustTagKey("name") + keyNamespace = MustTagKey("namespace") keyFleetName = MustTagKey("fleet_name") keyType = MustTagKey("type") keyStatusCode = MustTagKey("status_code")