Skip to content

Commit

Permalink
Metrics: add namespace to distinguish fleets with the same name (goog…
Browse files Browse the repository at this point in the history
…leforgames#1585)

* Added namespace to recordFleetReplicas

Co-authored-by: Alexander Apalikov <alexander.apalikov@globant.com>
Co-authored-by: Mark Mandel <markmandel@google.com>
  • Loading branch information
3 people authored and ilkercelikyilmaz committed Oct 23, 2020
1 parent 082cc01 commit 9bd1fe7
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 68 deletions.
18 changes: 10 additions & 8 deletions pkg/metrics/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ import (
"k8s.io/client-go/tools/cache"
)

const noneValue = "none"

var (
// MetricResyncPeriod is the interval to re-synchronize metrics based on indexed cache.
MetricResyncPeriod = time.Second * 15
Expand Down Expand Up @@ -140,7 +142,7 @@ func (c *Controller) recordFleetAutoScalerChanges(old, next interface{}) {
}

ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fas.Name),
tag.Upsert(keyFleetName, fas.Spec.FleetName))
tag.Upsert(keyFleetName, fas.Spec.FleetName), tag.Upsert(keyNamespace, fas.Namespace))

ableToScale := 0
limited := 0
Expand Down Expand Up @@ -189,7 +191,7 @@ func (c *Controller) recordFleetAutoScalerDeletion(obj interface{}) {
return
}
ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fas.Name),
tag.Upsert(keyFleetName, fas.Spec.FleetName))
tag.Upsert(keyFleetName, fas.Spec.FleetName), tag.Upsert(keyNamespace, fas.Namespace))

// recording status
stats.Record(ctx,
Expand All @@ -211,7 +213,7 @@ func (c *Controller) recordFleetChanges(obj interface{}) {
return
}

c.recordFleetReplicas(f.Name, f.Status.Replicas, f.Status.AllocatedReplicas,
c.recordFleetReplicas(f.Name, f.Namespace, f.Status.Replicas, f.Status.AllocatedReplicas,
f.Status.ReadyReplicas, f.Spec.Replicas)
}

Expand All @@ -221,12 +223,12 @@ func (c *Controller) recordFleetDeletion(obj interface{}) {
return
}

c.recordFleetReplicas(f.Name, 0, 0, 0, 0)
c.recordFleetReplicas(f.Name, f.Namespace, 0, 0, 0, 0)
}

func (c *Controller) recordFleetReplicas(fleetName string, total, allocated, ready, desired int32) {
func (c *Controller) recordFleetReplicas(fleetName, fleetNamespace string, total, allocated, ready, desired int32) {

ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fleetName))
ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fleetName), tag.Upsert(keyNamespace, fleetNamespace))

recordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "total")},
fleetsReplicasCountStats.M(int64(total)))
Expand Down Expand Up @@ -258,10 +260,10 @@ func (c *Controller) recordGameServerStatusChanges(old, next interface{}) {
if newGs.Status.State != oldGs.Status.State {
fleetName := newGs.Labels[agonesv1.FleetNameLabel]
if fleetName == "" {
fleetName = "none"
fleetName = noneValue
}
recordWithTags(context.Background(), []tag.Mutator{tag.Upsert(keyType, string(newGs.Status.State)),
tag.Upsert(keyFleetName, fleetName)}, gameServerTotalStats.M(1))
tag.Upsert(keyFleetName, fleetName), tag.Upsert(keyNamespace, newGs.GetNamespace())}, gameServerTotalStats.M(1))
}
}

Expand Down
18 changes: 9 additions & 9 deletions pkg/metrics/controller_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,63 +39,63 @@ var (
Measure: fleetsReplicasCountStats,
Description: "The number of replicas per fleet",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyType},
TagKeys: []tag.Key{keyName, keyType, keyNamespace},
},
&view.View{
Name: "fleet_autoscalers_buffer_limits",
Measure: fasBufferLimitsCountStats,
Description: "The limits of buffer based fleet autoscalers",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyType, keyFleetName},
TagKeys: []tag.Key{keyName, keyType, keyFleetName, keyNamespace},
},
&view.View{
Name: "fleet_autoscalers_buffer_size",
Measure: fasBufferSizeStats,
Description: "The buffer size of fleet autoscalers",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyType, keyFleetName},
TagKeys: []tag.Key{keyName, keyType, keyFleetName, keyNamespace},
},
&view.View{
Name: "fleet_autoscalers_current_replicas_count",
Measure: fasCurrentReplicasStats,
Description: "The current replicas count as seen by autoscalers",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName},
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
&view.View{
Name: "fleet_autoscalers_desired_replicas_count",
Measure: fasDesiredReplicasStats,
Description: "The desired replicas count as seen by autoscalers",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName},
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
&view.View{
Name: "fleet_autoscalers_able_to_scale",
Measure: fasAbleToScaleStats,
Description: "The fleet autoscaler can access the fleet to scale",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName},
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
&view.View{
Name: "fleet_autoscalers_limited",
Measure: fasLimitedStats,
Description: "The fleet autoscaler is capped",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyFleetName},
TagKeys: []tag.Key{keyName, keyFleetName, keyNamespace},
},
&view.View{
Name: "gameservers_count",
Measure: gameServerCountStats,
Description: "The number of gameservers",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyType, keyFleetName},
TagKeys: []tag.Key{keyType, keyFleetName, keyNamespace},
},
&view.View{
Name: "gameservers_total",
Measure: gameServerTotalStats,
Description: "The total of gameservers",
Aggregation: view.Count(),
TagKeys: []tag.Key{keyType, keyFleetName},
TagKeys: []tag.Key{keyType, keyFleetName, keyNamespace},
},
&view.View{
Name: "nodes_count",
Expand Down
81 changes: 40 additions & 41 deletions pkg/metrics/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ func assertMetricData(t *testing.T, exporter *metricExporter, metricName string,
}
e, ok := expectedValuesAsMap[serialize(actualLabelValues)]
assert.True(t, ok, "no TimeSeries found with labels: %v", actualLabelValues)

assert.Equal(t, actualLabelValues, e.labels, "label values don't match")
assert.Equal(t, len(tsd.Points), 1, "assertMetricDataValues can only handle a single Point in a TimeSeries")
assert.Equal(t, tsd.Points[0].Value, e.val, "metric: %s, tags: %v, values don't match; got: %v, want: %v", metricName, tsd.LabelValues, tsd.Points[0].Value, e.val)
Expand Down Expand Up @@ -110,9 +109,9 @@ func TestControllerGameServerCount(t *testing.T) {
c.collect()
reader.ReadAndExport(exporter)
assertMetricData(t, exporter, "gameservers_count", []expectedMetricData{
{labels: []string{"test-fleet", "Ready"}, val: int64(0)},
{labels: []string{"test-fleet", "Shutdown"}, val: int64(1)},
{labels: []string{"none", "PortAllocation"}, val: int64(2)},
{labels: []string{"test-fleet", "default", "Ready"}, val: int64(0)},
{labels: []string{"test-fleet", "default", "Shutdown"}, val: int64(1)},
{labels: []string{"none", "default", "PortAllocation"}, val: int64(2)},
})
}

Expand Down Expand Up @@ -141,14 +140,14 @@ func TestControllerGameServersTotal(t *testing.T) {
c.sync()
reader.ReadAndExport(exporter)
assertMetricData(t, exporter, "gameservers_total", []expectedMetricData{
{labels: []string{"test", "Creating"}, val: int64(16)},
{labels: []string{"test", "Scheduled"}, val: int64(15)},
{labels: []string{"test", "Starting"}, val: int64(10)},
{labels: []string{"test", "Unhealthy"}, val: int64(1)},
{labels: []string{"none", "Creating"}, val: int64(19)},
{labels: []string{"none", "Scheduled"}, val: int64(18)},
{labels: []string{"none", "Starting"}, val: int64(16)},
{labels: []string{"none", "Unhealthy"}, val: int64(1)},
{labels: []string{"test", "default", "Creating"}, val: int64(16)},
{labels: []string{"test", "default", "Scheduled"}, val: int64(15)},
{labels: []string{"test", "default", "Starting"}, val: int64(10)},
{labels: []string{"test", "default", "Unhealthy"}, val: int64(1)},
{labels: []string{"none", "default", "Creating"}, val: int64(19)},
{labels: []string{"none", "default", "Scheduled"}, val: int64(18)},
{labels: []string{"none", "default", "Starting"}, val: int64(16)},
{labels: []string{"none", "default", "Unhealthy"}, val: int64(1)},
})
}

Expand All @@ -175,14 +174,14 @@ func TestControllerFleetReplicasCount(t *testing.T) {

reader.ReadAndExport(exporter)
assertMetricData(t, exporter, "fleets_replicas_count", []expectedMetricData{
{labels: []string{"fleet-deleted", "allocated"}, val: int64(0)},
{labels: []string{"fleet-deleted", "desired"}, val: int64(0)},
{labels: []string{"fleet-deleted", "ready"}, val: int64(0)},
{labels: []string{"fleet-deleted", "total"}, val: int64(0)},
{labels: []string{"fleet-test", "allocated"}, val: int64(2)},
{labels: []string{"fleet-test", "desired"}, val: int64(5)},
{labels: []string{"fleet-test", "ready"}, val: int64(1)},
{labels: []string{"fleet-test", "total"}, val: int64(8)},
{labels: []string{"fleet-deleted", "default", "allocated"}, val: int64(0)},
{labels: []string{"fleet-deleted", "default", "desired"}, val: int64(0)},
{labels: []string{"fleet-deleted", "default", "ready"}, val: int64(0)},
{labels: []string{"fleet-deleted", "default", "total"}, val: int64(0)},
{labels: []string{"fleet-test", "default", "allocated"}, val: int64(2)},
{labels: []string{"fleet-test", "default", "desired"}, val: int64(5)},
{labels: []string{"fleet-test", "default", "ready"}, val: int64(1)},
{labels: []string{"fleet-test", "default", "total"}, val: int64(8)},
})
}

Expand Down Expand Up @@ -220,37 +219,37 @@ func TestControllerFleetAutoScalerState(t *testing.T) {

reader.ReadAndExport(exporter)
assertMetricData(t, exporter, "fleet_autoscalers_able_to_scale", []expectedMetricData{
{labels: []string{"first-fleet", "name-switch"}, val: int64(0)},
{labels: []string{"second-fleet", "name-switch"}, val: int64(1)},
{labels: []string{"deleted-fleet", "deleted"}, val: int64(0)},
{labels: []string{"first-fleet", "name-switch", "default"}, val: int64(0)},
{labels: []string{"second-fleet", "name-switch", "default"}, val: int64(1)},
{labels: []string{"deleted-fleet", "deleted", "default"}, val: int64(0)},
})
assertMetricData(t, exporter, "fleet_autoscalers_buffer_limits", []expectedMetricData{
{labels: []string{"first-fleet", "name-switch", "max"}, val: int64(50)},
{labels: []string{"first-fleet", "name-switch", "min"}, val: int64(10)},
{labels: []string{"second-fleet", "name-switch", "max"}, val: int64(50)},
{labels: []string{"second-fleet", "name-switch", "min"}, val: int64(10)},
{labels: []string{"deleted-fleet", "deleted", "max"}, val: int64(150)},
{labels: []string{"deleted-fleet", "deleted", "min"}, val: int64(15)},
{labels: []string{"first-fleet", "name-switch", "default", "max"}, val: int64(50)},
{labels: []string{"first-fleet", "name-switch", "default", "min"}, val: int64(10)},
{labels: []string{"second-fleet", "name-switch", "default", "max"}, val: int64(50)},
{labels: []string{"second-fleet", "name-switch", "default", "min"}, val: int64(10)},
{labels: []string{"deleted-fleet", "deleted", "default", "max"}, val: int64(150)},
{labels: []string{"deleted-fleet", "deleted", "default", "min"}, val: int64(15)},
})
assertMetricData(t, exporter, "fleet_autoscalers_buffer_size", []expectedMetricData{
{labels: []string{"first-fleet", "name-switch", "count"}, val: int64(10)},
{labels: []string{"second-fleet", "name-switch", "count"}, val: int64(10)},
{labels: []string{"deleted-fleet", "deleted", "percentage"}, val: int64(50)},
{labels: []string{"first-fleet", "name-switch", "default", "count"}, val: int64(10)},
{labels: []string{"second-fleet", "name-switch", "default", "count"}, val: int64(10)},
{labels: []string{"deleted-fleet", "deleted", "default", "percentage"}, val: int64(50)},
})
assertMetricData(t, exporter, "fleet_autoscalers_current_replicas_count", []expectedMetricData{
{labels: []string{"first-fleet", "name-switch"}, val: int64(0)},
{labels: []string{"second-fleet", "name-switch"}, val: int64(20)},
{labels: []string{"deleted-fleet", "deleted"}, val: int64(0)},
{labels: []string{"first-fleet", "name-switch", "default"}, val: int64(0)},
{labels: []string{"second-fleet", "name-switch", "default"}, val: int64(20)},
{labels: []string{"deleted-fleet", "deleted", "default"}, val: int64(0)},
})
assertMetricData(t, exporter, "fleet_autoscalers_desired_replicas_count", []expectedMetricData{
{labels: []string{"first-fleet", "name-switch"}, val: int64(0)},
{labels: []string{"second-fleet", "name-switch"}, val: int64(10)},
{labels: []string{"deleted-fleet", "deleted"}, val: int64(0)},
{labels: []string{"first-fleet", "name-switch", "default"}, val: int64(0)},
{labels: []string{"second-fleet", "name-switch", "default"}, val: int64(10)},
{labels: []string{"deleted-fleet", "deleted", "default"}, val: int64(0)},
})
assertMetricData(t, exporter, "fleet_autoscalers_limited", []expectedMetricData{
{labels: []string{"first-fleet", "name-switch"}, val: int64(0)},
{labels: []string{"second-fleet", "name-switch"}, val: int64(1)},
{labels: []string{"deleted-fleet", "deleted"}, val: int64(0)},
{labels: []string{"first-fleet", "name-switch", "default"}, val: int64(0)},
{labels: []string{"second-fleet", "name-switch", "default"}, val: int64(1)},
{labels: []string{"deleted-fleet", "deleted", "default"}, val: int64(0)},
})
}

Expand Down
35 changes: 25 additions & 10 deletions pkg/metrics/gameservers_count.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,23 @@ import (
)

// GameServerCount is the count of gameserver per current state and per fleet name
type GameServerCount map[agonesv1.GameServerState]map[string]int64
type GameServerCount map[agonesv1.GameServerState]map[fleetKey]int64

type fleetKey struct {
name string
namespace string
}

// increment adds the count of gameservers for a given fleetName and state
func (c GameServerCount) increment(fleetName string, state agonesv1.GameServerState) {
func (c GameServerCount) increment(fleetName, fleetNamespace string, state agonesv1.GameServerState) {
key := fleetKey{name: fleetName, namespace: fleetNamespace}

fleets, ok := c[state]
if !ok {
fleets = map[string]int64{}
fleets = map[fleetKey]int64{}
c[state] = fleets
}
fleets[fleetName]++
fleets[key]++
}

// reset sets zero to the whole metrics set
Expand All @@ -52,18 +59,26 @@ func (c GameServerCount) record(gameservers []*agonesv1.GameServer) error {
// Otherwise OpenCensus will write the last value recorded to the prom endpoint.
// TL;DR we can't remove a gauge
c.reset()
// counts gameserver per state and fleet
for _, g := range gameservers {
c.increment(g.Labels[agonesv1.FleetNameLabel], g.Status.State)

if len(gameservers) != 0 {
// counts gameserver per state and fleet
for _, g := range gameservers {
c.increment(g.Labels[agonesv1.FleetNameLabel], g.GetNamespace(), g.Status.State)
}
}

errs := []error{}
for state, fleets := range c {
for fleet, count := range fleets {
if fleet == "" {
fleet = "none"
if fleet.name == "" {
fleet.name = noneValue
}
if fleet.namespace == "" {
fleet.namespace = noneValue
}

if err := stats.RecordWithTags(context.Background(), []tag.Mutator{tag.Upsert(keyType, string(state)),
tag.Upsert(keyFleetName, fleet)}, gameServerCountStats.M(count)); err != nil {
tag.Upsert(keyFleetName, fleet.name), tag.Upsert(keyNamespace, fleet.namespace)}, gameServerCountStats.M(count)); err != nil {
errs = append(errs, err)
}
}
Expand Down
1 change: 1 addition & 0 deletions pkg/metrics/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ var (
logger = runtime.NewLoggerWithSource("metrics")

keyName = MustTagKey("name")
keyNamespace = MustTagKey("namespace")
keyFleetName = MustTagKey("fleet_name")
keyType = MustTagKey("type")
keyStatusCode = MustTagKey("status_code")
Expand Down

0 comments on commit 9bd1fe7

Please sign in to comment.