Skip to content

Commit

Permalink
Merge pull request #7027 from damikag/refactor-mig-fetch
Browse files Browse the repository at this point in the history
add metrics for function duration for listing gce instances and metrics for inconsistent mig count when gce bulk instance listing is enabled
  • Loading branch information
k8s-ci-robot committed Jul 12, 2024
2 parents a5d04db + 9eb7d2f commit 5dc4ec4
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
10 changes: 10 additions & 0 deletions cluster-autoscaler/cloudprovider/gce/mig_info_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (

gce "google.golang.org/api/compute/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/metrics"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
)
Expand Down Expand Up @@ -196,6 +197,8 @@ func (c *cachingMigInfoProvider) listInstancesInAllZonesWithMigs() ([]GceInstanc
var allInstances []GceInstance
errors := make([]error, len(zones))
zoneInstances := make([][]GceInstance, len(zones))
defer metrics.UpdateDurationFromStart(metrics.BulkListAllGceInstances, time.Now())

workqueue.ParallelizeUntil(context.Background(), c.concurrentGceRefreshes, len(zones), func(piece int) {
zoneInstances[piece], errors[piece] = c.gceClient.FetchAllInstances(c.projectId, zones[piece], "")
}, workqueue.WithChunkSize(c.concurrentGceRefreshes))
Expand Down Expand Up @@ -245,6 +248,12 @@ func (c *cachingMigInfoProvider) isMigCreatingOrDeletingInstances(mig Mig) bool

// updateMigInstancesCache updates the mig instances for each mig
func (c *cachingMigInfoProvider) updateMigInstancesCache(migToInstances map[GceRef][]GceInstance) error {
defer metrics.UpdateDurationFromStart(metrics.BulkListMigInstances, time.Now())
inconsistentInstancesMigsCount := 0
defer func() {
klog.Warningf("Inconsistent instances migs count: %v", inconsistentInstancesMigsCount)
metrics.UpdateInconsistentInstancesMigsCount(inconsistentInstancesMigsCount)
}()
var errors []error
for _, mig := range c.migLister.GetMigs() {
migRef := mig.GceRef()
Expand All @@ -257,6 +266,7 @@ func (c *cachingMigInfoProvider) updateMigInstancesCache(migToInstances map[GceR
if err := c.fillMigInstances(migRef); err != nil {
errors = append(errors, err)
}
inconsistentInstancesMigsCount += 1
continue
}

Expand Down
18 changes: 18 additions & 0 deletions cluster-autoscaler/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ const (
Reconfigure FunctionLabel = "reconfigure"
Autoscaling FunctionLabel = "autoscaling"
LoopWait FunctionLabel = "loopWait"
BulkListAllGceInstances FunctionLabel = "bulkListInstances:listAllInstances"
BulkListMigInstances FunctionLabel = "bulkListInstances:listMigInstances"
)

var (
Expand Down Expand Up @@ -417,6 +419,14 @@ var (
},
[]string{"type"},
)

inconsistentInstancesMigsCount = k8smetrics.NewGauge(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "inconsistent_instances_migs_count",
Help: "Number of migs where instance count according to InstanceGroupManagers.List() differs from the results of Instances.List(). This can happen when some instances are abandoned or a user edits instance 'created-by' metadata.",
},
)
)

// RegisterAll registers all metrics.
Expand Down Expand Up @@ -452,6 +462,7 @@ func RegisterAll(emitPerNodeGroupMetrics bool) {
legacyregistry.MustRegister(nodeGroupDeletionCount)
legacyregistry.MustRegister(pendingNodeDeletions)
legacyregistry.MustRegister(nodeTaintsCount)
legacyregistry.MustRegister(inconsistentInstancesMigsCount)

if emitPerNodeGroupMetrics {
legacyregistry.MustRegister(nodesGroupMinNodes)
Expand Down Expand Up @@ -715,3 +726,10 @@ func ObservePendingNodeDeletions(value int) {
func ObserveNodeTaintsCount(taintType string, count float64) {
nodeTaintsCount.WithLabelValues(taintType).Set(count)
}

// UpdateInconsistentInstancesMigsCount records the observed number of migs where instance count
// according to InstanceGroupManagers.List() differs from the results of Instances.List().
// This can happen when some instances are abandoned or a user edits instance 'created-by' metadata.
func UpdateInconsistentInstancesMigsCount(migCount int) {
inconsistentInstancesMigsCount.Set(float64(migCount))
}

0 comments on commit 5dc4ec4

Please sign in to comment.