Skip to content

Commit

Permalink
Merge pull request kubernetes#119241 from cartermckinnon/ccm-node-tai…
Browse files Browse the repository at this point in the history
…nt-delay-metric

Add metrics for cloud controller manager initial node sync
  • Loading branch information
k8s-ci-robot committed Jul 17, 2023
2 parents 1fef8fd + 77cbd86 commit 58b3ae9
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
59 changes: 59 additions & 0 deletions staging/src/k8s.io/cloud-provider/controllers/node/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package cloud

import (
"sync"

"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
)

const (
metricsSubsystem = "node_controller"
)

var (
removeCloudProviderTaintDelay = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: metricsSubsystem,
Name: "cloud_provider_taint_removal_delay_seconds",
Help: "Number of seconds after node creation when NodeController removed the cloud-provider taint of a single node.",
Buckets: metrics.ExponentialBuckets(1, 4, 6), // 1s -> ~17m
StabilityLevel: metrics.ALPHA,
},
)
initialNodeSyncDelay = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: metricsSubsystem,
Name: "initial_node_sync_delay_seconds",
Help: "Number of seconds after node creation when NodeController finished the initial synchronization of a single node.",
Buckets: metrics.ExponentialBuckets(1, 4, 6), // 1s -> ~17m
StabilityLevel: metrics.ALPHA,
},
)
)

var metricRegistration sync.Once

// registerMetrics registers the metrics that are to be monitored.
func registerMetrics() {
metricRegistration.Do(func() {
legacyregistry.MustRegister(removeCloudProviderTaintDelay)
legacyregistry.MustRegister(initialNodeSyncDelay)
})
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ import (
"k8s.io/klog/v2"
)

func init() {
registerMetrics()
}

// labelReconcileInfo lists Node labels to reconcile, and how to reconcile them.
// primaryKey and secondaryKey are keys of labels to reconcile.
// - If both keys exist, but their values don't match. Use the value from the
Expand Down Expand Up @@ -489,6 +493,8 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e
return err
}

removeCloudProviderTaintDelay.Observe(time.Since(newNode.ObjectMeta.CreationTimestamp.Time).Seconds())

// After adding, call UpdateNodeAddress to set the CloudProvider provided IPAddresses
// So that users do not see any significant delay in IP addresses being filled into the node
cnc.updateNodeAddress(ctx, newNode, instanceMetadata)
Expand All @@ -501,6 +507,7 @@ func (cnc *CloudNodeController) syncNode(ctx context.Context, nodeName string) e
}

cnc.recorder.Event(copyNode, v1.EventTypeNormal, "Synced", "Node synced successfully")
initialNodeSyncDelay.Observe(time.Since(curNode.ObjectMeta.CreationTimestamp.Time).Seconds())
return nil
}

Expand Down

0 comments on commit 58b3ae9

Please sign in to comment.