Skip to content

Commit

Permalink
controller: fix error retry and add more metrics
Browse files Browse the repository at this point in the history
Signed-off-by: nolouch <nolouch@gmail.com>
  • Loading branch information
nolouch committed May 27, 2024
1 parent f004df1 commit b43293c
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 8 deletions.
12 changes: 6 additions & 6 deletions client/resource_group/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ type groupCostController struct {
handleRespFunc func(*rmpb.TokenBucketResponse)

// metrics
metrics *metricsCollection
metrics *groupMetricsCollection
mu struct {
sync.Mutex
consumption *rmpb.Consumption
Expand Down Expand Up @@ -681,7 +681,7 @@ type groupCostController struct {
tombstone bool
}

type metricsCollection struct {
type groupMetricsCollection struct {
successfulRequestDuration prometheus.Observer
failedLimitReserveDuration prometheus.Observer
requestRetryCounter prometheus.Counter
Expand All @@ -690,12 +690,12 @@ type metricsCollection struct {
tokenRequestCounter prometheus.Counter
}

func initMetrics(oldName, name string) *metricsCollection {
func initMetrics(oldName, name string) *groupMetricsCollection {
const (
otherType = "others"
throttledType = "throttled"
)
return &metricsCollection{
return &groupMetricsCollection{
successfulRequestDuration: successfulRequestDuration.WithLabelValues(oldName, name),
failedLimitReserveDuration: failedLimitReserveDuration.WithLabelValues(oldName, name),
failedRequestCounterWithOthers: failedRequestCounter.WithLabelValues(oldName, name, otherType),
Expand Down Expand Up @@ -806,7 +806,7 @@ func (gc *groupCostController) initRunState() {
case rmpb.GroupMode_RUMode:
gc.run.requestUnitTokens = make(map[rmpb.RequestUnitType]*tokenCounter)
for typ := range requestUnitLimitTypeList {
limiter := NewLimiterWithCfg(now, cfgFunc(getRUTokenBucketSetting(gc.meta, typ)), gc.lowRUNotifyChan)
limiter := NewLimiterWithCfg(now, cfgFunc(getRUTokenBucketSetting(gc.meta, typ)), gc.lowRUNotifyChan).SetName(gc.name).SetupMetrics()
counter := &tokenCounter{
limiter: limiter,
avgRUPerSec: 0,
Expand All @@ -820,7 +820,7 @@ func (gc *groupCostController) initRunState() {
case rmpb.GroupMode_RawMode:
gc.run.resourceTokens = make(map[rmpb.RawResourceType]*tokenCounter)
for typ := range requestResourceLimitTypeList {
limiter := NewLimiterWithCfg(now, cfgFunc(getRawResourceTokenBucketSetting(gc.meta, typ)), gc.lowRUNotifyChan)
limiter := NewLimiterWithCfg(now, cfgFunc(getRawResourceTokenBucketSetting(gc.meta, typ)), gc.lowRUNotifyChan).SetName(gc.name).SetupMetrics()
counter := &tokenCounter{
limiter: limiter,
avgRUPerSec: 0,
Expand Down
30 changes: 30 additions & 0 deletions client/resource_group/controller/limiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"time"

"github.com/pingcap/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/tikv/pd/client/errs"
"go.uber.org/zap"
)
Expand Down Expand Up @@ -81,6 +82,15 @@ type Limiter struct {
isLowProcess bool
// remainingNotifyTimes is used to limit notify when the speed limit is already set.
remainingNotifyTimes int
name string

// metrics
metrics *limiterMetricsCollection
}

// limiterMetricsCollection is a collection of metrics for a limiter.
type limiterMetricsCollection struct {
lowTokenNotifyCounter prometheus.Counter
}

// Limit returns the maximum overall event rate.
Expand Down Expand Up @@ -224,6 +234,23 @@ func (lim *Limiter) SetupNotificationThreshold(threshold float64) {
lim.notifyThreshold = threshold
}

// SetName sets the name of the limiter.
func (lim *Limiter) SetName(name string) *Limiter {
lim.mu.Lock()
defer lim.mu.Unlock()
lim.name = name
return lim
}

func (lim *Limiter) SetupMetrics() *Limiter {
lim.mu.Lock()
defer lim.mu.Unlock()
lim.metrics = &limiterMetricsCollection{
lowTokenNotifyCounter: lowTokenRequestNotifyCounter.WithLabelValues(lim.name),
}
return lim
}

// notify tries to send a non-blocking notification on notifyCh and disables
// further notifications (until the next Reconfigure or StartNotification).
func (lim *Limiter) notify() {
Expand All @@ -234,6 +261,9 @@ func (lim *Limiter) notify() {
lim.isLowProcess = true
select {
case lim.lowTokensNotifyChan <- struct{}{}:
if lim.metrics != nil {
lim.metrics.lowTokenNotifyCounter.Inc()
}
default:
}
}
Expand Down
13 changes: 11 additions & 2 deletions client/resource_group/controller/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ var (
Namespace: namespace,
Subsystem: requestSubsystem,
Name: "success",
Buckets: []float64{.005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30}, // 0.005 ~ 30
Buckets: []float64{0.0005, .005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30, 60, 600, 1800, 3600}, // 0.0005 ~ 1h
Help: "Bucketed histogram of wait duration of successful request.",
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})

Expand All @@ -51,7 +51,7 @@ var (
Namespace: namespace,
Subsystem: requestSubsystem,
Name: "limit_reserve_time_failed",
Buckets: []float64{.005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30}, // 0.005 ~ 30
Buckets: []float64{0.0005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30, 60, 600, 1800, 3600, 86400}, // 0.0005 ~ 24h
Help: "Bucketed histogram of wait duration of failed request.",
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})

Expand All @@ -75,6 +75,7 @@ var (
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: tokenRequestSubsystem,
Buckets: prometheus.ExponentialBuckets(0.001, 2, 13), // 1ms ~ 8s
Name: "duration",
Help: "Bucketed histogram of latency(s) of token request.",
}, []string{"type"})
Expand All @@ -86,6 +87,14 @@ var (
Name: "resource_group",
Help: "Counter of token request by every resource group.",
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})

lowTokenRequestNotifyCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: tokenRequestSubsystem,
Name: "low_token_notified",
Help: "Counter of low token request.",
}, []string{newResourceGroupNameLabel})
)

var (
Expand Down

0 comments on commit b43293c

Please sign in to comment.