Skip to content

Commit

Permalink
seperate the lock metrics
Browse files Browse the repository at this point in the history
Signed-off-by: nolouch <nolouch@gmail.com>
  • Loading branch information
nolouch committed Mar 4, 2024
1 parent e36b9c1 commit bd05e2d
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 17 deletions.
26 changes: 24 additions & 2 deletions pkg/core/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,28 @@ var (
Help: "Bucketed histogram of processing count of handle the heartbeat stage.",
}, []string{"name"})

waitLockDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("WaitLock")
waitLockCount = HeartbeatBreakdownHandleCount.WithLabelValues("WaitLock")
AcquireRegionsLockWaitDurationSum = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "core",
Name: "acquire_regions_lock_wait_duration_seconds_sum",
Help: "Bucketed histogram of processing time (s) of waiting for acquiring regions lock.",
}, []string{"type"})
AcquireRegionsLockWaitCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "pd",
Subsystem: "core",
Name: "acquire_regions_lock_wait_duration_seconds_count",
Help: "Bucketed histogram of processing count of waiting for acquiring regions lock.",
}, []string{"type"})

// lock statistics
waitRegionsLockDurationSum = AcquireRegionsLockWaitDurationSum.WithLabelValues("WaitRegionsLock")
waitRegionsLockCount = AcquireRegionsLockWaitCount.WithLabelValues("WaitRegionsLock")
waitSubRegionsLockDurationSum = AcquireRegionsLockWaitDurationSum.WithLabelValues("WaitSubRegionsLock")
waitSubRegionsLockCount = AcquireRegionsLockWaitCount.WithLabelValues("WaitSubRegionsLock")

// heartbeat breakdown statistics
preCheckDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("PreCheck")
preCheckCount = HeartbeatBreakdownHandleCount.WithLabelValues("PreCheck")
asyncHotStatsDurationSum = HeartbeatBreakdownHandleDurationSum.WithLabelValues("AsyncHotStatsDuration")
Expand All @@ -63,6 +83,8 @@ var (
func init() {
prometheus.MustRegister(HeartbeatBreakdownHandleDurationSum)
prometheus.MustRegister(HeartbeatBreakdownHandleCount)
prometheus.MustRegister(AcquireRegionsLockWaitDurationSum)
prometheus.MustRegister(AcquireRegionsLockWaitCount)
}

type saveCacheStats struct {
Expand Down
41 changes: 26 additions & 15 deletions pkg/core/region.go
Original file line number Diff line number Diff line change
Expand Up @@ -1699,23 +1699,34 @@ const magicCount = 15 * time.Second

// CollectWaitLockMetrics collects the metrics of waiting time for lock
func (r *RegionsInfo) CollectWaitLockMetrics() {
sTotalTime := atomic.LoadInt64(&r.t.totalWaitTime)
stTotalTime := atomic.LoadInt64(&r.st.totalWaitTime)
sLockCount := atomic.LoadInt64(&r.t.lockCount)
stLockCount := atomic.LoadInt64(&r.st.lockCount)
lastTotalWaitTime := atomic.LoadInt64(&r.t.lastTotalWaitTime) + atomic.LoadInt64(&r.st.lastTotalWaitTime)
lastLockCount := atomic.LoadInt64(&r.t.lastLockCount) + atomic.LoadInt64(&r.st.lastLockCount)
totalLockCount := sLockCount + stLockCount
totalWaitTime := sTotalTime + stTotalTime
atomic.StoreInt64(&r.t.lastTotalWaitTime, sTotalTime)
atomic.StoreInt64(&r.t.lastLockCount, sLockCount)
atomic.StoreInt64(&r.st.lastTotalWaitTime, stTotalTime)
atomic.StoreInt64(&r.st.lastLockCount, stLockCount)
if lastTotalWaitTime == 0 || lastLockCount == 0 || totalLockCount-lastLockCount < 0 || totalLockCount-lastLockCount > int64(magicCount) {
regionsLockTotalWaitTime := atomic.LoadInt64(&r.t.totalWaitTime)
regionsLockCount := atomic.LoadInt64(&r.t.lockCount)

subRegionsLockTotalWaitTime := atomic.LoadInt64(&r.st.totalWaitTime)
subRegionsLockCount := atomic.LoadInt64(&r.st.lockCount)

lastRegionsLockTotalWaitTime := atomic.LoadInt64(&r.t.lastTotalWaitTime)
lastsRegionsLockCount := atomic.LoadInt64(&r.t.lastLockCount)

lastSubRegionsLockTotalWaitTime := atomic.LoadInt64(&r.st.lastTotalWaitTime)
lastSubRegionsLockCount := atomic.LoadInt64(&r.st.lastLockCount)

// skip invalid situation like initial status
if lastRegionsLockTotalWaitTime == 0 || lastsRegionsLockCount == 0 || lastSubRegionsLockTotalWaitTime == 0 || lastSubRegionsLockCount == 0 ||
regionsLockTotalWaitTime-lastRegionsLockTotalWaitTime < 0 || regionsLockCount-lastsRegionsLockCount > int64(magicCount) ||
subRegionsLockTotalWaitTime-lastSubRegionsLockTotalWaitTime < 0 || subRegionsLockCount-lastSubRegionsLockCount > int64(magicCount) {
return
}
waitLockDurationSum.Add(time.Duration(totalWaitTime - lastTotalWaitTime).Seconds())
waitLockCount.Add(float64(totalLockCount - lastLockCount))

waitRegionsLockDurationSum.Add(float64(regionsLockTotalWaitTime - lastRegionsLockTotalWaitTime))
waitRegionsLockCount.Add(float64(regionsLockCount - lastsRegionsLockCount))
waitSubRegionsLockDurationSum.Add(float64(subRegionsLockTotalWaitTime - lastSubRegionsLockTotalWaitTime))
waitSubRegionsLockCount.Add(float64(subRegionsLockCount - lastSubRegionsLockCount))

atomic.StoreInt64(&r.t.lastTotalWaitTime, regionsLockTotalWaitTime)
atomic.StoreInt64(&r.t.lastLockCount, regionsLockCount)
atomic.StoreInt64(&r.st.lastTotalWaitTime, subRegionsLockTotalWaitTime)
atomic.StoreInt64(&r.st.lastLockCount, subRegionsLockCount)
}

// GetAdjacentRegions returns region's info that is adjacent with specific region
Expand Down

0 comments on commit bd05e2d

Please sign in to comment.