Skip to content

Commit

Permalink
Merge branch 'master' into fix_coordinator
Browse files Browse the repository at this point in the history
  • Loading branch information
HuSharp authored Oct 10, 2023
2 parents d2eb2af + 2266c94 commit 9574d70
Show file tree
Hide file tree
Showing 10 changed files with 227 additions and 239 deletions.
255 changes: 124 additions & 131 deletions metrics/grafana/pd.json

Large diffs are not rendered by default.

66 changes: 65 additions & 1 deletion pkg/mcs/scheduling/server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,12 +425,76 @@ func (c *Cluster) runCoordinator() {
c.coordinator.RunUntilStop()
}

func (c *Cluster) runMetricsCollectionJob() {
defer logutil.LogPanic()
defer c.wg.Done()

ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()

for {
select {
case <-c.ctx.Done():
log.Info("metrics are reset")
c.resetMetrics()
log.Info("metrics collection job has been stopped")
return
case <-ticker.C:
c.collectMetrics()
}
}
}

func (c *Cluster) collectMetrics() {
statsMap := statistics.NewStoreStatisticsMap(c.persistConfig)
stores := c.GetStores()
for _, s := range stores {
statsMap.Observe(s)
statsMap.ObserveHotStat(s, c.hotStat.StoresStats)
}
statsMap.Collect()

c.coordinator.GetSchedulersController().CollectSchedulerMetrics()
c.coordinator.CollectHotSpotMetrics()
c.collectClusterMetrics()
}

func (c *Cluster) collectClusterMetrics() {
if c.regionStats == nil {
return
}
c.regionStats.Collect()
c.labelStats.Collect()
// collect hot cache metrics
c.hotStat.CollectMetrics()
}

func (c *Cluster) resetMetrics() {
statsMap := statistics.NewStoreStatisticsMap(c.persistConfig)
statsMap.Reset()

c.coordinator.GetSchedulersController().ResetSchedulerMetrics()
c.coordinator.ResetHotSpotMetrics()
c.resetClusterMetrics()
}

func (c *Cluster) resetClusterMetrics() {
if c.regionStats == nil {
return
}
c.regionStats.Reset()
c.labelStats.Reset()
// reset hot cache metrics
c.hotStat.ResetMetrics()
}

// StartBackgroundJobs starts background jobs.
func (c *Cluster) StartBackgroundJobs() {
c.wg.Add(3)
c.wg.Add(4)
go c.updateScheduler()
go c.runUpdateStoreStats()
go c.runCoordinator()
go c.runMetricsCollectionJob()
c.running.Store(true)
}

Expand Down
17 changes: 16 additions & 1 deletion pkg/mcs/scheduling/server/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,11 @@ func (o *PersistConfig) IsSchedulingHalted() bool {
return o.GetScheduleConfig().HaltScheduling
}

// GetStoresLimit gets the stores' limit.
func (o *PersistConfig) GetStoresLimit() map[uint64]sc.StoreLimitConfig {
return o.GetScheduleConfig().StoreLimit
}

// GetStoreLimitByType returns the limit of a store with a given type.
func (o *PersistConfig) GetStoreLimitByType(storeID uint64, typ storelimit.Type) (returned float64) {
limit := o.GetStoreLimit(storeID)
Expand Down Expand Up @@ -620,11 +625,21 @@ func (o *PersistConfig) GetRegionMaxSize() uint64 {
return o.GetStoreConfig().GetRegionMaxSize()
}

// GetRegionMaxKeys returns the region split keys
// GetRegionMaxKeys returns the max region keys
func (o *PersistConfig) GetRegionMaxKeys() uint64 {
return o.GetStoreConfig().GetRegionMaxKeys()
}

// GetRegionSplitSize returns the region split size in MB
func (o *PersistConfig) GetRegionSplitSize() uint64 {
return o.GetStoreConfig().GetRegionSplitSize()
}

// GetRegionSplitKeys returns the region split keys
func (o *PersistConfig) GetRegionSplitKeys() uint64 {
return o.GetStoreConfig().GetRegionSplitKeys()
}

// IsEnableRegionBucket return true if the region bucket is enabled.
func (o *PersistConfig) IsEnableRegionBucket() bool {
return o.GetStoreConfig().IsEnableRegionBucket()
Expand Down
3 changes: 3 additions & 0 deletions pkg/schedule/config/config_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ type SchedulerConfigProvider interface {
SharedConfigProvider

IsSchedulingHalted() bool
GetStoresLimit() map[uint64]StoreLimitConfig

IsSchedulerDisabled(string) bool
AddSchedulerCfg(string, []string)
Expand Down Expand Up @@ -137,6 +138,8 @@ type ConfProvider interface {
type StoreConfigProvider interface {
GetRegionMaxSize() uint64
GetRegionMaxKeys() uint64
GetRegionSplitSize() uint64
GetRegionSplitKeys() uint64
CheckRegionSize(uint64, uint64) error
CheckRegionKeys(uint64, uint64) error
IsEnableRegionBucket() bool
Expand Down
1 change: 1 addition & 0 deletions pkg/schedule/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ func (c *Coordinator) PatrolRegions() {
// Note: we reset the ticker here to support updating configuration dynamically.
ticker.Reset(c.cluster.GetCheckerConfig().GetPatrolRegionInterval())
case <-c.ctx.Done():
patrolCheckRegionsGauge.Set(0)
log.Info("patrol regions has been stopped")
return
}
Expand Down
19 changes: 9 additions & 10 deletions pkg/statistics/store_collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import (
"github.com/tikv/pd/pkg/core"
"github.com/tikv/pd/pkg/core/constant"
"github.com/tikv/pd/pkg/core/storelimit"
"github.com/tikv/pd/pkg/schedule/config"
"github.com/tikv/pd/pkg/statistics/utils"
"github.com/tikv/pd/server/config"
)

const (
Expand All @@ -32,7 +32,7 @@ const (
)

type storeStatistics struct {
opt *config.PersistOptions
opt config.ConfProvider
Up int
Disconnect int
Unhealthy int
Expand All @@ -54,7 +54,7 @@ type storeStatistics struct {
Removed int
}

func newStoreStatistics(opt *config.PersistOptions) *storeStatistics {
func newStoreStatistics(opt config.ConfProvider) *storeStatistics {
return &storeStatistics{
opt: opt,
LabelCounter: make(map[string]int),
Expand Down Expand Up @@ -222,11 +222,10 @@ func (s *storeStatistics) Collect() {
configs["max-snapshot-count"] = float64(s.opt.GetMaxSnapshotCount())
configs["max-merge-region-size"] = float64(s.opt.GetMaxMergeRegionSize())
configs["max-merge-region-keys"] = float64(s.opt.GetMaxMergeRegionKeys())
storeConfig := s.opt.GetStoreConfig()
configs["region-max-size"] = float64(storeConfig.GetRegionMaxSize())
configs["region-split-size"] = float64(storeConfig.GetRegionSplitSize())
configs["region-split-keys"] = float64(storeConfig.GetRegionSplitKeys())
configs["region-max-keys"] = float64(storeConfig.GetRegionMaxKeys())
configs["region-max-size"] = float64(s.opt.GetRegionMaxSize())
configs["region-split-size"] = float64(s.opt.GetRegionSplitSize())
configs["region-split-keys"] = float64(s.opt.GetRegionSplitKeys())
configs["region-max-keys"] = float64(s.opt.GetRegionMaxKeys())

var enableMakeUpReplica, enableRemoveDownReplica, enableRemoveExtraReplica, enableReplaceOfflineReplica float64
if s.opt.IsMakeUpReplicaEnabled() {
Expand Down Expand Up @@ -290,12 +289,12 @@ func (s *storeStatistics) resetStoreStatistics(storeAddress string, id string) {
}

type storeStatisticsMap struct {
opt *config.PersistOptions
opt config.ConfProvider
stats *storeStatistics
}

// NewStoreStatisticsMap creates a new storeStatisticsMap.
func NewStoreStatisticsMap(opt *config.PersistOptions) *storeStatisticsMap {
func NewStoreStatisticsMap(opt config.ConfProvider) *storeStatisticsMap {
return &storeStatisticsMap{
opt: opt,
stats: newStoreStatistics(opt),
Expand Down
15 changes: 6 additions & 9 deletions server/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2153,17 +2153,14 @@ func (c *RaftCluster) deleteStore(store *core.StoreInfo) error {
}

func (c *RaftCluster) collectMetrics() {
statsMap := statistics.NewStoreStatisticsMap(c.opt)
stores := c.GetStores()
for _, s := range stores {
statsMap.Observe(s)
if !c.isAPIServiceMode {
if !c.isAPIServiceMode {
statsMap := statistics.NewStoreStatisticsMap(c.opt)
stores := c.GetStores()
for _, s := range stores {
statsMap.Observe(s)
statsMap.ObserveHotStat(s, c.hotStat.StoresStats)
}
}
statsMap.Collect()

if !c.isAPIServiceMode {
statsMap.Collect()
c.coordinator.GetSchedulersController().CollectSchedulerMetrics()
c.coordinator.CollectHotSpotMetrics()
c.collectClusterMetrics()
Expand Down
2 changes: 1 addition & 1 deletion server/config/persist_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,7 @@ func (o *PersistOptions) GetRegionMaxSize() uint64 {
return o.GetStoreConfig().GetRegionMaxSize()
}

// GetRegionMaxKeys returns the region split keys
// GetRegionMaxKeys returns the max region keys
func (o *PersistOptions) GetRegionMaxKeys() uint64 {
return o.GetStoreConfig().GetRegionMaxKeys()
}
Expand Down
13 changes: 2 additions & 11 deletions tools/pd-api-bench/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -37,28 +37,27 @@ require (
github.com/elliotchance/pie/v2 v2.1.0 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/gin-gonic/gin v1.8.1 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-playground/locales v0.14.0 // indirect
github.com/go-playground/universal-translator v0.18.0 // indirect
github.com/go-playground/validator/v10 v10.10.0 // indirect
github.com/goccy/go-json v0.9.7 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang-jwt/jwt v3.2.1+incompatible // indirect
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/btree v1.1.2 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/gorilla/mux v1.7.4 // indirect
github.com/gorilla/websocket v1.4.2 // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/jonboulle/clockwork v0.2.2 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect
github.com/leodido/go-urn v1.2.1 // indirect
github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
Expand All @@ -72,30 +71,22 @@ require (
github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 // indirect
github.com/pingcap/kvproto v0.0.0-20230920042517-db656f45023b // indirect
github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 // indirect
github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect
github.com/prometheus/client_golang v1.11.1 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.26.0 // indirect
github.com/prometheus/procfs v0.6.0 // indirect
github.com/sasha-s/go-deadlock v0.2.0 // indirect
github.com/shirou/gopsutil/v3 v3.23.3 // indirect
github.com/shoenig/go-m1cpu v0.1.5 // indirect
github.com/sirupsen/logrus v1.6.0 // indirect
github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072 // indirect
github.com/soheilhy/cmux v0.1.4 // indirect
github.com/spf13/cobra v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.8.2 // indirect
github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 // indirect
github.com/tklauser/go-sysconf v0.3.11 // indirect
github.com/tklauser/numcpus v0.6.0 // indirect
github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect
github.com/ugorji/go/codec v1.2.7 // indirect
github.com/unrolled/render v1.0.1 // indirect
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.etcd.io/bbolt v1.3.6 // indirect
go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 // indirect
go.uber.org/atomic v1.10.0 // indirect
Expand Down
Loading

0 comments on commit 9574d70

Please sign in to comment.