Skip to content

Commit

Permalink
metrics: refine metrics of infoschema (#46612)
Browse files Browse the repository at this point in the history
ref #46524
  • Loading branch information
crazycs520 authored Sep 4, 2023
1 parent 28f91c8 commit ee76cd8
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 14 deletions.
1 change: 1 addition & 0 deletions domain/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ go_library(
"//domain/resourcegroup",
"//errno",
"//infoschema",
"//infoschema/metrics",
"//infoschema/perfschema",
"//keyspace",
"//kv",
Expand Down
9 changes: 8 additions & 1 deletion domain/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
"github.com/pingcap/tidb/domain/resourcegroup"
"github.com/pingcap/tidb/errno"
"github.com/pingcap/tidb/infoschema"
infoschema_metrics "github.com/pingcap/tidb/infoschema/metrics"
"github.com/pingcap/tidb/infoschema/perfschema"
"github.com/pingcap/tidb/keyspace"
"github.com/pingcap/tidb/kv"
Expand Down Expand Up @@ -216,6 +217,10 @@ func (do *Domain) EtcdClient() *clientv3.Client {
// 4. the changed table IDs if it is not full load
// 5. an error if any
func (do *Domain) loadInfoSchema(startTS uint64) (infoschema.InfoSchema, bool, int64, *transaction.RelatedSchemaChange, error) {
beginTime := time.Now()
defer func() {
infoschema_metrics.LoadSchemaDurationTotal.Observe(time.Since(beginTime).Seconds())
}()
snapshot := do.store.GetSnapshot(kv.NewVersion(startTS))
m := meta.NewSnapshotMeta(snapshot)
neededSchemaVersion, err := m.GetSchemaVersionWithNonEmptyDiff()
Expand Down Expand Up @@ -252,6 +257,7 @@ func (do *Domain) loadInfoSchema(startTS uint64) (infoschema.InfoSchema, bool, i
if currentSchemaVersion != 0 && neededSchemaVersion > currentSchemaVersion && neededSchemaVersion-currentSchemaVersion < LoadSchemaDiffVersionGapThreshold {
is, relatedChanges, err := do.tryLoadSchemaDiffs(m, currentSchemaVersion, neededSchemaVersion)
if err == nil {
infoschema_metrics.LoadSchemaDurationLoadDiff.Observe(time.Since(startTime).Seconds())
do.infoCache.Insert(is, uint64(schemaTs))
logutil.BgLogger().Info("diff load InfoSchema success",
zap.Int64("currentSchemaVersion", currentSchemaVersion),
Expand Down Expand Up @@ -285,6 +291,7 @@ func (do *Domain) loadInfoSchema(startTS uint64) (infoschema.InfoSchema, bool, i
if err != nil {
return nil, false, currentSchemaVersion, nil, err
}
infoschema_metrics.LoadSchemaDurationLoadAll.Observe(time.Since(startTime).Seconds())
logutil.BgLogger().Info("full load InfoSchema success",
zap.Int64("currentSchemaVersion", currentSchemaVersion),
zap.Int64("neededSchemaVersion", neededSchemaVersion),
Expand Down Expand Up @@ -477,6 +484,7 @@ func (do *Domain) GetSnapshotInfoSchema(snapshotTS uint64) (infoschema.InfoSchem
return is, nil
}
is, _, _, _, err := do.loadInfoSchema(snapshotTS)
infoschema_metrics.LoadSchemaCounterSnapshot.Inc()
return is, err
}

Expand Down Expand Up @@ -578,7 +586,6 @@ func (do *Domain) Reload() error {
is, hitCache, oldSchemaVersion, changes, err = do.loadInfoSchema(version)
}
}
metrics.LoadSchemaDuration.Observe(time.Since(startTime).Seconds())
if err != nil {
metrics.LoadSchemaCounter.WithLabelValues("failed").Inc()
return err
Expand Down
12 changes: 12 additions & 0 deletions infoschema/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ var (
HitLatestCounter prometheus.Counter
HitTSCounter prometheus.Counter
HitVersionCounter prometheus.Counter

LoadSchemaCounterSnapshot prometheus.Counter

LoadSchemaDurationTotal prometheus.Observer
LoadSchemaDurationLoadDiff prometheus.Observer
LoadSchemaDurationLoadAll prometheus.Observer
)

func init() {
Expand All @@ -43,4 +49,10 @@ func InitMetricsVars() {
HitLatestCounter = metrics.InfoCacheCounters.WithLabelValues("hit", "latest")
HitTSCounter = metrics.InfoCacheCounters.WithLabelValues("hit", "ts")
HitVersionCounter = metrics.InfoCacheCounters.WithLabelValues("hit", "version")

LoadSchemaCounterSnapshot = metrics.LoadSchemaCounter.WithLabelValues("snapshot")

LoadSchemaDurationTotal = metrics.LoadSchemaDuration.WithLabelValues("total")
LoadSchemaDurationLoadDiff = metrics.LoadSchemaDuration.WithLabelValues("load-diff")
LoadSchemaDurationLoadAll = metrics.LoadSchemaDuration.WithLabelValues("load-all")
}
6 changes: 3 additions & 3 deletions metrics/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var (
LoadSchemaCounter *prometheus.CounterVec

// LoadSchemaDuration records the duration of load schema.
LoadSchemaDuration prometheus.Histogram
LoadSchemaDuration *prometheus.HistogramVec

// InfoCacheCounters are the counters of get/hit.
InfoCacheCounters *prometheus.CounterVec
Expand Down Expand Up @@ -59,14 +59,14 @@ func InitDomainMetrics() {
Help: "Counter of load schema",
}, []string{LblType})

LoadSchemaDuration = NewHistogram(
LoadSchemaDuration = NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "tidb",
Subsystem: "domain",
Name: "load_schema_duration_seconds",
Help: "Bucketed histogram of processing time (s) in load schema.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), // 1ms ~ 524s
})
}, []string{LblAction})

InfoCacheCounters = NewCounterVec(
prometheus.CounterOpts{
Expand Down
132 changes: 122 additions & 10 deletions metrics/grafana/tidb.json
Original file line number Diff line number Diff line change
Expand Up @@ -12091,13 +12091,15 @@
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": false
"values": true
},
"lines": true,
"linewidth": 1,
Expand All @@ -12117,10 +12119,10 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(tidb_domain_load_schema_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))",
"expr": "histogram_quantile(0.99, sum(rate(tidb_domain_load_schema_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, action))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"legendFormat": "{{action}}",
"metric": "",
"refId": "A",
"step": 10
Expand All @@ -12130,7 +12132,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Load Schema Duration",
"title": "Load Schema Action Duration",
"tooltip": {
"msResolution": false,
"shared": true,
Expand Down Expand Up @@ -12302,13 +12304,15 @@
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": false
"values": true
},
"lines": true,
"linewidth": 1,
Expand Down Expand Up @@ -12365,7 +12369,7 @@
{
"format": "short",
"label": null,
"logBase": 10,
"logBase": 1,
"max": null,
"min": null,
"show": true
Expand Down Expand Up @@ -12496,6 +12500,114 @@
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"description": "TiDB schema cache operations per second.",
"editable": true,
"error": false,
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"grid": {},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 24
},
"hiddenSeries": false,
"id": 314,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.11",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(tidb_domain_infocache_counters{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (action,type)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{action}}-{{type}}",
"metric": "",
"refId": "A",
"step": 10
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Schema Cache OPS",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"repeat": null,
Expand Down

0 comments on commit ee76cd8

Please sign in to comment.