From f820c1d99d9875de10eceebe4f8f9d571d549725 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Sat, 20 Feb 2021 15:25:47 +0800 Subject: [PATCH] cherry pick #1438 to release-2.0 Signed-off-by: ti-srebot --- .gitignore | 2 + dm/dm-ansible/conf/dm_worker.rules.yml | 15 +- dm/master/metrics/metrics.go | 13 +- dm/master/server.go | 9 +- monitoring/dashboards/dm.json | 6065 ------------------------ monitoring/rules/dm_worker.rules.yml | 173 - tests/_utils/check_metric | 6 +- tests/dmctl_basic/run.sh | 2 + 8 files changed, 24 insertions(+), 6261 deletions(-) delete mode 100644 monitoring/dashboards/dm.json delete mode 100644 monitoring/rules/dm_worker.rules.yml diff --git a/.gitignore b/.gitignore index c474aaad1a..b6679237de 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ relay_log/* vendor */*.DS_Store tidb-slow.log +/monitoring/dashboards/dm.json +/monitoring/rules/dm_worker.rules.yml diff --git a/dm/dm-ansible/conf/dm_worker.rules.yml b/dm/dm-ansible/conf/dm_worker.rules.yml index 8436c6e40a..db4d6c0357 100644 --- a/dm/dm-ansible/conf/dm_worker.rules.yml +++ b/dm/dm-ansible/conf/dm_worker.rules.yml @@ -1,6 +1,17 @@ groups: - name: alert.rules rules: + - alert: DM_master_all_down + expr: up{job="dm_master"} == 0 + labels: + env: ENV_LABELS_ENV + level: critical + expr: up{job="dm_master"} == 0 + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values: {{ $value }}' + value: '{{ $value }}' + summary: DM master all down, metrics not relyable + - alert: DM_remain_storage_of_relay_log expr: dm_relay_space{type="available"} < 10*1024*1024*1024 labels: @@ -126,12 +137,12 @@ groups: summary: dm syncer binlog file not catch up master server exceed 10 min - alert: DM_binlog_file_gap_between_relay_syncer - expr: dm_relay_binlog_file{node="relay"} - ON(instance, job) dm_syncer_binlog_file{node="syncer"} > 1 + expr: dm_relay_binlog_file{node="relay"} - ON(instance, job) group_right dm_syncer_binlog_file{node="syncer"} > 1 for: 10m labels: env: ENV_LABELS_ENV level: critical - expr: dm_relay_binlog_file{node="relay"} - ON(instance, job) dm_syncer_binlog_file{node="syncer"} > 1 + expr: dm_relay_binlog_file{node="relay"} - ON(instance, job) group_right dm_syncer_binlog_file{node="syncer"} > 1 annotations: description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' value: '{{ $value }}' diff --git a/dm/master/metrics/metrics.go b/dm/master/metrics/metrics.go index d7050a47ea..478e51c755 100644 --- a/dm/master/metrics/metrics.go +++ b/dm/master/metrics/metrics.go @@ -15,12 +15,10 @@ package metrics import ( "context" - "net/http" "time" cpu "github.com/pingcap/tidb-tools/pkg/utils" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/pingcap/dm/pkg/metricsproxy" ) @@ -121,9 +119,7 @@ func RunBackgroundJob(ctx context.Context) { // RegistryMetrics registries metrics for worker func RegistryMetrics() { - registry := prometheus.NewRegistry() - registry.MustRegister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{})) - registry.MustRegister(prometheus.NewGoCollector()) + registry := prometheus.DefaultRegisterer registry.MustRegister(workerState) registry.MustRegister(cpuUsageGauge) @@ -131,13 +127,6 @@ func RegistryMetrics() { registry.MustRegister(ddlErrCounter) registry.MustRegister(workerEventErrCounter) registry.MustRegister(startLeaderCounter) - - prometheus.DefaultGatherer = registry -} - -// GetMetricsHandler returns prometheus HTTP Handler -func GetMetricsHandler() http.Handler { - return promhttp.Handler() } // ReportWorkerStage is a setter for workerState diff --git a/dm/master/server.go b/dm/master/server.go index ddb5e67bb6..4645349842 100644 --- a/dm/master/server.go +++ b/dm/master/server.go @@ -179,7 +179,7 @@ func (s *Server) Start(ctx context.Context) (err error) { registerOnce.Do(metrics.RegistryMetrics) - // HTTP handlers on etcd's client IP:port + // HTTP handlers on etcd's client IP:port. etcd will add a builtin `/metrics` route // NOTE: after received any HTTP request from chrome browser, // the server may be blocked when closing sometime. // And any request to etcd's builtin handler has the same problem. @@ -187,10 +187,9 @@ func (s *Server) Start(ctx context.Context) (err error) { // But I haven't figured it out. // (maybe more requests are sent from chrome or its extensions). userHandles := map[string]http.Handler{ - "/apis/": apiHandler, - "/status": getStatusHandle(), - "/debug/": getDebugHandler(), - "/metrics": metrics.GetMetricsHandler(), + "/apis/": apiHandler, + "/status": getStatusHandle(), + "/debug/": getDebugHandler(), } // gRPC API server diff --git a/monitoring/dashboards/dm.json b/monitoring/dashboards/dm.json deleted file mode 100644 index 9e6b247b1f..0000000000 --- a/monitoring/dashboards/dm.json +++ /dev/null @@ -1,6065 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "dm-cluster", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1582881408361, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 53, - "panels": [], - "repeat": null, - "title": "overview (multiple instances)", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The current state of subtasks in instances.\n\n0: Invalid\n\n1: New\n\n2: Running\n\n3: Paused\n\n4: Stopped\n\n5: Finished", - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 1 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_worker_task_state{task=~\"$task\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "task state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": "5", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The storage capacity of the disk occupied by the relay log", - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 1 - }, - "id": 47, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_relay_space{type=\"capacity\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "storage capacity", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The remaining storage capacity of the disk occupied by the relay log", - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 1 - }, - "id": 48, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_relay_space{type=\"available\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "storage remain", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of binlog files in the relay log that are behind the upstream master", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 8 - }, - "id": 49, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_relay_binlog_file{node=\"master\"} - ON(instance, job) dm_relay_binlog_file{node=\"relay\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog file gap between master and relay", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The data import process percentage of Loader. The value range is 0% ~ 100%", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 8 - }, - "id": 50, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_loader_progress{task=~\"$task\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "load progress", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of binlog files in binlog replication unit that are behind the master", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 8 - }, - "id": 51, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_syncer_binlog_file{task=~\"$task\", node=\"master\"} - ON(instance, task, job) dm_syncer_binlog_file{task=~\"$task\", node=\"syncer\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog file gap between master and syncer", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "Is waiting shard DDL lock to be resolved, >0 means waiting", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 8 - }, - "id": 52, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_syncer_shard_lock_resolving{task=~\"$task\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "shard lock resolving", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 15 - }, - "id": 67, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of error happens before operate", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 16 - }, - "id": 71, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_worker_operate_error{type=\"BeforeAnyOp\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "before any operate error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of error happens in operate source bound", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 16 - }, - "id": 72, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_worker_operate_error{type=\"SourceBound\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "source bound error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of error happens in operate start", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 16 - }, - "id": 76, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_worker_operate_error{type=\"Start\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "start error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of error happens in operate pause", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 16 - }, - "id": 74, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_worker_operate_error{type=\"Pause\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "pause error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of error happens in operate resume", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 23 - }, - "id": 78, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_worker_operate_error{type=\"Resume\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "resume error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of error happens in operate auto-resume", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 16 - }, - "id": 69, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_worker_operate_error{type=\"AutoResume\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "auto-resume error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of error happens in operate update", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 23 - }, - "id": 77, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_worker_operate_error{type=\"Update\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "update error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of error happens in operate stop", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 23 - }, - "id": 75, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_worker_operate_error{type=\"Stop\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "stop error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "operate error", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 54, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The current state of the subtask in the instance", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 2 - }, - "id": 40, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "dm_worker_task_state{task=~\"$task\",source_id=~\"$source\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "task state", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "invalid", - "value": "0" - }, - { - "op": "=", - "text": "new", - "value": "1" - }, - { - "op": "=", - "text": "running", - "value": "2" - }, - { - "op": "=", - "text": "paused", - "value": "3" - }, - { - "op": "=", - "text": "stopped", - "value": "4" - }, - { - "op": "=", - "text": "finished", - "value": "5" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "title": "task", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 100 - }, - "id": 55, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The storage capacity of the disk occupied by the relay log", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 3 - }, - "id": 20, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "dm_relay_space{instance=~\"$instance\", type=\"capacity\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "storage capacity", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The remaining storage capacity of the disk occupied by the relay log", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 3 - }, - "id": 21, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "dm_relay_space{instance=~\"$instance\", type=\"available\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "storage remain", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The relay log encounters an error within the DM-worker and exits", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 3 - }, - "id": 30, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "changes(dm_relay_exit_with_error_count{instance=~\"$instance\"}[30m])", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "process exits with error", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The number of corrupted relay log files", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 3 - }, - "id": 31, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "changes(dm_relay_data_corruption{instance=~\"$instance\"}[30m])", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "relay log data corruption", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The number of errors encountered when the relay log reads the binlog from the upstream MySQL", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 10 - }, - "id": 28, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "changes(dm_relay_read_error_count{instance=~\"$instance\"}[30m])", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "fail to read binlog from master", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The number of errors encountered when the relay log writes the binlog to disks", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 10 - }, - "id": 29, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "changes(dm_relay_write_error_count{instance=~\"$instance\"}[30m])", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "fail to write relay log", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The largest index number of relay log files. E.g. \"value = 1\" indicates \"mysql-bin.000001\"", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 10 - }, - "id": 5, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_relay_binlog_file{instance=~\"$instance\", node=\"master\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "master", - "refId": "A" - }, - { - "expr": "dm_relay_binlog_file{instance=~\"$instance\", node=\"relay\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "relay", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog file index", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of binlog files in the relay log that are behind the upstream master", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 10 - }, - "id": 42, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_relay_binlog_file{instance=~\"$instance\", node=\"master\"} - ON(instance, job) dm_relay_binlog_file{instance=~\"$instance\", node=\"relay\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog file gap between master and relay", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The write offset of the latest relay log file", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 17 - }, - "id": 6, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_relay_binlog_pos{instance=~\"$instance\", node=\"master\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "master", - "refId": "A" - }, - { - "expr": "dm_relay_binlog_pos{instance=~\"$instance\", node=\"relay\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "relay", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog pos", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The duration that the relay log reads binlog event from the upstream MySQL (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 17 - }, - "id": 9, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_relay_read_binlog_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_relay_read_binlog_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_relay_read_binlog_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "read binlog event duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The duration that the relay log writes binlog event into the disks each time (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 17 - }, - "id": 12, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_relay_write_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_relay_write_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_relay_write_duration_bucket{instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "write relay log duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The size of a single binlog event that the relay log writes into the disks", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 17 - }, - "id": 25, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_relay_write_size_bucket{instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_relay_write_size_bucket{instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_relay_write_size_bucket{instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog event size", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "relay log", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 56, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The data import process percentage of Loader. The value range is 0% ~ 100%", - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 4 - }, - "id": 15, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "dm_loader_progress{task=~\"$task\",source_id=~\"$source\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "load progress", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The total size of the data files in the full data imported by Loader (including the `INSERT INTO` statement)", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 4 - }, - "id": 16, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "dm_loader_data_size_gauge{task=~\"$task\",source_id=~\"$source\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "data file size", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "Dumper encounters an error within the DM-worker and exits", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 4 - }, - "id": 32, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "changes(dm_dumpling_exit_with_error_count{task=~\"$task\",source_id=~\"$source\"}[30m])", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "dump process exits with error", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "Loader encounters an error within the DM-worker and exits", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 4 - }, - "id": 33, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "changes(dm_loader_exit_with_error_count{task=~\"$task\",source_id=~\"$source\"}[30m])", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "load process exits with error", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The total number of tables in the full data imported by Loader", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 11 - }, - "id": 23, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "dm_loader_table_gauge{task=~\"$task\",source_id=~\"$source\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "table count", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The total number of data files in the full data imported by Loader (including the `INSERT INTO` statement)", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 11 - }, - "id": 17, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "dm_loader_data_file_gauge{task=~\"$task\",source_id=~\"$source\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "data file count", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The duration that Loader executes a transaction (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 11 - }, - "id": 18, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_loader_txn_duration_time_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_loader_txn_duration_time_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_loader_txn_duration_time_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "transaction execution latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The time it takes loader to execute every statement to the downstream (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 11 - }, - "id": 34, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_loader_stmt_duration_time_bucket{task=~\"$task\", type=\"begin\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "begin", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.90, sum(rate(dm_loader_stmt_duration_time_bucket{task=~\"$task\", type=\"stmt\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "stmt", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.90, sum(rate(dm_loader_stmt_duration_time_bucket{task=~\"$task\", type=\"commit\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "commit", - "refId": "C" - }, - { - "expr": "histogram_quantile(0.90, sum(rate(dm_loader_stmt_duration_time_bucket{task=~\"$task\", type=\"rollback\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "rollback", - "refId": "D" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "statement execution latency - 90", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "load dump files", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 57, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The predicted remaining time it takes Syncer to be completely synchronized with the master (in minutes)", - "format": "m", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 5 - }, - "id": 37, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "dm_syncer_remaining_time{task=~\"$task\", source_id=~\"$source\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "remaining time to sync", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The latency time it takes to replicate the binlog from master to Syncer (in seconds)", - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 5 - }, - "id": 38, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "dm_syncer_replication_lag{task=~\"$task\", instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "replicate lag", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "dm-cluster", - "description": "The binlog replication unit process encounters an error within the DM-worker and exits", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 5 - }, - "id": 39, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "changes(dm_syncer_exit_with_error_count{task=~\"$task\", source_id=~\"$source\"}[30m])", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "process exits with error", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of binlog files in binlog replication unit that are behind the master", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 12 - }, - "id": 43, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_syncer_binlog_file{source_id=~\"$source\", task=~\"$task\", node=\"master\"} - ON(source, task, job) dm_syncer_binlog_file{source_id=~\"$source\", task=~\"$task\", node=\"syncer\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog file gap between master and syncer", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of binlog files in binlog replication unit that are behind relay", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 12 - }, - "id": 44, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_relay_binlog_file{instance=~\"$instance\", node=\"relay\"} - ON(instance, job) dm_syncer_binlog_file{instance=~\"$instance\", task=~\"$task\", node=\"syncer\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog file gap between relay and syncer", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of binlog events received per unit of time (this number does not include the events that need to be skipped)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 12 - }, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(dm_syncer_binlog_transform_cost_count{task=~\"$task\", source_id=~\"$source\", type=\"write_rows\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "insert", - "refId": "A" - }, - { - "expr": "rate(dm_syncer_binlog_transform_cost_count{task=~\"$task\", source_id=~\"$source\", type=\"update_rows\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "update", - "refId": "B" - }, - { - "expr": "rate(dm_syncer_binlog_transform_cost_count{task=~\"$task\", source_id=~\"$source\", type=\"delete_rows\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "delete", - "refId": "C" - }, - { - "expr": "rate(dm_syncer_binlog_transform_cost_count{task=~\"$task\", source_id=~\"$source\", type=\"query\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "query", - "refId": "D" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog event QPS", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of binlog events received per unit of time that needs to be skipped", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 12 - }, - "id": 35, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(dm_syncer_skip_binlog_duration_count{task=~\"$task\", source_id=~\"$source\", type=\"rows\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "rows", - "refId": "A" - }, - { - "expr": "rate(dm_syncer_skip_binlog_duration_count{task=~\"$task\", source_id=~\"$source\", type=\"query\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "query", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "skipped binlog event QPS", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The duration that the binlog replication unit reads binlog event from the relay log or upstream MySQL (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 19 - }, - "id": 36, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_read_binlog_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_syncer_read_binlog_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_syncer_read_binlog_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "read binlog event duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The time it takes binlog replication unit to parse and transform the binlog into SQL statements (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 19 - }, - "id": 58, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_binlog_transform_cost_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_syncer_binlog_transform_cost_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_syncer_binlog_transform_cost_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "transform binlog event duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The time it takes binlog replication unit to dispatch a binlog event (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 19 - }, - "id": 59, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_dispatch_binlog_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_syncer_dispatch_binlog_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_syncer_dispatch_binlog_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "dispatch binlog event duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The time it takes binlog replication unit to execute the transaction to the downstream (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 19 - }, - "id": 60, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_txn_duration_time_bucket{task=~\"$task\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_syncer_txn_duration_time_bucket{task=~\"$task\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_syncer_txn_duration_time_bucket{task=~\"$task\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "transaction execution latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The size of a single binlog event that the binlog replication unit reads from relay log or upstream master", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 26 - }, - "id": 61, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_binlog_event_size_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_syncer_binlog_event_size_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_syncer_binlog_event_size_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "binlog event size", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The remain length of DML job queues", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 26 - }, - "id": 62, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_syncer_queue_size{task=~\"$task\", source_id=~\"$source\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{queueNo}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "DML queue remain length", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of newly added jobs per unit of time", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 26 - }, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(dm_syncer_added_jobs_total{task=~\"$task\", source_id=~\"$source\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{queueNo}}-{{type}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "total SQL jobs", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of finished jobs per unit of time", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 26 - }, - "id": 2, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(dm_syncer_finished_jobs_total{task=~\"$task\", source_id=~\"$source\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{queueNo}}-{{type}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "finished SQL jobs", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The time it takes binlog replication unit to execute every statement to the downstream (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 33 - }, - "id": 1, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_stmt_duration_time_bucket{task=~\"$task\", type=\"begin\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "begin", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_stmt_duration_time_bucket{task=~\"$task\", type=\"stmt\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "stmt", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_stmt_duration_time_bucket{task=~\"$task\", type=\"commit\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "commit", - "refId": "C" - }, - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_stmt_duration_time_bucket{task=~\"$task\", type=\"rollback\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "rollback", - "refId": "D" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "statement execution latency - 90", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The time it takes binlog replication unit to add a job to the queue (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 33 - }, - "id": 63, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_add_job_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_syncer_add_job_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_syncer_add_job_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "add job duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The time it takes binlog replication unit to detect conflicts between DMLs (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 33 - }, - "id": 64, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_conflict_detect_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_syncer_conflict_detect_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_syncer_conflict_detect_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "DML conflict detect duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The time it takes binlog replication unit to skip a binlog event (in seconds)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 33 - }, - "id": 65, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(dm_syncer_skip_binlog_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(dm_syncer_skip_binlog_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(dm_syncer_skip_binlog_duration_bucket{task=~\"$task\", source_id=~\"$source\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "skip event duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "The number of unsynced tables in the subtask", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 45, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_syncer_unsynced_table_number{source_id=~\"$source\", task=~\"$task\"}", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "unsynced tables", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "Is waiting shard DDL lock to be resolved, >0 means waiting", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 46, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_syncer_shard_lock_resolving{source_id=~\"$source\", task=~\"$task\"}", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "shard lock resolving", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": "", - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "Number of error happens when update heartbeat", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 47 - }, - "id": 90, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_syncer_heartbeat_update_error", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "heartbeat update error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": "", - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "binlog replication", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 80, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "number of worker in different states, -1 - unrecognized, 0 - offline, 1 - free, 2 - bound", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 48 - }, - "id": 83, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "count_values(\"state\", dm_master_worker_state)", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "number of workers in different state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "state of worker, -1 - unrecognized, 0 - offline, 1 - free, 2 - bound", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 48 - }, - "id": 81, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (worker) (dm_master_worker_state)", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "workers' state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "number of shard DDL lock/operation error in one minute", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 55 - }, - "id": 82, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (task, type) (increase(dm_master_shard_ddl_error[1m]))", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "shard ddl error per minute", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "number of pending shard DDL in different states, Un-synced (waiting all upstream), Synced (all upstream finished, waiting all downstream)", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 55 - }, - "id": 84, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (task, type) (dm_master_ddl_state_number)", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "number of pending shard ddl", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "number of worker event error", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 48 - }, - "id": 85, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (type) (dm_master_worker_event_error)", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "number of worker event error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "dm-cluster", - "description": "number of dm-masters try to start leader components per minute", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 48 - }, - "id": 86, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "dm_master_start_leader_counter", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "number of dm-masters start leader components per minute", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "transparent": true, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "HA", - "type": "row" - } - ], - "refresh": "30s", - "schemaVersion": 18, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": "dm-cluster", - "definition": "", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "task", - "options": [], - "query": "label_values(dm_worker_task_state, task)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "dm-cluster", - "definition": "", - "hide": 0, - "includeAll": false, - "label": null, - "multi": true, - "name": "source", - "options": [], - "query": "label_values(dm_worker_task_state, source_id)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "dm-cluster", - "definition": "", - "hide": 0, - "includeAll": false, - "label": null, - "multi": true, - "name": "instance", - "options": [], - "query": "label_values(dm_worker_task_state{task=~\"$task\"}, instance)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Test-Cluster-DM", - "uid": "wkbFDNlZz", - "version": 1 -} \ No newline at end of file diff --git a/monitoring/rules/dm_worker.rules.yml b/monitoring/rules/dm_worker.rules.yml deleted file mode 100644 index 8436c6e40a..0000000000 --- a/monitoring/rules/dm_worker.rules.yml +++ /dev/null @@ -1,173 +0,0 @@ -groups: -- name: alert.rules - rules: - - alert: DM_remain_storage_of_relay_log - expr: dm_relay_space{type="available"} < 10*1024*1024*1024 - labels: - env: ENV_LABELS_ENV - level: critical - expr: dm_relay_space{type="available"} < 10*1024*1024*1024 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DM remain storage of relay log - - - alert: DM_relay_process_exits_with_error - expr: changes(dm_relay_exit_with_error_count[1m]) > 0 - labels: - env: ENV_LABELS_ENV - level: critical - expr: changes(dm_relay_exit_with_error_count[1m]) > 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DM relay process exits with error - - - alert: DM_relay_log_data_corruption - expr: changes(dm_relay_data_corruption[1m]) > 0 - labels: - env: ENV_LABELS_ENV - level: emergency - expr: changes(dm_relay_data_corruption[1m]) > 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DM relay log data corruption - - - alert: DM_fail_to_read_binlog_from_master - expr: changes(dm_relay_read_error_count[1m]) > 0 - labels: - env: ENV_LABELS_ENV - level: critical - expr: changes(dm_relay_read_error_count[1m]) > 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DM fail to read binlog from master - - - alert: DM_fail_to_write_relay_log - expr: changes(dm_relay_write_error_count[1m]) > 0 - labels: - env: ENV_LABELS_ENV - level: critical - expr: changes(dm_relay_write_error_count[1m]) > 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DM fail to write relay log - - - alert: DM_dump_process_exists_with_error - expr: changes(dm_mydumper_exit_with_error_count[1m]) > 0 - labels: - env: ENV_LABELS_ENV - level: critical - expr: changes(dm_mydumper_exit_with_error_count[1m]) > 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DM dump process exists with error - - - alert: DM_load_process_exists_with_error - expr: changes(dm_loader_exit_with_error_count[1m]) > 0 - labels: - env: ENV_LABELS_ENV - level: critical - expr: changes(dm_loader_exit_with_error_count[1m]) > 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DM load process exists with error - - - alert: DM_sync_process_exists_with_error - expr: changes(dm_syncer_exit_with_error_count[1m]) > 0 - labels: - env: ENV_LABELS_ENV - level: critical - expr: changes(dm_syncer_exit_with_error_count[1m]) > 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DM sync process exists with error - - - alert: DM_task_state - expr: dm_worker_task_state == 3 - for: 20m - labels: - env: ENV_LABELS_ENV - level: critical - expr: dm_worker_task_state == 3 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: dm worker paused exceed 20 min - - - alert: DM_binlog_file_gap_between_master_relay - expr: dm_relay_binlog_file{node="master"} - ON(instance, job) dm_relay_binlog_file{node="relay"} > 1 - for: 10m - labels: - env: ENV_LABELS_ENV - level: critical - expr: dm_relay_binlog_file{node="master"} - ON(instance, job) dm_relay_binlog_file{node="relay"} > 1 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: dm relay binlog file not catch up master server exceed 10 min - - - alert: DM_binlog_file_gap_between_master_syncer - expr: dm_syncer_binlog_file{node="master"} - ON(instance, task, job) dm_syncer_binlog_file{node="syncer"} > 1 - for: 10m - labels: - env: ENV_LABELS_ENV - level: critical - expr: dm_syncer_binlog_file{node="master"} - ON(instance, task, job) dm_syncer_binlog_file{node="syncer"} > 1 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: dm syncer binlog file not catch up master server exceed 10 min - - - alert: DM_binlog_file_gap_between_relay_syncer - expr: dm_relay_binlog_file{node="relay"} - ON(instance, job) dm_syncer_binlog_file{node="syncer"} > 1 - for: 10m - labels: - env: ENV_LABELS_ENV - level: critical - expr: dm_relay_binlog_file{node="relay"} - ON(instance, job) dm_syncer_binlog_file{node="syncer"} > 1 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: dm syncer binlog file not catch up relay exceed 10 min - - - alert: DM_worker_offline - expr: dm_master_worker_state == 0 - for: 1h - labels: - env: ENV_LABELS_ENV - level: critical - expr: dm_master_worker_state == 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: dm worker offline exceed 1h - - - alert: DM_pending_DDL - expr: dm_master_ddl_state_number > 0 - for: 1h - labels: - env: ENV_LABELS_ENV - level: critical - expr: dm_master_ddl_state_number > 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DDL pending exceed 1h - - - alert: DM_DDL_error - expr: increase(dm_master_shard_ddl_error[1m]) > 0 - labels: - env: ENV_LABELS_ENV - level: critical - expr: increase(dm_master_shard_ddl_error[1m]) > 0 - annotations: - description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' - value: '{{ $value }}' - summary: DDL error happens \ No newline at end of file diff --git a/tests/_utils/check_metric b/tests/_utils/check_metric index 279ac2c1c7..3791c6cd4a 100755 --- a/tests/_utils/check_metric +++ b/tests/_utils/check_metric @@ -16,10 +16,8 @@ shift 3 counter=0 while [ $counter -lt $retry_count ]; do - metric=$(curl -s http://127.0.0.1:$port/metrics | grep $metric_name | awk '{print $2}') - cmp1=$(awk 'BEGIN{ print "'$lower'"<"'$metric'" }') - cmp2=$(awk 'BEGIN{ print "'$metric'"<"'$upper'" }') - if [ $cmp1 -eq 1 ] && [ $cmp2 -eq 1 ]; then + metric=$(curl -s http://127.0.0.1:$port/metrics | grep $metric_name | grep -v "#" | awk '{print $2}') + if [ $lower -lt $metric ] && [ $metric -lt $upper ]; then exit 0 fi ((counter+=1)) diff --git a/tests/dmctl_basic/run.sh b/tests/dmctl_basic/run.sh index 77a21dfdad..7e25083429 100755 --- a/tests/dmctl_basic/run.sh +++ b/tests/dmctl_basic/run.sh @@ -112,6 +112,8 @@ function run() { run_dm_master $WORK_DIR/master $MASTER_PORT $dm_master_conf check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT + # only one master, check it should be leader and etcd metrics works + check_metric $MASTER_PORT 'etcd_server_has_leader' 3 0 2 usage_and_arg_test