Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server: add metric for connection idle time (#21265) #21301

Merged
merged 4 commits into from
Nov 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions infoschema/metric_table_def.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ var MetricTableMap = map[string]MetricTableDef{
Labels: []string{"instance"},
Comment: "TiDB current connection counts",
},
"tidb_connection_idle_duration": {
PromQL: `histogram_quantile($QUANTILE, sum(rate(tidb_server_conn_idle_duration_seconds_bucket{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (le,in_txn,instance))`,
Labels: []string{"instance", "in_txn"},
Quantile: 0.90,
Comment: "The quantile of TiDB connection idle durations(second)",
},
"tidb_connection_idle_total_count": {
PromQL: `sum(increase(tidb_server_conn_idle_duration_seconds_count{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (in_txn,instance)`,
Labels: []string{"instance", "in_txn"},
Comment: "The total count of TiDB connection idle",
},
"tidb_connection_idle_total_time": {
PromQL: `sum(increase(tidb_server_conn_idle_duration_seconds_sum{$LABEL_CONDITIONS}[$RANGE_DURATION])) by (in_txn,instance)`,
Labels: []string{"instance", "in_txn"},
Comment: "The total time of TiDB connection idle",
},
"node_process_open_fd_count": {
PromQL: "process_open_fds{$LABEL_CONDITIONS}",
Labels: []string{"instance", "job"},
Expand Down
132 changes: 130 additions & 2 deletions metrics/grafana/tidb.json
Original file line number Diff line number Diff line change
Expand Up @@ -601,9 +601,9 @@
"fill": 1,
"gridPos": {
"h": 6,
"w": 24,
"w": 12,
"x": 0,
"y": 19
"y": 25
},
"id": 112,
"legend": {
Expand Down Expand Up @@ -691,6 +691,134 @@
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"description": "TiDB connection idle durations",
"fill": 1,
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 25
},
"id": 218,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(tidb_server_conn_idle_duration_seconds_bucket{in_txn='1'}[1m])) by (le,in_txn,instance))",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "99-in-txn",
"refId": "A"
},
{
"expr": "histogram_quantile(0.99, sum(rate(tidb_server_conn_idle_duration_seconds_bucket{in_txn='0'}[1m])) by (le,in_txn,instance))",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "99-not-in-txn",
"refId": "B"
},
{
"expr": "histogram_quantile(0.90, sum(rate(tidb_server_conn_idle_duration_seconds_bucket{in_txn='1'}[1m])) by (le,in_txn,instance))",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "90-in-txn",
"refId": "C"
},
{
"expr": "histogram_quantile(0.90, sum(rate(tidb_server_conn_idle_duration_seconds_bucket{in_txn='0'}[1m])) by (le,in_txn,instance))",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "90-not-in-txn",
"refId": "D"
},
{
"expr": "histogram_quantile(0.80, sum(rate(tidb_server_conn_idle_duration_seconds_bucket{in_txn='1'}[1m])) by (le,in_txn,instance))",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "80-in-txn",
"refId": "E"
},
{
"expr": "histogram_quantile(0.80, sum(rate(tidb_server_conn_idle_duration_seconds_bucket{in_txn='0'}[1m])) by (le,in_txn,instance))",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "80-not-in-txn",
"refId": "F"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Connection Idle Duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:518",
"format": "s",
"label": null,
"logBase": 2,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
Expand Down
1 change: 1 addition & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,5 @@ func RegisterMetrics() {
prometheus.MustRegister(TiKVNoAvailableConnectionCounter)
prometheus.MustRegister(MaxProcs)
prometheus.MustRegister(GOGC)
prometheus.MustRegister(ConnIdleDurationHistogram)
}
9 changes: 9 additions & 0 deletions metrics/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,15 @@ var (
Name: "gogc",
Help: "The value of GOGC",
})

ConnIdleDurationHistogram = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "tidb",
Subsystem: "server",
Name: "conn_idle_duration_seconds",
Help: "Bucketed histogram of connection idle time (s).",
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 29), // 0.5ms ~ 1.5days
}, []string{LblInTxn})
)

// ExecuteErrorToLabel converts an execute error to label.
Expand Down
1 change: 1 addition & 0 deletions metrics/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,5 @@ const (
LblBatchGet = "batch_get"
LblGet = "get"
LblLockKeys = "lock_keys"
LblInTxn = "in_txn"
)
14 changes: 13 additions & 1 deletion server/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ var (
queryDurationHistogramExecute = metrics.QueryDurationHistogram.WithLabelValues("Execute")
queryDurationHistogramSet = metrics.QueryDurationHistogram.WithLabelValues("Set")
queryDurationHistogramGeneral = metrics.QueryDurationHistogram.WithLabelValues(metrics.LblGeneral)

connIdleDurationHistogramNotInTxn = metrics.ConnIdleDurationHistogram.WithLabelValues("0")
connIdleDurationHistogramInTxn = metrics.ConnIdleDurationHistogram.WithLabelValues("1")
)

// newClientConn creates a *clientConn object.
Expand All @@ -140,6 +143,7 @@ func newClientConn(s *Server) *clientConn {
collation: mysql.DefaultCollationID,
alloc: arena.NewAllocator(32 * 1024),
status: connStatusDispatching,
lastActive: time.Now(),
}
}

Expand All @@ -164,6 +168,7 @@ type clientConn struct {
status int32 // dispatching/reading/shutdown/waitshutdown
lastCode uint16 // last error code
collation uint8 // collation used by client, may be different from the collation used by database.
lastActive time.Time

// mu is used for cancelling the execution of current transaction.
mu struct {
Expand Down Expand Up @@ -903,6 +908,13 @@ func (cc *clientConn) dispatch(ctx context.Context, data []byte) error {
// reset killed for each request
atomic.StoreUint32(&cc.ctx.GetSessionVars().Killed, 0)
}()
t := time.Now()
if (cc.ctx.Status() & mysql.ServerStatusInTrans) > 0 {
connIdleDurationHistogramInTxn.Observe(t.Sub(cc.lastActive).Seconds())
} else {
connIdleDurationHistogramNotInTxn.Observe(t.Sub(cc.lastActive).Seconds())
}

span := opentracing.StartSpan("server.dispatch")

var cancelFunc context.CancelFunc
Expand All @@ -911,7 +923,6 @@ func (cc *clientConn) dispatch(ctx context.Context, data []byte) error {
cc.mu.cancelFunc = cancelFunc
cc.mu.Unlock()

t := time.Now()
cc.lastPacket = data
cmd := data[0]
data = data[1:]
Expand Down Expand Up @@ -948,6 +959,7 @@ func (cc *clientConn) dispatch(ctx context.Context, data []byte) error {

cc.server.releaseToken(token)
span.Finish()
cc.lastActive = time.Now()
}()

vars := cc.ctx.GetSessionVars()
Expand Down