From 2baf8c2e4f51f2c21fd48cf032003af5d2911998 Mon Sep 17 00:00:00 2001 From: cfzjywxk Date: Sun, 12 Feb 2023 11:08:01 +0800 Subject: [PATCH] metric: seperate the internal and external transaction metrics (#41206) ref pingcap/tidb#41203 --- executor/simple.go | 22 +++++++--- metrics/grafana/tidb.json | 28 +++++++------ metrics/session.go | 9 ++-- session/session.go | 76 +++++++++++++++++++++++++--------- session/tidb.go | 22 +++++++--- store/driver/txn/txn_driver.go | 2 + 6 files changed, 112 insertions(+), 47 deletions(-) diff --git a/executor/simple.go b/executor/simple.go index e291e42e04224..dad3a6e63781e 100644 --- a/executor/simple.go +++ b/executor/simple.go @@ -66,8 +66,10 @@ import ( const notSpecified = -1 var ( - transactionDurationPessimisticRollback = metrics.TransactionDuration.WithLabelValues(metrics.LblPessimistic, metrics.LblRollback) - transactionDurationOptimisticRollback = metrics.TransactionDuration.WithLabelValues(metrics.LblOptimistic, metrics.LblRollback) + transactionDurationPessimisticRollbackInternal = metrics.TransactionDuration.WithLabelValues(metrics.LblPessimistic, metrics.LblRollback, metrics.LblInternal) + transactionDurationPessimisticRollbackGeneral = metrics.TransactionDuration.WithLabelValues(metrics.LblPessimistic, metrics.LblRollback, metrics.LblGeneral) + transactionDurationOptimisticRollbackInternal = metrics.TransactionDuration.WithLabelValues(metrics.LblOptimistic, metrics.LblRollback, metrics.LblInternal) + transactionDurationOptimisticRollbackGeneral = metrics.TransactionDuration.WithLabelValues(metrics.LblOptimistic, metrics.LblRollback, metrics.LblGeneral) ) // SimpleExec represents simple statement executor. @@ -814,10 +816,18 @@ func (e *SimpleExec) executeRollback(s *ast.RollbackStmt) error { sessVars.SetInTxn(false) if txn.Valid() { duration := time.Since(sessVars.TxnCtx.CreateTime).Seconds() - if sessVars.TxnCtx.IsPessimistic { - transactionDurationPessimisticRollback.Observe(duration) - } else { - transactionDurationOptimisticRollback.Observe(duration) + isInternal := false + if internal := txn.GetOption(kv.RequestSourceInternal); internal != nil && internal.(bool) { + isInternal = true + } + if isInternal && sessVars.TxnCtx.IsPessimistic { + transactionDurationPessimisticRollbackInternal.Observe(duration) + } else if isInternal && !sessVars.TxnCtx.IsPessimistic { + transactionDurationOptimisticRollbackInternal.Observe(duration) + } else if !isInternal && sessVars.TxnCtx.IsPessimistic { + transactionDurationPessimisticRollbackGeneral.Observe(duration) + } else if !isInternal && !sessVars.TxnCtx.IsPessimistic { + transactionDurationOptimisticRollbackGeneral.Observe(duration) } sessVars.TxnCtx.ClearDelta() return txn.Rollback() diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index 612c62ee641bf..8c19fe9a56bf9 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -3845,12 +3845,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_session_transaction_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type, txn_mode)", + "expr": "sum(rate(tidb_session_transaction_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", scope=~\"general\"}[1m])) by (type, txn_mode)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}-{{txn_mode}}", "refId": "A", "step": 10 + }, + { + "expr": "sum(rate(tidb_session_transaction_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", scope=~\"internal\"}[1m])) by (type, txn_mode)", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "Internal-{{type}}-{{txn_mode}}", + "refId": "B", + "step": 10 } ], "thresholds": [], @@ -3947,24 +3956,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tidb_session_transaction_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, txn_mode))", + "expr": "histogram_quantile(0.99, sum(rate(tidb_session_transaction_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", scope=~\"general\"}[1m])) by (le, txn_mode))", "format": "time_series", "intervalFactor": 2, "legendFormat": "99-{{txn_mode}}", "refId": "A" }, { - "expr": "histogram_quantile(0.95, sum(rate(tidb_session_transaction_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, txn_mode))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95-{{txn_mode}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.80, sum(rate(tidb_session_transaction_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, txn_mode))", + "expr": "sum(rate(tidb_session_transaction_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", scope=~\"general\"}[1m])) by (txn_mode) / sum(rate(tidb_session_transaction_duration_seconds_count{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", scope=~\"general\"}[1m])) by (txn_mode)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "80-{{txn_mode}}", + "legendFormat": "avg-{{txn_mode}}", "refId": "C" } ], @@ -4045,7 +4047,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum(increase(tidb_session_transaction_statement_num_bucket{instance=~\"$instance\"}[1m])) by (le)", + "expr": "sum(increase(tidb_session_transaction_statement_num_bucket{instance=~\"$instance\", scope=~\"general\"}[1m])) by (le)", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -4114,7 +4116,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum(increase(tidb_session_retry_num_bucket{instance=~\"$instance\"}[1m])) by (le)", + "expr": "sum(increase(tidb_session_retry_num_bucket{instance=~\"$instance\", scope=~\"general\"}[1m])) by (le)", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", diff --git a/metrics/session.go b/metrics/session.go index 962ed67477ebb..208092c11b087 100644 --- a/metrics/session.go +++ b/metrics/session.go @@ -59,14 +59,14 @@ var ( Name: "schema_lease_error_total", Help: "Counter of schema lease error", }, []string{LblType}) - SessionRetry = prometheus.NewHistogram( + SessionRetry = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: "tidb", Subsystem: "session", Name: "retry_num", Help: "Bucketed histogram of session retry count.", Buckets: prometheus.LinearBuckets(0, 1, 21), // 0 ~ 20 - }) + }, []string{LblScope}) SessionRetryErrorCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "tidb", @@ -90,7 +90,7 @@ var ( Name: "transaction_statement_num", Help: "Bucketed histogram of statements count in each transaction.", Buckets: prometheus.ExponentialBuckets(1, 2, 16), // 1 ~ 32768 - }, []string{LblTxnMode, LblType}) + }, []string{LblTxnMode, LblType, LblScope}) TransactionDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ @@ -99,7 +99,7 @@ var ( Name: "transaction_duration_seconds", Help: "Bucketed histogram of a transaction execution duration, including retry.", Buckets: prometheus.ExponentialBuckets(0.001, 2, 28), // 1ms ~ 1.5days - }, []string{LblTxnMode, LblType}) + }, []string{LblTxnMode, LblType, LblScope}) StatementDeadlockDetectDuration = prometheus.NewHistogram( prometheus.HistogramOpts{ @@ -242,4 +242,5 @@ const ( LblAggressiveLockingTxnEffective = "txn-effective" LblAggressiveLockingStmtUsed = "stmt-used" LblAggressiveLockingStmtEffective = "stmt-effective" + LblScope = "scope" ) diff --git a/session/session.go b/session/session.go index 84c95bdb3ef9c..119e50622c33f 100644 --- a/session/session.go +++ b/session/session.go @@ -113,14 +113,24 @@ import ( ) var ( - statementPerTransactionPessimisticOK = metrics.StatementPerTransaction.WithLabelValues(metrics.LblPessimistic, metrics.LblOK) - statementPerTransactionPessimisticError = metrics.StatementPerTransaction.WithLabelValues(metrics.LblPessimistic, metrics.LblError) - statementPerTransactionOptimisticOK = metrics.StatementPerTransaction.WithLabelValues(metrics.LblOptimistic, metrics.LblOK) - statementPerTransactionOptimisticError = metrics.StatementPerTransaction.WithLabelValues(metrics.LblOptimistic, metrics.LblError) - transactionDurationPessimisticCommit = metrics.TransactionDuration.WithLabelValues(metrics.LblPessimistic, metrics.LblCommit) - transactionDurationPessimisticAbort = metrics.TransactionDuration.WithLabelValues(metrics.LblPessimistic, metrics.LblAbort) - transactionDurationOptimisticCommit = metrics.TransactionDuration.WithLabelValues(metrics.LblOptimistic, metrics.LblCommit) - transactionDurationOptimisticAbort = metrics.TransactionDuration.WithLabelValues(metrics.LblOptimistic, metrics.LblAbort) + statementPerTransactionPessimisticOKInternal = metrics.StatementPerTransaction.WithLabelValues(metrics.LblPessimistic, metrics.LblOK, metrics.LblInternal) + statementPerTransactionPessimisticOKGeneral = metrics.StatementPerTransaction.WithLabelValues(metrics.LblPessimistic, metrics.LblOK, metrics.LblGeneral) + statementPerTransactionPessimisticErrorInternal = metrics.StatementPerTransaction.WithLabelValues(metrics.LblPessimistic, metrics.LblError, metrics.LblInternal) + statementPerTransactionPessimisticErrorGeneral = metrics.StatementPerTransaction.WithLabelValues(metrics.LblPessimistic, metrics.LblError, metrics.LblGeneral) + statementPerTransactionOptimisticOKInternal = metrics.StatementPerTransaction.WithLabelValues(metrics.LblOptimistic, metrics.LblOK, metrics.LblInternal) + statementPerTransactionOptimisticOKGeneral = metrics.StatementPerTransaction.WithLabelValues(metrics.LblOptimistic, metrics.LblOK, metrics.LblGeneral) + statementPerTransactionOptimisticErrorInternal = metrics.StatementPerTransaction.WithLabelValues(metrics.LblOptimistic, metrics.LblError, metrics.LblInternal) + statementPerTransactionOptimisticErrorGeneral = metrics.StatementPerTransaction.WithLabelValues(metrics.LblOptimistic, metrics.LblError, metrics.LblGeneral) + transactionDurationPessimisticCommitInternal = metrics.TransactionDuration.WithLabelValues(metrics.LblPessimistic, metrics.LblCommit, metrics.LblInternal) + transactionDurationPessimisticCommitGeneral = metrics.TransactionDuration.WithLabelValues(metrics.LblPessimistic, metrics.LblCommit, metrics.LblGeneral) + transactionDurationPessimisticAbortInternal = metrics.TransactionDuration.WithLabelValues(metrics.LblPessimistic, metrics.LblAbort, metrics.LblInternal) + transactionDurationPessimisticAbortGeneral = metrics.TransactionDuration.WithLabelValues(metrics.LblPessimistic, metrics.LblAbort, metrics.LblGeneral) + transactionDurationOptimisticCommitInternal = metrics.TransactionDuration.WithLabelValues(metrics.LblOptimistic, metrics.LblCommit, metrics.LblInternal) + transactionDurationOptimisticCommitGeneral = metrics.TransactionDuration.WithLabelValues(metrics.LblOptimistic, metrics.LblCommit, metrics.LblGeneral) + transactionDurationOptimisticAbortInternal = metrics.TransactionDuration.WithLabelValues(metrics.LblOptimistic, metrics.LblAbort, metrics.LblInternal) + transactionDurationOptimisticAbortGeneral = metrics.TransactionDuration.WithLabelValues(metrics.LblOptimistic, metrics.LblAbort, metrics.LblGeneral) + transactionRetryInternal = metrics.SessionRetry.WithLabelValues(metrics.LblInternal) + transactionRetryGeneral = metrics.SessionRetry.WithLabelValues(metrics.LblGeneral) sessionExecuteCompileDurationInternal = metrics.SessionExecuteCompileDuration.WithLabelValues(metrics.LblInternal) sessionExecuteCompileDurationGeneral = metrics.SessionExecuteCompileDuration.WithLabelValues(metrics.LblGeneral) @@ -954,6 +964,10 @@ func (s *session) doCommitWithRetry(ctx context.Context) error { // If the transaction is invalid, maybe it has already been rolled back by the client. return nil } + isInternalTxn := false + if internal := s.txn.GetOption(kv.RequestSourceInternal); internal != nil && internal.(bool) { + isInternalTxn = true + } var err error txnSize := s.txn.Size() isPessimistic := s.txn.IsPessimistic() @@ -998,7 +1012,7 @@ func (s *session) doCommitWithRetry(ctx context.Context) error { } counter := s.sessionVars.TxnCtx.StatementCount duration := time.Since(s.GetSessionVars().TxnCtx.CreateTime).Seconds() - s.recordOnTransactionExecution(err, counter, duration) + s.recordOnTransactionExecution(err, counter, duration, isInternalTxn) if err != nil { if !errIsNoisy(err) { @@ -1204,7 +1218,11 @@ func (s *session) retry(ctx context.Context, maxCnt uint) (err error) { defer func() { s.sessionVars.RetryInfo.Retrying = false // retryCnt only increments on retryable error, so +1 here. - metrics.SessionRetry.Observe(float64(retryCnt + 1)) + if s.sessionVars.InRestrictedSQL { + transactionRetryInternal.Observe(float64(retryCnt + 1)) + } else { + transactionRetryGeneral.Observe(float64(retryCnt + 1)) + } s.sessionVars.SetInTxn(false) if err != nil { s.RollbackTxn(ctx) @@ -3834,22 +3852,42 @@ func logGeneralQuery(execStmt *executor.ExecStmt, s *session, isPrepared bool) { } } -func (s *session) recordOnTransactionExecution(err error, counter int, duration float64) { +func (s *session) recordOnTransactionExecution(err error, counter int, duration float64, isInternal bool) { if s.sessionVars.TxnCtx.IsPessimistic { if err != nil { - statementPerTransactionPessimisticError.Observe(float64(counter)) - transactionDurationPessimisticAbort.Observe(duration) + if isInternal { + transactionDurationPessimisticAbortInternal.Observe(duration) + statementPerTransactionPessimisticErrorInternal.Observe(float64(counter)) + } else { + transactionDurationPessimisticAbortGeneral.Observe(duration) + statementPerTransactionPessimisticErrorGeneral.Observe(float64(counter)) + } } else { - statementPerTransactionPessimisticOK.Observe(float64(counter)) - transactionDurationPessimisticCommit.Observe(duration) + if isInternal { + transactionDurationPessimisticCommitInternal.Observe(duration) + statementPerTransactionPessimisticOKInternal.Observe(float64(counter)) + } else { + transactionDurationPessimisticCommitGeneral.Observe(duration) + statementPerTransactionPessimisticOKGeneral.Observe(float64(counter)) + } } } else { if err != nil { - statementPerTransactionOptimisticError.Observe(float64(counter)) - transactionDurationOptimisticAbort.Observe(duration) + if isInternal { + transactionDurationOptimisticAbortInternal.Observe(duration) + statementPerTransactionOptimisticErrorInternal.Observe(float64(counter)) + } else { + transactionDurationOptimisticAbortGeneral.Observe(duration) + statementPerTransactionOptimisticErrorGeneral.Observe(float64(counter)) + } } else { - statementPerTransactionOptimisticOK.Observe(float64(counter)) - transactionDurationOptimisticCommit.Observe(duration) + if isInternal { + transactionDurationOptimisticCommitInternal.Observe(duration) + statementPerTransactionOptimisticOKInternal.Observe(float64(counter)) + } else { + transactionDurationOptimisticCommitGeneral.Observe(duration) + statementPerTransactionOptimisticOKGeneral.Observe(float64(counter)) + } } } } diff --git a/session/tidb.go b/session/tidb.go index 41b866da8ff85..c5b95b6999afb 100644 --- a/session/tidb.go +++ b/session/tidb.go @@ -220,12 +220,20 @@ func Parse(ctx sessionctx.Context, src string) ([]ast.StmtNode, error) { return stmts, nil } -func recordAbortTxnDuration(sessVars *variable.SessionVars) { +func recordAbortTxnDuration(sessVars *variable.SessionVars, isInternal bool) { duration := time.Since(sessVars.TxnCtx.CreateTime).Seconds() if sessVars.TxnCtx.IsPessimistic { - transactionDurationPessimisticAbort.Observe(duration) + if isInternal { + transactionDurationPessimisticAbortInternal.Observe(duration) + } else { + transactionDurationPessimisticAbortGeneral.Observe(duration) + } } else { - transactionDurationOptimisticAbort.Observe(duration) + if isInternal { + transactionDurationOptimisticAbortInternal.Observe(duration) + } else { + transactionDurationOptimisticAbortGeneral.Observe(duration) + } } } @@ -265,16 +273,20 @@ func finishStmt(ctx context.Context, se *session, meetsErr error, sql sqlexec.St } func autoCommitAfterStmt(ctx context.Context, se *session, meetsErr error, sql sqlexec.Statement) error { + isInternal := false + if internal := se.txn.GetOption(kv.RequestSourceInternal); internal != nil && internal.(bool) { + isInternal = true + } sessVars := se.sessionVars if meetsErr != nil { if !sessVars.InTxn() { logutil.BgLogger().Info("rollbackTxn called due to ddl/autocommit failure") se.RollbackTxn(ctx) - recordAbortTxnDuration(sessVars) + recordAbortTxnDuration(sessVars, isInternal) } else if se.txn.Valid() && se.txn.IsPessimistic() && executor.ErrDeadlock.Equal(meetsErr) { logutil.BgLogger().Info("rollbackTxn for deadlock", zap.Uint64("txn", se.txn.StartTS())) se.RollbackTxn(ctx) - recordAbortTxnDuration(sessVars) + recordAbortTxnDuration(sessVars, isInternal) } return meetsErr } diff --git a/store/driver/txn/txn_driver.go b/store/driver/txn/txn_driver.go index 6ac8700c3cbc4..5878413024bf2 100644 --- a/store/driver/txn/txn_driver.go +++ b/store/driver/txn/txn_driver.go @@ -283,6 +283,8 @@ func (txn *tikvTxn) GetOption(opt int) interface{} { return txn.KVTxn.GetScope() case kv.TableToColumnMaps: return txn.columnMapsCache + case kv.RequestSourceInternal: + return txn.RequestSourceInternal case kv.RequestSourceType: return txn.RequestSourceType default: