Skip to content

Commit

Permalink
sql, insights: record error message for failed stmts
Browse files Browse the repository at this point in the history
Part of: cockroachdb#87785.

Previously, the insights subsystem did not keep track of the error
messages for failed executions, only the error codes.

This commit  updates the `[cluster|node]_execution_insights` and
`[cluster|node]_txn_execution_insights` virtual tables to include a
`last_error` column which contains the most recent error message.

Release note (sql change): adds `last_error` column to the
`[cluster|node]_execution_insights` and `[cluster|node]_txn_execution_insights`
tables which keeps track of the error message for failed executions.

Co-authored-by: gtr <gerardo@cockroachlabs.com>
  • Loading branch information
2 people authored and xinhaoz committed Sep 20, 2023
1 parent cd9f01e commit d0ce6f4
Show file tree
Hide file tree
Showing 13 changed files with 252 additions and 174 deletions.
3 changes: 1 addition & 2 deletions pkg/ccl/logictestccl/testdata/logic_test/crdb_internal
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,7 @@ ALTER TENANT [5] GRANT CAPABILITY can_admin_split
query ITT colnames,retry,rowsort
SELECT * FROM crdb_internal.node_tenant_capabilities_cache WHERE capability_name = 'can_admin_split'
----
tenant_id capability_name capability_value
tenant_id capability_name capability_value
1 can_admin_split true
5 can_admin_split true

subtest end
16 changes: 8 additions & 8 deletions pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant
Original file line number Diff line number Diff line change
Expand Up @@ -327,25 +327,25 @@ SELECT * FROM crdb_internal.node_inflight_trace_spans WHERE span_id < 0
----
trace_id parent_span_id span_id goroutine_id finished start_time duration operation

query TTTBTTTTTIITITTTTTTTTTTTTIT colnames
query TTTBTTTTTIITITTTTTTTTTTTTITT colnames
SELECT * FROM crdb_internal.cluster_execution_insights WHERE query = ''
----
session_id txn_id txn_fingerprint_id stmt_id stmt_fingerprint_id problem causes query status start_time end_time full_scan user_name app_name database_name plan_gist rows_read rows_written priority retries last_retry_reason exec_node_ids contention index_recommendations implicit_txn cpu_sql_nanos error_code
session_id txn_id txn_fingerprint_id stmt_id stmt_fingerprint_id problem causes query status start_time end_time full_scan user_name app_name database_name plan_gist rows_read rows_written priority retries last_retry_reason exec_node_ids contention index_recommendations implicit_txn cpu_sql_nanos error_code last_error_redactable

query TTTBTTTTTIITITTTTTTTTTTTTIT colnames
query TTTBTTTTTIITITTTTTTTTTTTTITT colnames
SELECT * FROM crdb_internal.node_execution_insights WHERE query = ''
----
session_id txn_id txn_fingerprint_id stmt_id stmt_fingerprint_id problem causes query status start_time end_time full_scan user_name app_name database_name plan_gist rows_read rows_written priority retries last_retry_reason exec_node_ids contention index_recommendations implicit_txn cpu_sql_nanos error_code
session_id txn_id txn_fingerprint_id stmt_id stmt_fingerprint_id problem causes query status start_time end_time full_scan user_name app_name database_name plan_gist rows_read rows_written priority retries last_retry_reason exec_node_ids contention index_recommendations implicit_txn cpu_sql_nanos error_code last_error_redactable

query TTTBTTTTTIITITTTTTITT colnames
query TTTBTTTTTIITITTTTTITTT colnames
SELECT * FROM crdb_internal.cluster_txn_execution_insights WHERE query = ''
----
txn_id txn_fingerprint_id query implicit_txn session_id start_time end_time user_name app_name rows_read rows_written priority retries last_retry_reason contention problems causes stmt_execution_ids cpu_sql_nanos last_error_code status
txn_id txn_fingerprint_id query implicit_txn session_id start_time end_time user_name app_name rows_read rows_written priority retries last_retry_reason contention problems causes stmt_execution_ids cpu_sql_nanos last_error_code last_error_redactable status

query TTTBTTTTTIITITTTTTITT colnames
query TTTBTTTTTIITITTTTTITTT colnames
SELECT * FROM crdb_internal.node_txn_execution_insights WHERE query = ''
----
txn_id txn_fingerprint_id query implicit_txn session_id start_time end_time user_name app_name rows_read rows_written priority retries last_retry_reason contention problems causes stmt_execution_ids cpu_sql_nanos last_error_code status
txn_id txn_fingerprint_id query implicit_txn session_id start_time end_time user_name app_name rows_read rows_written priority retries last_retry_reason contention problems causes stmt_execution_ids cpu_sql_nanos last_error_code last_error_redactable status

query ITTI
SELECT range_id, start_pretty, end_pretty, lease_holder FROM crdb_internal.ranges
Expand Down
8 changes: 8 additions & 0 deletions pkg/cli/zip_table_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ var zipInternalTablesPerCluster = DebugZipTableRegistry{
"index_recommendations",
"retries",
"last_retry_reason",
"error_code",
"crdb_internal.redact(last_error_redactable) as last_error_redactable",
},
},
"crdb_internal.cluster_locks": {
Expand Down Expand Up @@ -283,6 +285,8 @@ var zipInternalTablesPerCluster = DebugZipTableRegistry{
"problems",
"causes",
"stmt_execution_ids",
"last_error_code",
"crdb_internal.redact(last_error_redactable) as last_error_redactable",
},
},
`"".crdb_internal.create_function_statements`: {
Expand Down Expand Up @@ -698,6 +702,8 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{
"priority",
"retries",
"exec_node_ids",
"error_code",
"crdb_internal.redact(last_error_redactable) as last_error_redactable",
},
},
"crdb_internal.node_inflight_trace_spans": {
Expand Down Expand Up @@ -947,6 +953,8 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{
"problems",
"causes",
"stmt_execution_ids",
"last_error_code",
"crdb_internal.redact(last_error_redactable) as last_error_redactable",
},
},
"crdb_internal.node_txn_stats": {
Expand Down
44 changes: 33 additions & 11 deletions pkg/sql/crdb_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -8018,6 +8018,7 @@ CREATE TABLE crdb_internal.%s (
stmt_execution_ids STRING[] NOT NULL,
cpu_sql_nanos INT8,
last_error_code STRING,
last_error_redactable STRING,
status STRING NOT NULL
)`

Expand All @@ -8043,11 +8044,11 @@ func populateTxnExecutionInsights(
addRow func(...tree.Datum) error,
request *serverpb.ListExecutionInsightsRequest,
) (err error) {
hasRoleOption, _, err := p.HasViewActivityOrViewActivityRedactedRole(ctx)
// Check if the user has sufficient privileges.
hasPrivs, shouldRedactError, err := p.HasViewActivityOrViewActivityRedactedRole(ctx)
if err != nil {
return err
}
if !hasRoleOption {
} else if !hasPrivs {
return noViewActivityOrViewActivityRedactedRoleError(p.User())
}

Expand All @@ -8063,7 +8064,6 @@ func populateTxnExecutionInsights(
continue
}

var errorCode string
var queryBuilder strings.Builder
for i := range insight.Statements {
// Build query string.
Expand All @@ -8078,10 +8078,18 @@ func populateTxnExecutionInsights(
queryBuilder.WriteString(" ; ")
}
queryBuilder.WriteString(insight.Statements[i].Query)
}

if insight.Statements[i].ErrorCode != "" {
errorCode = insight.Statements[i].ErrorCode
}
errorCode := tree.DNull
if insight.Transaction.LastErrorCode != "" {
errorCode = tree.NewDString(insight.Transaction.LastErrorCode)
}

var errorMsg tree.Datum
if shouldRedactError {
errorMsg = tree.NewDString(string(insight.Transaction.LastErrorMsg.Redact()))
} else {
errorMsg = tree.NewDString(string(insight.Transaction.LastErrorMsg))
}

problems := tree.NewDArray(types.String)
Expand Down Expand Up @@ -8150,7 +8158,8 @@ func populateTxnExecutionInsights(
causes,
stmtIDs,
tree.NewDInt(tree.DInt(insight.Transaction.CPUSQLNanos)),
tree.NewDString(errorCode),
errorCode,
errorMsg,
tree.NewDString(insight.Transaction.Status.String()),
))

Expand Down Expand Up @@ -8191,7 +8200,8 @@ CREATE TABLE crdb_internal.%s (
index_recommendations STRING[] NOT NULL,
implicit_txn BOOL NOT NULL,
cpu_sql_nanos INT8,
error_code STRING
error_code STRING,
last_error_redactable STRING
)`

var crdbInternalClusterExecutionInsightsTable = virtualSchemaTable{
Expand Down Expand Up @@ -8227,7 +8237,7 @@ func populateStmtInsights(
request *serverpb.ListExecutionInsightsRequest,
) (err error) {
// Check if the user has sufficient privileges.
hasPrivs, _, err := p.HasViewActivityOrViewActivityRedactedRole(ctx)
hasPrivs, shouldRedactError, err := p.HasViewActivityOrViewActivityRedactedRole(ctx)
if err != nil {
return err
} else if !hasPrivs {
Expand Down Expand Up @@ -8293,6 +8303,17 @@ func populateStmtInsights(
}
}

errorCode := tree.DNull
errorMsg := tree.DNull
if s.ErrorCode != "" {
errorCode = tree.NewDString(s.ErrorCode)
if shouldRedactError {
errorMsg = tree.NewDString(string(s.ErrorMsg.Redact()))
} else {
errorMsg = tree.NewDString(string(s.ErrorMsg))
}
}

err = errors.CombineErrors(err, addRow(
tree.NewDString(hex.EncodeToString(insight.Session.ID.GetBytes())),
tree.NewDUuid(tree.DUuid{UUID: insight.Transaction.ID}),
Expand Down Expand Up @@ -8320,7 +8341,8 @@ func populateStmtInsights(
indexRecommendations,
tree.MakeDBool(tree.DBool(insight.Transaction.ImplicitTxn)),
tree.NewDInt(tree.DInt(s.CPUSQLNanos)),
tree.NewDString(s.ErrorCode),
errorCode,
errorMsg,
))
}
}
Expand Down
18 changes: 14 additions & 4 deletions pkg/sql/logictest/testdata/logic_test/crdb_internal
Original file line number Diff line number Diff line change
Expand Up @@ -530,16 +530,26 @@ SELECT * FROM crdb_internal.ranges_no_leases WHERE range_id < 0
----
range_id start_key start_pretty end_key end_pretty replicas replica_localities voting_replicas non_voting_replicas learner_replicas split_enforced_until

query TTTBTTTTTIITITTTTTITT colnames
query TTTBTTTTTIITITTTTTTTTTTTTITT colnames
SELECT * FROM crdb_internal.cluster_execution_insights WHERE query = ''
----
session_id txn_id txn_fingerprint_id stmt_id stmt_fingerprint_id problem causes query status start_time end_time full_scan user_name app_name database_name plan_gist rows_read rows_written priority retries last_retry_reason exec_node_ids contention index_recommendations implicit_txn cpu_sql_nanos error_code last_error_redactable

query TTTBTTTTTIITITTTTTTTTTTTTITT colnames
SELECT * FROM crdb_internal.node_execution_insights WHERE query = ''
----
session_id txn_id txn_fingerprint_id stmt_id stmt_fingerprint_id problem causes query status start_time end_time full_scan user_name app_name database_name plan_gist rows_read rows_written priority retries last_retry_reason exec_node_ids contention index_recommendations implicit_txn cpu_sql_nanos error_code last_error_redactable

query TTTBTTTTTIITITTTTTITTT colnames
SELECT * FROM crdb_internal.cluster_txn_execution_insights WHERE query = ''
----
txn_id txn_fingerprint_id query implicit_txn session_id start_time end_time user_name app_name rows_read rows_written priority retries last_retry_reason contention problems causes stmt_execution_ids cpu_sql_nanos last_error_code status
txn_id txn_fingerprint_id query implicit_txn session_id start_time end_time user_name app_name rows_read rows_written priority retries last_retry_reason contention problems causes stmt_execution_ids cpu_sql_nanos last_error_code last_error_redactable status


query TTTBTTTTTIITITTTTTITT colnames
query TTTBTTTTTIITITTTTTITTT colnames
SELECT * FROM crdb_internal.node_txn_execution_insights WHERE query = ''
----
txn_id txn_fingerprint_id query implicit_txn session_id start_time end_time user_name app_name rows_read rows_written priority retries last_retry_reason contention problems causes stmt_execution_ids cpu_sql_nanos last_error_code status
txn_id txn_fingerprint_id query implicit_txn session_id start_time end_time user_name app_name rows_read rows_written priority retries last_retry_reason contention problems causes stmt_execution_ids cpu_sql_nanos last_error_code last_error_redactable status


statement ok
Expand Down
Loading

0 comments on commit d0ce6f4

Please sign in to comment.