Skip to content

Commit

Permalink
sql: use AOST -10s when building crdb_internal.table_row_statistics
Browse files Browse the repository at this point in the history
This commit updates the query used to build the virtual table
crdb_internal.table_row_statistics so that it is always run at AS OF
SYSTEM TIME '-10s'. This will reduce contention on the
system.table_statistics table and avoid delays when running SHOW TABLES,
which queries crdb_internal.table_row_statistics.

Informs #58189

Release note (performance improvement): Updated the query used to build
the virtual table crdb_internal.table_row_statistics so that it is always
run at AS OF SYSTEM TIME '-10s'. This should reduce contention on
the table and improve performance for transactions that rely on
crdb_internal.table_row_statistics, such as SHOW TABLES.
  • Loading branch information
rytaft committed Feb 24, 2021
1 parent eab1ae3 commit 9c24267
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 12 deletions.
7 changes: 6 additions & 1 deletion pkg/sql/catalog/descs/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,8 @@ func (tc *Collection) getDescriptorFromStore(
) (found bool, desc catalog.Descriptor, err error) {
// Bypass the namespace lookup from the store for system tables.
descID := bootstrap.LookupSystemTableDescriptorID(ctx, tc.settings, tc.codec(), parentID, name)
if descID == descpb.InvalidID {
isSystemDescriptor := descID != descpb.InvalidID
if !isSystemDescriptor {
var found bool
var err error
found, descID, err = catalogkv.LookupObjectID(ctx, txn, codec, parentID, parentSchemaID, name)
Expand All @@ -321,6 +322,10 @@ func (tc *Collection) getDescriptorFromStore(
desc, err = catalogkv.GetAnyDescriptorByID(ctx, txn, codec, descID, catalogkv.Mutable)
if err != nil {
return false, nil, err
} else if desc == nil && isSystemDescriptor {
// This can happen during startup because we're not actually looking up the
// system descriptor IDs in KV.
return false, nil, errors.Wrapf(catalog.ErrDescriptorNotFound, "descriptor %d not found", descID)
} else if desc == nil {
// Having done the namespace lookup, the descriptor must exist.
return false, nil, errors.AssertionFailedf("descriptor %d not found", descID)
Expand Down
29 changes: 24 additions & 5 deletions pkg/sql/crdb_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,30 +409,49 @@ CREATE TABLE crdb_internal.tables (
},
}

// statsAsOfTimeClusterMode controls the cluster setting for the duration which
// is used to define the AS OF time for querying the system.table_statistics
// table when building crdb_internal.table_row_statistics.
var statsAsOfTimeClusterMode = settings.RegisterDurationSetting(
"sql.crdb_internal.table_row_statistics.as_of_time",
"historical query time used to build the crdb_internal.table_row_statistics table",
-10*time.Second,
)

var crdbInternalTablesTableLastStats = virtualSchemaTable{
comment: "the latest stats for all tables accessible by current user in current database (KV scan)",
comment: "stats for all tables accessible by current user in current database as of 10s ago",
schema: `
CREATE TABLE crdb_internal.table_row_statistics (
table_id INT NOT NULL,
table_name STRING NOT NULL,
estimated_row_count INT
)`,
populate: func(ctx context.Context, p *planner, db *dbdesc.Immutable, addRow func(...tree.Datum) error) error {
// Collect the latests statistics for all tables.
query := `
// Collect the statistics for all tables AS OF 10 seconds ago to avoid
// contention on the stats table. We pass a nil transaction so that the AS
// OF clause can be independent of any outer query.
query := fmt.Sprintf(`
SELECT s."tableID", max(s."rowCount")
FROM system.table_statistics AS s
JOIN (
SELECT "tableID", max("createdAt") AS last_dt
FROM system.table_statistics
GROUP BY "tableID"
) AS l ON l."tableID" = s."tableID" AND l.last_dt = s."createdAt"
GROUP BY s."tableID"`
AS OF SYSTEM TIME '%s'
GROUP BY s."tableID"`, statsAsOfTimeClusterMode.String(&p.ExecCfg().Settings.SV))
statRows, err := p.ExtendedEvalContext().ExecCfg.InternalExecutor.QueryEx(
ctx, "crdb-internal-statistics-table", p.txn,
ctx, "crdb-internal-statistics-table", nil,
sessiondata.InternalExecutorOverride{User: security.RootUserName()},
query)
if err != nil {
// This query is likely to cause errors due to SHOW TABLES being run less
// than 10 seconds after cluster startup (10s is the default AS OF time
// for the query), causing the error "descriptor not found". We should
// tolerate this error and return nil.
if errors.Is(err, catalog.ErrDescriptorNotFound) {
return nil
}
return err
}

Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/logictest/logic.go
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,14 @@ func (t *logicTest) newCluster(serverArgs TestServerArgs) {
if _, err := conn.Exec("SET CLUSTER SETTING sql.defaults.interleaved_tables.enabled = true"); err != nil {
t.Fatal(err)
}

// Update the default AS OF time for querying the system.table_statistics
// table to create the crdb_internal.table_row_statistics table.
if _, err := conn.Exec(
"SET CLUSTER SETTING sql.crdb_internal.table_row_statistics.as_of_time = '-1µs'",
); err != nil {
t.Fatal(err)
}
}

if cfg.overrideDistSQLMode != "" {
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/event_log
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ ORDER BY "timestamp", info
0 1 {"EventType": "set_cluster_setting", "SettingName": "kv.range_merge.queue_enabled", "Statement": "SET CLUSTER SETTING \"kv.range_merge.queue_enabled\" = false", "User": "root", "Value": "false"}
0 1 {"EventType": "set_cluster_setting", "PlaceholderValues": ["5"], "SettingName": "sql.stats.automatic_collection.min_stale_rows", "Statement": "SET CLUSTER SETTING \"sql.stats.automatic_collection.min_stale_rows\" = $1::INT8", "User": "root", "Value": "5"}
0 1 {"EventType": "set_cluster_setting", "SettingName": "sql.defaults.interleaved_tables.enabled", "Statement": "SET CLUSTER SETTING \"sql.defaults.interleaved_tables.enabled\" = true", "User": "root", "Value": "true"}
0 1 {"EventType": "set_cluster_setting", "SettingName": "sql.crdb_internal.table_row_statistics.as_of_time", "Statement": "SET CLUSTER SETTING \"sql.crdb_internal.table_row_statistics.as_of_time\" = e'-1\\u00B5s'", "User": "root", "Value": "-00:00:00.000001"}
0 1 {"EventType": "set_cluster_setting", "SettingName": "kv.allocator.load_based_lease_rebalancing.enabled", "Statement": "SET CLUSTER SETTING \"kv.allocator.load_based_lease_rebalancing.enabled\" = false", "User": "root", "Value": "false"}
0 1 {"EventType": "set_cluster_setting", "SettingName": "kv.allocator.load_based_lease_rebalancing.enabled", "Statement": "SET CLUSTER SETTING \"kv.allocator.load_based_lease_rebalancing.enabled\" = DEFAULT", "User": "root", "Value": "DEFAULT"}
0 1 {"EventType": "set_cluster_setting", "PlaceholderValues": ["'some string'"], "SettingName": "cluster.organization", "Statement": "SET CLUSTER SETTING \"cluster.organization\" = $1", "User": "root", "Value": "'some string'"}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/pg_catalog