From b2e0f43a96b21a77637de44b4df08294a0d60eb1 Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Sat, 24 Sep 2022 17:22:42 -0700 Subject: [PATCH] stats: fix missing autostats on cluster startup Fixes #87247 Previously, automatic statistics may fail to be collected on tables with no stats at cluster startup when the `sql.stats.automatic_collection.enabled` cluster setting is false but table storage parameter `sql_stats_automatic_collection_enabled` is true. Method `ensureAllTables` does not put entries into the settingOverrides map, and since storage parameter info is not normally looked up in the table header in auto stats processing, we fall back on the cluster setting to determine if auto stats should be collected. To address this, this patch modifies auto stats to flag whether the current batch of tables saved in the mutationCounts map comes from cluster startup processing, and if so, ensures that storage parameters controlling auto stats are always looked up in the table header during that processing. Release note (bug fix): This patch fixes missing automatic statistics collection at cluster startup when the `sql.stats.automatic_collection.enabled` cluster setting is false, but there are tables with storage parameter `sql_stats_automatic_collection_enabled` set to true. --- pkg/sql/stats/automatic_stats.go | 8 +++- pkg/sql/stats/automatic_stats_test.go | 59 +++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 24cf7a6204e0..73b48c6312a0 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -388,14 +388,20 @@ func (r *Refresher) Start( // once on startup. const initialTableCollectionDelay = time.Second initialTableCollection := time.After(initialTableCollectionDelay) + var ensuringAllTables bool for { select { case <-initialTableCollection: r.ensureAllTables(ctx, &r.st.SV, initialTableCollectionDelay) + if len(r.mutationCounts) > 0 { + ensuringAllTables = true + } case <-timer.C: mutationCounts := r.mutationCounts + refreshingAllTables := ensuringAllTables + ensuringAllTables = false var settingOverrides map[descpb.ID]catpb.AutoStatsSettings // For each mutation count, look up auto stats setting overrides using @@ -436,7 +442,7 @@ func (r *Refresher) Start( // processing the current table longer than the refresh // interval, look up the table descriptor to ensure we don't // have stale table settings. - if elapsed > DefaultRefreshInterval { + if elapsed > DefaultRefreshInterval || refreshingAllTables { desc = r.getTableDescriptor(ctx, tableID) if desc != nil { if !r.autoStatsEnabled(desc) { diff --git a/pkg/sql/stats/automatic_stats_test.go b/pkg/sql/stats/automatic_stats_test.go index 89066eb40fab..1ff9fd22c525 100644 --- a/pkg/sql/stats/automatic_stats_test.go +++ b/pkg/sql/stats/automatic_stats_test.go @@ -575,6 +575,65 @@ func TestAutoStatsReadOnlyTables(t *testing.T) { }) } +func TestAutoStatsOnStartupClusterSettingOff(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + s, sqlDB, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) + defer sqlDB.Close() + defer s.Stopper().Stop(ctx) + + st := cluster.MakeTestingClusterSettings() + AutomaticStatisticsClusterMode.Override(ctx, &st.SV, false) + evalCtx := eval.NewTestingEvalContext(st) + defer evalCtx.Stop(ctx) + + sqlRun := sqlutils.MakeSQLRunner(sqlDB) + sqlRun.Exec(t, + `CREATE DATABASE t; + CREATE TABLE t.a (k INT PRIMARY KEY); + ALTER TABLE t.a SET (sql_stats_automatic_collection_enabled = true); + CREATE TABLE t.b (k INT PRIMARY KEY); + ALTER TABLE t.b SET (sql_stats_automatic_collection_enabled = false); + CREATE TABLE t.c (k INT PRIMARY KEY);`) + + executor := s.InternalExecutor().(sqlutil.InternalExecutor) + cache := NewTableStatisticsCache( + 10, /* cacheSize */ + kvDB, + executor, + s.ClusterSettings(), + s.CollectionFactory().(*descs.CollectionFactory), + ) + require.NoError(t, cache.Start(ctx, keys.SystemSQLCodec, s.RangeFeedFactory().(*rangefeed.Factory))) + refresher := MakeRefresher(s.AmbientCtx(), st, executor, cache, time.Microsecond /* asOfTime */) + + // Refresher start should trigger stats collection on t.a. + if err := refresher.Start( + ctx, s.Stopper(), time.Millisecond, /* refreshInterval */ + ); err != nil { + t.Fatal(err) + } + + // There should be one stat for table t.a. + sqlRun.CheckQueryResultsRetry(t, + `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE t.a]`, + [][]string{ + {"__auto__", "{k}", "0"}, + }) + + // There should be no stats for table t.b. + sqlRun.CheckQueryResultsRetry(t, + `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE t.b]`, + [][]string{}) + + // There should be no stats for table t.c. + sqlRun.CheckQueryResultsRetry(t, + `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE t.c]`, + [][]string{}) +} + func TestNoRetryOnFailure(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t)