From 921d3a42fe8414e217afb357b16d1102b5ee39b6 Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Sat, 24 Sep 2022 17:22:42 -0700 Subject: [PATCH] stats: fix missing autostats on cluster startup Fixes #87247 Previously, automatic statistics may fail to be collected on tables with no stats at cluster startup when the `sql.stats.automatic_collection.enabled` cluster setting is false but table storage parameter `sql_stats_automatic_collection_enabled` is true. Method `ensureAllTables` does not put entries into the settingOverrides map, and since storage parameter info is not normally looked up in the table header in auto stats processing, we fall back on the cluster setting to determine if auto stats should be collected. To address this, this patch modifies auto stats to flag whether the current batch of tables saved in the mutationCounts map comes from cluster startup processing, and if so, ensures that storage parameters controlling auto stats are always looked up in the table header during that processing. Release note (bug fix): This patch fixes missing automatic statistics collection at cluster startup when the `sql.stats.automatic_collection.enabled` cluster setting is false, but there are tables with storage parameter `sql_stats_automatic_collection_enabled` set to true. --- pkg/sql/stats/automatic_stats.go | 8 +++- pkg/sql/stats/automatic_stats_test.go | 59 +++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/pkg/sql/stats/automatic_stats.go b/pkg/sql/stats/automatic_stats.go index 6b213909ccd9..3df4bc30cf51 100644 --- a/pkg/sql/stats/automatic_stats.go +++ b/pkg/sql/stats/automatic_stats.go @@ -388,14 +388,20 @@ func (r *Refresher) Start( // once on startup. const initialTableCollectionDelay = time.Second initialTableCollection := time.After(initialTableCollectionDelay) + var ensuringAllTables bool for { select { case <-initialTableCollection: r.ensureAllTables(ctx, &r.st.SV, initialTableCollectionDelay) + if len(r.mutationCounts) > 0 { + ensuringAllTables = true + } case <-timer.C: mutationCounts := r.mutationCounts + refreshingAllTables := ensuringAllTables + ensuringAllTables = false var settingOverrides map[descpb.ID]catpb.AutoStatsSettings // For each mutation count, look up auto stats setting overrides using @@ -436,7 +442,7 @@ func (r *Refresher) Start( // processing the current table longer than the refresh // interval, look up the table descriptor to ensure we don't // have stale table settings. - if elapsed > DefaultRefreshInterval { + if elapsed > DefaultRefreshInterval || refreshingAllTables { desc = r.getTableDescriptor(ctx, tableID) if desc != nil { if !r.autoStatsEnabled(desc) { diff --git a/pkg/sql/stats/automatic_stats_test.go b/pkg/sql/stats/automatic_stats_test.go index 5e68155acc46..1a2265559168 100644 --- a/pkg/sql/stats/automatic_stats_test.go +++ b/pkg/sql/stats/automatic_stats_test.go @@ -582,6 +582,65 @@ func TestAutoStatsReadOnlyTables(t *testing.T) { }) } +func TestAutoStatsOnStartupClusterSettingOff(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + s, sqlDB, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) + defer sqlDB.Close() + defer s.Stopper().Stop(ctx) + + st := cluster.MakeTestingClusterSettings() + AutomaticStatisticsClusterMode.Override(ctx, &st.SV, false) + evalCtx := eval.NewTestingEvalContext(st) + defer evalCtx.Stop(ctx) + + sqlRun := sqlutils.MakeSQLRunner(sqlDB) + sqlRun.Exec(t, + `CREATE DATABASE t; + CREATE TABLE t.a (k INT PRIMARY KEY); + ALTER TABLE t.a SET (sql_stats_automatic_collection_enabled = true); + CREATE TABLE t.b (k INT PRIMARY KEY); + ALTER TABLE t.b SET (sql_stats_automatic_collection_enabled = false); + CREATE TABLE t.c (k INT PRIMARY KEY);`) + + executor := s.InternalExecutor().(sqlutil.InternalExecutor) + cache := NewTableStatisticsCache( + 10, /* cacheSize */ + kvDB, + executor, + s.ClusterSettings(), + s.CollectionFactory().(*descs.CollectionFactory), + ) + require.NoError(t, cache.Start(ctx, keys.SystemSQLCodec, s.RangeFeedFactory().(*rangefeed.Factory))) + refresher := MakeRefresher(s.AmbientCtx(), st, executor, cache, time.Microsecond /* asOfTime */) + + // Refresher start should trigger stats collection on t.a. + if err := refresher.Start( + ctx, s.Stopper(), time.Millisecond, /* refreshInterval */ + ); err != nil { + t.Fatal(err) + } + + // There should be one stat for table t.a. + sqlRun.CheckQueryResultsRetry(t, + `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE t.a]`, + [][]string{ + {"__auto__", "{k}", "0"}, + }) + + // There should be no stats for table t.b. + sqlRun.CheckQueryResultsRetry(t, + `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE t.b]`, + [][]string{}) + + // There should be no stats for table t.c. + sqlRun.CheckQueryResultsRetry(t, + `SELECT statistics_name, column_names, row_count FROM [SHOW STATISTICS FOR TABLE t.c]`, + [][]string{}) +} + func TestNoRetryOnFailure(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t)