diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index 27c87046f4f8d..1fb5ca04f3a69 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -80,7 +80,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 37, + shard_count = 38, deps = [ "//pkg/config", "//pkg/parser/ast", diff --git a/pkg/statistics/column.go b/pkg/statistics/column.go index fff29c7ab571a..3637ed867c942 100644 --- a/pkg/statistics/column.go +++ b/pkg/statistics/column.go @@ -263,3 +263,13 @@ func (c *Column) StatsAvailable() bool { // StatsVer, so we check NDV > 0 || NullCount > 0 for the case. return c.IsAnalyzed() || c.NDV > 0 || c.NullCount > 0 } + +// EmptyColumn creates an empty column object. It may be used for pseudo estimation or to stop loading unexisting stats. +func EmptyColumn(tid int64, pkIsHandle bool, colInfo *model.ColumnInfo) *Column { + return &Column{ + PhysicalID: tid, + Info: colInfo, + Histogram: *NewHistogram(colInfo.ID, 0, 0, 0, &colInfo.FieldType, 0, 0), + IsHandle: pkIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), + } +} diff --git a/pkg/statistics/handle/storage/read.go b/pkg/statistics/handle/storage/read.go index 49a023905a028..1a9feec3ff732 100644 --- a/pkg/statistics/handle/storage/read.go +++ b/pkg/statistics/handle/storage/read.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/config" + "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/model" @@ -576,7 +577,7 @@ func LoadHistogram(sctx sessionctx.Context, tableID int64, isIndex int, histID i } // LoadNeededHistograms will load histograms for those needed columns/indices. -func LoadNeededHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, loadFMSketch bool) (err error) { +func LoadNeededHistograms(sctx sessionctx.Context, statsCache statstypes.StatsHandle, loadFMSketch bool) (err error) { items := statistics.HistogramNeededItems.AllItems() for _, item := range items { if !item.IsIndex { @@ -619,18 +620,42 @@ func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int { return reallyNeeded } -func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) { - tbl, ok := statsCache.Get(col.TableID) +func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.StatsHandle, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) { + statsTbl, ok := statsCache.Get(col.TableID) if !ok { return nil } + is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) + tbl, ok := statsCache.TableInfoByID(is, col.TableID) + if !ok { + return nil + } + tblInfo := tbl.Meta() var colInfo *model.ColumnInfo - _, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(col.ID, true) + _, loadNeeded, analyzed := statsTbl.ColumnIsLoadNeeded(col.ID, true) + for _, ci := range tblInfo.Columns { + if col.ID == ci.ID { + colInfo = ci + break + } + } + if colInfo == nil { + statistics.HistogramNeededItems.Delete(col) + return nil + } if !loadNeeded || !analyzed { + // If this column is not analyzed yet and we don't have it in memory. + // We create a fake one for the pseudo estimation. + // Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed. + if loadNeeded && !analyzed { + fakeCol := statistics.EmptyColumn(colInfo.ID, tblInfo.PKIsHandle, colInfo) + statsTbl.Columns[col.ID] = fakeCol + statsCache.UpdateStatsCache([]*statistics.Table{statsTbl}, nil) + } statistics.HistogramNeededItems.Delete(col) return nil } - colInfo = tbl.ColAndIdxExistenceMap.GetCol(col.ID) + hg, _, statsVer, _, err := HistMetaFromStorageWithHighPriority(sctx, &col, colInfo) if hg == nil || err != nil { statistics.HistogramNeededItems.Delete(col) @@ -664,29 +689,29 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S CMSketch: cms, TopN: topN, FMSketch: fms, - IsHandle: tbl.IsPkIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), + IsHandle: tblInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), StatsVer: statsVer, } // Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions // like `GetPartitionStats` called in `fmSketchFromStorage` would have modified the stats cache already. - tbl, ok = statsCache.Get(col.TableID) + statsTbl, ok = statsCache.Get(col.TableID) if !ok { return nil } - tbl = tbl.Copy() + statsTbl = statsTbl.Copy() if colHist.StatsAvailable() { if fullLoad { colHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus() } else { colHist.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus() } - tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, colHist.LastUpdateVersion) if statsVer != statistics.Version0 { - tbl.StatsVer = int(statsVer) + statsTbl.LastAnalyzeVersion = max(statsTbl.LastAnalyzeVersion, colHist.LastUpdateVersion) + statsTbl.StatsVer = int(statsVer) } } - tbl.Columns[col.ID] = colHist - statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil) + statsTbl.Columns[col.ID] = colHist + statsCache.UpdateStatsCache([]*statistics.Table{statsTbl}, nil) statistics.HistogramNeededItems.Delete(col) if col.IsSyncLoadFailed { logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.", @@ -741,9 +766,9 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache statstypes.St tbl = tbl.Copy() if idxHist.StatsVer != statistics.Version0 { tbl.StatsVer = int(idxHist.StatsVer) + tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion) } tbl.Indices[idx.ID] = idxHist - tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion) statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil) if idx.IsSyncLoadFailed { logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.", diff --git a/pkg/statistics/handle/syncload/stats_syncload.go b/pkg/statistics/handle/syncload/stats_syncload.go index 04360bc13ee97..f7525c6f08371 100644 --- a/pkg/statistics/handle/syncload/stats_syncload.go +++ b/pkg/statistics/handle/syncload/stats_syncload.go @@ -310,13 +310,9 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err } // If this column is not analyzed yet and we don't have it in memory. // We create a fake one for the pseudo estimation. + // Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed. if loadNeeded && !analyzed { - wrapper.col = &statistics.Column{ - PhysicalID: item.TableID, - Info: wrapper.colInfo, - Histogram: *statistics.NewHistogram(item.ID, 0, 0, 0, &wrapper.colInfo.FieldType, 0, 0), - IsHandle: tbl.IsPkIsHandle && mysql.HasPriKeyFlag(wrapper.colInfo.GetFlag()), - } + wrapper.col = statistics.EmptyColumn(item.TableID, tbl.IsPkIsHandle, wrapper.colInfo) s.updateCachedItem(item, wrapper.col, wrapper.idx, task.Item.FullLoad) return nil } diff --git a/pkg/statistics/integration_test.go b/pkg/statistics/integration_test.go index 4fa12778dd69b..acc970d3e769f 100644 --- a/pkg/statistics/integration_test.go +++ b/pkg/statistics/integration_test.go @@ -574,3 +574,25 @@ func TestTableLastAnalyzeVersion(t *testing.T) { require.True(t, found) require.NotEqual(t, uint64(0), statsTbl.LastAnalyzeVersion) } + +func TestLastAnalyzeVersionNotChangedWithAsyncStatsLoad(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("set @@tidb_stats_load_sync_wait = 0;") + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int);") + require.NoError(t, dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh())) + require.NoError(t, dom.StatsHandle().Update(dom.InfoSchema())) + tk.MustExec("insert into t values (1, 1);") + err := dom.StatsHandle().DumpStatsDeltaToKV(true) + require.NoError(t, err) + tk.MustExec("alter table t add column c int default 1;") + dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh()) + tk.MustExec("select * from t where a = 1 or b = 1 or c = 1;") + require.NoError(t, dom.StatsHandle().LoadNeededHistograms()) + result := tk.MustQuery("show stats_meta where table_name = 't'") + require.Len(t, result.Rows(), 1) + // The last analyze time. + require.Equal(t, "", result.Rows()[0][6]) +} diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 1f6248d9a74ac..5a0d32999146b 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -644,7 +644,7 @@ func (t *Table) GetStatsHealthy() (int64, bool) { } // ColumnIsLoadNeeded checks whether the column needs trigger the async/sync load. -// The Column should be visible in the table and really has analyzed statistics in the stroage. +// The Column should be visible in the table and really has analyzed statistics in the storage. // Also, if the stats has been loaded into the memory, we also don't need to load it. // We return the Column together with the checking result, to avoid accessing the map multiple times. // The first bool is whether we have it in memory. The second bool is whether this column has stats in the system table or not.