Skip to content

Commit

Permalink
statistics: always tracking predicate columns (#54152)
Browse files Browse the repository at this point in the history
ref #53567
  • Loading branch information
Rustin170506 authored Jun 24, 2024
1 parent 324ee4c commit fd2b5e9
Show file tree
Hide file tree
Showing 10 changed files with 19 additions and 194 deletions.
8 changes: 2 additions & 6 deletions pkg/executor/set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -672,15 +672,11 @@ func TestSetVar(t *testing.T) {
tk.MustQuery("select @@tidb_enable_historical_stats").Check(testkit.Rows("0"))

// test for tidb_enable_column_tracking
tk.MustQuery("select @@tidb_enable_column_tracking").Check(testkit.Rows("0"))
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustQuery("select @@tidb_enable_column_tracking").Check(testkit.Rows("1"))
tk.MustExec("set global tidb_enable_column_tracking = 0")
tk.MustQuery("select @@tidb_enable_column_tracking").Check(testkit.Rows("0"))
// When set tidb_enable_column_tracking off, we record the time of the setting operation.
tk.MustQuery("select count(1) from mysql.tidb where variable_name = 'tidb_disable_column_tracking_time' and variable_value is not null").Check(testkit.Rows("1"))
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1681 The 'tidb_enable_column_tracking' variable is deprecated and will be removed in future versions of TiDB. It is always set to 'ON' now."))
tk.MustQuery("select @@tidb_enable_column_tracking").Check(testkit.Rows("1"))
tk.MustQuery("select count(1) from mysql.tidb where variable_name = 'tidb_disable_column_tracking_time' and variable_value is not null").Check(testkit.Rows("0"))
require.Error(t, tk.ExecToErr("select @@session.tidb_enable_column_tracking"))
require.Error(t, tk.ExecToErr("set tidb_enable_column_tracking = 0"))
require.Error(t, tk.ExecToErr("set global tidb_enable_column_tracking = -1"))
Expand Down
11 changes: 4 additions & 7 deletions pkg/planner/core/collect_column_stats_usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,15 +349,14 @@ func (c *columnStatsUsageCollector) collectFromPlan(lp base.LogicalPlan) {
// First return value: predicate columns (nil if predicate is false)
// Second return value: histogram-needed columns (nil if histNeeded is false)
// Third return value: ds.PhysicalTableID from all DataSource (always collected)
func CollectColumnStatsUsage(lp base.LogicalPlan, predicate, histNeeded bool) (
func CollectColumnStatsUsage(lp base.LogicalPlan, histNeeded bool) (
[]model.TableItemID,
[]model.StatsLoadItem,
*intset.FastIntSet,
) {
var mode uint64
if predicate {
mode |= collectPredicateColumns
}
// Always collect predicate columns.
mode |= collectPredicateColumns
if histNeeded {
mode |= collectHistNeededColumns
}
Expand Down Expand Up @@ -448,9 +447,7 @@ func CollectColumnStatsUsage(lp base.LogicalPlan, predicate, histNeeded bool) (
predicateCols []model.TableItemID
histNeededCols []model.StatsLoadItem
)
if predicate {
predicateCols = maps.Keys(collector.predicateCols)
}
predicateCols = maps.Keys(collector.predicateCols)
if histNeeded {
histNeededCols = itemSet2slice(collector.histNeededCols)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/core/collect_column_stats_usage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func getStatsLoadItem(t *testing.T, is infoschema.InfoSchema, item model.StatsLo

func checkColumnStatsUsageForPredicates(t *testing.T, is infoschema.InfoSchema, lp base.LogicalPlan, expected []string, comment string) {
var tblColIDs []model.TableItemID
tblColIDs, _, _ = CollectColumnStatsUsage(lp, true, false)
tblColIDs, _, _ = CollectColumnStatsUsage(lp, false)
cols := make([]string, 0, len(tblColIDs))
for _, tblColID := range tblColIDs {
col := getColumnName(t, is, tblColID, comment)
Expand All @@ -91,7 +91,7 @@ func checkColumnStatsUsageForPredicates(t *testing.T, is infoschema.InfoSchema,

func checkColumnStatsUsageForStatsLoad(t *testing.T, is infoschema.InfoSchema, lp base.LogicalPlan, expected []string, comment string) {
var loadItems []model.StatsLoadItem
_, loadItems, _ = CollectColumnStatsUsage(lp, false, true)
_, loadItems, _ = CollectColumnStatsUsage(lp, true)
cols := make([]string, 0, len(loadItems))
for _, item := range loadItems {
col := getStatsLoadItem(t, is, item, comment)
Expand Down
3 changes: 1 addition & 2 deletions pkg/planner/core/rule_collect_plan_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,9 @@ func (collectPredicateColumnsPoint) optimize(_ context.Context, plan base.Logica
if plan.SCtx().GetSessionVars().InRestrictedSQL {
return plan, planChanged, nil
}
predicateNeeded := variable.EnableColumnTracking.Load()
syncWait := plan.SCtx().GetSessionVars().StatsLoadSyncWait.Load()
histNeeded := syncWait > 0
predicateColumns, histNeededColumns, visitedPhysTblIDs := CollectColumnStatsUsage(plan, predicateNeeded, histNeeded)
predicateColumns, histNeededColumns, visitedPhysTblIDs := CollectColumnStatsUsage(plan, histNeeded)
if len(predicateColumns) > 0 {
plan.SCtx().UpdateColStatsUsage(predicateColumns)
}
Expand Down
29 changes: 4 additions & 25 deletions pkg/sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -995,13 +995,6 @@ var defaultSysVars = []*SysVar{
GetGlobal: func(_ context.Context, s *SessionVars) (string, error) {
return BoolToOnOff(PersistAnalyzeOptions.Load()), nil
},
Validation: func(vars *SessionVars, normalizedValue string, originalValue string, scope ScopeFlag) (string, error) {
persist := TiDBOptOn(normalizedValue)
if !persist && EnableColumnTracking.Load() {
return "", errors.Errorf("tidb_persist_analyze_options option cannot be set to OFF when tidb_enable_column_tracking is ON, as this will result in the loss of column tracking information")
}
return normalizedValue, nil
},
SetGlobal: func(_ context.Context, s *SessionVars, val string) error {
persist := TiDBOptOn(val)
PersistAnalyzeOptions.Store(persist)
Expand Down Expand Up @@ -1152,31 +1145,17 @@ var defaultSysVars = []*SysVar{
},
{
Scope: ScopeGlobal, Name: TiDBEnableColumnTracking,
Value: BoolToOnOff(DefTiDBEnableColumnTracking),
Value: BoolToOnOff(true),
Type: TypeBool,
GetGlobal: func(_ context.Context, s *SessionVars) (string, error) {
return BoolToOnOff(EnableColumnTracking.Load()), nil
return BoolToOnOff(true), nil
},
Validation: func(vars *SessionVars, normalizedValue string, originalValue string, scope ScopeFlag) (string, error) {
enabled := TiDBOptOn(normalizedValue)
persist := PersistAnalyzeOptions.Load()
if enabled && !persist {
return "", errors.Errorf("tidb_enable_column_tracking option cannot be set to ON when tidb_persist_analyze_options is set to OFF, as this will prevent the preservation of column tracking information")
}
// This variable is deprecated and will be removed in the future.
vars.StmtCtx.AppendWarning(ErrWarnDeprecatedSyntaxSimpleMsg.FastGen("The 'tidb_enable_column_tracking' variable is deprecated and will be removed in future versions of TiDB. It is always set to 'ON' now."))
return normalizedValue, nil
},
SetGlobal: func(_ context.Context, s *SessionVars, val string) error {
enabled := TiDBOptOn(val)
// If this is a user initiated statement,
// we log that column tracking is disabled.
if s.StmtCtx.StmtType == "Set" && !enabled {
// Set the location to UTC to avoid time zone interference.
disableTime := time.Now().UTC().Format(types.UTCTimeFormat)
if err := setTiDBTableValue(s, TiDBDisableColumnTrackingTime, disableTime, "Record the last time tidb_enable_column_tracking is set off"); err != nil {
return err
}
}
EnableColumnTracking.Store(enabled)
return nil
}},
{Scope: ScopeGlobal, Name: RequireSecureTransport, Value: BoolToOnOff(DefRequireSecureTransport), Type: TypeBool,
Expand Down
66 changes: 0 additions & 66 deletions pkg/sessionctx/variable/sysvar_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1631,69 +1631,3 @@ func TestTiDBLowResTSOUpdateInterval(t *testing.T) {
require.NoError(t, err)
require.Equal(t, "1000", val)
}

func TestSetEnableColumnTrackingAndPersistAnalyzeOptions(t *testing.T) {
vars := NewSessionVars(nil)
mock := NewMockGlobalAccessor4Tests()
mock.SessionVars = vars
vars.GlobalVarsAccessor = mock

// Test EnableColumnTracking
val, err := mock.GetGlobalSysVar(TiDBEnableColumnTracking)
require.NoError(t, err)
require.Equal(t, Off, val)
err = mock.SetGlobalSysVar(context.Background(), TiDBEnableColumnTracking, On)
require.NoError(t, err)
val, err = mock.GetGlobalSysVar(TiDBEnableColumnTracking)
require.NoError(t, err)
require.Equal(t, On, val)
// Reset back.
err = mock.SetGlobalSysVar(context.Background(), TiDBEnableColumnTracking, Off)
require.NoError(t, err)

// Test PersistAnalyzeOptions
val, err = mock.GetGlobalSysVar(TiDBPersistAnalyzeOptions)
require.NoError(t, err)
require.Equal(t, On, val)
err = mock.SetGlobalSysVar(context.Background(), TiDBPersistAnalyzeOptions, Off)
require.NoError(t, err)
val, err = mock.GetGlobalSysVar(TiDBPersistAnalyzeOptions)
require.NoError(t, err)
require.Equal(t, Off, val)
// Reset back
err = mock.SetGlobalSysVar(context.Background(), TiDBPersistAnalyzeOptions, On)
require.NoError(t, err)

// Set EnableColumnTracking to true when PersistAnalyzeOptions is false
// Set to false first.
err = mock.SetGlobalSysVar(context.Background(), TiDBEnableColumnTracking, Off)
require.NoError(t, err)
err = mock.SetGlobalSysVar(context.Background(), TiDBPersistAnalyzeOptions, Off)
require.NoError(t, err)
val, err = mock.GetGlobalSysVar(TiDBPersistAnalyzeOptions)
require.NoError(t, err)
require.Equal(t, Off, val)
err = mock.SetGlobalSysVar(context.Background(), TiDBEnableColumnTracking, On)
require.Error(t, err, "enable column tracking requires to persist analyze options")
val, err = mock.GetGlobalSysVar(TiDBEnableColumnTracking)
require.NoError(t, err)
require.Equal(t, Off, val)

// Set PersistAnalyzeOptions to false when EnableColumnTracking is true
// Set to true first.
err = mock.SetGlobalSysVar(context.Background(), TiDBPersistAnalyzeOptions, On)
require.NoError(t, err)
val, err = mock.GetGlobalSysVar(TiDBPersistAnalyzeOptions)
require.NoError(t, err)
require.Equal(t, On, val)
err = mock.SetGlobalSysVar(context.Background(), TiDBEnableColumnTracking, On)
require.NoError(t, err)
val, err = mock.GetGlobalSysVar(TiDBEnableColumnTracking)
require.NoError(t, err)
require.Equal(t, On, val)
err = mock.SetGlobalSysVar(context.Background(), TiDBPersistAnalyzeOptions, Off)
require.Error(t, err, "persist analyze options requires to enable column tracking")
val, err = mock.GetGlobalSysVar(TiDBPersistAnalyzeOptions)
require.NoError(t, err)
require.Equal(t, On, val)
}
4 changes: 2 additions & 2 deletions pkg/sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -975,10 +975,12 @@ const (
// TiDBPersistAnalyzeOptions persists analyze options for later analyze and auto-analyze
TiDBPersistAnalyzeOptions = "tidb_persist_analyze_options"
// TiDBEnableColumnTracking enables collecting predicate columns.
// DEPRECATED: This variable is deprecated, please do not use this variable.
TiDBEnableColumnTracking = "tidb_enable_column_tracking"
// TiDBDisableColumnTrackingTime records the last time TiDBEnableColumnTracking is set off.
// It is used to invalidate the collected predicate columns after turning off TiDBEnableColumnTracking, which avoids physical deletion.
// It doesn't have cache in memory, and we directly get/set the variable value from/to mysql.tidb.
// DEPRECATED: This variable is deprecated, please do not use this variable.
TiDBDisableColumnTrackingTime = "tidb_disable_column_tracking_time"
// TiDBStatsLoadPseudoTimeout indicates whether to fallback to pseudo stats after load timeout.
TiDBStatsLoadPseudoTimeout = "tidb_stats_load_pseudo_timeout"
Expand Down Expand Up @@ -1342,7 +1344,6 @@ const (
DefEnableLegacyInstanceScope = true
DefTiDBTableCacheLease = 3 // 3s
DefTiDBPersistAnalyzeOptions = true
DefTiDBEnableColumnTracking = false
DefTiDBStatsLoadSyncWait = 100
DefTiDBStatsLoadPseudoTimeout = true
DefSysdateIsNow = false
Expand Down Expand Up @@ -1527,7 +1528,6 @@ var (
VarTiDBSuperReadOnly = atomic.NewBool(DefTiDBSuperReadOnly)
PersistAnalyzeOptions = atomic.NewBool(DefTiDBPersistAnalyzeOptions)
TableCacheLease = atomic.NewInt64(DefTiDBTableCacheLease)
EnableColumnTracking = atomic.NewBool(DefTiDBEnableColumnTracking)
StatsLoadSyncWait = atomic.NewInt64(DefTiDBStatsLoadSyncWait)
StatsLoadPseudoTimeout = atomic.NewBool(DefTiDBStatsLoadPseudoTimeout)
MemQuotaBindingCache = atomic.NewInt64(DefTiDBMemQuotaBindingCache)
Expand Down
24 changes: 1 addition & 23 deletions pkg/statistics/handle/updatetest/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1026,48 +1026,26 @@ func TestCollectPredicateColumnsFromExecute(t *testing.T) {
}
}

func TestEnableAndDisableColumnTracking(t *testing.T) {
func TestColumnTracking(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a int, b int, c int)")

originalVal := tk.MustQuery("select @@tidb_enable_column_tracking").Rows()[0][0].(string)
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()

tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where b > 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows()
require.Len(t, rows, 1)
require.Equal(t, "b", rows[0][3])

tk.MustExec("set global tidb_enable_column_tracking = 0")
// After tidb_enable_column_tracking is set to 0, the predicate columns collected before are invalidated.
tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Check(testkit.Rows())

// Sleep for 1.5s to let `last_used_at` be larger than `tidb_disable_tracking_time`.
time.Sleep(1500 * time.Millisecond)
tk.MustExec("select * from t where a > 1")
require.NoError(t, h.DumpColStatsUsageToKV())
// We don't collect predicate columns when tidb_enable_column_tracking = 0
tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Check(testkit.Rows())

tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where b < 1 and c > 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows = tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Sort().Rows()
require.Len(t, rows, 2)
require.Equal(t, "b", rows[0][3])
require.Equal(t, "c", rows[1][3])

// Test invalidating predicate columns again in order to check that tidb_disable_tracking_time can be updated.
tk.MustExec("set global tidb_enable_column_tracking = 0")
tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Check(testkit.Rows())
}

func TestStatsLockUnlockForAutoAnalyze(t *testing.T) {
Expand Down
61 changes: 3 additions & 58 deletions pkg/statistics/handle/usage/predicate_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/statistics"
statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types"
"github.com/pingcap/tidb/pkg/statistics/handle/usage/indexusage"
Expand Down Expand Up @@ -82,10 +81,6 @@ func (u *statsUsageImpl) CollectColumnsInExtendedStats(tableID int64) (columnIDs

// LoadColumnStatsUsage loads column stats usage information from disk.
func LoadColumnStatsUsage(sctx sessionctx.Context, loc *time.Location) (map[model.TableItemID]statstypes.ColStatsTimeInfo, error) {
disableTime, err := getDisableColumnTrackingTime(sctx)
if err != nil {
return nil, errors.Trace(err)
}
// Since we use another session from session pool to read mysql.column_stats_usage, which may have different @@time_zone, so we do time zone conversion here.
rows, _, err := utilstats.ExecRows(sctx, "SELECT table_id, column_id, CONVERT_TZ(last_used_at, @@TIME_ZONE, '+00:00'), CONVERT_TZ(last_analyzed_at, @@TIME_ZONE, '+00:00') FROM mysql.column_stats_usage")
if err != nil {
Expand All @@ -103,12 +98,8 @@ func LoadColumnStatsUsage(sctx sessionctx.Context, loc *time.Location) (map[mode
if err != nil {
return nil, errors.Trace(err)
}
// If `last_used_at` is before the time when `set global tidb_enable_column_tracking = 0`, we should ignore it because
// `set global tidb_enable_column_tracking = 0` indicates all the predicate columns collected before.
if disableTime == nil || gt.After(*disableTime) {
t := types.NewTime(types.FromGoTime(gt.In(loc)), mysql.TypeTimestamp, types.DefaultFsp)
statsUsage.LastUsedAt = &t
}
t := types.NewTime(types.FromGoTime(gt.In(loc)), mysql.TypeTimestamp, types.DefaultFsp)
statsUsage.LastUsedAt = &t
}
if !row.IsNull(3) {
gt, err := row.GetTime(3).GoTime(time.UTC)
Expand All @@ -130,11 +121,6 @@ func GetPredicateColumns(sctx sessionctx.Context, tableID int64) ([]int64, error
if err != nil {
return nil, errors.Trace(err)
}
// This time is the time when `set global tidb_enable_column_tracking = 0`.
disableTime, err := getDisableColumnTrackingTime(sctx)
if err != nil {
return nil, errors.Trace(err)
}
rows, _, err := utilstats.ExecRows(
sctx,
"SELECT column_id, CONVERT_TZ(last_used_at, @@TIME_ZONE, '+00:00') FROM mysql.column_stats_usage WHERE table_id = %? AND last_used_at IS NOT NULL",
Expand All @@ -151,17 +137,7 @@ func GetPredicateColumns(sctx sessionctx.Context, tableID int64) ([]int64, error
continue
}
colID := row.GetInt64(0)
gt, err := row.GetTime(1).GoTime(time.UTC)
if err != nil {
return nil, errors.Trace(err)
}
// If `last_used_at` is before the time when `set global tidb_enable_column_tracking = 0`, we don't regard the column as predicate column because
// `set global tidb_enable_column_tracking = 0` indicates all the predicate columns collected before.
// TODO: Why do we need to do this? If column tracking is already disabled, we should not collect any column usage.
// If this refers to re-enabling column tracking, shouldn't we retain the column usage data from before it was disabled?
if disableTime == nil || gt.After(*disableTime) {
columnIDs = append(columnIDs, colID)
}
columnIDs = append(columnIDs, colID)
}
return columnIDs, nil
}
Expand Down Expand Up @@ -193,37 +169,6 @@ func cleanupDroppedColumnStatsUsage(sctx sessionctx.Context, tableID int64) erro
return err
}

// getDisableColumnTrackingTime reads the value of tidb_disable_column_tracking_time from mysql.tidb if it exists.
// UTC time format is used to store the time.
func getDisableColumnTrackingTime(sctx sessionctx.Context) (*time.Time, error) {
rows, fields, err := utilstats.ExecRows(
sctx,
"SELECT variable_value FROM %n.%n WHERE variable_name = %?",
mysql.SystemDB,
mysql.TiDBTable,
variable.TiDBDisableColumnTrackingTime,
)
if err != nil {
return nil, err
}
if len(rows) == 0 {
return nil, nil
}

d := rows[0].GetDatum(0, &fields[0].Column.FieldType)
// The string represents the UTC time when tidb_enable_column_tracking is set to 0.
value, err := d.ToString()
if err != nil {
return nil, err
}
t, err := time.Parse(types.UTCTimeFormat, value)
if err != nil {
return nil, err
}

return &t, nil
}

// CollectColumnsInExtendedStats returns IDs of the columns involved in extended stats.
func CollectColumnsInExtendedStats(sctx sessionctx.Context, tableID int64) ([]int64, error) {
const sql = "SELECT name, type, column_ids FROM mysql.stats_extended WHERE table_id = %? and status in (%?, %?)"
Expand Down
3 changes: 0 additions & 3 deletions pkg/statistics/handle/usage/session_stats_collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,6 @@ func (s *statsUsageImpl) dumpTableStatCountToKV(is infoschema.InfoSchema, physic

// DumpColStatsUsageToKV sweeps the whole list, updates the column stats usage map and dumps it to KV.
func (s *statsUsageImpl) DumpColStatsUsageToKV() error {
if !variable.EnableColumnTracking.Load() {
return nil
}
s.SweepSessionStatsList()
colMap := s.SessionStatsUsage().GetUsageAndReset()
defer func() {
Expand Down

0 comments on commit fd2b5e9

Please sign in to comment.