Skip to content

Commit

Permalink
stats: auto analyze on certain period of a day
Browse files Browse the repository at this point in the history
  • Loading branch information
Haibin Xie committed Aug 31, 2018
1 parent 40a4a2d commit 52d79a8
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 15 deletions.
2 changes: 2 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,8 @@ var defaultSysVars = []*SysVar{
{ScopeSession, TiDBOptAggPushDown, boolToIntStr(DefOptAggPushDown)},
{ScopeGlobal | ScopeSession, TiDBBuildStatsConcurrency, strconv.Itoa(DefBuildStatsConcurrency)},
{ScopeGlobal, TiDBAutoAnalyzeRatio, strconv.FormatFloat(DefAutoAnalyzeRatio, 'f', -1, 64)},
{ScopeGlobal, TiDBAutoAnalyzeStartTime, DefAutoAnalyzeStartTime},
{ScopeGlobal, TiDBAutoAnalyzeEndTime, DefAutoAnalyzeEndTime},
{ScopeSession, TiDBChecksumTableConcurrency, strconv.Itoa(DefChecksumTableConcurrency)},
{ScopeGlobal | ScopeSession, TiDBDistSQLScanConcurrency, strconv.Itoa(DefDistSQLScanConcurrency)},
{ScopeGlobal | ScopeSession, TiDBOptInSubqUnFolding, boolToIntStr(DefOptInSubqUnfolding)},
Expand Down
6 changes: 6 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ const (
// Auto analyze will run if (table modify count)/(table row count) is greater than this value.
TiDBAutoAnalyzeRatio = "tidb_auto_analyze_ratio"

// Auto analyze will run if current time is within start time and end time.
TiDBAutoAnalyzeStartTime = "tidb_auto_analyze_start_time"
TiDBAutoAnalyzeEndTime = "tidb_auto_analyze_end_time"

// tidb_checksum_table_concurrency is used to speed up the ADMIN CHECKSUM TABLE
// statement, when a table has multiple indices, those indices can be
// scanned concurrently, with the cost of higher system performance impact.
Expand Down Expand Up @@ -189,6 +193,8 @@ const (
DefDistSQLScanConcurrency = 15
DefBuildStatsConcurrency = 4
DefAutoAnalyzeRatio = 0.5
DefAutoAnalyzeStartTime = "00:00 UTC"
DefAutoAnalyzeEndTime = "23:59 UTC"
DefChecksumTableConcurrency = 4
DefSkipUTF8Check = false
DefOptAggPushDown = false
Expand Down
75 changes: 60 additions & 15 deletions statistics/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -601,12 +601,28 @@ func TableAnalyzed(tbl *Table) bool {
return false
}

// needAnalyzeTable checks if we need to analyze the table:
// withinTimePeriod tests whether `now` is between `start` and `end`.
func withinTimePeriod(start, end, now time.Time) bool {
// Converts to UTC and only keeps the hour and minute info.
start, end, now = start.UTC(), end.UTC(), now.UTC()
start = time.Date(0, 0, 0, start.Hour(), start.Minute(), 0, 0, time.UTC)
end = time.Date(0, 0, 0, end.Hour(), end.Minute(), 0, 0, time.UTC)
now = time.Date(0, 0, 0, now.Hour(), now.Minute(), 0, 0, time.UTC)
// for cases like from 00:00 to 06:00
if end.Sub(start) >= 0 {
return now.Sub(start) >= 0 && now.Sub(end) <= 0
}
// for cases like from 22:00 to 06:00
return now.Sub(end) <= 0 || now.Sub(start) >= 0
}

// NeedAnalyzeTable checks if we need to analyze the table:
// 1. If the table has never been analyzed, we need to analyze it when it has
// not been modified for a time.
// 2. If the table had been analyzed before, we need to analyze it when
// "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio".
func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) bool {
// 3. The current time is between `start` and `end`.
func NeedAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64, start, end, now time.Time) bool {
analyzed := TableAnalyzed(tbl)
if !analyzed {
t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond))
Expand All @@ -616,34 +632,63 @@ func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64)
if autoAnalyzeRatio == 0 {
return false
}
return float64(tbl.ModifyCount)/float64(tbl.Count) > autoAnalyzeRatio
// No need to analyze it.
if float64(tbl.ModifyCount)/float64(tbl.Count) <= autoAnalyzeRatio {
return false
}
// Tests if current time is within the time period.
return withinTimePeriod(start, end, now)
}

const minAutoAnalyzeRatio = 0.3
const (
minAutoAnalyzeRatio = 0.3
// TimeFormat is the format of auto analyze start time and end time.
TimeFormat = "15:04 MST"
)

func (h *Handle) getAutoAnalyzeRatio() float64 {
sql := fmt.Sprintf("select variable_value from mysql.global_variables where variable_name = '%s'", variable.TiDBAutoAnalyzeRatio)
func (h *Handle) getAutoAnalyzeParameters() map[string]string {
sql := fmt.Sprintf("select variable_name, variable_value from mysql.global_variables where variable_name in ('%s', '%s', '%s')",
variable.TiDBAutoAnalyzeRatio, variable.TiDBAutoAnalyzeStartTime, variable.TiDBAutoAnalyzeEndTime)
rows, _, err := h.restrictedExec.ExecRestrictedSQL(nil, sql)
if err != nil {
return variable.DefAutoAnalyzeRatio
return map[string]string{}
}
autoAnalyzeRatio := variable.DefAutoAnalyzeRatio
if len(rows) > 0 {
autoAnalyzeRatio, err = strconv.ParseFloat(rows[0].GetString(0), 64)
if err != nil {
return variable.DefAutoAnalyzeRatio
}
parameters := make(map[string]string)
for _, row := range rows {
parameters[row.GetString(0)] = row.GetString(1)
}
return parameters
}

func parseAutoAnalyzeRatio(ratio string) float64 {
autoAnalyzeRatio, err := strconv.ParseFloat(ratio, 64)
if err != nil {
return variable.DefAutoAnalyzeRatio
}
if autoAnalyzeRatio > 0 {
autoAnalyzeRatio = math.Max(autoAnalyzeRatio, minAutoAnalyzeRatio)
}
return autoAnalyzeRatio
}

func parseAnalyzePeriod(start, end string) (time.Time, time.Time) {
s, err := time.ParseInLocation(TimeFormat, start, time.UTC)
if err != nil {
s = time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC)
}
e, err := time.ParseInLocation(TimeFormat, end, time.UTC)
if err != nil {
e = time.Date(0, 0, 0, 23, 59, 0, 0, time.UTC)
}
return s, e
}

// HandleAutoAnalyze analyzes the newly created table or index.
func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
dbs := is.AllSchemaNames()
autoAnalyzeRatio := h.getAutoAnalyzeRatio()
parameters := h.getAutoAnalyzeParameters()
autoAnalyzeRatio := parseAutoAnalyzeRatio(parameters[variable.TiDBAutoAnalyzeRatio])
start, end := parseAnalyzePeriod(parameters[variable.TiDBAutoAnalyzeStartTime], parameters[variable.TiDBAutoAnalyzeEndTime])
for _, db := range dbs {
tbls := is.SchemaTables(model.NewCIStr(db))
for _, tbl := range tbls {
Expand All @@ -653,7 +698,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
continue
}
tblName := "`" + db + "`.`" + tblInfo.Name.O + "`"
if needAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio) {
if NeedAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio, start, end, time.Now()) {
sql := fmt.Sprintf("analyze table %s", tblName)
log.Infof("[stats] auto analyze table %s now", tblName)
return errors.Trace(h.execAutoAnalyze(sql))
Expand Down
87 changes: 87 additions & 0 deletions statistics/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/store/tikv/oracle"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/ranger"
Expand Down Expand Up @@ -918,3 +919,89 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) {
c.Assert(s.hook.results, Equals, t.result)
}
}

func (s *testStatsUpdateSuite) TestNeedAnalyzeTable(c *C) {
columns := map[int64]*statistics.Column{}
columns[1] = &statistics.Column{Count: 1}
tests := []struct {
tbl *statistics.Table
ratio float64
limit time.Duration
timeStr []string
result bool
}{
// table never analyzed and has reach the limit
{
tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))},
limit: 0,
ratio: 0,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: true,
},
// table never analyzed but has not reach the limit
{
tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))},
limit: time.Hour,
ratio: 0,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: false,
},
// table already analyzed but auto analyze is disabled
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: false,
},
// table already analyzed and but modify count is small
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 0, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: false,
},
// table already analyzed and but not within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"00:00 CST", "00:01 CST", "00:02 CST"},
result: false,
},
// table already analyzed and but not within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"22:00 CST", "06:00 CST", "10:02 CST"},
result: false,
},
// table already analyzed and within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: true,
},
// table already analyzed and within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"22:00 CST", "06:00 CST", "23:00 CST"},
result: true,
},
}
for _, test := range tests {
ts := make([]time.Time, 0, 3)
for _, s := range test.timeStr {
t, err := time.ParseInLocation(statistics.TimeFormat, s, time.UTC)
c.Assert(err, IsNil)
ts = append(ts, t)
}
c.Assert(statistics.NeedAnalyzeTable(test.tbl, test.limit, test.ratio, ts[0], ts[1], ts[2]), Equals, test.result)
}
}

0 comments on commit 52d79a8

Please sign in to comment.