Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: auto analyze on certain period of a day #7570

Merged
merged 7 commits into from
Sep 5, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,8 @@ var defaultSysVars = []*SysVar{
{ScopeSession, TiDBOptAggPushDown, boolToIntStr(DefOptAggPushDown)},
{ScopeGlobal | ScopeSession, TiDBBuildStatsConcurrency, strconv.Itoa(DefBuildStatsConcurrency)},
{ScopeGlobal, TiDBAutoAnalyzeRatio, strconv.FormatFloat(DefAutoAnalyzeRatio, 'f', -1, 64)},
{ScopeGlobal, TiDBAutoAnalyzeStartTime, DefAutoAnalyzeStartTime},
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
{ScopeGlobal, TiDBAutoAnalyzeEndTime, DefAutoAnalyzeEndTime},
{ScopeSession, TiDBChecksumTableConcurrency, strconv.Itoa(DefChecksumTableConcurrency)},
{ScopeGlobal | ScopeSession, TiDBDistSQLScanConcurrency, strconv.Itoa(DefDistSQLScanConcurrency)},
{ScopeGlobal | ScopeSession, TiDBOptInSubqUnFolding, boolToIntStr(DefOptInSubqUnfolding)},
Expand Down
6 changes: 6 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ const (
// Auto analyze will run if (table modify count)/(table row count) is greater than this value.
TiDBAutoAnalyzeRatio = "tidb_auto_analyze_ratio"

// Auto analyze will run if current time is within start time and end time.
TiDBAutoAnalyzeStartTime = "tidb_auto_analyze_start_time"
TiDBAutoAnalyzeEndTime = "tidb_auto_analyze_end_time"

// tidb_checksum_table_concurrency is used to speed up the ADMIN CHECKSUM TABLE
// statement, when a table has multiple indices, those indices can be
// scanned concurrently, with the cost of higher system performance impact.
Expand Down Expand Up @@ -189,6 +193,8 @@ const (
DefDistSQLScanConcurrency = 15
DefBuildStatsConcurrency = 4
DefAutoAnalyzeRatio = 0.5
DefAutoAnalyzeStartTime = "00:00 UTC"
DefAutoAnalyzeEndTime = "23:59 UTC"
DefChecksumTableConcurrency = 4
DefSkipUTF8Check = false
DefOptAggPushDown = false
Expand Down
75 changes: 60 additions & 15 deletions statistics/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -601,12 +601,28 @@ func TableAnalyzed(tbl *Table) bool {
return false
}

// needAnalyzeTable checks if we need to analyze the table:
// withinTimePeriod tests whether `now` is between `start` and `end`.
func withinTimePeriod(start, end, now time.Time) bool {
// Converts to UTC and only keeps the hour and minute info.
start, end, now = start.UTC(), end.UTC(), now.UTC()
start = time.Date(0, 0, 0, start.Hour(), start.Minute(), 0, 0, time.UTC)
end = time.Date(0, 0, 0, end.Hour(), end.Minute(), 0, 0, time.UTC)
now = time.Date(0, 0, 0, now.Hour(), now.Minute(), 0, 0, time.UTC)
// for cases like from 00:00 to 06:00
if end.Sub(start) >= 0 {
return now.Sub(start) >= 0 && now.Sub(end) <= 0
}
// for cases like from 22:00 to 06:00
return now.Sub(end) <= 0 || now.Sub(start) >= 0
}

// NeedAnalyzeTable checks if we need to analyze the table:
// 1. If the table has never been analyzed, we need to analyze it when it has
// not been modified for a time.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/ not been modified for a time./ not been modified for a while

// 2. If the table had been analyzed before, we need to analyze it when
// "tbl.ModifyCount/tbl.Count > autoAnalyzeRatio".
func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64) bool {
// 3. The current time is between `start` and `end`.
func NeedAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64, start, end, now time.Time) bool {
analyzed := TableAnalyzed(tbl)
if !analyzed {
t := time.Unix(0, oracle.ExtractPhysical(tbl.Version)*int64(time.Millisecond))
Expand All @@ -616,34 +632,63 @@ func needAnalyzeTable(tbl *Table, limit time.Duration, autoAnalyzeRatio float64)
if autoAnalyzeRatio == 0 {
return false
}
return float64(tbl.ModifyCount)/float64(tbl.Count) > autoAnalyzeRatio
// No need to analyze it.
if float64(tbl.ModifyCount)/float64(tbl.Count) <= autoAnalyzeRatio {
return false
}
// Tests if current time is within the time period.
return withinTimePeriod(start, end, now)
}

const minAutoAnalyzeRatio = 0.3
const (
minAutoAnalyzeRatio = 0.3
// TimeFormat is the format of auto analyze start time and end time.
TimeFormat = "15:04 MST"
)

func (h *Handle) getAutoAnalyzeRatio() float64 {
sql := fmt.Sprintf("select variable_value from mysql.global_variables where variable_name = '%s'", variable.TiDBAutoAnalyzeRatio)
func (h *Handle) getAutoAnalyzeParameters() map[string]string {
sql := fmt.Sprintf("select variable_name, variable_value from mysql.global_variables where variable_name in ('%s', '%s', '%s')",
variable.TiDBAutoAnalyzeRatio, variable.TiDBAutoAnalyzeStartTime, variable.TiDBAutoAnalyzeEndTime)
rows, _, err := h.restrictedExec.ExecRestrictedSQL(nil, sql)
if err != nil {
return variable.DefAutoAnalyzeRatio
return map[string]string{}
}
autoAnalyzeRatio := variable.DefAutoAnalyzeRatio
if len(rows) > 0 {
autoAnalyzeRatio, err = strconv.ParseFloat(rows[0].GetString(0), 64)
if err != nil {
return variable.DefAutoAnalyzeRatio
}
parameters := make(map[string]string)
for _, row := range rows {
parameters[row.GetString(0)] = row.GetString(1)
}
return parameters
}

func parseAutoAnalyzeRatio(ratio string) float64 {
autoAnalyzeRatio, err := strconv.ParseFloat(ratio, 64)
if err != nil {
return variable.DefAutoAnalyzeRatio
}
if autoAnalyzeRatio > 0 {
autoAnalyzeRatio = math.Max(autoAnalyzeRatio, minAutoAnalyzeRatio)
}
return autoAnalyzeRatio
}

func parseAnalyzePeriod(start, end string) (time.Time, time.Time) {
s, err := time.ParseInLocation(TimeFormat, start, time.UTC)
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
s = time.Date(0, 0, 0, 0, 0, 0, 0, time.UTC)
}
e, err := time.ParseInLocation(TimeFormat, end, time.UTC)
if err != nil {
e = time.Date(0, 0, 0, 23, 59, 0, 0, time.UTC)
}
return s, e
}

// HandleAutoAnalyze analyzes the newly created table or index.
func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
dbs := is.AllSchemaNames()
autoAnalyzeRatio := h.getAutoAnalyzeRatio()
parameters := h.getAutoAnalyzeParameters()
autoAnalyzeRatio := parseAutoAnalyzeRatio(parameters[variable.TiDBAutoAnalyzeRatio])
start, end := parseAnalyzePeriod(parameters[variable.TiDBAutoAnalyzeStartTime], parameters[variable.TiDBAutoAnalyzeEndTime])
for _, db := range dbs {
tbls := is.SchemaTables(model.NewCIStr(db))
for _, tbl := range tbls {
Expand All @@ -653,7 +698,7 @@ func (h *Handle) HandleAutoAnalyze(is infoschema.InfoSchema) error {
continue
}
tblName := "`" + db + "`.`" + tblInfo.Name.O + "`"
if needAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio) {
if NeedAnalyzeTable(statsTbl, 20*h.Lease, autoAnalyzeRatio, start, end, time.Now()) {
sql := fmt.Sprintf("analyze table %s", tblName)
log.Infof("[stats] auto analyze table %s now", tblName)
return errors.Trace(h.execAutoAnalyze(sql))
Expand Down
87 changes: 87 additions & 0 deletions statistics/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/store/tikv/oracle"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/ranger"
Expand Down Expand Up @@ -918,3 +919,89 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) {
c.Assert(s.hook.results, Equals, t.result)
}
}

func (s *testStatsUpdateSuite) TestNeedAnalyzeTable(c *C) {
columns := map[int64]*statistics.Column{}
columns[1] = &statistics.Column{Count: 1}
tests := []struct {
tbl *statistics.Table
ratio float64
limit time.Duration
timeStr []string
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would easier to read if we split timeStr slice to start, end and now.

result bool
}{
// table never analyzed and has reach the limit
{
tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))},
limit: 0,
ratio: 0,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: true,
},
// table never analyzed but has not reach the limit
{
tbl: &statistics.Table{Version: oracle.EncodeTSO(oracle.GetPhysical(time.Now()))},
limit: time.Hour,
ratio: 0,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: false,
},
// table already analyzed but auto analyze is disabled
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: false,
},
// table already analyzed and but modify count is small
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 0, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: false,
},
// table already analyzed and but not within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"00:00 CST", "00:01 CST", "00:02 CST"},
result: false,
},
// table already analyzed and but not within time period
Copy link
Contributor

@zhexuany zhexuany Sep 4, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/table already analyzed and but not within time period/table was already analyzed and but not within time period

and for the rest of the comment, you also need to fix them.

{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"22:00 CST", "06:00 CST", "10:02 CST"},
result: false,
},
// table already analyzed and within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"00:00 CST", "00:01 CST", "00:00 CST"},
result: true,
},
// table already analyzed and within time period
{
tbl: &statistics.Table{HistColl: statistics.HistColl{Columns: columns, ModifyCount: 1, Count: 1}},
limit: 0,
ratio: 0.3,
timeStr: []string{"22:00 CST", "06:00 CST", "23:00 CST"},
result: true,
},
}
for _, test := range tests {
ts := make([]time.Time, 0, 3)
for _, s := range test.timeStr {
t, err := time.ParseInLocation(statistics.TimeFormat, s, time.UTC)
c.Assert(err, IsNil)
ts = append(ts, t)
}
c.Assert(statistics.NeedAnalyzeTable(test.tbl, test.limit, test.ratio, ts[0], ts[1], ts[2]), Equals, test.result)
}
}