From 94d728bf483b34f18a95e0f33d8dec739b16d82a Mon Sep 17 00:00:00 2001 From: Lynne <55439700+LynneLiu-LYJ@users.noreply.github.com> Date: Mon, 11 Nov 2019 15:36:13 +0800 Subject: [PATCH 01/19] expression: implement vectorized evaluation for 'builtinCastJSONAsRealSig' (#13316) --- expression/builtin_cast_vec.go | 27 +++++++++++++++++++++++++-- expression/builtin_cast_vec_test.go | 1 + 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/expression/builtin_cast_vec.go b/expression/builtin_cast_vec.go index 8f924dd76385f..ee657ebc66c4c 100644 --- a/expression/builtin_cast_vec.go +++ b/expression/builtin_cast_vec.go @@ -412,11 +412,34 @@ func (b *builtinCastRealAsJSONSig) vecEvalJSON(input *chunk.Chunk, result *chunk } func (b *builtinCastJSONAsRealSig) vectorized() bool { - return false + return true } func (b *builtinCastJSONAsRealSig) vecEvalReal(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + buf, err := b.bufAllocator.get(types.ETJson, n) + if err != nil { + return err + } + defer b.bufAllocator.put(buf) + if err := b.args[0].VecEvalJSON(b.ctx, input, buf); err != nil { + return err + } + + result.ResizeFloat64(n, false) + result.MergeNulls(buf) + f64s := result.Float64s() + sc := b.ctx.GetSessionVars().StmtCtx + for i := 0; i < n; i++ { + if result.IsNull(i) { + continue + } + f64s[i], err = types.ConvertJSONToFloat(sc, buf.GetJSON(i)) + if err != nil { + return err + } + } + return nil } func (b *builtinCastJSONAsTimeSig) vectorized() bool { diff --git a/expression/builtin_cast_vec_test.go b/expression/builtin_cast_vec_test.go index 9ef2ebbbee4ad..79f515b5ab77b 100644 --- a/expression/builtin_cast_vec_test.go +++ b/expression/builtin_cast_vec_test.go @@ -35,6 +35,7 @@ var vecBuiltinCastCases = map[string][]vecExprBenchCase{ {retEvalType: types.ETReal, childrenTypes: []types.EvalType{types.ETInt}}, {retEvalType: types.ETDuration, childrenTypes: []types.EvalType{types.ETInt}, geners: []dataGenerator{new(randDurInt)}}, {retEvalType: types.ETReal, childrenTypes: []types.EvalType{types.ETReal}}, + {retEvalType: types.ETReal, childrenTypes: []types.EvalType{types.ETJson}}, {retEvalType: types.ETReal, childrenTypes: []types.EvalType{types.ETDecimal}}, {retEvalType: types.ETReal, childrenTypes: []types.EvalType{types.ETDatetime}}, {retEvalType: types.ETDuration, childrenTypes: []types.EvalType{types.ETDatetime}, From 30da446193aa9afebe36ab72a57771692d6d43b8 Mon Sep 17 00:00:00 2001 From: Zhang Zhiyi <909645105@qq.com> Date: Mon, 11 Nov 2019 15:42:13 +0800 Subject: [PATCH 02/19] expression: implement vectorized evaluation for `builtinHexIntArgSig` (#13330) --- expression/builtin_string_vec.go | 22 ++++++++++++++++++++-- expression/builtin_string_vec_test.go | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/expression/builtin_string_vec.go b/expression/builtin_string_vec.go index 6cbb4cf7d7824..e6eed321a320e 100644 --- a/expression/builtin_string_vec.go +++ b/expression/builtin_string_vec.go @@ -1808,11 +1808,29 @@ func (b *builtinSubstringBinary3ArgsSig) vecEvalString(input *chunk.Chunk, resul } func (b *builtinHexIntArgSig) vectorized() bool { - return false + return true } func (b *builtinHexIntArgSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + buf, err := b.bufAllocator.get(types.ETInt, n) + if err != nil { + return err + } + defer b.bufAllocator.put(buf) + if err := b.args[0].VecEvalInt(b.ctx, input, buf); err != nil { + return err + } + result.ReserveString(n) + i64s := buf.Int64s() + for i := 0; i < n; i++ { + if buf.IsNull(i) { + result.AppendNull() + continue + } + result.AppendString(strings.ToUpper(fmt.Sprintf("%x", uint64(i64s[i])))) + } + return nil } func (b *builtinFieldIntSig) vectorized() bool { diff --git a/expression/builtin_string_vec_test.go b/expression/builtin_string_vec_test.go index b18004fc46910..3c18a6414a5f7 100644 --- a/expression/builtin_string_vec_test.go +++ b/expression/builtin_string_vec_test.go @@ -102,6 +102,7 @@ var vecBuiltinStringCases = map[string][]vecExprBenchCase{ }, ast.Hex: { {retEvalType: types.ETString, childrenTypes: []types.EvalType{types.ETString}, geners: []dataGenerator{&randHexStrGener{10, 100}}}, + {retEvalType: types.ETString, childrenTypes: []types.EvalType{types.ETInt}}, }, ast.Unhex: { {retEvalType: types.ETString, childrenTypes: []types.EvalType{types.ETString}, geners: []dataGenerator{&randHexStrGener{10, 100}}}, From 2b5f81add11497b5b5595b3ba16ac88cddf33a65 Mon Sep 17 00:00:00 2001 From: Zhuhe Fang <fzhedu@gmail.com> Date: Mon, 11 Nov 2019 15:50:43 +0800 Subject: [PATCH 03/19] executor, util: rename original List to ListInMemory and add a new interface List (#13353) --- executor/executor.go | 10 ++-- executor/hash_table.go | 11 +++-- executor/index_lookup_join.go | 16 +++++-- executor/index_lookup_merge_join.go | 9 ++-- executor/join.go | 6 +-- executor/pkg_test.go | 2 +- executor/sort.go | 18 +++++--- util/chunk/disk.go | 9 +++- util/chunk/iterator.go | 10 ++-- util/chunk/iterator_test.go | 6 +-- util/chunk/list.go | 72 +++++++++++++++++------------ util/chunk/list_test.go | 16 +++---- 12 files changed, 110 insertions(+), 75 deletions(-) diff --git a/executor/executor.go b/executor/executor.go index 482119db8e0a9..a10ecf2e636ee 100644 --- a/executor/executor.go +++ b/executor/executor.go @@ -132,10 +132,10 @@ func newFirstChunk(e Executor) *chunk.Chunk { return chunk.New(base.retFieldTypes, base.initCap, base.maxChunkSize) } -// newList creates a new List to buffer current executor's result. -func newList(e Executor) *chunk.List { +// newList creates a new ListInMemory to buffer current executor's result. +func newList(e Executor) *chunk.ListInMemory { base := e.base() - return chunk.NewList(base.retFieldTypes, base.initCap, base.maxChunkSize) + return chunk.NewListInMemory(base.retFieldTypes, base.initCap, base.maxChunkSize) } // retTypes returns all output column types. @@ -1115,7 +1115,7 @@ type TableScanExec struct { iter kv.Iterator columns []*model.ColumnInfo isVirtualTable bool - virtualTableChunkList *chunk.List + virtualTableChunkList *chunk.ListInMemory virtualTableChunkIdx int } @@ -1146,7 +1146,7 @@ func (e *TableScanExec) Next(ctx context.Context, req *chunk.Chunk) error { func (e *TableScanExec) nextChunk4InfoSchema(ctx context.Context, chk *chunk.Chunk) error { chk.GrowAndReset(e.maxChunkSize) if e.virtualTableChunkList == nil { - e.virtualTableChunkList = chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize) + e.virtualTableChunkList = chunk.NewListInMemory(retTypes(e), e.initCap, e.maxChunkSize) columns := make([]*table.Column, e.schema.Len()) for i, colInfo := range e.columns { columns[i] = table.ToColumn(colInfo) diff --git a/executor/hash_table.go b/executor/hash_table.go index 256be4bfcbf32..875fc3d38f1bf 100644 --- a/executor/hash_table.go +++ b/executor/hash_table.go @@ -91,7 +91,7 @@ type hashRowContainer struct { memTracker *memory.Tracker // records stores the chunks in memory. - records *chunk.List + records *chunk.ListInMemory // recordsInDisk stores the chunks in disk. recordsInDisk *chunk.ListInDisk @@ -117,7 +117,7 @@ func newHashRowContainer(sCtx sessionctx.Context, estCount int, hCtx *hashContex if estCount < maxChunkSize*estCountMinFactor { estCount = 0 } - initList := chunk.NewList(hCtx.allTypes, maxChunkSize, maxChunkSize) + initList := chunk.NewListInMemory(hCtx.allTypes, maxChunkSize, maxChunkSize) c := &hashRowContainer{ sc: sCtx.GetSessionVars().StmtCtx, hCtx: hCtx, @@ -202,7 +202,10 @@ func (c *hashRowContainer) PutChunk(chk *chunk.Chunk) error { } } else { chkIdx = uint32(c.records.NumChunks()) - c.records.Add(chk) + err := c.records.Add(chk) + if err != nil { + return err + } if atomic.LoadUint32(&c.exceeded) != 0 { err := c.spillToDisk() if err != nil { @@ -269,7 +272,7 @@ func (c *hashRowContainer) ActionSpill() memory.ActionOnExceed { return &spillDiskAction{c: c} } -// spillDiskAction implements memory.ActionOnExceed for chunk.List. If +// spillDiskAction implements memory.ActionOnExceed for chunk.ListInMemory. If // the memory quota of a query is exceeded, spillDiskAction.Action is // triggered. type spillDiskAction struct { diff --git a/executor/index_lookup_join.go b/executor/index_lookup_join.go index 57f669e375e26..dfd349e8834fa 100644 --- a/executor/index_lookup_join.go +++ b/executor/index_lookup_join.go @@ -94,10 +94,10 @@ type innerCtx struct { } type lookUpJoinTask struct { - outerResult *chunk.List + outerResult *chunk.ListInMemory outerMatch [][]bool - innerResult *chunk.List + innerResult *chunk.ListInMemory encodedLookUpKeys []*chunk.Chunk lookupMap *mvmap.MVMap matchedInners []chunk.Row @@ -392,7 +392,10 @@ func (ow *outerWorker) buildTask(ctx context.Context) (*lookUpJoinTask, error) { break } - task.outerResult.Add(chk) + err = task.outerResult.Add(chk) + if err != nil { + return nil, err + } } if task.outerResult.Len() == 0 { return nil, nil @@ -601,7 +604,7 @@ func (iw *innerWorker) fetchInnerResults(ctx context.Context, task *lookUpJoinTa return err } defer terror.Call(innerExec.Close) - innerResult := chunk.NewList(retTypes(innerExec), iw.ctx.GetSessionVars().MaxChunkSize, iw.ctx.GetSessionVars().MaxChunkSize) + innerResult := chunk.NewListInMemory(retTypes(innerExec), iw.ctx.GetSessionVars().MaxChunkSize, iw.ctx.GetSessionVars().MaxChunkSize) innerResult.GetMemTracker().SetLabel(buildSideResultLabel) innerResult.GetMemTracker().AttachTo(task.memTracker) for { @@ -617,7 +620,10 @@ func (iw *innerWorker) fetchInnerResults(ctx context.Context, task *lookUpJoinTa if iw.executorChk.NumRows() == 0 { break } - innerResult.Add(iw.executorChk) + err = innerResult.Add(iw.executorChk) + if err != nil { + return err + } iw.executorChk = newFirstChunk(innerExec) } task.innerResult = innerResult diff --git a/executor/index_lookup_merge_join.go b/executor/index_lookup_merge_join.go index e4dbe7369d3b2..dbf541aa6898b 100644 --- a/executor/index_lookup_merge_join.go +++ b/executor/index_lookup_merge_join.go @@ -86,7 +86,7 @@ type innerMergeCtx struct { } type lookUpMergeJoinTask struct { - outerResult *chunk.List + outerResult *chunk.ListInMemory outerOrderIdx []chunk.RowPtr innerResult *chunk.Chunk @@ -323,7 +323,7 @@ func (omw *outerMergeWorker) pushToChan(ctx context.Context, task *lookUpMergeJo func (omw *outerMergeWorker) buildTask(ctx context.Context) (*lookUpMergeJoinTask, error) { task := &lookUpMergeJoinTask{ results: make(chan *indexMergeJoinResult, numResChkHold), - outerResult: chunk.NewList(omw.rowTypes, omw.executor.base().initCap, omw.executor.base().maxChunkSize), + outerResult: chunk.NewListInMemory(omw.rowTypes, omw.executor.base().initCap, omw.executor.base().maxChunkSize), } task.memTracker = memory.NewTracker(stringutil.MemoizeStr(func() string { return fmt.Sprintf("lookup join task %p", task) }), -1) task.memTracker.AttachTo(omw.parentMemTracker) @@ -346,7 +346,10 @@ func (omw *outerMergeWorker) buildTask(ctx context.Context) (*lookUpMergeJoinTas break } - task.outerResult.Add(execChk) + err = task.outerResult.Add(execChk) + if err != nil { + return task, err + } requiredRows -= execChk.NumRows() task.memTracker.Consume(execChk.MemoryUsage()) } diff --git a/executor/join.go b/executor/join.go index 61d7386cdce44..33010f5f6da3d 100644 --- a/executor/join.go +++ b/executor/join.go @@ -555,7 +555,7 @@ type NestedLoopApplyExec struct { outerChunk *chunk.Chunk outerChunkCursor int outerSelected []bool - innerList *chunk.List + innerList *chunk.ListInMemory innerChunk *chunk.Chunk innerSelected []bool innerIter chunk.Iterator @@ -586,7 +586,7 @@ func (e *NestedLoopApplyExec) Open(ctx context.Context) error { e.innerRows = e.innerRows[:0] e.outerChunk = newFirstChunk(e.outerExec) e.innerChunk = newFirstChunk(e.innerExec) - e.innerList = chunk.NewList(retTypes(e.innerExec), e.initCap, e.maxChunkSize) + e.innerList = chunk.NewListInMemory(retTypes(e.innerExec), e.initCap, e.maxChunkSize) e.memTracker = memory.NewTracker(e.id, e.ctx.GetSessionVars().MemQuotaNestedLoopApply) e.memTracker.AttachTo(e.ctx.GetSessionVars().StmtCtx.MemTracker) @@ -628,7 +628,7 @@ func (e *NestedLoopApplyExec) fetchSelectedOuterRow(ctx context.Context, chk *ch } } -// fetchAllInners reads all data from the inner table and stores them in a List. +// fetchAllInners reads all data from the inner table and stores them in a ListInMemory. func (e *NestedLoopApplyExec) fetchAllInners(ctx context.Context) error { err := e.innerExec.Open(ctx) defer terror.Call(e.innerExec.Close) diff --git a/executor/pkg_test.go b/executor/pkg_test.go index bdd7a91772bed..22a361293a89f 100644 --- a/executor/pkg_test.go +++ b/executor/pkg_test.go @@ -61,7 +61,7 @@ func (s *pkgTestSuite) TestNestedLoopApply(c *C) { innerFilter: []expression.Expression{innerFilter}, joiner: joiner, } - join.innerList = chunk.NewList(retTypes(innerExec), innerExec.initCap, innerExec.maxChunkSize) + join.innerList = chunk.NewListInMemory(retTypes(innerExec), innerExec.initCap, innerExec.maxChunkSize) join.innerChunk = newFirstChunk(innerExec) join.outerChunk = newFirstChunk(outerExec) joinChk := newFirstChunk(join) diff --git a/executor/sort.go b/executor/sort.go index aa895015b1eec..a9b378b41452b 100644 --- a/executor/sort.go +++ b/executor/sort.go @@ -45,7 +45,7 @@ type SortExec struct { // keyCmpFuncs is used to compare each ByItem. keyCmpFuncs []chunk.CompareFunc // rowChunks is the chunks to store row values. - rowChunks *chunk.List + rowChunks *chunk.ListInMemory // rowPointer store the chunk index and row index for each row. rowPtrs []chunk.RowPtr @@ -95,7 +95,7 @@ func (e *SortExec) Next(ctx context.Context, req *chunk.Chunk) error { func (e *SortExec) fetchRowChunks(ctx context.Context) error { fields := retTypes(e) - e.rowChunks = chunk.NewList(fields, e.initCap, e.maxChunkSize) + e.rowChunks = chunk.NewListInMemory(fields, e.initCap, e.maxChunkSize) e.rowChunks.GetMemTracker().AttachTo(e.memTracker) e.rowChunks.GetMemTracker().SetLabel(rowChunksLabel) for { @@ -108,7 +108,10 @@ func (e *SortExec) fetchRowChunks(ctx context.Context) error { if rowCount == 0 { break } - e.rowChunks.Add(chk) + err = e.rowChunks.Add(chk) + if err != nil { + return err + } } return nil } @@ -256,7 +259,7 @@ func (e *TopNExec) Next(ctx context.Context, req *chunk.Chunk) error { func (e *TopNExec) loadChunksUntilTotalLimit(ctx context.Context) error { e.chkHeap = &topNChunkHeap{e} - e.rowChunks = chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize) + e.rowChunks = chunk.NewListInMemory(retTypes(e), e.initCap, e.maxChunkSize) e.rowChunks.GetMemTracker().AttachTo(e.memTracker) e.rowChunks.GetMemTracker().SetLabel(rowChunksLabel) for uint64(e.rowChunks.Len()) < e.totalLimit { @@ -270,7 +273,10 @@ func (e *TopNExec) loadChunksUntilTotalLimit(ctx context.Context) error { if srcChk.NumRows() == 0 { break } - e.rowChunks.Add(srcChk) + err = e.rowChunks.Add(srcChk) + if err != nil { + return err + } } e.initPointers() e.initCompareFuncs() @@ -330,7 +336,7 @@ func (e *TopNExec) processChildChk(childRowChk *chunk.Chunk) error { // but we want descending top N, then we will keep all data in memory. // But if data is distributed randomly, this function will be called log(n) times. func (e *TopNExec) doCompaction() error { - newRowChunks := chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize) + newRowChunks := chunk.NewListInMemory(retTypes(e), e.initCap, e.maxChunkSize) newRowPtrs := make([]chunk.RowPtr, 0, e.rowChunks.Len()) for _, rowPtr := range e.rowPtrs { newRowPtr := newRowChunks.AppendRow(e.rowChunks.GetRow(rowPtr)) diff --git a/util/chunk/disk.go b/util/chunk/disk.go index a374a2307c954..bc20be59d89d8 100644 --- a/util/chunk/disk.go +++ b/util/chunk/disk.go @@ -83,7 +83,7 @@ func NewListInDisk(fieldTypes []*types.FieldType) *ListInDisk { return l } -// GetDiskTracker returns the memory tracker of this List. +// GetDiskTracker returns the memory tracker of this ListInMemory. func (l *ListInDisk) GetDiskTracker() *disk.Tracker { return l.diskTracker } @@ -92,7 +92,7 @@ func (l *ListInDisk) GetDiskTracker() *disk.Tracker { // is not empty and not used any more and has the same field types. func (l *ListInDisk) Add(chk *Chunk) (err error) { if chk.NumRows() == 0 { - return errors.New("chunk appended to List should have at least 1 row") + return errors.New("chunk appended to ListInMemory should have at least 1 row") } if l.disk == nil { l.disk, err = ioutil.TempFile(tmpDir, l.diskTracker.Label().String()) @@ -138,6 +138,11 @@ func (l *ListInDisk) NumChunks() int { return len(l.offsets) } +// NumRowsOfChunk returns the number of rows of a chunk in the ListInDisk. +func (l *ListInDisk) NumRowsOfChunk(chkID int) int { + return len(l.offsets[chkID]) +} + // Close releases the disk resource. func (l *ListInDisk) Close() error { if l.disk != nil { diff --git a/util/chunk/iterator.go b/util/chunk/iterator.go index c6a3bc58ff303..071a3087922ff 100644 --- a/util/chunk/iterator.go +++ b/util/chunk/iterator.go @@ -159,13 +159,13 @@ func (it *Iterator4Chunk) GetChunk() *Chunk { return it.chk } -// NewIterator4List returns a Iterator for List. -func NewIterator4List(li *List) Iterator { +// NewIterator4List returns a Iterator for ListInMemory. +func NewIterator4List(li *ListInMemory) Iterator { return &iterator4List{li: li} } type iterator4List struct { - li *List + li *ListInMemory chkCursor int rowCursor int } @@ -232,12 +232,12 @@ func (it *iterator4List) Len() int { } // NewIterator4RowPtr returns a Iterator for RowPtrs. -func NewIterator4RowPtr(li *List, ptrs []RowPtr) Iterator { +func NewIterator4RowPtr(li *ListInMemory, ptrs []RowPtr) Iterator { return &iterator4RowPtr{li: li, ptrs: ptrs} } type iterator4RowPtr struct { - li *List + li *ListInMemory ptrs []RowPtr cursor int } diff --git a/util/chunk/iterator_test.go b/util/chunk/iterator_test.go index 2c937447b1909..7125f24d194fd 100644 --- a/util/chunk/iterator_test.go +++ b/util/chunk/iterator_test.go @@ -49,8 +49,8 @@ func (s *testChunkSuite) TestIterator(c *check.C) { expected = append(expected, int64(i)) } var rows []Row - li := NewList(fields, 1, 2) - li2 := NewList(fields, 8, 16) + li := NewListInMemory(fields, 1, 2) + li2 := NewListInMemory(fields, 8, 16) var ptrs []RowPtr var ptrs2 []RowPtr for i := 0; i < n; i++ { @@ -120,7 +120,7 @@ func (s *testChunkSuite) TestIterator(c *check.C) { c.Assert(it.Begin(), check.Equals, it.End()) it = NewIterator4Chunk(new(Chunk)) c.Assert(it.Begin(), check.Equals, it.End()) - it = NewIterator4List(new(List)) + it = NewIterator4List(new(ListInMemory)) c.Assert(it.Begin(), check.Equals, it.End()) it = NewIterator4RowPtr(li, nil) c.Assert(it.Begin(), check.Equals, it.End()) diff --git a/util/chunk/list.go b/util/chunk/list.go index 2f71febb1179d..4826b229d82ae 100644 --- a/util/chunk/list.go +++ b/util/chunk/list.go @@ -22,8 +22,16 @@ import ( "github.com/pingcap/tidb/util/stringutil" ) -// List holds a slice of chunks, use to append rows with max chunk size properly handled. -type List struct { +// List is interface for ListInMemory and ListInDisk +type List interface { + NumRowsOfChunk(int) + NumChunks() + GetRow(RowPtr) + Add(chk *Chunk) (err error) +} + +// ListInMemory holds a slice of chunks, use to append rows with max chunk size properly handled. +type ListInMemory struct { fieldTypes []*types.FieldType initChunkSize int maxChunkSize int @@ -42,11 +50,11 @@ type RowPtr struct { RowIdx uint32 } -var chunkListLabel fmt.Stringer = stringutil.StringerStr("chunk.List") +var chunkListLabel fmt.Stringer = stringutil.StringerStr("chunk.ListInMemory") -// NewList creates a new List with field types, init chunk size and max chunk size. -func NewList(fieldTypes []*types.FieldType, initChunkSize, maxChunkSize int) *List { - l := &List{ +// NewListInMemory creates a new ListInMemory with field types, init chunk size and max chunk size. +func NewListInMemory(fieldTypes []*types.FieldType, initChunkSize, maxChunkSize int) *ListInMemory { + l := &ListInMemory{ fieldTypes: fieldTypes, initChunkSize: initChunkSize, maxChunkSize: maxChunkSize, @@ -56,28 +64,28 @@ func NewList(fieldTypes []*types.FieldType, initChunkSize, maxChunkSize int) *Li return l } -// GetMemTracker returns the memory tracker of this List. -func (l *List) GetMemTracker() *memory.Tracker { +// GetMemTracker returns the memory tracker of this ListInMemory. +func (l *ListInMemory) GetMemTracker() *memory.Tracker { return l.memTracker } -// Len returns the length of the List. -func (l *List) Len() int { +// Len returns the length of the ListInMemory. +func (l *ListInMemory) Len() int { return l.length } -// NumChunks returns the number of chunks in the List. -func (l *List) NumChunks() int { +// NumChunks returns the number of chunks in the ListInMemory. +func (l *ListInMemory) NumChunks() int { return len(l.chunks) } // GetChunk gets the Chunk by ChkIdx. -func (l *List) GetChunk(chkIdx int) *Chunk { +func (l *ListInMemory) GetChunk(chkIdx int) *Chunk { return l.chunks[chkIdx] } -// AppendRow appends a row to the List, the row is copied to the List. -func (l *List) AppendRow(row Row) RowPtr { +// AppendRow appends a row to the ListInMemory, the row is copied to the ListInMemory. +func (l *ListInMemory) AppendRow(row Row) RowPtr { chkIdx := len(l.chunks) - 1 if chkIdx == -1 || l.chunks[chkIdx].NumRows() >= l.chunks[chkIdx].Capacity() || chkIdx == l.consumedIdx { newChk := l.allocChunk() @@ -95,13 +103,12 @@ func (l *List) AppendRow(row Row) RowPtr { return RowPtr{ChkIdx: uint32(chkIdx), RowIdx: uint32(rowIdx)} } -// Add adds a chunk to the List, the chunk may be modified later by the list. +// Add adds a chunk to the ListInMemory, the chunk may be modified later by the list. // Caller must make sure the input chk is not empty and not used any more and has the same field types. -func (l *List) Add(chk *Chunk) { +func (l *ListInMemory) Add(chk *Chunk) (err error) { // FixMe: we should avoid add a Chunk that chk.NumRows() > list.maxChunkSize. if chk.NumRows() == 0 { - // TODO: return error here. - panic("chunk appended to List should have at least 1 row") + return errors.New("chunk appended to ListInMemory should have at least 1 row") } if chkIdx := len(l.chunks) - 1; l.consumedIdx != chkIdx { l.memTracker.Consume(l.chunks[chkIdx].MemoryUsage()) @@ -111,10 +118,10 @@ func (l *List) Add(chk *Chunk) { l.consumedIdx++ l.chunks = append(l.chunks, chk) l.length += chk.NumRows() - return + return nil } -func (l *List) allocChunk() (chk *Chunk) { +func (l *ListInMemory) allocChunk() (chk *Chunk) { if len(l.freelist) > 0 { lastIdx := len(l.freelist) - 1 chk = l.freelist[lastIdx] @@ -130,13 +137,18 @@ func (l *List) allocChunk() (chk *Chunk) { } // GetRow gets a Row from the list by RowPtr. -func (l *List) GetRow(ptr RowPtr) Row { +func (l *ListInMemory) GetRow(ptr RowPtr) Row { chk := l.chunks[ptr.ChkIdx] return chk.GetRow(int(ptr.RowIdx)) } -// Reset resets the List. -func (l *List) Reset() { +// NumRowsOfChunk returns the number of rows of a chunk. +func (l *ListInMemory) NumRowsOfChunk(chkID int) int { + return l.GetChunk(chkID).NumRows() +} + +// Reset resets the ListInMemory. +func (l *ListInMemory) Reset() { if lastIdx := len(l.chunks) - 1; lastIdx != l.consumedIdx { l.memTracker.Consume(l.chunks[lastIdx].MemoryUsage()) } @@ -148,11 +160,11 @@ func (l *List) Reset() { // preAlloc4Row pre-allocates the storage memory for a Row. // NOTE: only used in test -// 1. The List must be empty or holds no useful data. -// 2. The schema of the Row must be the same with the List. +// 1. The ListInMemory must be empty or holds no useful data. +// 2. The schema of the Row must be the same with the ListInMemory. // 3. This API is paired with the `Insert()` function, which inserts all the -// rows data into the List after the pre-allocation. -func (l *List) preAlloc4Row(row Row) (ptr RowPtr) { +// rows data into the ListInMemory after the pre-allocation. +func (l *ListInMemory) preAlloc4Row(row Row) (ptr RowPtr) { chkIdx := len(l.chunks) - 1 if chkIdx == -1 || l.chunks[chkIdx].NumRows() >= l.chunks[chkIdx].Capacity() { newChk := l.allocChunk() @@ -172,7 +184,7 @@ func (l *List) preAlloc4Row(row Row) (ptr RowPtr) { // Insert inserts `row` on the position specified by `ptr`. // Note: Insert will cover the origin data, it should be called after // PreAlloc. -func (l *List) Insert(ptr RowPtr, row Row) { +func (l *ListInMemory) Insert(ptr RowPtr, row Row) { l.chunks[ptr.ChkIdx].insert(int(ptr.RowIdx), row) } @@ -181,7 +193,7 @@ func (l *List) Insert(ptr RowPtr, row Row) { type ListWalkFunc = func(row Row) error // Walk iterate the list and call walkFunc for each row. -func (l *List) Walk(walkFunc ListWalkFunc) error { +func (l *ListInMemory) Walk(walkFunc ListWalkFunc) error { for i := 0; i < len(l.chunks); i++ { chk := l.chunks[i] for j := 0; j < chk.NumRows(); j++ { diff --git a/util/chunk/list_test.go b/util/chunk/list_test.go index b7d7780ffa2f2..c71bd712a1875 100644 --- a/util/chunk/list_test.go +++ b/util/chunk/list_test.go @@ -32,7 +32,7 @@ func (s *testChunkSuite) TestList(c *check.C) { fields := []*types.FieldType{ types.NewFieldType(mysql.TypeLonglong), } - l := NewList(fields, 2, 2) + l := NewListInMemory(fields, 2, 2) srcChunk := NewChunkWithCapacity(fields, 32) srcChunk.AppendInt64(0, 1) srcRow := srcChunk.GetRow(0) @@ -104,7 +104,7 @@ func (s *testChunkSuite) TestListMemoryUsage(c *check.C) { srcChk.AppendTime(3, timeObj) srcChk.AppendDuration(4, durationObj) - list := NewList(fieldTypes, maxChunkSize, maxChunkSize*2) + list := NewListInMemory(fieldTypes, maxChunkSize, maxChunkSize*2) c.Assert(list.GetMemTracker().BytesConsumed(), check.Equals, int64(0)) list.AppendRow(srcChk.GetRow(0)) @@ -133,8 +133,8 @@ func (s *testChunkSuite) TestListPrePreAlloc4RowAndInsert(c *check.C) { srcChk.AppendString(3, strings.Repeat(strconv.FormatInt(i, 10), int(i))) } - srcList := NewList(fieldTypes, 3, 3) - destList := NewList(fieldTypes, 5, 5) + srcList := NewListInMemory(fieldTypes, 3, 3) + destList := NewListInMemory(fieldTypes, 5, 5) destRowPtr := make([]RowPtr, srcChk.NumRows()) for i := 0; i < srcChk.NumRows(); i++ { srcList.AppendRow(srcChk.GetRow(i)) @@ -176,7 +176,7 @@ func BenchmarkListMemoryUsage(b *testing.B) { row := chk.GetRow(0) initCap := 50 - list := NewList(fieldTypes, 2, 8) + list := NewListInMemory(fieldTypes, 2, 8) for i := 0; i < initCap; i++ { list.AppendRow(row) } @@ -194,7 +194,7 @@ func BenchmarkPreAllocList(b *testing.B) { row := chk.GetRow(0) b.ResetTimer() - list := NewList(fieldTypes, 1024, 1024) + list := NewListInMemory(fieldTypes, 1024, 1024) for i := 0; i < b.N; i++ { list.Reset() // 32768 indicates the number of int64 rows to fill 256KB L2 cache. @@ -225,7 +225,7 @@ func BenchmarkListAdd(b *testing.B) { numChk, numRow := 1, 2 chks, fields := initChunks(numChk, numRow) chk := chks[0] - l := NewList(fields, numRow, numRow) + l := NewListInMemory(fields, numRow, numRow) b.ResetTimer() for i := 0; i < b.N; i++ { @@ -236,7 +236,7 @@ func BenchmarkListAdd(b *testing.B) { func BenchmarkListGetRow(b *testing.B) { numChk, numRow := 10000, 2 chks, fields := initChunks(numChk, numRow) - l := NewList(fields, numRow, numRow) + l := NewListInMemory(fields, numRow, numRow) for _, chk := range chks { l.Add(chk) } From 6f50566b0970740c25780cdf29da5c46694d5a73 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang <zhangyuanjia@pingcap.com> Date: Mon, 11 Nov 2019 02:01:44 -0600 Subject: [PATCH 04/19] expression: generate big real numbers when testing (#13338) --- expression/bench_test.go | 4 ++-- expression/builtin_math_vec_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/expression/bench_test.go b/expression/bench_test.go index 0ddb6237a04fc..f324da15fd215 100644 --- a/expression/bench_test.go +++ b/expression/bench_test.go @@ -221,9 +221,9 @@ func (g *defaultGener) gen() interface{} { return rand.Int63() case types.ETReal: if rand.Float64() < 0.5 { - return -rand.Float64() + return -rand.Float64() * 1000000 } - return rand.Float64() + return rand.Float64() * 1000000 case types.ETDecimal: d := new(types.MyDecimal) var f float64 diff --git a/expression/builtin_math_vec_test.go b/expression/builtin_math_vec_test.go index 36f77d43f3fff..8449972e87757 100644 --- a/expression/builtin_math_vec_test.go +++ b/expression/builtin_math_vec_test.go @@ -55,7 +55,7 @@ var vecBuiltinMathCases = map[string][]vecExprBenchCase{ {retEvalType: types.ETReal, childrenTypes: []types.EvalType{types.ETReal}}, }, ast.Exp: { - {retEvalType: types.ETReal, childrenTypes: []types.EvalType{types.ETReal}}, + {retEvalType: types.ETReal, childrenTypes: []types.EvalType{types.ETReal}, geners: []dataGenerator{&rangeRealGener{-1, 1, 0.2}}}, }, ast.Degrees: { {retEvalType: types.ETReal, childrenTypes: []types.EvalType{types.ETReal}}, From 69ff2d5e887d49657968ca0ebb0201f49c675dcc Mon Sep 17 00:00:00 2001 From: Mingcong Han <hanmingcong123@hotmail.com> Date: Mon, 11 Nov 2019 16:15:43 +0800 Subject: [PATCH 05/19] planner/cascades: add ImplementationRule for TopN (#13323) --- planner/cascades/enforcer_rules.go | 8 +-- planner/cascades/implementation_rules.go | 62 +++++++++++++++++++ planner/cascades/optimize.go | 2 +- .../testdata/integration_suite_in.json | 4 +- .../testdata/integration_suite_out.json | 26 ++++++++ planner/implementation/simple_plans.go | 18 ++++++ 6 files changed, 114 insertions(+), 6 deletions(-) diff --git a/planner/cascades/enforcer_rules.go b/planner/cascades/enforcer_rules.go index f242db62849be..b965ca6b474fd 100644 --- a/planner/cascades/enforcer_rules.go +++ b/planner/cascades/enforcer_rules.go @@ -30,7 +30,7 @@ type Enforcer interface { // required physical property. OnEnforce(reqProp *property.PhysicalProperty, child memo.Implementation) (impl memo.Implementation) // GetEnforceCost calculates cost of enforcing required physical property. - GetEnforceCost(g *memo.Group, inputCount float64) float64 + GetEnforceCost(g *memo.Group) float64 } // GetEnforcerRules gets all candidate enforcer rules based @@ -54,7 +54,7 @@ var orderEnforcer = &OrderEnforcer{} // NewProperty removes order property from required physical property. func (e *OrderEnforcer) NewProperty(prop *property.PhysicalProperty) (newProp *property.PhysicalProperty) { // Order property cannot be empty now. - newProp = &property.PhysicalProperty{ExpectedCnt: prop.ExpectedCnt} + newProp = &property.PhysicalProperty{ExpectedCnt: math.MaxFloat64} return } @@ -76,10 +76,10 @@ func (e *OrderEnforcer) OnEnforce(reqProp *property.PhysicalProperty, child memo } // GetEnforceCost calculates cost of sort operator. -func (e *OrderEnforcer) GetEnforceCost(g *memo.Group, inputCount float64) float64 { +func (e *OrderEnforcer) GetEnforceCost(g *memo.Group) float64 { // We need a SessionCtx to calculate the cost of a sort. sctx := g.Equivalents.Front().Value.(*memo.GroupExpr).ExprNode.SCtx() sort := plannercore.PhysicalSort{}.Init(sctx, nil, 0, nil) - cost := sort.GetCost(inputCount) + cost := sort.GetCost(g.Prop.Stats.RowCount) return cost } diff --git a/planner/cascades/implementation_rules.go b/planner/cascades/implementation_rules.go index 381abd2b443fd..6bbdbcfa89995 100644 --- a/planner/cascades/implementation_rules.go +++ b/planner/cascades/implementation_rules.go @@ -16,6 +16,7 @@ package cascades import ( "math" + "github.com/pingcap/tidb/expression" plannercore "github.com/pingcap/tidb/planner/core" impl "github.com/pingcap/tidb/planner/implementation" "github.com/pingcap/tidb/planner/memo" @@ -59,6 +60,10 @@ var defaultImplementationMap = map[memo.Operand][]ImplementationRule{ memo.OperandLimit: { &ImplLimit{}, }, + memo.OperandTopN: { + &ImplTopN{}, + &ImplTopNAsLimit{}, + }, } // ImplTableDual implements LogicalTableDual as PhysicalTableDual. @@ -272,3 +277,60 @@ func (r *ImplLimit) OnImplement(expr *memo.GroupExpr, reqProp *property.Physical }.Init(logicalLimit.SCtx(), expr.Group.Prop.Stats, logicalLimit.SelectBlockOffset(), newProp) return impl.NewLimitImpl(physicalLimit), nil } + +// ImplTopN is the implementation rule which implements LogicalTopN +// to PhysicalTopN. +type ImplTopN struct { +} + +// Match implements ImplementationRule Match interface. +func (r *ImplTopN) Match(expr *memo.GroupExpr, prop *property.PhysicalProperty) (matched bool) { + topN := expr.ExprNode.(*plannercore.LogicalTopN) + return plannercore.MatchItems(prop, topN.ByItems) +} + +// OnImplement implements ImplementationRule OnImplement interface. +func (r *ImplTopN) OnImplement(expr *memo.GroupExpr, reqProp *property.PhysicalProperty) (memo.Implementation, error) { + lt := expr.ExprNode.(*plannercore.LogicalTopN) + resultProp := &property.PhysicalProperty{ExpectedCnt: math.MaxFloat64} + topN := plannercore.PhysicalTopN{ + ByItems: lt.ByItems, + Count: lt.Count, + Offset: lt.Offset, + }.Init(lt.SCtx(), expr.Group.Prop.Stats, lt.SelectBlockOffset(), resultProp) + switch expr.Group.EngineType { + case memo.EngineTiDB: + return impl.NewTiDBTopNImpl(topN), nil + default: + // TODO: return TiKVTopNImpl after we have implemented push topN down gather. + return nil, plannercore.ErrInternal.GenWithStack("Unsupported EngineType '%s' for TopN.", expr.Group.EngineType.String()) + } +} + +// ImplTopNAsLimit is the implementation rule which implements LogicalTopN +// as PhysicalLimit with required order property. +type ImplTopNAsLimit struct { +} + +// Match implements ImplementationRule Match interface. +func (r *ImplTopNAsLimit) Match(expr *memo.GroupExpr, prop *property.PhysicalProperty) (matched bool) { + topN := expr.ExprNode.(*plannercore.LogicalTopN) + _, canUseLimit := plannercore.GetPropByOrderByItems(topN.ByItems) + return canUseLimit && plannercore.MatchItems(prop, topN.ByItems) +} + +// OnImplement implements ImplementationRule OnImplement interface. +func (r *ImplTopNAsLimit) OnImplement(expr *memo.GroupExpr, reqProp *property.PhysicalProperty) (memo.Implementation, error) { + lt := expr.ExprNode.(*plannercore.LogicalTopN) + newProp := &property.PhysicalProperty{ExpectedCnt: float64(lt.Count + lt.Offset)} + newProp.Items = make([]property.Item, len(lt.ByItems)) + for i, item := range lt.ByItems { + newProp.Items[i].Col = item.Expr.(*expression.Column) + newProp.Items[i].Desc = item.Desc + } + physicalLimit := plannercore.PhysicalLimit{ + Offset: lt.Offset, + Count: lt.Count, + }.Init(lt.SCtx(), expr.Group.Prop.Stats, lt.SelectBlockOffset(), newProp) + return impl.NewLimitImpl(physicalLimit), nil +} diff --git a/planner/cascades/optimize.go b/planner/cascades/optimize.go index 1a7cd566f6cbd..55d2044bb7fe2 100644 --- a/planner/cascades/optimize.go +++ b/planner/cascades/optimize.go @@ -332,7 +332,7 @@ func (opt *Optimizer) implGroup(g *memo.Group, reqPhysProp *property.PhysicalPro // Handle enforcer rules for required physical property. for _, rule := range GetEnforcerRules(g, reqPhysProp) { newReqPhysProp := rule.NewProperty(reqPhysProp) - enforceCost := rule.GetEnforceCost(g, outCount) + enforceCost := rule.GetEnforceCost(g) childImpl, err := opt.implGroup(g, newReqPhysProp, costLimit-enforceCost) if err != nil { return nil, err diff --git a/planner/cascades/testdata/integration_suite_in.json b/planner/cascades/testdata/integration_suite_in.json index 91779070e4083..deb46c52d0cf9 100644 --- a/planner/cascades/testdata/integration_suite_in.json +++ b/planner/cascades/testdata/integration_suite_in.json @@ -50,7 +50,9 @@ "name": "TestSimplePlans", "cases": [ "select a from t limit 2", - "select a from t limit 1 offset 2" + "select a from t limit 1 offset 2", + "select b from t order by b limit 3", + "select a from t order by a limit 1 offset 2" ] } ] diff --git a/planner/cascades/testdata/integration_suite_out.json b/planner/cascades/testdata/integration_suite_out.json index 46e3ca6b1f8c5..27628cbb1aea4 100644 --- a/planner/cascades/testdata/integration_suite_out.json +++ b/planner/cascades/testdata/integration_suite_out.json @@ -303,6 +303,32 @@ "Result": [ "3" ] + }, + { + "SQL": "select b from t order by b limit 3", + "Plan": [ + "TopN_8 3.00 root Column#3:asc, offset:0, count:3", + "└─Projection_10 10000.00 root Column#2", + " └─TableReader_11 10000.00 root data:TableScan_12", + " └─TableScan_12 10000.00 cop[tikv] table:t, range:[-inf,+inf], keep order:false, stats:pseudo" + ], + "Result": [ + "11", + "22", + "33" + ] + }, + { + "SQL": "select a from t order by a limit 1 offset 2", + "Plan": [ + "Limit_9 1.00 root offset:2, count:1", + "└─Projection_13 3.00 root Column#1", + " └─TableReader_14 3.00 root data:TableScan_15", + " └─TableScan_15 3.00 cop[tikv] table:t, range:[-inf,+inf], keep order:true, stats:pseudo" + ], + "Result": [ + "3" + ] } ] } diff --git a/planner/implementation/simple_plans.go b/planner/implementation/simple_plans.go index 44d55239830bb..c405fafd4fc8c 100644 --- a/planner/implementation/simple_plans.go +++ b/planner/implementation/simple_plans.go @@ -108,3 +108,21 @@ type LimitImpl struct { func NewLimitImpl(limit *plannercore.PhysicalLimit) *LimitImpl { return &LimitImpl{baseImpl{plan: limit}} } + +// TiDBTopNImpl is the implementation of PhysicalTopN in TiDB layer. +type TiDBTopNImpl struct { + baseImpl +} + +// CalcCost implements Implementation CalcCost interface. +func (impl *TiDBTopNImpl) CalcCost(outCount float64, children ...memo.Implementation) float64 { + topN := impl.plan.(*plannercore.PhysicalTopN) + childCount := children[0].GetPlan().Stats().RowCount + impl.cost = topN.GetCost(childCount, true) + children[0].GetCost() + return impl.cost +} + +// NewTiDBTopNImpl creates a new TiDBTopNImpl. +func NewTiDBTopNImpl(topN *plannercore.PhysicalTopN) *TiDBTopNImpl { + return &TiDBTopNImpl{baseImpl{plan: topN}} +} From 1938cd1be44725786665d96eb1e108ff6ab33b43 Mon Sep 17 00:00:00 2001 From: Eugene Kalinin <ekalinin@users.noreply.github.com> Date: Mon, 11 Nov 2019 12:09:00 +0300 Subject: [PATCH 06/19] expression: implement vectorized evaluation for builtinCastTimeAsTimeSig (#13209) --- expression/builtin_cast_vec.go | 35 +++++++++++++++++++++++++++-- expression/builtin_cast_vec_test.go | 2 ++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/expression/builtin_cast_vec.go b/expression/builtin_cast_vec.go index ee657ebc66c4c..1f82da00ee106 100644 --- a/expression/builtin_cast_vec.go +++ b/expression/builtin_cast_vec.go @@ -1151,11 +1151,42 @@ func (b *builtinCastTimeAsIntSig) vecEvalInt(input *chunk.Chunk, result *chunk.C } func (b *builtinCastTimeAsTimeSig) vectorized() bool { - return false + return true } func (b *builtinCastTimeAsTimeSig) vecEvalTime(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + if err := b.args[0].VecEvalTime(b.ctx, input, result); err != nil { + return err + } + + times := result.Times() + stmt := b.ctx.GetSessionVars().StmtCtx + fsp := int8(b.tp.Decimal) + for i := 0; i < n; i++ { + if result.IsNull(i) { + continue + } + res, err := times[i].Convert(stmt, b.tp.Tp) + if err != nil { + if err = handleInvalidTimeError(b.ctx, err); err != nil { + return err + } + result.SetNull(i, true) + continue + } + tm, err := res.RoundFrac(stmt, fsp) + if err != nil { + return err + } + times[i] = tm + if b.tp.Tp == mysql.TypeDate { + // Truncate hh:mm:ss part if the type is Date. + times[i].Time = types.FromDate(tm.Time.Year(), tm.Time.Month(), tm.Time.Day(), 0, 0, 0, 0) + times[i].Type = b.tp.Tp + } + } + return nil } func (b *builtinCastTimeAsStringSig) vectorized() bool { diff --git a/expression/builtin_cast_vec_test.go b/expression/builtin_cast_vec_test.go index 79f515b5ab77b..c27542e3d76ec 100644 --- a/expression/builtin_cast_vec_test.go +++ b/expression/builtin_cast_vec_test.go @@ -73,6 +73,8 @@ var vecBuiltinCastCases = map[string][]vecExprBenchCase{ &dataStrGener{}, }}, {retEvalType: types.ETDatetime, childrenTypes: []types.EvalType{types.ETDuration}}, + {retEvalType: types.ETDatetime, childrenTypes: []types.EvalType{types.ETDatetime}}, + {retEvalType: types.ETDatetime, childrenTypes: []types.EvalType{types.ETTimestamp}}, }, } From 79ff3ebe2d999d264bba582ac32dde0c931cc7d2 Mon Sep 17 00:00:00 2001 From: Tennix <tennix@users.noreply.github.com> Date: Mon, 11 Nov 2019 17:26:30 +0800 Subject: [PATCH 07/19] tidb-server: introduce `go.uber.org/automaxprocs` (#13340) --- go.mod | 1 + go.sum | 2 ++ tidb-server/main.go | 1 + 3 files changed, 4 insertions(+) diff --git a/go.mod b/go.mod index 5639b5dd3009d..6cfa8d89f33ef 100644 --- a/go.mod +++ b/go.mod @@ -58,6 +58,7 @@ require ( github.com/unrolled/render v0.0.0-20180914162206-b9786414de4d // indirect go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738 go.uber.org/atomic v1.5.0 + go.uber.org/automaxprocs v1.2.0 go.uber.org/zap v1.12.0 golang.org/x/crypto v0.0.0-20191029031824-8986dd9e96cf // indirect golang.org/x/net v0.0.0-20190909003024-a7b16738d86b diff --git a/go.sum b/go.sum index d91f8b94bb88f..1cd066437080a 100644 --- a/go.sum +++ b/go.sum @@ -291,6 +291,8 @@ go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXw go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0 h1:OI5t8sDa1Or+q8AeE+yKeB/SDYioSHAgcVljj9JIETY= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= +go.uber.org/automaxprocs v1.2.0 h1:+RUihKM+nmYUoB9w0D0Ov5TJ2PpFO2FgenTxMJiZBZA= +go.uber.org/automaxprocs v1.2.0/go.mod h1:YfO3fm683kQpzETxlTGZhGIVmXAhaw3gxeBADbpZtnU= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.4.0 h1:f3WCSC2KzAcBXGATIxAB1E2XuCpNU255wNKZ505qi3E= diff --git a/tidb-server/main.go b/tidb-server/main.go index 6c0b555e4ebbd..961054ddf9f0f 100644 --- a/tidb-server/main.go +++ b/tidb-server/main.go @@ -59,6 +59,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/push" "github.com/struCoder/pidusage" + _ "go.uber.org/automaxprocs" "go.uber.org/zap" ) From f4ee90d06f5e89a634f4ea0e24f7c8eb27aabb47 Mon Sep 17 00:00:00 2001 From: Haibin Xie <lambdax.tyler@gmail.com> Date: Mon, 11 Nov 2019 17:46:29 +0800 Subject: [PATCH 08/19] planner: correct block offset for table as names (#12996) --- planner/core/hints.go | 8 +++++--- planner/core/logical_plan_builder.go | 23 ++++++++++++++++------- planner/core/testdata/plan_suite_in.json | 4 +++- planner/core/testdata/plan_suite_out.json | 9 +++++++++ 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/planner/core/hints.go b/planner/core/hints.go index c94da58f0663f..a19a89404a652 100644 --- a/planner/core/hints.go +++ b/planner/core/hints.go @@ -247,13 +247,15 @@ func extractTableAsName(p PhysicalPlan) (*model.CIStr, *model.CIStr) { func getJoinHints(sctx sessionctx.Context, joinType string, parentOffset int, nodeType nodeType, children ...PhysicalPlan) (res []*ast.TableOptimizerHint) { for _, child := range children { - if child.SelectBlockOffset() == -1 { + blockOffset := child.SelectBlockOffset() + if blockOffset == -1 { continue } var dbName, tableName *model.CIStr if child.SelectBlockOffset() != parentOffset { hintTable := sctx.GetSessionVars().PlannerSelectBlockAsName[child.SelectBlockOffset()] - dbName, tableName = &hintTable.DBName, &hintTable.TableName + // For sub-queries like `(select * from t) t1`, t1 should belong to its surrounding select block. + dbName, tableName, blockOffset = &hintTable.DBName, &hintTable.TableName, parentOffset } else { dbName, tableName = extractTableAsName(child) } @@ -261,7 +263,7 @@ func getJoinHints(sctx sessionctx.Context, joinType string, parentOffset int, no continue } res = append(res, &ast.TableOptimizerHint{ - QBName: generateQBName(nodeType, child.SelectBlockOffset()), + QBName: generateQBName(nodeType, blockOffset), HintName: model.NewCIStr(joinType), Tables: []ast.HintTable{{DBName: *dbName, TableName: *tableName}}, }) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 883d8a5921f00..d7facea93a46f 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -173,6 +173,7 @@ func (b *PlanBuilder) buildResultSetNode(ctx context.Context, node ast.ResultSet case *ast.Join: return b.buildJoin(ctx, x) case *ast.TableSource: + var isTableName bool switch v := x.Source.(type) { case *ast.SelectStmt: p, err = b.buildSelect(ctx, v) @@ -180,6 +181,7 @@ func (b *PlanBuilder) buildResultSetNode(ctx context.Context, node ast.ResultSet p, err = b.buildUnion(ctx, v) case *ast.TableName: p, err = b.buildDataSource(ctx, v, &x.AsName) + isTableName = true default: err = ErrUnsupportedType.GenWithStackByArgs(v) } @@ -196,7 +198,8 @@ func (b *PlanBuilder) buildResultSetNode(ctx context.Context, node ast.ResultSet name.TblName = x.AsName } } - if b.ctx.GetSessionVars().PlannerSelectBlockAsName != nil { + // `TableName` is not a select block, so we do not need to handle it. + if !isTableName && b.ctx.GetSessionVars().PlannerSelectBlockAsName != nil { b.ctx.GetSessionVars().PlannerSelectBlockAsName[p.SelectBlockOffset()] = ast.HintTable{DBName: p.OutputNames()[0].DBName, TableName: p.OutputNames()[0].TblName} } // Duplicate column name in one table is not allowed. @@ -345,7 +348,7 @@ func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, der // extractTableAlias returns table alias of the LogicalPlan's columns. // It will return nil when there are multiple table alias, because the alias is only used to check if // the logicalPlan match some optimizer hints, and hints are not expected to take effect in this case. -func extractTableAlias(p Plan) *hintTableInfo { +func extractTableAlias(p Plan, parentOffset int) *hintTableInfo { if len(p.OutputNames()) > 0 && p.OutputNames()[0].TblName.L != "" { firstName := p.OutputNames()[0] for _, name := range p.OutputNames() { @@ -353,7 +356,13 @@ func extractTableAlias(p Plan) *hintTableInfo { return nil } } - return &hintTableInfo{dbName: firstName.DBName, tblName: firstName.TblName, selectOffset: p.SelectBlockOffset()} + blockOffset := p.SelectBlockOffset() + blockAsNames := p.SCtx().GetSessionVars().PlannerSelectBlockAsName + // For sub-queries like `(select * from t) t1`, t1 should belong to its surrounding select block. + if blockOffset != parentOffset && blockAsNames != nil && blockAsNames[blockOffset].TableName.L != "" { + blockOffset = parentOffset + } + return &hintTableInfo{dbName: firstName.DBName, tblName: firstName.TblName, selectOffset: blockOffset} } return nil } @@ -363,8 +372,8 @@ func (p *LogicalJoin) setPreferredJoinType(hintInfo *tableHintInfo) { return } - lhsAlias := extractTableAlias(p.children[0]) - rhsAlias := extractTableAlias(p.children[1]) + lhsAlias := extractTableAlias(p.children[0], p.blockOffset) + rhsAlias := extractTableAlias(p.children[1], p.blockOffset) if hintInfo.ifPreferMergeJoin(lhsAlias, rhsAlias) { p.preferJoinType |= preferMergeJoin } @@ -2782,8 +2791,8 @@ func (b *PlanBuilder) buildSemiJoin(outerPlan, innerPlan LogicalPlan, onConditio } // Apply forces to choose hash join currently, so don't worry the hints will take effect if the semi join is in one apply. if b.TableHints() != nil { - outerAlias := extractTableAlias(outerPlan) - innerAlias := extractTableAlias(innerPlan) + outerAlias := extractTableAlias(outerPlan, joinPlan.blockOffset) + innerAlias := extractTableAlias(innerPlan, joinPlan.blockOffset) if b.TableHints().ifPreferMergeJoin(outerAlias, innerAlias) { joinPlan.preferJoinType |= preferMergeJoin } diff --git a/planner/core/testdata/plan_suite_in.json b/planner/core/testdata/plan_suite_in.json index 066c780e3e076..86a4740083ff8 100644 --- a/planner/core/testdata/plan_suite_in.json +++ b/planner/core/testdata/plan_suite_in.json @@ -15,6 +15,7 @@ "select /*+ SM_JOIN(t1) */ t1.a, t1.b from t t1, (select t2.a from t t2, t t3 where t2.a = t3.c) s where t1.a=s.a", "select /*+ INL_JOIN(t1) */ t1.a, t1.b from t t1, (select t2.a from t t2, t t3 where t2.a = t3.c) s where t1.a=s.a", "select /*+ HASH_JOIN(t1) */ t1.a, t1.b from t t1, (select t2.a from t t2, t t3 where t2.a = t3.c) s where t1.a=s.a", + "select /*+ HASH_JOIN(@sel_2 t1@sel_2, t2@sel_2), SM_JOIN(@sel_1 t1@sel_1, t2@sel_1) */ * from (select t1.a, t1.b from t t1, t t2 where t1.a = t2.a) t1, t t2 where t1.b = t2.b", // aggregation hints "select /*+ STREAM_AGG() */ s, count(s) from (select /*+ HASH_AGG() */ sum(t1.a) as s from t t1, t t2 where t1.a = t2.b group by t1.a) p group by s", "select /*+ HASH_AGG() */ s, count(s) from (select /*+ STREAM_AGG() */ sum(t1.a) as s from t t1, t t2 where t1.a = t2.b group by t1.a) p group by s", @@ -468,7 +469,8 @@ "select /*+ HASH_AGG(@sel_1), STREAM_AGG(@sel_2) */ count(*) from t t1 where t1.a < (select count(*) from t t2 where t1.a > t2.a)", "select /*+ STREAM_AGG(@sel_1), HASH_AGG(@qb) */ count(*) from t t1 where t1.a < (select /*+ QB_NAME(qb) */ count(*) from t t2 where t1.a > t2.a)", "select /*+ HASH_AGG(@sel_2) */ a, (select count(*) from t t1 where t1.b > t.a) from t where b > (select b from t t2 where t2.b = t.a limit 1)", - "select /*+ HASH_JOIN(@sel_1 t1), HASH_JOIN(@sel_2 t1) */ t1.b, t2.a, t2.aa from t t1, (select t1.a as a, t2.a as aa from t t1, t t2) t2 where t1.a = t2.aa;" + "select /*+ HASH_JOIN(@sel_1 t1), HASH_JOIN(@sel_2 t1) */ t1.b, t2.a, t2.aa from t t1, (select t1.a as a, t2.a as aa from t t1, t t2) t2 where t1.a = t2.aa;", + "select /*+ HASH_JOIN(@sel_2 t1@sel_2, t2@sel_2), SM_JOIN(@sel_1 t1@sel_1, t2@sel_1) */ * from (select t1.a, t1.b from t t1, t t2 where t1.a = t2.a) t1, t t2 where t1.b = t2.b" ] }, { diff --git a/planner/core/testdata/plan_suite_out.json b/planner/core/testdata/plan_suite_out.json index 36335fe6a7f3a..c878c188db5f9 100644 --- a/planner/core/testdata/plan_suite_out.json +++ b/planner/core/testdata/plan_suite_out.json @@ -50,6 +50,10 @@ "SQL": "select /*+ HASH_JOIN(t1) */ t1.a, t1.b from t t1, (select t2.a from t t2, t t3 where t2.a = t3.c) s where t1.a=s.a", "Best": "RightHashJoin{TableReader(Table(t))->LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]])->IndexReader(Index(t.c_d_e)[[NULL,+inf]])}(Column#13,Column#27)}(Column#1,Column#13)->Projection" }, + { + "SQL": "select /*+ HASH_JOIN(@sel_2 t1@sel_2, t2@sel_2), SM_JOIN(@sel_1 t1@sel_1, t2@sel_1) */ * from (select t1.a, t1.b from t t1, t t2 where t1.a = t2.a) t1, t t2 where t1.b = t2.b", + "Best": "MergeInnerJoin{LeftHashJoin{TableReader(Table(t))->IndexReader(Index(t.f)[[NULL,+inf]])}(Column#1,Column#13)->Sort->TableReader(Table(t))->Sort}(Column#2,Column#28)->Projection" + }, { "SQL": "select /*+ STREAM_AGG() */ s, count(s) from (select /*+ HASH_AGG() */ sum(t1.a) as s from t t1, t t2 where t1.a = t2.b group by t1.a) p group by s", "Best": "LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]])->TableReader(Table(t))}(Column#1,Column#14)->Projection->HashAgg->Sort->StreamAgg->Projection" @@ -1243,6 +1247,11 @@ "SQL": "select /*+ HASH_JOIN(@sel_1 t1), HASH_JOIN(@sel_2 t1) */ t1.b, t2.a, t2.aa from t t1, (select t1.a as a, t2.a as aa from t t1, t t2) t2 where t1.a = t2.aa;", "Plan": "RightHashJoin{TableReader(Table(t))->LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]])->IndexReader(Index(t.f)[[NULL,+inf]])}}(Column#1,Column#25)->Projection", "Hints": "USE_INDEX(@`sel_1` `test`.`t1` ), USE_INDEX(@`sel_2` `test`.`t1` `f`), USE_INDEX(@`sel_2` `test`.`t2` `f`), HASH_JOIN(@`sel_2` `test`.`t1`), HASH_JOIN(@`sel_1` `test`.`t1`)" + }, + { + "SQL": "select /*+ HASH_JOIN(@sel_2 t1@sel_2, t2@sel_2), SM_JOIN(@sel_1 t1@sel_1, t2@sel_1) */ * from (select t1.a, t1.b from t t1, t t2 where t1.a = t2.a) t1, t t2 where t1.b = t2.b", + "Plan": "MergeInnerJoin{LeftHashJoin{TableReader(Table(t))->IndexReader(Index(t.f)[[NULL,+inf]])}(Column#1,Column#13)->Sort->TableReader(Table(t))->Sort}(Column#2,Column#28)->Projection", + "Hints": "USE_INDEX(@`sel_2` `test`.`t1` ), USE_INDEX(@`sel_2` `test`.`t2` `f`), HASH_JOIN(@`sel_2` `test`.`t1`), USE_INDEX(@`sel_1` `test`.`t2` ), SM_JOIN(@`sel_1` `test`.`t1`)" } ] }, From 5069939d7469a656a5b0fe3222943ec1b3caa7b9 Mon Sep 17 00:00:00 2001 From: Haibin Xie <lambdax.tyler@gmail.com> Date: Mon, 11 Nov 2019 17:52:30 +0800 Subject: [PATCH 09/19] stats: specially handle unqiue key when estimate (#13354) --- statistics/handle/update.go | 2 +- statistics/histogram.go | 12 +++++++++++- statistics/histogram_test.go | 4 ++-- statistics/selectivity_test.go | 32 ++++++++++++++++++++++++++++++++ statistics/table.go | 14 +++++++++----- 5 files changed, 55 insertions(+), 9 deletions(-) diff --git a/statistics/handle/update.go b/statistics/handle/update.go index 54319fecdb308..98083228d4cc6 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -934,7 +934,7 @@ func (h *Handle) RecalculateExpectCount(q *statistics.QueryFeedback) error { expected *= idx.GetIncreaseFactor(t.Count) } else { c := t.Columns[id] - expected, err = c.GetColumnRowCount(sc, ranges, t.ModifyCount) + expected, err = c.GetColumnRowCount(sc, ranges, t.ModifyCount, true) expected *= c.GetIncreaseFactor(t.Count) } q.Expected = int64(expected) diff --git a/statistics/histogram.go b/statistics/histogram.go index 1bff31a4f3945..a5d07928dc1aa 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -731,7 +731,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, mo } // GetColumnRowCount estimates the row count by a slice of Range. -func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64) (float64, error) { +func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*ranger.Range, modifyCount int64, pkIsHandle bool) (float64, error) { var rowCount float64 for _, rg := range ranges { cmp, err := rg.LowVal[0].CompareDatum(sc, &rg.HighVal[0]) @@ -741,6 +741,11 @@ func (c *Column) GetColumnRowCount(sc *stmtctx.StatementContext, ranges []*range if cmp == 0 { // the point case. if !rg.LowExclude && !rg.HighExclude { + // In this case, the row count is at most 1. + if pkIsHandle { + rowCount += 1 + continue + } var cnt float64 cnt, err = c.equalRowCount(sc, rg.LowVal[0], modifyCount) if err != nil { @@ -855,6 +860,11 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, indexRanges []*range continue } if fullLen { + // At most 1 in this case. + if idx.Info.Unique { + totalCount += 1 + continue + } count, err := idx.equalRowCount(sc, lb, modifyCount) if err != nil { return 0, err diff --git a/statistics/histogram_test.go b/statistics/histogram_test.go index 51b18480fc26d..cc22d2f0762f6 100644 --- a/statistics/histogram_test.go +++ b/statistics/histogram_test.go @@ -50,9 +50,9 @@ func (s *testStatisticsSuite) TestNewHistogramBySelectivity(c *C) { node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(25), HighVal: []types.Datum{types.MaxValueDatum()}}) intColResult := `column:1 ndv:16 totColSize:0 num: 30 lower_bound: 0 upper_bound: 2 repeats: 10 -num: 20 lower_bound: 6 upper_bound: 8 repeats: 0 +num: 11 lower_bound: 6 upper_bound: 8 repeats: 0 num: 30 lower_bound: 9 upper_bound: 11 repeats: 0 -num: 10 lower_bound: 12 upper_bound: 14 repeats: 0 +num: 1 lower_bound: 12 upper_bound: 14 repeats: 0 num: 30 lower_bound: 27 upper_bound: 29 repeats: 0` stringCol := &Column{} diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 48aa2aa067133..3fcfd3221d017 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -424,6 +424,38 @@ func (s *testStatsSuite) TestEstimationForUnknownValues(c *C) { c.Assert(count, Equals, 0.0) } +func (s *testStatsSuite) TestEstimationUniqueKeyEqualConds(c *C) { + defer cleanEnv(c, s.store, s.do) + testKit := testkit.NewTestKit(c, s.store) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a int, b int, c int, unique key(b))") + testKit.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7)") + testKit.MustExec("analyze table t with 4 cmsketch width, 1 cmsketch depth;") + table, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + statsTbl := s.do.StatsHandle().GetTableStats(table.Meta()) + + sc := &stmtctx.StatementContext{} + idxID := table.Meta().Indices[0].ID + count, err := statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(7, 7)) + c.Assert(err, IsNil) + c.Assert(count, Equals, 1.0) + + count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(6, 6)) + c.Assert(err, IsNil) + c.Assert(count, Equals, 1.0) + + colID := table.Meta().Columns[0].ID + count, err = statsTbl.GetRowCountByIntColumnRanges(sc, colID, getRange(7, 7)) + c.Assert(err, IsNil) + c.Assert(count, Equals, 1.0) + + count, err = statsTbl.GetRowCountByIntColumnRanges(sc, colID, getRange(6, 6)) + c.Assert(err, IsNil) + c.Assert(count, Equals, 1.0) +} + func (s *testStatsSuite) TestPrimaryKeySelectivity(c *C) { defer cleanEnv(c, s.store, s.do) testKit := testkit.NewTestKit(c, s.store) diff --git a/statistics/table.go b/statistics/table.go index da563044dc2ad..c3d79ad621fa9 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -240,7 +240,7 @@ func (coll *HistColl) GetRowCountByIntColumnRanges(sc *stmtctx.StatementContext, } return getPseudoRowCountByUnsignedIntRanges(intRanges, float64(coll.Count)), nil } - result, err := c.GetColumnRowCount(sc, intRanges, coll.ModifyCount) + result, err := c.GetColumnRowCount(sc, intRanges, coll.ModifyCount, true) result *= c.GetIncreaseFactor(coll.Count) return result, errors.Trace(err) } @@ -251,7 +251,7 @@ func (coll *HistColl) GetRowCountByColumnRanges(sc *stmtctx.StatementContext, co if !ok || c.IsInvalid(sc, coll.Pseudo) { return GetPseudoRowCountByColumnRanges(sc, float64(coll.Count), colRanges, 0) } - result, err := c.GetColumnRowCount(sc, colRanges, coll.ModifyCount) + result, err := c.GetColumnRowCount(sc, colRanges, coll.ModifyCount, false) result *= c.GetIncreaseFactor(coll.Count) return result, errors.Trace(err) } @@ -387,7 +387,11 @@ func isSingleColIdxNullRange(idx *Index, ran *ranger.Range) bool { // getEqualCondSelectivity gets the selectivity of the equal conditions. `coverAll` means if the conditions // have covered all the index columns. -func (coll *HistColl) getEqualCondSelectivity(idx *Index, bytes []byte, coverAll bool) float64 { +func (coll *HistColl) getEqualCondSelectivity(idx *Index, bytes []byte, coverAll bool, unique bool) float64 { + // In this case, the row count is at most 1. + if unique && coverAll { + return 1.0 / float64(idx.TotalCount()) + } val := types.NewBytesDatum(bytes) if idx.outOfRange(val) { // When the value is out of range, we could not found this value in the CM Sketch, @@ -434,7 +438,7 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idxID int64 if err != nil { return 0, errors.Trace(err) } - selectivity = coll.getEqualCondSelectivity(idx, bytes, coverAll) + selectivity = coll.getEqualCondSelectivity(idx, bytes, coverAll, idx.Info.Unique) } else { bytes, err := codec.EncodeKey(sc, nil, ran.LowVal[:rangePosition-1]...) if err != nil { @@ -447,7 +451,7 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idxID int64 if err != nil { return 0, err } - selectivity += coll.getEqualCondSelectivity(idx, bytes, coverAll) + selectivity += coll.getEqualCondSelectivity(idx, bytes, coverAll, idx.Info.Unique) } } // use histogram to estimate the range condition From c9c337ec8ba842ef9dc352dbed214adfb01674e4 Mon Sep 17 00:00:00 2001 From: hejiayua <hejiayuan0822@163.com> Date: Mon, 11 Nov 2019 17:59:18 +0800 Subject: [PATCH 10/19] vec QuoteSig (#13312) --- expression/builtin_string_vec.go | 23 +++++++++++++++++++++-- expression/builtin_string_vec_test.go | 4 +++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/expression/builtin_string_vec.go b/expression/builtin_string_vec.go index e6eed321a320e..98043fac83e04 100644 --- a/expression/builtin_string_vec.go +++ b/expression/builtin_string_vec.go @@ -503,11 +503,30 @@ func (b *builtinFieldStringSig) vecEvalInt(input *chunk.Chunk, result *chunk.Col } func (b *builtinQuoteSig) vectorized() bool { - return false + return true } func (b *builtinQuoteSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + buf, err := b.bufAllocator.get(types.ETString, n) + if err != nil { + return err + } + defer b.bufAllocator.put(buf) + if err := b.args[0].VecEvalString(b.ctx, input, buf); err != nil { + return err + } + + result.ReserveString(n) + for i := 0; i < n; i++ { + if buf.IsNull(i) { + result.AppendString("NULL") + continue + } + str := buf.GetString(i) + result.AppendString(Quote(str)) + } + return nil } func (b *builtinInsertBinarySig) vectorized() bool { diff --git a/expression/builtin_string_vec_test.go b/expression/builtin_string_vec_test.go index 3c18a6414a5f7..bd08d0ab9161c 100644 --- a/expression/builtin_string_vec_test.go +++ b/expression/builtin_string_vec_test.go @@ -154,10 +154,12 @@ var vecBuiltinStringCases = map[string][]vecExprBenchCase{ {retEvalType: types.ETString, childrenTypes: []types.EvalType{types.ETInt}}, {retEvalType: types.ETString, childrenTypes: []types.EvalType{types.ETString}, geners: []dataGenerator{&numStrGener{rangeInt64Gener{-10, 10}}}}, }, + ast.Quote: { + {retEvalType: types.ETString, childrenTypes: []types.EvalType{types.ETString}}, + }, ast.Ord: { {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETString}}, }, - ast.Quote: {}, ast.Bin: { {retEvalType: types.ETString, childrenTypes: []types.EvalType{types.ETInt}}, }, From 838481a67630b0a0cb54524089c50d570fa67b6f Mon Sep 17 00:00:00 2001 From: Eugene Kalinin <ekalinin@users.noreply.github.com> Date: Mon, 11 Nov 2019 13:18:03 +0300 Subject: [PATCH 11/19] expression: implement vectorized evaluation for builtinSysDateWithoutFspSig (#13348) --- expression/builtin_time_vec.go | 21 +++++++++++++++++++-- expression/builtin_time_vec_test.go | 3 +++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/expression/builtin_time_vec.go b/expression/builtin_time_vec.go index 5a839b22a324c..23b8bc7fa4008 100644 --- a/expression/builtin_time_vec.go +++ b/expression/builtin_time_vec.go @@ -136,11 +136,28 @@ func (b *builtinFromUnixTime2ArgSig) vecEvalString(input *chunk.Chunk, result *c } func (b *builtinSysDateWithoutFspSig) vectorized() bool { - return false + return true } func (b *builtinSysDateWithoutFspSig) vecEvalTime(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + loc := b.ctx.GetSessionVars().Location() + nowTs, err := getStmtTimestamp(b.ctx) + if err != nil { + return err + } + + now := nowTs.In(loc) + result.ResizeTime(n, false) + times := result.Times() + t, err := convertTimeToMysqlTime(now, 0, types.ModeHalfEven) + if err != nil { + return err + } + for i := 0; i < n; i++ { + times[i] = t + } + return nil } func (b *builtinExtractDatetimeSig) vectorized() bool { diff --git a/expression/builtin_time_vec_test.go b/expression/builtin_time_vec_test.go index 4e3f466d90f8c..5f25d77204452 100644 --- a/expression/builtin_time_vec_test.go +++ b/expression/builtin_time_vec_test.go @@ -209,6 +209,9 @@ var vecBuiltinTimeCases = map[string][]vecExprBenchCase{ geners: []dataGenerator{&formatGener{0.2}, &locationGener{0.2}}, }, }, + ast.Sysdate: { + {retEvalType: types.ETDatetime}, + }, } func (s *testEvaluatorSuite) TestVectorizedBuiltinTimeEvalOneVec(c *C) { From 60bfdf5ba4bd5a5c1f012c2d4869daee568480d3 Mon Sep 17 00:00:00 2001 From: Zhuhe Fang <fzhedu@gmail.com> Date: Mon, 11 Nov 2019 18:30:34 +0800 Subject: [PATCH 12/19] Revert "executor, util: rename original List to ListInMemory and add a new interface List (#13353)" (#13363) This reverts commit 2b5f81add11497b5b5595b3ba16ac88cddf33a65. --- executor/executor.go | 10 ++-- executor/hash_table.go | 11 ++--- executor/index_lookup_join.go | 16 ++----- executor/index_lookup_merge_join.go | 9 ++-- executor/join.go | 6 +-- executor/pkg_test.go | 2 +- executor/sort.go | 18 +++----- util/chunk/disk.go | 9 +--- util/chunk/iterator.go | 10 ++-- util/chunk/iterator_test.go | 6 +-- util/chunk/list.go | 72 ++++++++++++----------------- util/chunk/list_test.go | 16 +++---- 12 files changed, 75 insertions(+), 110 deletions(-) diff --git a/executor/executor.go b/executor/executor.go index a10ecf2e636ee..482119db8e0a9 100644 --- a/executor/executor.go +++ b/executor/executor.go @@ -132,10 +132,10 @@ func newFirstChunk(e Executor) *chunk.Chunk { return chunk.New(base.retFieldTypes, base.initCap, base.maxChunkSize) } -// newList creates a new ListInMemory to buffer current executor's result. -func newList(e Executor) *chunk.ListInMemory { +// newList creates a new List to buffer current executor's result. +func newList(e Executor) *chunk.List { base := e.base() - return chunk.NewListInMemory(base.retFieldTypes, base.initCap, base.maxChunkSize) + return chunk.NewList(base.retFieldTypes, base.initCap, base.maxChunkSize) } // retTypes returns all output column types. @@ -1115,7 +1115,7 @@ type TableScanExec struct { iter kv.Iterator columns []*model.ColumnInfo isVirtualTable bool - virtualTableChunkList *chunk.ListInMemory + virtualTableChunkList *chunk.List virtualTableChunkIdx int } @@ -1146,7 +1146,7 @@ func (e *TableScanExec) Next(ctx context.Context, req *chunk.Chunk) error { func (e *TableScanExec) nextChunk4InfoSchema(ctx context.Context, chk *chunk.Chunk) error { chk.GrowAndReset(e.maxChunkSize) if e.virtualTableChunkList == nil { - e.virtualTableChunkList = chunk.NewListInMemory(retTypes(e), e.initCap, e.maxChunkSize) + e.virtualTableChunkList = chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize) columns := make([]*table.Column, e.schema.Len()) for i, colInfo := range e.columns { columns[i] = table.ToColumn(colInfo) diff --git a/executor/hash_table.go b/executor/hash_table.go index 875fc3d38f1bf..256be4bfcbf32 100644 --- a/executor/hash_table.go +++ b/executor/hash_table.go @@ -91,7 +91,7 @@ type hashRowContainer struct { memTracker *memory.Tracker // records stores the chunks in memory. - records *chunk.ListInMemory + records *chunk.List // recordsInDisk stores the chunks in disk. recordsInDisk *chunk.ListInDisk @@ -117,7 +117,7 @@ func newHashRowContainer(sCtx sessionctx.Context, estCount int, hCtx *hashContex if estCount < maxChunkSize*estCountMinFactor { estCount = 0 } - initList := chunk.NewListInMemory(hCtx.allTypes, maxChunkSize, maxChunkSize) + initList := chunk.NewList(hCtx.allTypes, maxChunkSize, maxChunkSize) c := &hashRowContainer{ sc: sCtx.GetSessionVars().StmtCtx, hCtx: hCtx, @@ -202,10 +202,7 @@ func (c *hashRowContainer) PutChunk(chk *chunk.Chunk) error { } } else { chkIdx = uint32(c.records.NumChunks()) - err := c.records.Add(chk) - if err != nil { - return err - } + c.records.Add(chk) if atomic.LoadUint32(&c.exceeded) != 0 { err := c.spillToDisk() if err != nil { @@ -272,7 +269,7 @@ func (c *hashRowContainer) ActionSpill() memory.ActionOnExceed { return &spillDiskAction{c: c} } -// spillDiskAction implements memory.ActionOnExceed for chunk.ListInMemory. If +// spillDiskAction implements memory.ActionOnExceed for chunk.List. If // the memory quota of a query is exceeded, spillDiskAction.Action is // triggered. type spillDiskAction struct { diff --git a/executor/index_lookup_join.go b/executor/index_lookup_join.go index dfd349e8834fa..57f669e375e26 100644 --- a/executor/index_lookup_join.go +++ b/executor/index_lookup_join.go @@ -94,10 +94,10 @@ type innerCtx struct { } type lookUpJoinTask struct { - outerResult *chunk.ListInMemory + outerResult *chunk.List outerMatch [][]bool - innerResult *chunk.ListInMemory + innerResult *chunk.List encodedLookUpKeys []*chunk.Chunk lookupMap *mvmap.MVMap matchedInners []chunk.Row @@ -392,10 +392,7 @@ func (ow *outerWorker) buildTask(ctx context.Context) (*lookUpJoinTask, error) { break } - err = task.outerResult.Add(chk) - if err != nil { - return nil, err - } + task.outerResult.Add(chk) } if task.outerResult.Len() == 0 { return nil, nil @@ -604,7 +601,7 @@ func (iw *innerWorker) fetchInnerResults(ctx context.Context, task *lookUpJoinTa return err } defer terror.Call(innerExec.Close) - innerResult := chunk.NewListInMemory(retTypes(innerExec), iw.ctx.GetSessionVars().MaxChunkSize, iw.ctx.GetSessionVars().MaxChunkSize) + innerResult := chunk.NewList(retTypes(innerExec), iw.ctx.GetSessionVars().MaxChunkSize, iw.ctx.GetSessionVars().MaxChunkSize) innerResult.GetMemTracker().SetLabel(buildSideResultLabel) innerResult.GetMemTracker().AttachTo(task.memTracker) for { @@ -620,10 +617,7 @@ func (iw *innerWorker) fetchInnerResults(ctx context.Context, task *lookUpJoinTa if iw.executorChk.NumRows() == 0 { break } - err = innerResult.Add(iw.executorChk) - if err != nil { - return err - } + innerResult.Add(iw.executorChk) iw.executorChk = newFirstChunk(innerExec) } task.innerResult = innerResult diff --git a/executor/index_lookup_merge_join.go b/executor/index_lookup_merge_join.go index dbf541aa6898b..e4dbe7369d3b2 100644 --- a/executor/index_lookup_merge_join.go +++ b/executor/index_lookup_merge_join.go @@ -86,7 +86,7 @@ type innerMergeCtx struct { } type lookUpMergeJoinTask struct { - outerResult *chunk.ListInMemory + outerResult *chunk.List outerOrderIdx []chunk.RowPtr innerResult *chunk.Chunk @@ -323,7 +323,7 @@ func (omw *outerMergeWorker) pushToChan(ctx context.Context, task *lookUpMergeJo func (omw *outerMergeWorker) buildTask(ctx context.Context) (*lookUpMergeJoinTask, error) { task := &lookUpMergeJoinTask{ results: make(chan *indexMergeJoinResult, numResChkHold), - outerResult: chunk.NewListInMemory(omw.rowTypes, omw.executor.base().initCap, omw.executor.base().maxChunkSize), + outerResult: chunk.NewList(omw.rowTypes, omw.executor.base().initCap, omw.executor.base().maxChunkSize), } task.memTracker = memory.NewTracker(stringutil.MemoizeStr(func() string { return fmt.Sprintf("lookup join task %p", task) }), -1) task.memTracker.AttachTo(omw.parentMemTracker) @@ -346,10 +346,7 @@ func (omw *outerMergeWorker) buildTask(ctx context.Context) (*lookUpMergeJoinTas break } - err = task.outerResult.Add(execChk) - if err != nil { - return task, err - } + task.outerResult.Add(execChk) requiredRows -= execChk.NumRows() task.memTracker.Consume(execChk.MemoryUsage()) } diff --git a/executor/join.go b/executor/join.go index 33010f5f6da3d..61d7386cdce44 100644 --- a/executor/join.go +++ b/executor/join.go @@ -555,7 +555,7 @@ type NestedLoopApplyExec struct { outerChunk *chunk.Chunk outerChunkCursor int outerSelected []bool - innerList *chunk.ListInMemory + innerList *chunk.List innerChunk *chunk.Chunk innerSelected []bool innerIter chunk.Iterator @@ -586,7 +586,7 @@ func (e *NestedLoopApplyExec) Open(ctx context.Context) error { e.innerRows = e.innerRows[:0] e.outerChunk = newFirstChunk(e.outerExec) e.innerChunk = newFirstChunk(e.innerExec) - e.innerList = chunk.NewListInMemory(retTypes(e.innerExec), e.initCap, e.maxChunkSize) + e.innerList = chunk.NewList(retTypes(e.innerExec), e.initCap, e.maxChunkSize) e.memTracker = memory.NewTracker(e.id, e.ctx.GetSessionVars().MemQuotaNestedLoopApply) e.memTracker.AttachTo(e.ctx.GetSessionVars().StmtCtx.MemTracker) @@ -628,7 +628,7 @@ func (e *NestedLoopApplyExec) fetchSelectedOuterRow(ctx context.Context, chk *ch } } -// fetchAllInners reads all data from the inner table and stores them in a ListInMemory. +// fetchAllInners reads all data from the inner table and stores them in a List. func (e *NestedLoopApplyExec) fetchAllInners(ctx context.Context) error { err := e.innerExec.Open(ctx) defer terror.Call(e.innerExec.Close) diff --git a/executor/pkg_test.go b/executor/pkg_test.go index 22a361293a89f..bdd7a91772bed 100644 --- a/executor/pkg_test.go +++ b/executor/pkg_test.go @@ -61,7 +61,7 @@ func (s *pkgTestSuite) TestNestedLoopApply(c *C) { innerFilter: []expression.Expression{innerFilter}, joiner: joiner, } - join.innerList = chunk.NewListInMemory(retTypes(innerExec), innerExec.initCap, innerExec.maxChunkSize) + join.innerList = chunk.NewList(retTypes(innerExec), innerExec.initCap, innerExec.maxChunkSize) join.innerChunk = newFirstChunk(innerExec) join.outerChunk = newFirstChunk(outerExec) joinChk := newFirstChunk(join) diff --git a/executor/sort.go b/executor/sort.go index a9b378b41452b..aa895015b1eec 100644 --- a/executor/sort.go +++ b/executor/sort.go @@ -45,7 +45,7 @@ type SortExec struct { // keyCmpFuncs is used to compare each ByItem. keyCmpFuncs []chunk.CompareFunc // rowChunks is the chunks to store row values. - rowChunks *chunk.ListInMemory + rowChunks *chunk.List // rowPointer store the chunk index and row index for each row. rowPtrs []chunk.RowPtr @@ -95,7 +95,7 @@ func (e *SortExec) Next(ctx context.Context, req *chunk.Chunk) error { func (e *SortExec) fetchRowChunks(ctx context.Context) error { fields := retTypes(e) - e.rowChunks = chunk.NewListInMemory(fields, e.initCap, e.maxChunkSize) + e.rowChunks = chunk.NewList(fields, e.initCap, e.maxChunkSize) e.rowChunks.GetMemTracker().AttachTo(e.memTracker) e.rowChunks.GetMemTracker().SetLabel(rowChunksLabel) for { @@ -108,10 +108,7 @@ func (e *SortExec) fetchRowChunks(ctx context.Context) error { if rowCount == 0 { break } - err = e.rowChunks.Add(chk) - if err != nil { - return err - } + e.rowChunks.Add(chk) } return nil } @@ -259,7 +256,7 @@ func (e *TopNExec) Next(ctx context.Context, req *chunk.Chunk) error { func (e *TopNExec) loadChunksUntilTotalLimit(ctx context.Context) error { e.chkHeap = &topNChunkHeap{e} - e.rowChunks = chunk.NewListInMemory(retTypes(e), e.initCap, e.maxChunkSize) + e.rowChunks = chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize) e.rowChunks.GetMemTracker().AttachTo(e.memTracker) e.rowChunks.GetMemTracker().SetLabel(rowChunksLabel) for uint64(e.rowChunks.Len()) < e.totalLimit { @@ -273,10 +270,7 @@ func (e *TopNExec) loadChunksUntilTotalLimit(ctx context.Context) error { if srcChk.NumRows() == 0 { break } - err = e.rowChunks.Add(srcChk) - if err != nil { - return err - } + e.rowChunks.Add(srcChk) } e.initPointers() e.initCompareFuncs() @@ -336,7 +330,7 @@ func (e *TopNExec) processChildChk(childRowChk *chunk.Chunk) error { // but we want descending top N, then we will keep all data in memory. // But if data is distributed randomly, this function will be called log(n) times. func (e *TopNExec) doCompaction() error { - newRowChunks := chunk.NewListInMemory(retTypes(e), e.initCap, e.maxChunkSize) + newRowChunks := chunk.NewList(retTypes(e), e.initCap, e.maxChunkSize) newRowPtrs := make([]chunk.RowPtr, 0, e.rowChunks.Len()) for _, rowPtr := range e.rowPtrs { newRowPtr := newRowChunks.AppendRow(e.rowChunks.GetRow(rowPtr)) diff --git a/util/chunk/disk.go b/util/chunk/disk.go index bc20be59d89d8..a374a2307c954 100644 --- a/util/chunk/disk.go +++ b/util/chunk/disk.go @@ -83,7 +83,7 @@ func NewListInDisk(fieldTypes []*types.FieldType) *ListInDisk { return l } -// GetDiskTracker returns the memory tracker of this ListInMemory. +// GetDiskTracker returns the memory tracker of this List. func (l *ListInDisk) GetDiskTracker() *disk.Tracker { return l.diskTracker } @@ -92,7 +92,7 @@ func (l *ListInDisk) GetDiskTracker() *disk.Tracker { // is not empty and not used any more and has the same field types. func (l *ListInDisk) Add(chk *Chunk) (err error) { if chk.NumRows() == 0 { - return errors.New("chunk appended to ListInMemory should have at least 1 row") + return errors.New("chunk appended to List should have at least 1 row") } if l.disk == nil { l.disk, err = ioutil.TempFile(tmpDir, l.diskTracker.Label().String()) @@ -138,11 +138,6 @@ func (l *ListInDisk) NumChunks() int { return len(l.offsets) } -// NumRowsOfChunk returns the number of rows of a chunk in the ListInDisk. -func (l *ListInDisk) NumRowsOfChunk(chkID int) int { - return len(l.offsets[chkID]) -} - // Close releases the disk resource. func (l *ListInDisk) Close() error { if l.disk != nil { diff --git a/util/chunk/iterator.go b/util/chunk/iterator.go index 071a3087922ff..c6a3bc58ff303 100644 --- a/util/chunk/iterator.go +++ b/util/chunk/iterator.go @@ -159,13 +159,13 @@ func (it *Iterator4Chunk) GetChunk() *Chunk { return it.chk } -// NewIterator4List returns a Iterator for ListInMemory. -func NewIterator4List(li *ListInMemory) Iterator { +// NewIterator4List returns a Iterator for List. +func NewIterator4List(li *List) Iterator { return &iterator4List{li: li} } type iterator4List struct { - li *ListInMemory + li *List chkCursor int rowCursor int } @@ -232,12 +232,12 @@ func (it *iterator4List) Len() int { } // NewIterator4RowPtr returns a Iterator for RowPtrs. -func NewIterator4RowPtr(li *ListInMemory, ptrs []RowPtr) Iterator { +func NewIterator4RowPtr(li *List, ptrs []RowPtr) Iterator { return &iterator4RowPtr{li: li, ptrs: ptrs} } type iterator4RowPtr struct { - li *ListInMemory + li *List ptrs []RowPtr cursor int } diff --git a/util/chunk/iterator_test.go b/util/chunk/iterator_test.go index 7125f24d194fd..2c937447b1909 100644 --- a/util/chunk/iterator_test.go +++ b/util/chunk/iterator_test.go @@ -49,8 +49,8 @@ func (s *testChunkSuite) TestIterator(c *check.C) { expected = append(expected, int64(i)) } var rows []Row - li := NewListInMemory(fields, 1, 2) - li2 := NewListInMemory(fields, 8, 16) + li := NewList(fields, 1, 2) + li2 := NewList(fields, 8, 16) var ptrs []RowPtr var ptrs2 []RowPtr for i := 0; i < n; i++ { @@ -120,7 +120,7 @@ func (s *testChunkSuite) TestIterator(c *check.C) { c.Assert(it.Begin(), check.Equals, it.End()) it = NewIterator4Chunk(new(Chunk)) c.Assert(it.Begin(), check.Equals, it.End()) - it = NewIterator4List(new(ListInMemory)) + it = NewIterator4List(new(List)) c.Assert(it.Begin(), check.Equals, it.End()) it = NewIterator4RowPtr(li, nil) c.Assert(it.Begin(), check.Equals, it.End()) diff --git a/util/chunk/list.go b/util/chunk/list.go index 4826b229d82ae..2f71febb1179d 100644 --- a/util/chunk/list.go +++ b/util/chunk/list.go @@ -22,16 +22,8 @@ import ( "github.com/pingcap/tidb/util/stringutil" ) -// List is interface for ListInMemory and ListInDisk -type List interface { - NumRowsOfChunk(int) - NumChunks() - GetRow(RowPtr) - Add(chk *Chunk) (err error) -} - -// ListInMemory holds a slice of chunks, use to append rows with max chunk size properly handled. -type ListInMemory struct { +// List holds a slice of chunks, use to append rows with max chunk size properly handled. +type List struct { fieldTypes []*types.FieldType initChunkSize int maxChunkSize int @@ -50,11 +42,11 @@ type RowPtr struct { RowIdx uint32 } -var chunkListLabel fmt.Stringer = stringutil.StringerStr("chunk.ListInMemory") +var chunkListLabel fmt.Stringer = stringutil.StringerStr("chunk.List") -// NewListInMemory creates a new ListInMemory with field types, init chunk size and max chunk size. -func NewListInMemory(fieldTypes []*types.FieldType, initChunkSize, maxChunkSize int) *ListInMemory { - l := &ListInMemory{ +// NewList creates a new List with field types, init chunk size and max chunk size. +func NewList(fieldTypes []*types.FieldType, initChunkSize, maxChunkSize int) *List { + l := &List{ fieldTypes: fieldTypes, initChunkSize: initChunkSize, maxChunkSize: maxChunkSize, @@ -64,28 +56,28 @@ func NewListInMemory(fieldTypes []*types.FieldType, initChunkSize, maxChunkSize return l } -// GetMemTracker returns the memory tracker of this ListInMemory. -func (l *ListInMemory) GetMemTracker() *memory.Tracker { +// GetMemTracker returns the memory tracker of this List. +func (l *List) GetMemTracker() *memory.Tracker { return l.memTracker } -// Len returns the length of the ListInMemory. -func (l *ListInMemory) Len() int { +// Len returns the length of the List. +func (l *List) Len() int { return l.length } -// NumChunks returns the number of chunks in the ListInMemory. -func (l *ListInMemory) NumChunks() int { +// NumChunks returns the number of chunks in the List. +func (l *List) NumChunks() int { return len(l.chunks) } // GetChunk gets the Chunk by ChkIdx. -func (l *ListInMemory) GetChunk(chkIdx int) *Chunk { +func (l *List) GetChunk(chkIdx int) *Chunk { return l.chunks[chkIdx] } -// AppendRow appends a row to the ListInMemory, the row is copied to the ListInMemory. -func (l *ListInMemory) AppendRow(row Row) RowPtr { +// AppendRow appends a row to the List, the row is copied to the List. +func (l *List) AppendRow(row Row) RowPtr { chkIdx := len(l.chunks) - 1 if chkIdx == -1 || l.chunks[chkIdx].NumRows() >= l.chunks[chkIdx].Capacity() || chkIdx == l.consumedIdx { newChk := l.allocChunk() @@ -103,12 +95,13 @@ func (l *ListInMemory) AppendRow(row Row) RowPtr { return RowPtr{ChkIdx: uint32(chkIdx), RowIdx: uint32(rowIdx)} } -// Add adds a chunk to the ListInMemory, the chunk may be modified later by the list. +// Add adds a chunk to the List, the chunk may be modified later by the list. // Caller must make sure the input chk is not empty and not used any more and has the same field types. -func (l *ListInMemory) Add(chk *Chunk) (err error) { +func (l *List) Add(chk *Chunk) { // FixMe: we should avoid add a Chunk that chk.NumRows() > list.maxChunkSize. if chk.NumRows() == 0 { - return errors.New("chunk appended to ListInMemory should have at least 1 row") + // TODO: return error here. + panic("chunk appended to List should have at least 1 row") } if chkIdx := len(l.chunks) - 1; l.consumedIdx != chkIdx { l.memTracker.Consume(l.chunks[chkIdx].MemoryUsage()) @@ -118,10 +111,10 @@ func (l *ListInMemory) Add(chk *Chunk) (err error) { l.consumedIdx++ l.chunks = append(l.chunks, chk) l.length += chk.NumRows() - return nil + return } -func (l *ListInMemory) allocChunk() (chk *Chunk) { +func (l *List) allocChunk() (chk *Chunk) { if len(l.freelist) > 0 { lastIdx := len(l.freelist) - 1 chk = l.freelist[lastIdx] @@ -137,18 +130,13 @@ func (l *ListInMemory) allocChunk() (chk *Chunk) { } // GetRow gets a Row from the list by RowPtr. -func (l *ListInMemory) GetRow(ptr RowPtr) Row { +func (l *List) GetRow(ptr RowPtr) Row { chk := l.chunks[ptr.ChkIdx] return chk.GetRow(int(ptr.RowIdx)) } -// NumRowsOfChunk returns the number of rows of a chunk. -func (l *ListInMemory) NumRowsOfChunk(chkID int) int { - return l.GetChunk(chkID).NumRows() -} - -// Reset resets the ListInMemory. -func (l *ListInMemory) Reset() { +// Reset resets the List. +func (l *List) Reset() { if lastIdx := len(l.chunks) - 1; lastIdx != l.consumedIdx { l.memTracker.Consume(l.chunks[lastIdx].MemoryUsage()) } @@ -160,11 +148,11 @@ func (l *ListInMemory) Reset() { // preAlloc4Row pre-allocates the storage memory for a Row. // NOTE: only used in test -// 1. The ListInMemory must be empty or holds no useful data. -// 2. The schema of the Row must be the same with the ListInMemory. +// 1. The List must be empty or holds no useful data. +// 2. The schema of the Row must be the same with the List. // 3. This API is paired with the `Insert()` function, which inserts all the -// rows data into the ListInMemory after the pre-allocation. -func (l *ListInMemory) preAlloc4Row(row Row) (ptr RowPtr) { +// rows data into the List after the pre-allocation. +func (l *List) preAlloc4Row(row Row) (ptr RowPtr) { chkIdx := len(l.chunks) - 1 if chkIdx == -1 || l.chunks[chkIdx].NumRows() >= l.chunks[chkIdx].Capacity() { newChk := l.allocChunk() @@ -184,7 +172,7 @@ func (l *ListInMemory) preAlloc4Row(row Row) (ptr RowPtr) { // Insert inserts `row` on the position specified by `ptr`. // Note: Insert will cover the origin data, it should be called after // PreAlloc. -func (l *ListInMemory) Insert(ptr RowPtr, row Row) { +func (l *List) Insert(ptr RowPtr, row Row) { l.chunks[ptr.ChkIdx].insert(int(ptr.RowIdx), row) } @@ -193,7 +181,7 @@ func (l *ListInMemory) Insert(ptr RowPtr, row Row) { type ListWalkFunc = func(row Row) error // Walk iterate the list and call walkFunc for each row. -func (l *ListInMemory) Walk(walkFunc ListWalkFunc) error { +func (l *List) Walk(walkFunc ListWalkFunc) error { for i := 0; i < len(l.chunks); i++ { chk := l.chunks[i] for j := 0; j < chk.NumRows(); j++ { diff --git a/util/chunk/list_test.go b/util/chunk/list_test.go index c71bd712a1875..b7d7780ffa2f2 100644 --- a/util/chunk/list_test.go +++ b/util/chunk/list_test.go @@ -32,7 +32,7 @@ func (s *testChunkSuite) TestList(c *check.C) { fields := []*types.FieldType{ types.NewFieldType(mysql.TypeLonglong), } - l := NewListInMemory(fields, 2, 2) + l := NewList(fields, 2, 2) srcChunk := NewChunkWithCapacity(fields, 32) srcChunk.AppendInt64(0, 1) srcRow := srcChunk.GetRow(0) @@ -104,7 +104,7 @@ func (s *testChunkSuite) TestListMemoryUsage(c *check.C) { srcChk.AppendTime(3, timeObj) srcChk.AppendDuration(4, durationObj) - list := NewListInMemory(fieldTypes, maxChunkSize, maxChunkSize*2) + list := NewList(fieldTypes, maxChunkSize, maxChunkSize*2) c.Assert(list.GetMemTracker().BytesConsumed(), check.Equals, int64(0)) list.AppendRow(srcChk.GetRow(0)) @@ -133,8 +133,8 @@ func (s *testChunkSuite) TestListPrePreAlloc4RowAndInsert(c *check.C) { srcChk.AppendString(3, strings.Repeat(strconv.FormatInt(i, 10), int(i))) } - srcList := NewListInMemory(fieldTypes, 3, 3) - destList := NewListInMemory(fieldTypes, 5, 5) + srcList := NewList(fieldTypes, 3, 3) + destList := NewList(fieldTypes, 5, 5) destRowPtr := make([]RowPtr, srcChk.NumRows()) for i := 0; i < srcChk.NumRows(); i++ { srcList.AppendRow(srcChk.GetRow(i)) @@ -176,7 +176,7 @@ func BenchmarkListMemoryUsage(b *testing.B) { row := chk.GetRow(0) initCap := 50 - list := NewListInMemory(fieldTypes, 2, 8) + list := NewList(fieldTypes, 2, 8) for i := 0; i < initCap; i++ { list.AppendRow(row) } @@ -194,7 +194,7 @@ func BenchmarkPreAllocList(b *testing.B) { row := chk.GetRow(0) b.ResetTimer() - list := NewListInMemory(fieldTypes, 1024, 1024) + list := NewList(fieldTypes, 1024, 1024) for i := 0; i < b.N; i++ { list.Reset() // 32768 indicates the number of int64 rows to fill 256KB L2 cache. @@ -225,7 +225,7 @@ func BenchmarkListAdd(b *testing.B) { numChk, numRow := 1, 2 chks, fields := initChunks(numChk, numRow) chk := chks[0] - l := NewListInMemory(fields, numRow, numRow) + l := NewList(fields, numRow, numRow) b.ResetTimer() for i := 0; i < b.N; i++ { @@ -236,7 +236,7 @@ func BenchmarkListAdd(b *testing.B) { func BenchmarkListGetRow(b *testing.B) { numChk, numRow := 10000, 2 chks, fields := initChunks(numChk, numRow) - l := NewListInMemory(fields, numRow, numRow) + l := NewList(fields, numRow, numRow) for _, chk := range chks { l.Add(chk) } From 88e96ebca7112fcb998900a23f72b1d7e7bc04d3 Mon Sep 17 00:00:00 2001 From: Chen Lixiang <lixiang3608@outlook.com> Date: Mon, 11 Nov 2019 19:15:33 +0800 Subject: [PATCH 13/19] expression: implement vectorized evaluation for `builtinYearWeekWithModeSig` (#13328) --- expression/builtin_time_vec.go | 49 +++++++++++++++++++++++++++-- expression/builtin_time_vec_test.go | 1 + 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/expression/builtin_time_vec.go b/expression/builtin_time_vec.go index 23b8bc7fa4008..4a53c1b7ef7fe 100644 --- a/expression/builtin_time_vec.go +++ b/expression/builtin_time_vec.go @@ -1682,11 +1682,56 @@ func (b *builtinSubDateDurationIntSig) vecEvalDuration(input *chunk.Chunk, resul } func (b *builtinYearWeekWithModeSig) vectorized() bool { - return false + return true } +// vecEvalInt evals YEARWEEK(date,mode). +// See https://dev.mysql.com/doc/refman/5.7/en/date-and-time-functions.html#function_yearweek func (b *builtinYearWeekWithModeSig) vecEvalInt(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + buf1, err := b.bufAllocator.get(types.ETDatetime, n) + if err != nil { + return err + } + if err := b.args[0].VecEvalTime(b.ctx, input, buf1); err != nil { + return err + } + buf2, err := b.bufAllocator.get(types.ETInt, n) + if err != nil { + return err + } + if err := b.args[1].VecEvalInt(b.ctx, input, buf2); err != nil { + return err + } + + result.ResizeInt64(n, false) + result.MergeNulls(buf1) + i64s := result.Int64s() + ds := buf1.Times() + ms := buf2.Int64s() + for i := 0; i < n; i++ { + if result.IsNull(i) { + continue + } + date := ds[i] + if date.IsZero() { + if err := handleInvalidTimeError(b.ctx, types.ErrIncorrectDatetimeValue.GenWithStackByArgs(date.String())); err != nil { + return err + } + result.SetNull(i, true) + continue + } + mode := int(ms[i]) + if buf2.IsNull(i) { + mode = 0 + } + year, week := date.Time.YearWeek(mode) + i64s[i] = int64(week + year*100) + if i64s[i] < 0 { + i64s[i] = int64(math.MaxUint32) + } + } + return nil } func (b *builtinTimestampDiffSig) vectorized() bool { diff --git a/expression/builtin_time_vec_test.go b/expression/builtin_time_vec_test.go index 5f25d77204452..8f981d15a9233 100644 --- a/expression/builtin_time_vec_test.go +++ b/expression/builtin_time_vec_test.go @@ -183,6 +183,7 @@ var vecBuiltinTimeCases = map[string][]vecExprBenchCase{ }, ast.YearWeek: { {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETDatetime}}, + {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETDatetime, types.ETInt}}, }, ast.WeekOfYear: { {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETDatetime}}, From 99720f1b3890757d315956d88b0eaffd4587934b Mon Sep 17 00:00:00 2001 From: Zhongyang Wu <zhongyang.wu@outlook.com> Date: Mon, 11 Nov 2019 06:19:34 -0500 Subject: [PATCH 14/19] expression: implement vectorized evaluation for `builtinInetAtonSig` (#13237) --- expression/bench_test.go | 24 +++++++ expression/builtin_miscellaneous_vec.go | 69 +++++++++++++++++++- expression/builtin_miscellaneous_vec_test.go | 17 ++++- 3 files changed, 107 insertions(+), 3 deletions(-) diff --git a/expression/bench_test.go b/expression/bench_test.go index f324da15fd215..251cb6e25cec3 100644 --- a/expression/bench_test.go +++ b/expression/bench_test.go @@ -258,6 +258,18 @@ func (g *defaultGener) gen() interface{} { return nil } +// selectStringGener select one string randomly from the candidates array +type selectStringGener struct { + candidates []string +} + +func (g *selectStringGener) gen() interface{} { + if len(g.candidates) == 0 { + return nil + } + return g.candidates[rand.Intn(len(g.candidates))] +} + type constJSONGener struct { jsonStr string } @@ -392,6 +404,18 @@ func (g *ipv6StrGener) gen() interface{} { return ip.String() } +// ipv4StrGener is used to generate ipv4 strings. For example 111.111.111.111 +type ipv4StrGener struct { +} + +func (g *ipv4StrGener) gen() interface{} { + var ip net.IP = make([]byte, net.IPv4len) + for i := range ip { + ip[i] = uint8(rand.Intn(256)) + } + return ip.String() +} + // ipv6ByteGener is used to generate ipv6 address in 16 bytes string. type ipv6ByteGener struct { } diff --git a/expression/builtin_miscellaneous_vec.go b/expression/builtin_miscellaneous_vec.go index f1495903d8145..22892f7a7a176 100644 --- a/expression/builtin_miscellaneous_vec.go +++ b/expression/builtin_miscellaneous_vec.go @@ -411,11 +411,76 @@ func (b *builtinTimeAnyValueSig) vecEvalTime(input *chunk.Chunk, result *chunk.C } func (b *builtinInetAtonSig) vectorized() bool { - return false + return true } func (b *builtinInetAtonSig) vecEvalInt(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + buf, err := b.bufAllocator.get(types.ETString, n) + if err != nil { + return err + } + defer b.bufAllocator.put(buf) + if err := b.args[0].VecEvalString(b.ctx, input, buf); err != nil { + return err + } + var ( + byteResult, res uint64 + dotCount int + ) + result.ResizeInt64(n, false) + i64s := result.Int64s() + result.MergeNulls(buf) + for i := 0; i < n; i++ { + if result.IsNull(i) { + continue + } + ipAddr := buf.GetString(i) + if len(ipAddr) == 0 || ipAddr[len(ipAddr)-1] == '.' { + // ip address should not end with '.'. + result.SetNull(i, true) + continue + } + //reset + byteResult = 0 + res = 0 + dotCount = 0 + for _, c := range ipAddr { + if c >= '0' && c <= '9' { + digit := uint64(c - '0') + byteResult = byteResult*10 + digit + if byteResult > 255 { + result.SetNull(i, true) + break + } + } else if c == '.' { + dotCount++ + if dotCount > 3 { + result.SetNull(i, true) + break + } + res = (res << 8) + byteResult + byteResult = 0 + } else { + result.SetNull(i, true) + break // illegal char (not number or .) + } + } + // 127 -> 0.0.0.127 + // 127.255 -> 127.0.0.255 + // 127.256 -> NULL + // 127.2.1 -> 127.2.0.1 + if !result.IsNull(i) { + if dotCount == 1 { + res <<= 16 + } + if dotCount == 2 { + res <<= 8 + } + i64s[i] = int64((res << 8) + byteResult) + } + } + return nil } func (b *builtinInet6NtoaSig) vectorized() bool { diff --git a/expression/builtin_miscellaneous_vec_test.go b/expression/builtin_miscellaneous_vec_test.go index 075e5b94e22ea..3420e27285d49 100644 --- a/expression/builtin_miscellaneous_vec_test.go +++ b/expression/builtin_miscellaneous_vec_test.go @@ -31,7 +31,22 @@ var vecBuiltinMiscellaneousCases = map[string][]vecExprBenchCase{ ast.Sleep: {}, ast.UUID: {}, ast.Inet6Ntoa: {}, - ast.InetAton: {}, + ast.InetAton: { + {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETString}, geners: []dataGenerator{&ipv4StrGener{}}}, + {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETString}}, + {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETString}, geners: []dataGenerator{ + &selectStringGener{ + candidates: []string{ + "11.11.11.11.", // last char is . + "266.266.266.266", // int in string exceed 255 + "127", + ".122", + ".123.123", + "127.255", + "127.2.1", + }, + }}}, + }, ast.IsIPv4Mapped: { {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETString}, geners: []dataGenerator{&ipv4MappedByteGener{}}}, {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETString}, geners: []dataGenerator{&ipv6ByteGener{}}}, From 413643e25af10fc9edd588aad1408a893de7bb06 Mon Sep 17 00:00:00 2001 From: "Zhuomin(Charming) Liu" <lzmhhh123@gmail.com> Date: Mon, 11 Nov 2019 19:37:29 +0800 Subject: [PATCH 15/19] planner: support point get by `_tidb_rowid` (#13360) --- cmd/explaintest/r/explain_easy.result | 4 +--- executor/point_get_test.go | 11 +++++++++++ planner/core/point_get_plan.go | 4 ++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index ef180b0b434a2..9c2273719aba0 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -602,9 +602,7 @@ drop table if exists t; create table t(a int); explain select * from t where _tidb_rowid = 0; id count task operator info -Projection_4 8000.00 root Column#1 -└─TableReader_6 10000.00 root data:TableScan_5 - └─TableScan_5 10000.00 cop[tikv] table:t, range:[0,0], keep order:false, stats:pseudo +Point_Get_1 1.00 root table:t, handle:0 explain select * from t where _tidb_rowid > 0; id count task operator info Projection_4 8000.00 root Column#1 diff --git a/executor/point_get_test.go b/executor/point_get_test.go index d888c7417513f..aa6a1c23a94bd 100644 --- a/executor/point_get_test.go +++ b/executor/point_get_test.go @@ -564,3 +564,14 @@ func (s *testPointGetSuite) TestForUpdateRetry(c *C) { _, err := tk.Exec("commit") c.Assert(session.ErrForUpdateCantRetry.Equal(err), IsTrue) } + +func (s *testPointGetSuite) TestPointGetByRowID(c *C) { + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t (a varchar(20), b int)") + tk.MustExec("insert into t values(\"aaa\", 12)") + tk.MustQuery("explain select * from t where t._tidb_rowid = 1").Check(testkit.Rows( + "Point_Get_1 1.00 root table:t, handle:1")) + tk.MustQuery("select * from t where t._tidb_rowid = 1").Check(testkit.Rows("aaa 12")) +} diff --git a/planner/core/point_get_plan.go b/planner/core/point_get_plan.go index fdb2984e1c1a2..31d68a7cb11d9 100644 --- a/planner/core/point_get_plan.go +++ b/planner/core/point_get_plan.go @@ -783,6 +783,10 @@ func getNameValuePairs(nvPairs []nameValuePair, tblName model.CIStr, expr ast.Ex func findPKHandle(tblInfo *model.TableInfo, pairs []nameValuePair) (handlePair nameValuePair, fieldType *types.FieldType) { if !tblInfo.PKIsHandle { + rowIDIdx := findInPairs("_tidb_rowid", pairs) + if rowIDIdx != -1 { + return pairs[rowIDIdx], types.NewFieldType(mysql.TypeLonglong) + } return handlePair, nil } for _, col := range tblInfo.Columns { From 139a5399d22c359e48d348365ed0b5794d55ce9e Mon Sep 17 00:00:00 2001 From: Eugene Kalinin <ekalinin@users.noreply.github.com> Date: Mon, 11 Nov 2019 14:51:58 +0300 Subject: [PATCH 16/19] expression: implement vectorized evaluation for builtinCastJSONAsTimeSig (#13257) --- expression/bench_test.go | 15 ++++++++++++++- expression/builtin_cast_vec.go | 2 +- expression/builtin_cast_vec_test.go | 4 ++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/expression/bench_test.go b/expression/bench_test.go index 251cb6e25cec3..42aaca13fdb4b 100644 --- a/expression/bench_test.go +++ b/expression/bench_test.go @@ -199,6 +199,12 @@ func BenchmarkScalarFunctionClone(b *testing.B) { b.ReportAllocs() } +func getRandomTime() types.MysqlTime { + return types.FromDate(rand.Intn(2200), rand.Intn(10)+1, rand.Intn(20)+1, + rand.Intn(12), rand.Intn(60), rand.Intn(60), rand.Intn(1000000)) + +} + // dataGenerator is used to generate data for test. type dataGenerator interface { gen() interface{} @@ -237,7 +243,7 @@ func (g *defaultGener) gen() interface{} { } return d case types.ETDatetime, types.ETTimestamp: - gt := types.FromDate(rand.Intn(2200), rand.Intn(10)+1, rand.Intn(20)+1, rand.Intn(12), rand.Intn(60), rand.Intn(60), rand.Intn(1000000)) + gt := getRandomTime() t := types.Time{Time: gt, Type: convertETType(g.eType)} return t case types.ETDuration: @@ -292,6 +298,13 @@ func (g *jsonStringGener) gen() interface{} { return j.String() } +type jsonTimeGener struct{} + +func (g *jsonTimeGener) gen() interface{} { + tm := types.Time{Time: getRandomTime(), Type: mysql.TypeDatetime, Fsp: types.DefaultFsp} + return json.CreateBinary(tm.String()) +} + type rangeDurationGener struct { nullRation float64 } diff --git a/expression/builtin_cast_vec.go b/expression/builtin_cast_vec.go index 1f82da00ee106..0358bfe094cba 100644 --- a/expression/builtin_cast_vec.go +++ b/expression/builtin_cast_vec.go @@ -463,7 +463,7 @@ func (b *builtinCastJSONAsTimeSig) vecEvalTime(input *chunk.Chunk, result *chunk stmtCtx := b.ctx.GetSessionVars().StmtCtx fsp := int8(b.tp.Decimal) for i := 0; i < n; i++ { - if buf.IsNull(i) { + if result.IsNull(i) { continue } s, err := buf.GetJSON(i).Unquote() diff --git a/expression/builtin_cast_vec_test.go b/expression/builtin_cast_vec_test.go index c27542e3d76ec..d40622640efea 100644 --- a/expression/builtin_cast_vec_test.go +++ b/expression/builtin_cast_vec_test.go @@ -75,6 +75,10 @@ var vecBuiltinCastCases = map[string][]vecExprBenchCase{ {retEvalType: types.ETDatetime, childrenTypes: []types.EvalType{types.ETDuration}}, {retEvalType: types.ETDatetime, childrenTypes: []types.EvalType{types.ETDatetime}}, {retEvalType: types.ETDatetime, childrenTypes: []types.EvalType{types.ETTimestamp}}, + {retEvalType: types.ETDatetime, childrenTypes: []types.EvalType{types.ETJson}, + geners: []dataGenerator{ + &jsonTimeGener{}, + }}, }, } From ceb31efe14f310c80fcddf9adc23473c685a88d2 Mon Sep 17 00:00:00 2001 From: shihongzhi <shi65881583@gmail.com> Date: Mon, 11 Nov 2019 20:51:15 +0800 Subject: [PATCH 17/19] expression: implement vectorized evaluation for `builtinCurrentRoleSig` (#13352) --- expression/builtin_info_vec.go | 33 +++++++++++++++++++++++++++-- expression/builtin_info_vec_test.go | 4 +++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/expression/builtin_info_vec.go b/expression/builtin_info_vec.go index 63f0a91e3c09c..cb44a965fac2c 100644 --- a/expression/builtin_info_vec.go +++ b/expression/builtin_info_vec.go @@ -14,6 +14,9 @@ package expression import ( + "sort" + "strings" + "github.com/pingcap/errors" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/types" @@ -103,11 +106,37 @@ func (b *builtinCurrentUserSig) vecEvalString(input *chunk.Chunk, result *chunk. } func (b *builtinCurrentRoleSig) vectorized() bool { - return false + return true } +// evalString evals a builtinCurrentUserSig. +// See https://dev.mysql.com/doc/refman/5.7/en/information-functions.html#function_current-user func (b *builtinCurrentRoleSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + + data := b.ctx.GetSessionVars() + if data == nil || data.ActiveRoles == nil { + return errors.Errorf("Missing session variable when eval builtin") + } + + result.ReserveString(n) + if len(data.ActiveRoles) == 0 { + for i := 0; i < n; i++ { + result.AppendString("") + } + return nil + } + + sortedRes := make([]string, 0, 10) + for _, r := range data.ActiveRoles { + sortedRes = append(sortedRes, r.String()) + } + sort.Strings(sortedRes) + res := strings.Join(sortedRes, ",") + for i := 0; i < n; i++ { + result.AppendString(res) + } + return nil } func (b *builtinUserSig) vectorized() bool { diff --git a/expression/builtin_info_vec_test.go b/expression/builtin_info_vec_test.go index bdba8bdbfd5d9..0034a8b37e348 100644 --- a/expression/builtin_info_vec_test.go +++ b/expression/builtin_info_vec_test.go @@ -71,7 +71,9 @@ var vecBuiltinInfoCases = map[string][]vecExprBenchCase{ ast.RowCount: { {retEvalType: types.ETInt, childrenTypes: []types.EvalType{}}, }, - ast.CurrentRole: {}, + ast.CurrentRole: { + {retEvalType: types.ETString, childrenTypes: []types.EvalType{}}, + }, ast.TiDBIsDDLOwner: { {retEvalType: types.ETInt, childrenTypes: []types.EvalType{}}, }, From 9f7d6d23c093f0dc3abe4f6dcd5d7c514e3dd554 Mon Sep 17 00:00:00 2001 From: Eugene Kalinin <ekalinin@users.noreply.github.com> Date: Mon, 11 Nov 2019 15:57:29 +0300 Subject: [PATCH 18/19] expression: implement vectorized evaluation for builtinFromUnixTime2ArgSig (#13319) --- expression/builtin_string_vec_test.go | 8 +++++ expression/builtin_time_vec.go | 45 +++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/expression/builtin_string_vec_test.go b/expression/builtin_string_vec_test.go index bd08d0ab9161c..4c5c965af6000 100644 --- a/expression/builtin_string_vec_test.go +++ b/expression/builtin_string_vec_test.go @@ -228,6 +228,14 @@ var vecBuiltinStringCases = map[string][]vecExprBenchCase{ ast.Elt: { {retEvalType: types.ETString, childrenTypes: []types.EvalType{types.ETInt, types.ETString, types.ETString, types.ETString}, geners: []dataGenerator{&rangeInt64Gener{-1, 5}}}, }, + ast.FromUnixTime: { + {retEvalType: types.ETString, childrenTypes: []types.EvalType{types.ETDecimal, types.ETString}, + geners: []dataGenerator{ + gener{defaultGener{eType: types.ETDecimal, nullRation: 0.9}}, + &constStrGener{"%y-%m-%d"}, + }, + }, + }, } func (s *testEvaluatorSuite) TestVectorizedBuiltinStringEvalOneVec(c *C) { diff --git a/expression/builtin_time_vec.go b/expression/builtin_time_vec.go index 4a53c1b7ef7fe..bb59fe89651c5 100644 --- a/expression/builtin_time_vec.go +++ b/expression/builtin_time_vec.go @@ -128,11 +128,52 @@ func (b *builtinDateSig) vectorized() bool { } func (b *builtinFromUnixTime2ArgSig) vectorized() bool { - return false + return true } func (b *builtinFromUnixTime2ArgSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + buf1, err := b.bufAllocator.get(types.ETDecimal, n) + if err != nil { + return err + } + defer b.bufAllocator.put(buf1) + if err = b.args[0].VecEvalDecimal(b.ctx, input, buf1); err != nil { + return err + } + + buf2, err := b.bufAllocator.get(types.ETString, n) + if err != nil { + return err + } + defer b.bufAllocator.put(buf2) + if err = b.args[1].VecEvalString(b.ctx, input, buf2); err != nil { + return err + } + + result.ReserveString(n) + ds := buf1.Decimals() + fsp := int8(b.tp.Decimal) + for i := 0; i < n; i++ { + if buf1.IsNull(i) || buf2.IsNull(i) { + result.AppendNull() + continue + } + t, isNull, err := evalFromUnixTime(b.ctx, fsp, &ds[i]) + if err != nil { + return err + } + if isNull { + result.AppendNull() + continue + } + res, err := t.DateFormat(buf2.GetString(i)) + if err != nil { + return err + } + result.AppendString(res) + } + return nil } func (b *builtinSysDateWithoutFspSig) vectorized() bool { From e9f19970d51164cff7f6da5976796e2e95c174e8 Mon Sep 17 00:00:00 2001 From: Eugene Kalinin <ekalinin@users.noreply.github.com> Date: Mon, 11 Nov 2019 16:03:29 +0300 Subject: [PATCH 19/19] expression: implement vectorized evaluation for builtinMakeDateSig (#13305) --- expression/builtin_time_vec.go | 63 ++++++++++++++++++++++++++++- expression/builtin_time_vec_test.go | 6 ++- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/expression/builtin_time_vec.go b/expression/builtin_time_vec.go index bb59fe89651c5..6421ab7f8b4d0 100644 --- a/expression/builtin_time_vec.go +++ b/expression/builtin_time_vec.go @@ -1063,11 +1063,70 @@ func (b *builtinAddDateIntDecimalSig) vecEvalTime(input *chunk.Chunk, result *ch } func (b *builtinMakeDateSig) vectorized() bool { - return false + return true } func (b *builtinMakeDateSig) vecEvalTime(input *chunk.Chunk, result *chunk.Column) error { - return errors.Errorf("not implemented") + n := input.NumRows() + buf1, err := b.bufAllocator.get(types.ETInt, n) + if err != nil { + return err + } + defer b.bufAllocator.put(buf1) + if err := b.args[0].VecEvalInt(b.ctx, input, buf1); err != nil { + return err + } + + buf2, err := b.bufAllocator.get(types.ETInt, n) + if err != nil { + return err + } + defer b.bufAllocator.put(buf2) + if err := b.args[1].VecEvalInt(b.ctx, input, buf2); err != nil { + return err + } + + result.ResizeTime(n, false) + result.MergeNulls(buf1, buf2) + + times := result.Times() + years := buf1.Int64s() + days := buf2.Int64s() + + for i := 0; i < n; i++ { + if result.IsNull(i) { + continue + } + if days[i] <= 0 || years[i] < 0 || years[i] > 9999 { + result.SetNull(i, true) + continue + } + if years[i] < 70 { + years[i] += 2000 + } else if years[i] < 100 { + years[i] += 1900 + } + startTime := types.Time{ + Time: types.FromDate(int(years[i]), 1, 1, 0, 0, 0, 0), + Type: mysql.TypeDate, + Fsp: 0, + } + retTimestamp := types.TimestampDiff("DAY", types.ZeroDate, startTime) + if retTimestamp == 0 { + if err = handleInvalidTimeError(b.ctx, types.ErrIncorrectDatetimeValue.GenWithStackByArgs(startTime.String())); err != nil { + return err + } + result.SetNull(i, true) + continue + } + ret := types.TimeFromDays(retTimestamp + days[i] - 1) + if ret.IsZero() || ret.Time.Year() > 9999 { + result.SetNull(i, true) + continue + } + times[i] = ret + } + return nil } func (b *builtinWeekOfYearSig) vectorized() bool { diff --git a/expression/builtin_time_vec_test.go b/expression/builtin_time_vec_test.go index 8f981d15a9233..478f973eb7106 100644 --- a/expression/builtin_time_vec_test.go +++ b/expression/builtin_time_vec_test.go @@ -87,7 +87,11 @@ var vecBuiltinTimeCases = map[string][]vecExprBenchCase{ ast.CurrentDate: { {retEvalType: types.ETDatetime}, }, - ast.MakeDate: {}, + ast.MakeDate: { + {retEvalType: types.ETDatetime, childrenTypes: []types.EvalType{types.ETInt, types.ETInt}, + geners: []dataGenerator{&rangeInt64Gener{0, 2200}, &rangeInt64Gener{0, 365}}, + }, + }, ast.MakeTime: {}, ast.PeriodAdd: { {retEvalType: types.ETInt, childrenTypes: []types.EvalType{types.ETInt, types.ETInt}, geners: []dataGenerator{new(periodGener), new(periodGener)}},