Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

executor/join : use shallow copy for join. #7433

Merged
merged 49 commits into from
Aug 29, 2018
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
a576cc0
iterator copy init
crazycs520 Aug 19, 2018
79b74e7
fix iterator bug
crazycs520 Aug 19, 2018
d9fe98b
fix nullmap index out of range and add test
crazycs520 Aug 19, 2018
8fa2ad8
refine code
crazycs520 Aug 19, 2018
33ae5a0
refine code
crazycs520 Aug 19, 2018
255c64b
remove iterator copy and use back to pre rows
crazycs520 Aug 19, 2018
0973373
checkout joiner.go file
crazycs520 Aug 19, 2018
055a41a
add check to bench
crazycs520 Aug 20, 2018
c5eeff9
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 20, 2018
e7c6c64
iterator only once
crazycs520 Aug 21, 2018
ce8eb4f
add appendMultiSameNullBitmap
crazycs520 Aug 21, 2018
499b6f9
field by field only one line 2X
crazycs520 Aug 22, 2018
2a295a1
refine column copy
crazycs520 Aug 22, 2018
9d82447
refine column copy
crazycs520 Aug 22, 2018
ef85948
add shadow copy to join and move code
crazycs520 Aug 22, 2018
45b4631
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 22, 2018
5bf279f
rename function
crazycs520 Aug 22, 2018
9690506
add comment
crazycs520 Aug 22, 2018
8db639f
add shadow copy to inner join
crazycs520 Aug 22, 2018
7a55ff5
refine code
crazycs520 Aug 22, 2018
05c1273
add shadow copy to all join
crazycs520 Aug 22, 2018
66a133c
remove redundancy code
crazycs520 Aug 22, 2018
dadb047
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 22, 2018
b4192e4
remove column copy and redundancy code
crazycs520 Aug 23, 2018
4096997
address comment
crazycs520 Aug 23, 2018
b802941
add mutchunk
crazycs520 Aug 23, 2018
c5cfdf1
address comment
crazycs520 Aug 23, 2018
947f9d4
use mutRow instead of mut chunk.
crazycs520 Aug 23, 2018
24ab90e
address comment
crazycs520 Aug 23, 2018
2b8d896
refine code
crazycs520 Aug 23, 2018
3f82d2b
address comment
crazycs520 Aug 23, 2018
604e49d
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 23, 2018
e5f4cbe
address comment
crazycs520 Aug 23, 2018
abbc2c9
address comment
crazycs520 Aug 24, 2018
e1dd31d
address comment and add test to mutRow_test
crazycs520 Aug 24, 2018
593b31c
remove chunk_copy_test.go
crazycs520 Aug 24, 2018
3a6fbb7
refine code
crazycs520 Aug 24, 2018
600fdc3
refine test
crazycs520 Aug 24, 2018
f4fbd70
refine test and code
crazycs520 Aug 24, 2018
23eaf1e
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 24, 2018
e9ef7dd
optimize append num
crazycs520 Aug 24, 2018
21b5417
remove shadown copy on inner, leftOut, rightOut join, vectorized filt…
crazycs520 Aug 27, 2018
0aadbf6
address comment
crazycs520 Aug 27, 2018
0de2063
address comment
crazycs520 Aug 27, 2018
c681658
address comment
crazycs520 Aug 28, 2018
f8ccdf2
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 28, 2018
c7b2301
Merge branch 'master' of https://github.com/pingcap/tidb into column-…
crazycs520 Aug 29, 2018
1e8a9f0
update test after merge
crazycs520 Aug 29, 2018
b939b3b
Merge branch 'master' into column-copy
XuHuaiyu Aug 29, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions util/chunk/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,77 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) {
}
}

func (c *Chunk) AppendPartialRows(colIdx int, rowIt Iterator, maxLen int) int {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the exported method, you must comment. It is not optional.

oldRowLen := c.columns[colIdx+0].length
columns := rowIt.Current().c.columns
for i, rowCol := range columns {
chkCol := c.columns[colIdx+i]
rower := rowIt
if i != 0 {
rower.PreRows(c.columns[colIdx+0].length - oldRowLen)
}

if rowCol.isFixed() {
elemLen := len(rowCol.elemBuf)
for row, j := rower.Current(), 0; j < maxLen && row != rower.End(); row, j = rower.Next(), j+1 {
chkCol.appendNullBitmap(!rowCol.isNull(row.idx))
offset := row.idx * elemLen
chkCol.data = append(chkCol.data, rowCol.data[offset:offset+elemLen]...)
chkCol.length++
}
} else {
for row, j := rower.Current(), 0; j < maxLen && row != rower.End(); row, j = rower.Next(), j+1 {
chkCol.appendNullBitmap(!rowCol.isNull(row.idx))
start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1]
chkCol.data = append(chkCol.data, rowCol.data[start:end]...)
chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data)))
chkCol.length++
}
}
}
return c.columns[colIdx+0].length - oldRowLen
}

func (c *Chunk) AppendPartialSameRows(colIdx int, row Row, rowsLen int) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same reason.

for i, rowCol := range row.c.columns {
chkCol := c.columns[colIdx+i]
for j := 0; j < rowsLen; j++ {
chkCol.appendNullBitmap(!rowCol.isNull(row.idx))
chkCol.length++
}
if rowCol.isFixed() {
elemLen := len(rowCol.elemBuf)
start := row.idx * elemLen
end := start + elemLen
for j := 0; j < rowsLen; j++ {
chkCol.data = append(chkCol.data, rowCol.data[start:start+end]...)
}
} else {
start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1]
for j := 0; j < rowsLen; j++ {
chkCol.data = append(chkCol.data, rowCol.data[start:end]...)
chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data)))
}
}

}
}

func (c *Chunk) AppendRightMultiRows(lhser Iterator, rhs Row, maxLen int) int {
c.numVirtualRows += maxLen
lhsLen := lhser.Current().Len()
rowsLen := c.AppendPartialRows(0, lhser, maxLen)
c.AppendPartialSameRows(lhsLen, rhs, rowsLen)
return rowsLen
}

func (c *Chunk) AppendMultiRows(lhs Row, rhser Iterator, maxLen int) int {
c.numVirtualRows += maxLen
rowsLen := c.AppendPartialRows(lhs.Len(), rhser, maxLen)
c.AppendPartialSameRows(0, lhs, rowsLen)
return rowsLen
}

// Append appends rows in [begin, end) in another Chunk to a Chunk.
func (c *Chunk) Append(other *Chunk, begin, end int) {
for colID, src := range other.columns {
Expand Down
126 changes: 126 additions & 0 deletions util/chunk/chunk_copy_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package chunk
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. move the tests to mutrow_test.go
  2. The tests in this file are not standard, please make them more elegant.


import (
"testing"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this line to line5

)

var (
numRows = 1024
)

func newChunkWithInitCap(cap int, elemLen ...int) *Chunk {
chk := &Chunk{}
for _, l := range elemLen {
if l > 0 {
chk.addFixedLenColumn(l, cap)
} else {
chk.addVarLenColumn(cap)
}
}
return chk
}

func getChunk() *Chunk {
chk := newChunkWithInitCap(numRows, 8, 8, 0, 0)
for i := 0; i < numRows; i++ {
//chk.AppendNull(0)
chk.AppendInt64(0, int64(i))
chk.AppendInt64(1, 1)
chk.AppendString(2, "abcd")
chk.AppendBytes(3, []byte("01234567890zxcvbnmqwer"))
}
return chk
}

func prepareChks() (it1 Iterator, row Row, dst *Chunk) {
chk1 := getChunk()
row = chk1.GetRow(0)
it1 = NewIterator4Chunk(chk1)
it1.Begin()
dst = newChunkWithInitCap(numRows, 8, 8, 0, 0, 8, 8, 0, 0)
return it1, row, dst
}

func checkDstChk(t *testing.T, dst *Chunk) {
for i := 0; i < 8; i++ {
if dst.columns[i].length != numRows {
t.Fail()
}
}
for j := 0; j < numRows; j++ {
row := dst.GetRow(j)
if row.GetInt64(0) != int64(j) {
t.Fail()
}
if row.GetInt64(1) != 1 {
t.Fail()
}
if row.GetString(2) != "abcd" {
t.Fail()
}
if string(row.GetBytes(3)) != "01234567890zxcvbnmqwer" {
t.Fail()
}

if row.GetInt64(4) != 0 {
t.Fail()
}
if row.GetInt64(5) != 1 {
t.Fail()
}
if row.GetString(6) != "abcd" {
t.Fail()
}
if string(row.GetBytes(7)) != "01234567890zxcvbnmqwer" {
t.Fail()
}
}
}

func TestCopyFieldByField(t *testing.T) {
it1, row, dst := prepareChks()

dst.Reset()
for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() {
dst.AppendRow(lhs)
dst.AppendPartialRow(lhs.Len(), row)
}
checkDstChk(t, dst)
}

func TestCopyColumnByColumn(t *testing.T) {
it1, row, dst := prepareChks()

dst.Reset()
for it1.Begin(); it1.Current() != it1.End(); {
dst.AppendRightMultiRows(it1, row, 128)
}
checkDstChk(t, dst)
}

func BenchmarkCopyFieldByField(b *testing.B) {
b.ReportAllocs()
it1, row, dst := prepareChks()

b.ResetTimer()
for i := 0; i < b.N; i++ {
dst.Reset()
for lhs := it1.Begin(); lhs != it1.End(); lhs = it1.Next() {
dst.AppendRow(lhs)
dst.AppendPartialRow(lhs.Len(), row)
}
}
}

func BenchmarkCopyColumnByColumn(b *testing.B) {
b.ReportAllocs()
it1, row, dst := prepareChks()

b.ResetTimer()
for i := 0; i < b.N; i++ {
dst.Reset()
for it1.Begin(); it1.Current() != it1.End(); {
dst.AppendRightMultiRows(it1, row, 128)
}
}
}
25 changes: 25 additions & 0 deletions util/chunk/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ type Iterator interface {
// Next returns the next Row.
Next() Row

PreRows(i int)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the use case for this method?


// End returns the invalid end Row.
End() Row

Expand Down Expand Up @@ -75,6 +77,11 @@ func (it *iterator4Slice) Next() Row {
return row
}

// PreRows implements the Iterator interface.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment basically says nothing. You have to be more explicitly.

func (it *iterator4Slice) PreRows(i int) {

}

// Current implements the Iterator interface.
func (it *iterator4Slice) Current() Row {
if it.cursor == 0 || it.cursor > it.Len() {
Expand Down Expand Up @@ -129,6 +136,14 @@ func (it *Iterator4Chunk) Next() Row {
return row
}

// PreRows implements the Iterator interface.
func (it *Iterator4Chunk) PreRows(i int) {
if it.cursor < i {
it.Begin()
}
it.cursor = it.cursor - i
}

// Current implements the Iterator interface.
func (it *Iterator4Chunk) Current() Row {
if it.cursor == 0 || it.cursor > it.Len() {
Expand Down Expand Up @@ -196,6 +211,11 @@ func (it *iterator4List) Next() Row {
return row
}

// PreRows implements the Iterator interface.
func (it *iterator4List) PreRows(i int) {

}

// Current implements the Iterator interface.
func (it *iterator4List) Current() Row {
if (it.chkCursor == 0 && it.rowCursor == 0) || it.chkCursor > it.li.NumChunks() {
Expand Down Expand Up @@ -255,6 +275,11 @@ func (it *iterator4RowPtr) Next() Row {
return row
}

// PreRows implements the Iterator interface.
func (it *iterator4RowPtr) PreRows(i int) {

}

// Current implements the Iterator interface.
func (it *iterator4RowPtr) Current() Row {
if it.cursor == 0 || it.cursor > it.Len() {
Expand Down