Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

coldata: fix Bytes invariant in some cases #59028

Merged
merged 2 commits into from
Jan 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion pkg/col/coldata/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,17 @@ func (m *MemBatch) SetSelection(b bool) {
func (m *MemBatch) SetLength(length int) {
m.length = length
if length > 0 {
// In order to maintain the invariant of Bytes vectors we need to update
// offsets up to the element with the largest index that can be accessed
// by the batch.
maxIdx := length - 1
if m.useSel {
// Note that here we rely on the fact that selection vectors are
// increasing sequences.
maxIdx = m.sel[length-1]
}
for i, ok := m.bytesVecIdxs.Next(0); ok; i, ok = m.bytesVecIdxs.Next(i + 1) {
m.b[i].Bytes().UpdateOffsetsToBeNonDecreasing(length)
m.b[i].Bytes().UpdateOffsetsToBeNonDecreasing(maxIdx + 1)
}
}
}
Expand Down
34 changes: 34 additions & 0 deletions pkg/col/coldata/batch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,37 @@ func TestBatchReset(t *testing.T) {
b = coldata.NewMemBatch(typsBytes, coldata.StandardColumnFactory)
resetAndCheck(b, typsBytes, 1, true)
}

// TestBatchWithBytesAndNulls verifies that the invariant of Bytes vectors is
// maintained when NULL values are present in the vector and there is a
// selection vector on the batch.
func TestBatchWithBytesAndNulls(t *testing.T) {
defer leaktest.AfterTest(t)()

b := coldata.NewMemBatch([]*types.T{types.Bytes}, coldata.StandardColumnFactory)
// We will insert some garbage data into the Bytes vector in positions that
// are not mentioned in the selection vector. All the values that are
// selected are actually NULL values, so we don't set anything on the Bytes
// vector there.
if coldata.BatchSize() < 6 {
return
}
sel := []int{1, 3, 5}
vec := b.ColVec(0).Bytes()
vec.Set(0, []byte("zero"))
vec.Set(2, []byte("two"))
b.SetSelection(true)
copy(b.Selection(), sel)

// This is where the invariant of non-decreasing offsets in the Bytes vector
// should be updated.
b.SetLength(len(sel))

// In many cases in the vectorized execution engine, for performance
// reasons, we will attempt to get something from the Bytes vector at
// positions on which we have NULLs, so all of the Gets below should be
// safe.
for _, idx := range sel {
assert.True(t, len(vec.Get(idx)) == 0)
}
}
10 changes: 3 additions & 7 deletions pkg/col/coldata/vec.eg.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/col/coldata/vec.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ type Vec interface {
// An optional Sel slice can also be provided to apply a filter on the source
// Vec.
// Refer to the SliceArgs comment for specifics and TestAppend for examples.
//
// NOTE: Append does *not* support the case of appending 0 values (i.e.
// the behavior of Append when args.SrcStartIdx == args.SrcEndIdx is
// undefined).
Append(SliceArgs)

// Copy uses CopySliceArgs to copy elements of a source Vec into this Vec. It is
Expand Down
14 changes: 6 additions & 8 deletions pkg/col/coldata/vec_tmpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,13 @@ func (m *memColumn) Append(args SliceArgs) {
// whether the value is NULL. It is possible that Bytes' invariant of
// non-decreasing offsets on the source is currently not maintained, so
// we explicitly enforce it.
maxIdx := 0
for _, selIdx := range sel {
if selIdx > maxIdx {
maxIdx = selIdx
}
}
fromCol.UpdateOffsetsToBeNonDecreasing(maxIdx + 1)
//
// Note that here we rely on the fact that selection vectors are
// increasing sequences.
fromCol.UpdateOffsetsToBeNonDecreasing(sel[len(sel)-1] + 1)
// {{else}}
toCol = execgen.SLICE(toCol, 0, args.DestIdx)
// {{/* Here WINDOW means slicing which allows us to use APPENDVAL below. */}}
toCol = execgen.WINDOW(toCol, 0, args.DestIdx)
// {{end}}
for _, selIdx := range sel {
val := fromCol.Get(selIdx)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ var dataManipulationReplacementInfos = []dataManipulationReplacementInfo{
numArgs: 3,
replaceWith: "Set",
},
{
templatePlaceholder: "execgen.SLICE",
numArgs: 3,
replaceWith: "Slice",
},
{
templatePlaceholder: "execgen.COPYSLICE",
numArgs: 5,
Expand Down
9 changes: 4 additions & 5 deletions pkg/sql/colexec/execgen/cmd/execgen/overloads_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,12 +481,12 @@ func (b *argWidthOverloadBase) Set(target, i, new string) string {
return set(b.CanonicalTypeFamily, target, i, new)
}

// Slice is a function that should only be used in templates.
func (b *argWidthOverloadBase) Slice(target, start, end string) string {
// slice is a function that should only be used in templates.
func (b *argWidthOverloadBase) slice(target, start, end string) string {
switch b.CanonicalTypeFamily {
case types.BytesFamily:
// Bytes vector doesn't support slicing.
colexecerror.InternalError(errors.AssertionFailedf("Slice method is attempted to be generated on Bytes vector"))
colexecerror.InternalError(errors.AssertionFailedf("slice method is attempted to be generated on Bytes vector"))
case typeconv.DatumVecCanonicalTypeFamily:
return fmt.Sprintf(`%s.Slice(%s, %s)`, target, start, end)
}
Expand Down Expand Up @@ -608,7 +608,7 @@ func (b *argWidthOverloadBase) Window(target, start, end string) string {
case types.BytesFamily:
return fmt.Sprintf(`%s.Window(%s, %s)`, target, start, end)
}
return b.Slice(target, start, end)
return b.slice(target, start, end)
}

// setVariableSize is a function that should only be used in templates. It
Expand Down Expand Up @@ -651,7 +651,6 @@ var (
_ = awob.GoTypeSliceName
_ = awob.CopyVal
_ = awob.Set
_ = awob.Slice
_ = awob.Sliceable
_ = awob.CopySlice
_ = awob.AppendSlice
Expand Down
7 changes: 0 additions & 7 deletions pkg/sql/colexec/execgen/placeholders.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ const nonTemplatePanic = "do not call from non-template code"
var (
_ = COPYVAL
_ = SET
_ = SLICE
_ = COPYSLICE
_ = APPENDSLICE
_ = APPENDVAL
Expand All @@ -44,12 +43,6 @@ func SET(target, i, new interface{}) {
colexecerror.InternalError(errors.AssertionFailedf(nonTemplatePanic))
}

// SLICE is a template function.
func SLICE(target, start, end interface{}) interface{} {
colexecerror.InternalError(errors.AssertionFailedf(nonTemplatePanic))
return nil
}

// COPYSLICE is a template function.
func COPYSLICE(target, src, destIdx, srcStartIdx, srcEndIdx interface{}) {
colexecerror.InternalError(errors.AssertionFailedf(nonTemplatePanic))
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/colexec/hashtable.go
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ func (ht *hashTable) removeDuplicates(
// appendAllDistinct appends all tuples from batch to the hash table. It
// assumes that all tuples are distinct and that ht.probeScratch.hashBuffer
// contains the hash codes for all of them.
// NOTE: batch must be of non-zero length.
func (ht *hashTable) appendAllDistinct(ctx context.Context, batch coldata.Batch) {
numBuffered := uint64(ht.vals.Length())
ht.allocator.PerformOperation(ht.vals.ColVecs(), func() {
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/colexec/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ func (b *appendOnlyBufferedBatch) ReplaceCol(coldata.Vec, int) {
// [startIdx, endIdx) from batch (paying attention to the selection vector)
// into b.
// NOTE: this does *not* perform memory accounting.
// NOTE: batch must be of non-zero length.
func (b *appendOnlyBufferedBatch) append(batch coldata.Batch, startIdx, endIdx int) {
for _, colIdx := range b.colsToStore {
b.colVecs[colIdx].Append(
Expand Down