diff --git a/pkg/col/coldata/batch.go b/pkg/col/coldata/batch.go index 0a8d44408c8a..0ad0fc9ff2c6 100644 --- a/pkg/col/coldata/batch.go +++ b/pkg/col/coldata/batch.go @@ -245,8 +245,17 @@ func (m *MemBatch) SetSelection(b bool) { func (m *MemBatch) SetLength(length int) { m.length = length if length > 0 { + // In order to maintain the invariant of Bytes vectors we need to update + // offsets up to the element with the largest index that can be accessed + // by the batch. + maxIdx := length - 1 + if m.useSel { + // Note that here we rely on the fact that selection vectors are + // increasing sequences. + maxIdx = m.sel[length-1] + } for i, ok := m.bytesVecIdxs.Next(0); ok; i, ok = m.bytesVecIdxs.Next(i + 1) { - m.b[i].Bytes().UpdateOffsetsToBeNonDecreasing(length) + m.b[i].Bytes().UpdateOffsetsToBeNonDecreasing(maxIdx + 1) } } } diff --git a/pkg/col/coldata/batch_test.go b/pkg/col/coldata/batch_test.go index 131b8fdaab04..c5dddcbe3e7a 100644 --- a/pkg/col/coldata/batch_test.go +++ b/pkg/col/coldata/batch_test.go @@ -108,3 +108,37 @@ func TestBatchReset(t *testing.T) { b = coldata.NewMemBatch(typsBytes, coldata.StandardColumnFactory) resetAndCheck(b, typsBytes, 1, true) } + +// TestBatchWithBytesAndNulls verifies that the invariant of Bytes vectors is +// maintained when NULL values are present in the vector and there is a +// selection vector on the batch. +func TestBatchWithBytesAndNulls(t *testing.T) { + defer leaktest.AfterTest(t)() + + b := coldata.NewMemBatch([]*types.T{types.Bytes}, coldata.StandardColumnFactory) + // We will insert some garbage data into the Bytes vector in positions that + // are not mentioned in the selection vector. All the values that are + // selected are actually NULL values, so we don't set anything on the Bytes + // vector there. + if coldata.BatchSize() < 6 { + return + } + sel := []int{1, 3, 5} + vec := b.ColVec(0).Bytes() + vec.Set(0, []byte("zero")) + vec.Set(2, []byte("two")) + b.SetSelection(true) + copy(b.Selection(), sel) + + // This is where the invariant of non-decreasing offsets in the Bytes vector + // should be updated. + b.SetLength(len(sel)) + + // In many cases in the vectorized execution engine, for performance + // reasons, we will attempt to get something from the Bytes vector at + // positions on which we have NULLs, so all of the Gets below should be + // safe. + for _, idx := range sel { + assert.True(t, len(vec.Get(idx)) == 0) + } +} diff --git a/pkg/col/coldata/vec.eg.go b/pkg/col/coldata/vec.eg.go index 748f616e4996..3e054e4fde26 100644 --- a/pkg/col/coldata/vec.eg.go +++ b/pkg/col/coldata/vec.eg.go @@ -73,13 +73,9 @@ func (m *memColumn) Append(args SliceArgs) { // whether the value is NULL. It is possible that Bytes' invariant of // non-decreasing offsets on the source is currently not maintained, so // we explicitly enforce it. - maxIdx := 0 - for _, selIdx := range sel { - if selIdx > maxIdx { - maxIdx = selIdx - } - } - fromCol.UpdateOffsetsToBeNonDecreasing(maxIdx + 1) + // Note that here we rely on the fact that selection vectors are + // increasing sequences. + fromCol.UpdateOffsetsToBeNonDecreasing(sel[len(sel)-1] + 1) for _, selIdx := range sel { val := fromCol.Get(selIdx) toCol.AppendVal(val) diff --git a/pkg/col/coldata/vec.go b/pkg/col/coldata/vec.go index 46695efd1f2a..db6752946542 100644 --- a/pkg/col/coldata/vec.go +++ b/pkg/col/coldata/vec.go @@ -96,6 +96,10 @@ type Vec interface { // An optional Sel slice can also be provided to apply a filter on the source // Vec. // Refer to the SliceArgs comment for specifics and TestAppend for examples. + // + // NOTE: Append does *not* support the case of appending 0 values (i.e. + // the behavior of Append when args.SrcStartIdx == args.SrcEndIdx is + // undefined). Append(SliceArgs) // Copy uses CopySliceArgs to copy elements of a source Vec into this Vec. It is diff --git a/pkg/col/coldata/vec_tmpl.go b/pkg/col/coldata/vec_tmpl.go index 1005760b6f4c..8cd6685943b6 100644 --- a/pkg/col/coldata/vec_tmpl.go +++ b/pkg/col/coldata/vec_tmpl.go @@ -75,15 +75,13 @@ func (m *memColumn) Append(args SliceArgs) { // whether the value is NULL. It is possible that Bytes' invariant of // non-decreasing offsets on the source is currently not maintained, so // we explicitly enforce it. - maxIdx := 0 - for _, selIdx := range sel { - if selIdx > maxIdx { - maxIdx = selIdx - } - } - fromCol.UpdateOffsetsToBeNonDecreasing(maxIdx + 1) + // + // Note that here we rely on the fact that selection vectors are + // increasing sequences. + fromCol.UpdateOffsetsToBeNonDecreasing(sel[len(sel)-1] + 1) // {{else}} - toCol = execgen.SLICE(toCol, 0, args.DestIdx) + // {{/* Here WINDOW means slicing which allows us to use APPENDVAL below. */}} + toCol = execgen.WINDOW(toCol, 0, args.DestIdx) // {{end}} for _, selIdx := range sel { val := fromCol.Get(selIdx) diff --git a/pkg/sql/colexec/execgen/cmd/execgen/data_manipulation_gen.go b/pkg/sql/colexec/execgen/cmd/execgen/data_manipulation_gen.go index 85972971cd18..9de777d4481f 100644 --- a/pkg/sql/colexec/execgen/cmd/execgen/data_manipulation_gen.go +++ b/pkg/sql/colexec/execgen/cmd/execgen/data_manipulation_gen.go @@ -34,11 +34,6 @@ var dataManipulationReplacementInfos = []dataManipulationReplacementInfo{ numArgs: 3, replaceWith: "Set", }, - { - templatePlaceholder: "execgen.SLICE", - numArgs: 3, - replaceWith: "Slice", - }, { templatePlaceholder: "execgen.COPYSLICE", numArgs: 5, diff --git a/pkg/sql/colexec/execgen/cmd/execgen/overloads_base.go b/pkg/sql/colexec/execgen/cmd/execgen/overloads_base.go index a0e48abda7d8..13660df36012 100644 --- a/pkg/sql/colexec/execgen/cmd/execgen/overloads_base.go +++ b/pkg/sql/colexec/execgen/cmd/execgen/overloads_base.go @@ -481,12 +481,12 @@ func (b *argWidthOverloadBase) Set(target, i, new string) string { return set(b.CanonicalTypeFamily, target, i, new) } -// Slice is a function that should only be used in templates. -func (b *argWidthOverloadBase) Slice(target, start, end string) string { +// slice is a function that should only be used in templates. +func (b *argWidthOverloadBase) slice(target, start, end string) string { switch b.CanonicalTypeFamily { case types.BytesFamily: // Bytes vector doesn't support slicing. - colexecerror.InternalError(errors.AssertionFailedf("Slice method is attempted to be generated on Bytes vector")) + colexecerror.InternalError(errors.AssertionFailedf("slice method is attempted to be generated on Bytes vector")) case typeconv.DatumVecCanonicalTypeFamily: return fmt.Sprintf(`%s.Slice(%s, %s)`, target, start, end) } @@ -608,7 +608,7 @@ func (b *argWidthOverloadBase) Window(target, start, end string) string { case types.BytesFamily: return fmt.Sprintf(`%s.Window(%s, %s)`, target, start, end) } - return b.Slice(target, start, end) + return b.slice(target, start, end) } // setVariableSize is a function that should only be used in templates. It @@ -651,7 +651,6 @@ var ( _ = awob.GoTypeSliceName _ = awob.CopyVal _ = awob.Set - _ = awob.Slice _ = awob.Sliceable _ = awob.CopySlice _ = awob.AppendSlice diff --git a/pkg/sql/colexec/execgen/placeholders.go b/pkg/sql/colexec/execgen/placeholders.go index af8602947d45..b475e30f6970 100644 --- a/pkg/sql/colexec/execgen/placeholders.go +++ b/pkg/sql/colexec/execgen/placeholders.go @@ -21,7 +21,6 @@ const nonTemplatePanic = "do not call from non-template code" var ( _ = COPYVAL _ = SET - _ = SLICE _ = COPYSLICE _ = APPENDSLICE _ = APPENDVAL @@ -44,12 +43,6 @@ func SET(target, i, new interface{}) { colexecerror.InternalError(errors.AssertionFailedf(nonTemplatePanic)) } -// SLICE is a template function. -func SLICE(target, start, end interface{}) interface{} { - colexecerror.InternalError(errors.AssertionFailedf(nonTemplatePanic)) - return nil -} - // COPYSLICE is a template function. func COPYSLICE(target, src, destIdx, srcStartIdx, srcEndIdx interface{}) { colexecerror.InternalError(errors.AssertionFailedf(nonTemplatePanic)) diff --git a/pkg/sql/colexec/hashtable.go b/pkg/sql/colexec/hashtable.go index 3a8b410712e8..3da12e667a95 100644 --- a/pkg/sql/colexec/hashtable.go +++ b/pkg/sql/colexec/hashtable.go @@ -477,6 +477,7 @@ func (ht *hashTable) removeDuplicates( // appendAllDistinct appends all tuples from batch to the hash table. It // assumes that all tuples are distinct and that ht.probeScratch.hashBuffer // contains the hash codes for all of them. +// NOTE: batch must be of non-zero length. func (ht *hashTable) appendAllDistinct(ctx context.Context, batch coldata.Batch) { numBuffered := uint64(ht.vals.Length()) ht.allocator.PerformOperation(ht.vals.ColVecs(), func() { diff --git a/pkg/sql/colexec/utils.go b/pkg/sql/colexec/utils.go index ed5379b31d56..2118a3d2f4cf 100644 --- a/pkg/sql/colexec/utils.go +++ b/pkg/sql/colexec/utils.go @@ -206,6 +206,7 @@ func (b *appendOnlyBufferedBatch) ReplaceCol(coldata.Vec, int) { // [startIdx, endIdx) from batch (paying attention to the selection vector) // into b. // NOTE: this does *not* perform memory accounting. +// NOTE: batch must be of non-zero length. func (b *appendOnlyBufferedBatch) append(batch coldata.Batch, startIdx, endIdx int) { for _, colIdx := range b.colsToStore { b.colVecs[colIdx].Append(