Skip to content

Commit

Permalink
Merge #38669
Browse files Browse the repository at this point in the history
38669: exec: Update NULL handling in Vec Copy r=rohany a=rohany

The copy method in Vec previously unset ALL NULL values in the
destination vector, and then proceeded to set the appropriate
NULL values from the source vector, which is incorrect. To address
this problem, this commit adds a UnsetNullRange operation to
the Nulls object. The operation unsets all null values within an
input range, but is unable to smartly/efficiently identify when
the vector no longer contains any null values. The best way
to address this problem can be left to future PR's and discussion.

Depends on #38654. Until that is landed, look at the latest commit to review.


Co-authored-by: Rohan Yadav <rohany@alumni.cmu.edu>
  • Loading branch information
craig[bot] and rohany committed Jul 8, 2019
2 parents 3798cc8 + dd2c287 commit ff57e89
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 33 deletions.
58 changes: 47 additions & 11 deletions pkg/sql/exec/coldata/nulls.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,20 +97,56 @@ func (n *Nulls) SetNullRange(start uint64, end uint64) {
}

// Case where mask spans at least two bytes.
if sIdx < eIdx {
mask := onesMask >> (8 - (start % 8))
n.nulls[sIdx] &= mask
mask := onesMask >> (8 - (start % 8))
n.nulls[sIdx] &= mask

if end%8 == 0 {
n.nulls[eIdx] = 0
} else {
mask = onesMask << (end % 8)
n.nulls[eIdx] &= mask
}
if end%8 == 0 {
n.nulls[eIdx] = 0
} else {
mask = onesMask << (end % 8)
n.nulls[eIdx] &= mask
}

for i := sIdx + 1; i < eIdx; i++ {
n.nulls[i] = 0
for i := sIdx + 1; i < eIdx; i++ {
n.nulls[i] = 0
}
}

// UnsetNullRange unsets all the nulls in the range [start, end).
// As of now, UnsetNullRange does not correctly update hasNulls,
// as it is unclear how to efficiently/smartly perform this check.
// After using UnsetNullRange, n might not contain any null values,
// but hasNulls will still be true.
func (n *Nulls) UnsetNullRange(start, end uint64) {
if start >= end {
return
}

sIdx := start / 8
eIdx := (end - 1) / 8

// Case where mask only spans one byte.
if sIdx == eIdx {
mask := onesMask << (start % 8)
if end%8 != 0 {
mask = mask & (onesMask >> (8 - (end % 8)))
}
n.nulls[sIdx] |= mask
return
}

// Case where mask spans at least two bytes.
mask := onesMask << (start % 8)
n.nulls[sIdx] |= mask
if end%8 == 0 {
n.nulls[eIdx] = onesMask
} else {
mask = onesMask >> (8 - (end % 8))
n.nulls[eIdx] |= mask
}

for i := sIdx + 1; i < eIdx; i++ {
n.nulls[i] = onesMask
}
}

Expand Down
15 changes: 15 additions & 0 deletions pkg/sql/exec/coldata/nulls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@ func TestSetNullRange(t *testing.T) {
}
}

func TestUnsetNullRange(t *testing.T) {
for _, start := range pos {
for _, end := range pos {
n := NewNulls(BatchSize)
n.SetNulls()
n.UnsetNullRange(start, end)
for i := uint64(0); i < BatchSize; i++ {
notExpected := i >= start && i < end
require.NotEqual(t, notExpected, n.NullAt64(i),
"NullAt(%d) saw %t, expected %t, after SetNullRange(%d, %d)", i, n.NullAt64(i), !notExpected, start, end)
}
}
}
}

func TestNullsTruncate(t *testing.T) {
for _, size := range pos {
n := NewNulls(BatchSize)
Expand Down
54 changes: 54 additions & 0 deletions pkg/sql/exec/coldata/vec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,60 @@ func TestCopy(t *testing.T) {
}
}

func TestCopyNulls(t *testing.T) {
const typ = types.Int64

// Set up the destination vector.
dst := NewMemColumn(typ, BatchSize)
dstInts := dst.Int64()
for i := range dstInts {
dstInts[i] = int64(1)
}
// Set some nulls in the destination vector.
for i := 0; i < 5; i++ {
dst.Nulls().SetNull(uint16(i))
}

// Set up the source vector.
src := NewMemColumn(typ, BatchSize)
srcInts := src.Int64()
for i := range srcInts {
srcInts[i] = 2
}
// Set some nulls in the source.
for i := 3; i < 8; i++ {
src.Nulls().SetNull(uint16(i))
}

copyArgs := CopyArgs{
ColType: typ,
Src: src,
DestIdx: 3,
SrcStartIdx: 3,
SrcEndIdx: 10,
}

dst.Copy(copyArgs)

// Verify that original nulls aren't deleted, and that
// the nulls in the source have been copied over.
for i := 0; i < 8; i++ {
require.True(t, dst.Nulls().NullAt(uint16(i)), "expected null at %d, found not null", i)
}

// Verify that the data from src has been copied over.
for i := 8; i < 10; i++ {
require.True(t, dstInts[i] == 2, "data from src was not copied over")
require.True(t, !dst.Nulls().NullAt(uint16(i)), "no extra nulls were added")
}

// Verify that the remaining elements in dst have not been touched.
for i := 10; i < BatchSize; i++ {
require.True(t, dstInts[i] == 1, "data in dst outside copy range has been changed")
require.True(t, !dst.Nulls().NullAt(uint16(i)), "no extra nulls were added")
}
}

func BenchmarkAppend(b *testing.B) {
const typ = types.Int64

Expand Down
30 changes: 8 additions & 22 deletions pkg/sql/exec/coldata/vec_tmpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,11 @@ func (m *memColumn) Append(args AppendArgs) {
}

func (m *memColumn) Copy(args CopyArgs) {
if args.DestIdx != 0 && m.HasNulls() {
panic("copying to non-zero dest index with nulls is not implemented yet (would overwrite nulls)")
}
if args.Nils != nil && args.Sel64 == nil {
panic("Nils set without Sel64")
}
// TODO(asubiotto): This is extremely wrong (we might be overwriting nulls
// past the end of where we are copying to that should be left alone).
// Previous code did this though so we won't be introducing new problems. We
// really have to fix and test this.
m.Nulls().UnsetNulls()

m.Nulls().UnsetNullRange(args.DestIdx, args.DestIdx+(args.SrcEndIdx-args.SrcStartIdx))

switch args.ColType {
// {{range .}}
Expand Down Expand Up @@ -152,21 +146,13 @@ func (m *memColumn) Copy(args CopyArgs) {
}
// No Sel or Sel64.
copy(toCol[args.DestIdx:], fromCol[args.SrcStartIdx:args.SrcEndIdx])
// We do not check for existence of nulls in m due to forcibly unsetting
// the bitmap at the start.
if args.Src.HasNulls() {
m.nulls.hasNulls = true
if args.DestIdx == 0 && args.SrcStartIdx == 0 {
// We can copy this bitmap indiscriminately.
copy(m.nulls.nulls, args.Src.Nulls().NullBitmap())
} else {
// TODO(asubiotto): This should use Extend but Extend only takes uint16
// arguments.
srcNulls := args.Src.Nulls()
for curDestIdx, curSrcIdx := args.DestIdx, args.SrcStartIdx; curSrcIdx < args.SrcEndIdx; curDestIdx, curSrcIdx = curDestIdx+1, curSrcIdx+1 {
if srcNulls.NullAt64(curSrcIdx) {
m.nulls.SetNull64(curDestIdx)
}
// TODO(asubiotto): This should use Extend but Extend only takes uint16
// arguments.
srcNulls := args.Src.Nulls()
for curDestIdx, curSrcIdx := args.DestIdx, args.SrcStartIdx; curSrcIdx < args.SrcEndIdx; curDestIdx, curSrcIdx = curDestIdx+1, curSrcIdx+1 {
if srcNulls.NullAt64(curSrcIdx) {
m.nulls.SetNull64(curDestIdx)
}
}
}
Expand Down

0 comments on commit ff57e89

Please sign in to comment.