-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
batch.go
181 lines (160 loc) · 5.43 KB
/
batch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package coldata
import (
"fmt"
"math"
"github.com/cockroachdb/cockroach/pkg/col/coltypes"
)
// Batch is the type that columnar operators receive and produce. It
// represents a set of column vectors (partial data columns) as well as
// metadata about a batch, like the selection vector (which rows in the column
// batch are selected).
type Batch interface {
// Length returns the number of values in the columns in the batch.
Length() uint16
// SetLength sets the number of values in the columns in the batch.
SetLength(uint16)
// Width returns the number of columns in the batch.
Width() int
// ColVec returns the ith Vec in this batch.
ColVec(i int) Vec
// ColVecs returns all of the underlying Vecs in this batch.
ColVecs() []Vec
// Selection, if not nil, returns the selection vector on this batch: a
// densely-packed list of the indices in each column that have not been
// filtered out by a previous step.
Selection() []uint16
// SetSelection sets whether this batch is using its selection vector or not.
SetSelection(bool)
// AppendCol appends a Vec with the specified type to this batch.
AppendCol(coltypes.T)
// Reset modifies the caller in-place to have the given length and columns
// with the given coltypes. If it's possible, Reset will reuse the existing
// columns and allocations, invalidating existing references to the Batch or
// its Vecs. However, Reset does _not_ zero out the column data.
Reset(types []coltypes.T, length int)
// ResetInternalBatch resets a batch and its underlying Vecs for reuse. It's
// important for callers to call ResetInternalBatch if they own internal
// batches that they reuse as not doing this could result in correctness
// or memory blowup issues.
ResetInternalBatch()
}
var _ Batch = &MemBatch{}
// BatchSize is the maximum number of tuples that fit in a column batch.
// TODO(jordan): tune
const BatchSize = 1024
// NewMemBatch allocates a new in-memory Batch.
// TODO(jordan): pool these allocations.
func NewMemBatch(types []coltypes.T) Batch {
return NewMemBatchWithSize(types, BatchSize)
}
// NewMemBatchWithSize allocates a new in-memory Batch with the given column
// size. Use for operators that have a precisely-sized output batch.
func NewMemBatchWithSize(types []coltypes.T, size int) Batch {
if max := math.MaxUint16; size > max {
panic(fmt.Sprintf(`batches cannot have length larger than %d; requested %d`, max, size))
}
b := &MemBatch{}
b.b = make([]Vec, len(types))
for i, t := range types {
b.b[i] = NewMemColumn(t, size)
}
b.sel = make([]uint16, size)
return b
}
// MemBatch is an in-memory implementation of Batch.
type MemBatch struct {
// length of batch or sel in tuples
n uint16
// slice of columns in this batch.
b []Vec
useSel bool
// if useSel is true, a selection vector from upstream. a selection vector is
// a list of selected column indexes in this memBatch's columns.
sel []uint16
}
// Length implements the Batch interface.
func (m *MemBatch) Length() uint16 {
return m.n
}
// Width implements the Batch interface.
func (m *MemBatch) Width() int {
return len(m.b)
}
// ColVec implements the Batch interface.
func (m *MemBatch) ColVec(i int) Vec {
return m.b[i]
}
// ColVecs implements the Batch interface.
func (m *MemBatch) ColVecs() []Vec {
return m.b
}
// Selection implements the Batch interface.
func (m *MemBatch) Selection() []uint16 {
if !m.useSel {
return nil
}
return m.sel
}
// SetSelection implements the Batch interface.
func (m *MemBatch) SetSelection(b bool) {
m.useSel = b
}
// SetLength implements the Batch interface.
func (m *MemBatch) SetLength(n uint16) {
m.n = n
}
// AppendCol implements the Batch interface.
func (m *MemBatch) AppendCol(t coltypes.T) {
m.b = append(m.b, NewMemColumn(t, BatchSize))
}
// Reset implements the Batch interface.
func (m *MemBatch) Reset(types []coltypes.T, length int) {
// The columns are always sized the same as the selection vector, so use it as
// a shortcut for the capacity (like a go slice, the batch's `Length` could be
// shorter than the capacity). We could be more defensive and type switch
// every column to verify its capacity, but that doesn't seem necessary yet.
hasColCapacity := len(m.sel) >= length
if m == nil || !hasColCapacity || m.Width() < len(types) {
*m = *NewMemBatchWithSize(types, length).(*MemBatch)
m.SetLength(uint16(length))
return
}
for i := range types {
if m.ColVec(i).Type() != types[i] {
*m = *NewMemBatchWithSize(types, length).(*MemBatch)
m.SetLength(uint16(length))
return
}
}
// Yay! We can reuse m. NB It's not specified in the Reset contract, but
// probably a good idea to keep all modifications below this line.
m.SetLength(uint16(length))
m.SetSelection(false)
m.sel = m.sel[:length]
m.b = m.b[:len(types)]
for _, col := range m.ColVecs() {
col.Nulls().UnsetNulls()
if col.Type() == coltypes.Bytes {
col.Bytes().Reset()
}
}
}
// ResetInternalBatch implements the Batch interface.
func (m *MemBatch) ResetInternalBatch() {
m.SetSelection(false)
for _, v := range m.b {
v.Nulls().UnsetNulls()
if v.Type() == coltypes.Bytes {
v.Bytes().Reset()
}
}
}