diff --git a/pkg/col/coldata/vec.go b/pkg/col/coldata/vec.go index 7405631d51a5..824eae30cb10 100644 --- a/pkg/col/coldata/vec.go +++ b/pkg/col/coldata/vec.go @@ -12,6 +12,7 @@ package coldata import ( "fmt" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coltypes" @@ -76,6 +77,8 @@ type Vec interface { // TODO(jordan): should this be [][]byte? // Decimal returns an apd.Decimal slice. Decimal() []apd.Decimal + // Timestamp returns a time.Time slice. + Timestamp() []time.Time // Col returns the raw, typeless backing storage for this Vec. Col() interface{} @@ -151,6 +154,8 @@ func NewMemColumn(t coltypes.T, n int) Vec { return &memColumn{t: t, col: make([]float64, n), nulls: nulls} case coltypes.Decimal: return &memColumn{t: t, col: make([]apd.Decimal, n), nulls: nulls} + case coltypes.Timestamp: + return &memColumn{t: t, col: make([]time.Time, n), nulls: nulls} default: panic(fmt.Sprintf("unhandled type %s", t)) } @@ -192,6 +197,10 @@ func (m *memColumn) Decimal() []apd.Decimal { return m.col.([]apd.Decimal) } +func (m *memColumn) Timestamp() []time.Time { + return m.col.([]time.Time) +} + func (m *memColumn) Col() interface{} { return m.col } diff --git a/pkg/col/coldata/vec_tmpl.go b/pkg/col/coldata/vec_tmpl.go index 141dcd7d535c..d81ce6482b99 100644 --- a/pkg/col/coldata/vec_tmpl.go +++ b/pkg/col/coldata/vec_tmpl.go @@ -21,6 +21,7 @@ package coldata import ( "fmt" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coltypes" @@ -45,6 +46,9 @@ type _GOTYPESLICE interface{} // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // */}} func (m *memColumn) Append(args SliceArgs) { diff --git a/pkg/col/colserde/arrowbatchconverter.go b/pkg/col/colserde/arrowbatchconverter.go index 7dd021f53f07..1aaf1a68b094 100644 --- a/pkg/col/colserde/arrowbatchconverter.go +++ b/pkg/col/colserde/arrowbatchconverter.go @@ -15,6 +15,7 @@ import ( "reflect" "unsafe" + "github.com/apache/arrow/go/arrow" "github.com/apache/arrow/go/arrow/array" "github.com/apache/arrow/go/arrow/memory" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -33,6 +34,9 @@ type ArrowBatchConverter struct { builders struct { // boolBuilder builds arrow bool columns as a bitmap from a bool slice. boolBuilder *array.BooleanBuilder + // binaryBuilder builds arrow []byte columns as one []byte slice with + // accompanying offsets from a [][]byte slice. + binaryBuilder *array.BinaryBuilder } scratch struct { @@ -56,11 +60,13 @@ func NewArrowBatchConverter(typs []coltypes.T) (*ArrowBatchConverter, error) { } c := &ArrowBatchConverter{typs: typs} c.builders.boolBuilder = array.NewBooleanBuilder(memory.DefaultAllocator) + c.builders.binaryBuilder = array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) c.scratch.arrowData = make([]*array.Data, len(typs)) c.scratch.buffers = make([][]*memory.Buffer, len(typs)) for i := range c.scratch.buffers { // Most types need only two buffers: one for the nulls, and one for the - // values, but some type (i.e. Bytes) need an extra buffer for the offsets. + // values, but some types (i.e. Bytes) need an extra buffer for the + // offsets. c.scratch.buffers[i] = make([]*memory.Buffer, 0, 3) } return c, nil @@ -82,6 +88,7 @@ var supportedTypes = func() map[coltypes.T]struct{} { coltypes.Int32, coltypes.Int64, coltypes.Float64, + coltypes.Timestamp, } { typs[t] = struct{}{} } @@ -108,11 +115,28 @@ func (c *ArrowBatchConverter) BatchToArrow(batch coldata.Batch) ([]*array.Data, arrowBitmap = n.NullBitmap() } - if typ == coltypes.Bool { - // Bools are handled differently from other coltypes. Refer to the - // comment on ArrowBatchConverter.builders for more information. - c.builders.boolBuilder.AppendValues(vec.Bool()[:n], nil /* valid */) - data := c.builders.boolBuilder.NewBooleanArray().Data() + if typ == coltypes.Bool || typ == coltypes.Timestamp { + // Bools and Timestamps are handled differently from other coltypes. + // Refer to the comment on ArrowBatchConverter.builders for more + // information. + var data *array.Data + switch typ { + case coltypes.Bool: + c.builders.boolBuilder.AppendValues(vec.Bool()[:n], nil /* valid */) + data = c.builders.boolBuilder.NewBooleanArray().Data() + case coltypes.Timestamp: + timestamps := vec.Timestamp()[:n] + for _, ts := range timestamps { + marshaled, err := ts.MarshalBinary() + if err != nil { + return nil, err + } + c.builders.binaryBuilder.Append(marshaled) + } + data = c.builders.binaryBuilder.NewBinaryArray().Data() + default: + panic(fmt.Sprintf("unexpected type %s", typ)) + } if arrowBitmap != nil { // Overwrite empty null bitmap with the true bitmap. data.Buffers()[0] = memory.NewBufferBytes(arrowBitmap) @@ -222,7 +246,7 @@ func (c *ArrowBatchConverter) ArrowToBatch(data []*array.Data, b coldata.Batch) d := data[i] var arr array.Interface - if typ == coltypes.Bool || typ == coltypes.Bytes { + if typ == coltypes.Bool || typ == coltypes.Bytes || typ == coltypes.Timestamp { switch typ { case coltypes.Bool: boolArr := array.NewBooleanData(d) @@ -242,6 +266,25 @@ func (c *ArrowBatchConverter) ArrowToBatch(data []*array.Data, b coldata.Batch) } coldata.BytesFromArrowSerializationFormat(vec.Bytes(), bytes, bytesArr.ValueOffsets()) arr = bytesArr + case coltypes.Timestamp: + // TODO(yuzefovich): this serialization is quite inefficient - improve + // it. + bytesArr := array.NewBinaryData(d) + bytes := bytesArr.ValueBytes() + if bytes == nil { + // All bytes values are empty, so the representation is solely with the + // offsets slice, so create an empty slice so that the conversion + // corresponds. + bytes = make([]byte, 0) + } + offsets := bytesArr.ValueOffsets() + vecArr := vec.Timestamp() + for i := 0; i < len(offsets)-1; i++ { + if err := vecArr[i].UnmarshalBinary(bytes[offsets[i]:offsets[i+1]]); err != nil { + return err + } + } + arr = bytesArr default: panic(fmt.Sprintf("unexpected type %s", typ)) } diff --git a/pkg/col/colserde/arrowbatchconverter_test.go b/pkg/col/colserde/arrowbatchconverter_test.go index 3fed44ca10d7..f216a0cd3859 100644 --- a/pkg/col/colserde/arrowbatchconverter_test.go +++ b/pkg/col/colserde/arrowbatchconverter_test.go @@ -30,8 +30,8 @@ func randomBatch() ([]coltypes.T, coldata.Batch) { availableTyps := make([]coltypes.T, 0, len(coltypes.AllTypes)) for _, typ := range coltypes.AllTypes { - // TODO(asubiotto): We do not support decimal conversion yet. - if typ == coltypes.Decimal { + // TODO(asubiotto,jordan): We do not support decimal, timestamp conversion yet. + if typ == coltypes.Decimal || typ == coltypes.Timestamp { continue } availableTyps = append(availableTyps, typ) @@ -119,9 +119,11 @@ func assertEqualBatches(t *testing.T, expected, actual coldata.Batch) { func TestArrowBatchConverterRejectsUnsupportedTypes(t *testing.T) { defer leaktest.AfterTest(t)() - typs := []coltypes.T{coltypes.Decimal} - _, err := NewArrowBatchConverter(typs) - require.Error(t, err) + unsupportedTypes := []coltypes.T{coltypes.Decimal} + for _, typ := range unsupportedTypes { + _, err := NewArrowBatchConverter([]coltypes.T{typ}) + require.Error(t, err) + } } func TestArrowBatchConverterRandom(t *testing.T) { @@ -196,11 +198,21 @@ func BenchmarkArrowBatchConverter(b *testing.B) { rng, _ := randutil.NewPseudoRand() - typs := []coltypes.T{coltypes.Bool, coltypes.Bytes, coltypes.Int64} + typs := []coltypes.T{ + coltypes.Bool, + coltypes.Bytes, + coltypes.Int64, + coltypes.Timestamp, + } // numBytes corresponds 1:1 to typs and specifies how many bytes we are // converting on one iteration of the benchmark for the corresponding type in // typs. - numBytes := []int64{int64(coldata.BatchSize()), fixedLen * int64(coldata.BatchSize()), 8 * int64(coldata.BatchSize())} + numBytes := []int64{ + int64(coldata.BatchSize()), + fixedLen * int64(coldata.BatchSize()), + 8 * int64(coldata.BatchSize()), + 3 * 8 * int64(coldata.BatchSize()), + } // Run a benchmark on every type we care about. for typIdx, typ := range typs { batch := colexec.RandomBatch(rng, []coltypes.T{typ}, int(coldata.BatchSize()), 0 /* length */, 0 /* nullProbability */) diff --git a/pkg/col/colserde/record_batch.go b/pkg/col/colserde/record_batch.go index 1253a7929306..9eef869cb4f9 100644 --- a/pkg/col/colserde/record_batch.go +++ b/pkg/col/colserde/record_batch.go @@ -36,7 +36,7 @@ func numBuffersForType(t coltypes.T) int { // null bitmap and one for the values. numBuffers := 2 switch t { - case coltypes.Bytes: + case coltypes.Bytes, coltypes.Timestamp: // This type has an extra offsets buffer. numBuffers = 3 } diff --git a/pkg/col/colserde/record_batch_test.go b/pkg/col/colserde/record_batch_test.go index dfd61635e672..a70a1550e45b 100644 --- a/pkg/col/colserde/record_batch_test.go +++ b/pkg/col/colserde/record_batch_test.go @@ -17,6 +17,7 @@ import ( "math/rand" "strings" "testing" + "time" "github.com/apache/arrow/go/arrow" "github.com/apache/arrow/go/arrow/array" @@ -26,6 +27,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/randutil" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/stretchr/testify/require" ) @@ -132,6 +134,20 @@ func randomDataFromType(rng *rand.Rand, t coltypes.T, n int, nullProbability flo } builder.(*array.FixedSizeBinaryBuilder).AppendValues(data, valid) } + case coltypes.Timestamp: + var err error + now := timeutil.Now() + builder = array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) + data := make([][]byte, n) + for i := range data { + delta := rng.Int63() + ts := now.Add(time.Duration(delta)) + data[i], err = ts.MarshalBinary() + if err != nil { + panic(err) + } + } + builder.(*array.BinaryBuilder).AppendValues(data, valid) default: panic(fmt.Sprintf("unsupported type %s", t)) } @@ -180,7 +196,7 @@ func TestRecordBatchSerializerSerializeDeserializeRandom(t *testing.T) { buf = bytes.Buffer{} ) - // We do not support decimals yet. + // We do not support decimals. for _, t := range coltypes.AllTypes { if t == coltypes.Decimal { continue diff --git a/pkg/col/coltypes/t_string.go b/pkg/col/coltypes/t_string.go index ed520b42be55..641a18a99df7 100644 --- a/pkg/col/coltypes/t_string.go +++ b/pkg/col/coltypes/t_string.go @@ -15,12 +15,13 @@ func _() { _ = x[Int32-4] _ = x[Int64-5] _ = x[Float64-6] - _ = x[Unhandled-7] + _ = x[Timestamp-7] + _ = x[Unhandled-8] } -const _T_name = "BoolBytesDecimalInt16Int32Int64Float64Unhandled" +const _T_name = "BoolBytesDecimalInt16Int32Int64Float64TimestampUnhandled" -var _T_index = [...]uint8{0, 4, 9, 16, 21, 26, 31, 38, 47} +var _T_index = [...]uint8{0, 4, 9, 16, 21, 26, 31, 38, 47, 56} func (i T) String() string { if i < 0 || i >= T(len(_T_index)-1) { diff --git a/pkg/col/coltypes/types.go b/pkg/col/coltypes/types.go index ca0feaa4ac96..d7dab0713cfe 100644 --- a/pkg/col/coltypes/types.go +++ b/pkg/col/coltypes/types.go @@ -14,6 +14,7 @@ import ( "fmt" "strings" "text/template" + "time" "github.com/cockroachdb/apd" ) @@ -38,6 +39,8 @@ const ( Int64 // Float64 is a column of type float64 Float64 + // Timestamp is a column of type time.Time + Timestamp // Unhandled is a temporary value that represents an unhandled type. // TODO(jordan): this should be replaced by a panic once all types are @@ -74,6 +77,7 @@ func init() { CompatibleTypes[Int32] = append(CompatibleTypes[Int32], NumberTypes...) CompatibleTypes[Int64] = append(CompatibleTypes[Int64], NumberTypes...) CompatibleTypes[Float64] = append(CompatibleTypes[Float64], NumberTypes...) + CompatibleTypes[Timestamp] = append(CompatibleTypes[Timestamp], Timestamp) } // FromGoType returns the type for a Go value, if applicable. Shouldn't be used at @@ -96,6 +100,8 @@ func FromGoType(v interface{}) T { return Bytes case apd.Decimal: return Decimal + case time.Time: + return Timestamp default: panic(fmt.Sprintf("type %T not supported yet", t)) } @@ -118,6 +124,8 @@ func (t T) GoTypeName() string { return "int64" case Float64: return "float64" + case Timestamp: + return "time.Time" default: panic(fmt.Sprintf("unhandled type %d", t)) } diff --git a/pkg/sql/colencoding/key_encoding.go b/pkg/sql/colencoding/key_encoding.go index bfa6f8c49358..f859c932d839 100644 --- a/pkg/sql/colencoding/key_encoding.go +++ b/pkg/sql/colencoding/key_encoding.go @@ -11,6 +11,8 @@ package colencoding import ( + "time" + "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" "github.com/cockroachdb/cockroach/pkg/roachpb" @@ -225,6 +227,14 @@ func decodeTableKeyToCol( rkey, t, err = encoding.DecodeVarintDescending(key) } vec.Int64()[idx] = t + case types.TimestampFamily: + var t time.Time + if dir == sqlbase.IndexDescriptor_ASC { + rkey, t, err = encoding.DecodeTimeAscending(key) + } else { + rkey, t, err = encoding.DecodeTimeDescending(key) + } + vec.Timestamp()[idx] = t default: return rkey, errors.AssertionFailedf("unsupported type %+v", log.Safe(valType)) } diff --git a/pkg/sql/colencoding/value_encoding.go b/pkg/sql/colencoding/value_encoding.go index b3d743734e83..a425f93aed64 100644 --- a/pkg/sql/colencoding/value_encoding.go +++ b/pkg/sql/colencoding/value_encoding.go @@ -11,6 +11,8 @@ package colencoding import ( + "time" + "github.com/cockroachdb/cockroach/pkg/col/coldata" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/encoding" @@ -91,6 +93,10 @@ func decodeUntaggedDatumToCol(vec coldata.Vec, idx uint16, t *types.T, buf []byt if err == nil { vec.Bytes().Set(int(idx), data.GetBytes()) } + case types.TimestampFamily: + var t time.Time + buf, t, err = encoding.DecodeUntaggedTimeValue(buf) + vec.Timestamp()[idx] = t default: return buf, errors.AssertionFailedf( "couldn't decode type: %s", log.Safe(t)) diff --git a/pkg/sql/colexec/any_not_null_agg_tmpl.go b/pkg/sql/colexec/any_not_null_agg_tmpl.go index 33a1c9024f12..edad3b7d3932 100644 --- a/pkg/sql/colexec/any_not_null_agg_tmpl.go +++ b/pkg/sql/colexec/any_not_null_agg_tmpl.go @@ -20,6 +20,8 @@ package colexec import ( + "time" + "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" "github.com/cockroachdb/cockroach/pkg/col/coltypes" @@ -45,6 +47,9 @@ func newAnyNotNullAgg(t coltypes.T) (aggregateFunc, error) { // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // _GOTYPESLICE is the template Go type slice variable for this operator. It // will be replaced by the Go slice representation for each type in coltypes.T, for // example []int64 for coltypes.Int64. diff --git a/pkg/sql/colexec/builtin_funcs.go b/pkg/sql/colexec/builtin_funcs.go index af233fa34066..23596d44e4c0 100644 --- a/pkg/sql/colexec/builtin_funcs.go +++ b/pkg/sql/colexec/builtin_funcs.go @@ -63,7 +63,7 @@ func (b *defaultBuiltinFuncOperator) Next(ctx context.Context) coldata.Batch { for j := range b.argumentCols { col := batch.ColVec(b.argumentCols[j]) - b.row[j] = PhysicalTypeColElemToDatum(col, rowIdx, b.da, b.columnTypes[b.argumentCols[j]]) + b.row[j] = PhysicalTypeColElemToDatum(col, rowIdx, b.da, &b.columnTypes[b.argumentCols[j]]) hasNulls = hasNulls || b.row[j] == tree.DNull } diff --git a/pkg/sql/colexec/cfetcher.go b/pkg/sql/colexec/cfetcher.go index 5e21f1a7c920..e09977a4097c 100644 --- a/pkg/sql/colexec/cfetcher.go +++ b/pkg/sql/colexec/cfetcher.go @@ -788,7 +788,7 @@ func (rf *cFetcher) pushState(state fetcherState) { // getDatumAt returns the converted datum object at the given (colIdx, rowIdx). // This function is meant for tracing and should not be used in hot paths. func (rf *cFetcher) getDatumAt(colIdx int, rowIdx uint16, typ types.T) tree.Datum { - return PhysicalTypeColElemToDatum(rf.machine.colvecs[colIdx], rowIdx, rf.table.da, typ) + return PhysicalTypeColElemToDatum(rf.machine.colvecs[colIdx], rowIdx, rf.table.da, &typ) } // processValue processes the state machine's current value component, setting diff --git a/pkg/sql/colexec/const_tmpl.go b/pkg/sql/colexec/const_tmpl.go index 9150086c008b..38c80241a931 100644 --- a/pkg/sql/colexec/const_tmpl.go +++ b/pkg/sql/colexec/const_tmpl.go @@ -21,6 +21,7 @@ package colexec import ( "context" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -36,6 +37,9 @@ import ( // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // _TYPES_T is the template type variable for coltypes.T. It will be replaced by // coltypes.Foo for each type Foo in the coltypes.T type. const _TYPES_T = coltypes.Unhandled diff --git a/pkg/sql/colexec/distinct_tmpl.go b/pkg/sql/colexec/distinct_tmpl.go index 6b42d4006d60..9e9f36fb83ea 100644 --- a/pkg/sql/colexec/distinct_tmpl.go +++ b/pkg/sql/colexec/distinct_tmpl.go @@ -23,6 +23,7 @@ import ( "bytes" "context" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -104,6 +105,9 @@ var _ bytes.Buffer // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // Dummy import to pull in "tree" package. var _ tree.Datum diff --git a/pkg/sql/colexec/execgen/cmd/execgen/overloads.go b/pkg/sql/colexec/execgen/cmd/execgen/overloads.go index 2c5e9d877453..7a1fa1ffe95f 100644 --- a/pkg/sql/colexec/execgen/cmd/execgen/overloads.go +++ b/pkg/sql/colexec/execgen/cmd/execgen/overloads.go @@ -537,6 +537,9 @@ type floatIntCustomizer struct{} // side and a float right-hand side. type intFloatCustomizer struct{} +// timestampCustomizer is necessary since time.Time doesn't have infix operators. +type timestampCustomizer struct{} + func (boolCustomizer) getCmpOpCompareFunc() compareFunc { return func(target, l, r string) string { args := map[string]string{"Target": target, "Left": l, "Right": r} @@ -997,11 +1000,42 @@ func (c intFloatCustomizer) getCmpOpCompareFunc() compareFunc { return getFloatCmpOpCompareFunc(false /* checkLeftNan */, true /* checkRightNan */) } +func (c timestampCustomizer) getCmpOpCompareFunc() compareFunc { + return func(target, l, r string) string { + args := map[string]string{"Target": target, "Left": l, "Right": r} + buf := strings.Builder{} + // Inline the code from tree.compareTimestamps. + t := template.Must(template.New("").Parse(` + if {{.Left}}.Before({{.Right}}) { + {{.Target}} = -1 + } else if {{.Right}}.Before({{.Left}}) { + {{.Target}} = 1 + } else { + {{.Target}} = 0 + }`)) + + if err := t.Execute(&buf, args); err != nil { + execerror.VectorizedInternalPanic(err) + } + return buf.String() + } +} + +func (timestampCustomizer) getHashAssignFunc() assignFunc { + return func(op overload, target, v, _ string) string { + return fmt.Sprintf(` + s := %[2]s.UnixNano() + %[1]s = memhash64(noescape(unsafe.Pointer(&s)), %[1]s) + `, target, v) + } +} + func registerTypeCustomizers() { typeCustomizers = make(map[coltypePair]typeCustomizer) registerTypeCustomizer(coltypePair{coltypes.Bool, coltypes.Bool}, boolCustomizer{}) registerTypeCustomizer(coltypePair{coltypes.Bytes, coltypes.Bytes}, bytesCustomizer{}) registerTypeCustomizer(coltypePair{coltypes.Decimal, coltypes.Decimal}, decimalCustomizer{}) + registerTypeCustomizer(coltypePair{coltypes.Timestamp, coltypes.Timestamp}, timestampCustomizer{}) for _, leftFloatType := range coltypes.FloatTypes { for _, rightFloatType := range coltypes.FloatTypes { registerTypeCustomizer(coltypePair{leftFloatType, rightFloatType}, floatCustomizer{width: 64}) diff --git a/pkg/sql/colexec/execgen/cmd/execgen/overloads_test_utils_gen.go b/pkg/sql/colexec/execgen/cmd/execgen/overloads_test_utils_gen.go index f700b7435eea..b6038b74d0b3 100644 --- a/pkg/sql/colexec/execgen/cmd/execgen/overloads_test_utils_gen.go +++ b/pkg/sql/colexec/execgen/cmd/execgen/overloads_test_utils_gen.go @@ -21,6 +21,7 @@ package colexec import ( "bytes" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/sql/colexec/execerror" diff --git a/pkg/sql/colexec/materializer.go b/pkg/sql/colexec/materializer.go index 7658811812b2..4efbc846f4c1 100644 --- a/pkg/sql/colexec/materializer.go +++ b/pkg/sql/colexec/materializer.go @@ -165,7 +165,7 @@ func (m *Materializer) next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadat typs := m.OutputTypes() for colIdx := 0; colIdx < len(typs); colIdx++ { col := m.batch.ColVec(colIdx) - m.row[colIdx].Datum = PhysicalTypeColElemToDatum(col, rowIdx, m.da, typs[colIdx]) + m.row[colIdx].Datum = PhysicalTypeColElemToDatum(col, rowIdx, m.da, &typs[colIdx]) } return m.ProcessRowHelper(m.row), nil } diff --git a/pkg/sql/colexec/mem_estimation.go b/pkg/sql/colexec/mem_estimation.go index 501f9ba88b94..b1d11ecbac78 100644 --- a/pkg/sql/colexec/mem_estimation.go +++ b/pkg/sql/colexec/mem_estimation.go @@ -12,6 +12,7 @@ package colexec import ( "fmt" + "time" "unsafe" "github.com/cockroachdb/cockroach/pkg/col/coltypes" @@ -24,6 +25,7 @@ const ( sizeOfInt32 = int(unsafe.Sizeof(int32(0))) sizeOfInt64 = int(unsafe.Sizeof(int64(0))) sizeOfFloat64 = int(unsafe.Sizeof(float64(0))) + sizeOfTime = int(unsafe.Sizeof(time.Time{})) ) // EstimateBatchSizeBytes returns an estimated amount of bytes needed to @@ -55,6 +57,15 @@ func EstimateBatchSizeBytes(vecTypes []coltypes.T, batchLength int) int { // Similar to byte arrays, we can't tell how much space is used // to hold the arbitrary precision decimal objects. acc += 50 + case coltypes.Timestamp: + // time.Time consists of two 64 bit integers and a pointer to + // time.Location. We will only account for this 3 bytes without paying + // attention to the full time.Location struct. The reason is that it is + // likely that time.Location's are cached and are shared among all the + // timestamps, so if we were to include that in the estimation, we would + // significantly overestimate. + // TODO(yuzefovich): figure out whether the caching does take place. + acc += sizeOfTime default: execerror.VectorizedInternalPanic(fmt.Sprintf("unhandled type %s", t)) } diff --git a/pkg/sql/colexec/mergejoinbase_tmpl.go b/pkg/sql/colexec/mergejoinbase_tmpl.go index 0fdb10ac21df..1d00007846f4 100644 --- a/pkg/sql/colexec/mergejoinbase_tmpl.go +++ b/pkg/sql/colexec/mergejoinbase_tmpl.go @@ -23,6 +23,7 @@ import ( "bytes" "fmt" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -44,6 +45,9 @@ var _ tree.Datum // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // Dummy import to pull in "math" package. var _ = math.MaxInt64 diff --git a/pkg/sql/colexec/mergejoiner_tmpl.go b/pkg/sql/colexec/mergejoiner_tmpl.go index bc3754d08032..3570469ad094 100644 --- a/pkg/sql/colexec/mergejoiner_tmpl.go +++ b/pkg/sql/colexec/mergejoiner_tmpl.go @@ -24,6 +24,7 @@ import ( "context" "fmt" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -46,6 +47,9 @@ var _ tree.Datum // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // Dummy import to pull in "math" package. var _ = math.MaxInt64 diff --git a/pkg/sql/colexec/min_max_agg_tmpl.go b/pkg/sql/colexec/min_max_agg_tmpl.go index bb5ee3007d1c..47e7300dff03 100644 --- a/pkg/sql/colexec/min_max_agg_tmpl.go +++ b/pkg/sql/colexec/min_max_agg_tmpl.go @@ -22,6 +22,7 @@ package colexec import ( "bytes" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -43,6 +44,9 @@ var _ bytes.Buffer // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // Dummy import to pull in "tree" package. var _ tree.Datum diff --git a/pkg/sql/colexec/proj_const_ops_tmpl.go b/pkg/sql/colexec/proj_const_ops_tmpl.go index 46e6cd6798b1..d947367a154c 100644 --- a/pkg/sql/colexec/proj_const_ops_tmpl.go +++ b/pkg/sql/colexec/proj_const_ops_tmpl.go @@ -23,6 +23,7 @@ import ( "bytes" "context" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -52,6 +53,9 @@ var _ tree.Datum // Dummy import to pull in "math" package. var _ = math.MaxInt64 +// Dummy import to pull in "time" package. +var _ time.Time + // _ASSIGN is the template function for assigning the first input to the result // of computation an operation on the second and the third inputs. func _ASSIGN(_, _, _ interface{}) { diff --git a/pkg/sql/colexec/projection_ops_test.go b/pkg/sql/colexec/projection_ops_test.go index d2a306899d17..267e2228c652 100644 --- a/pkg/sql/colexec/projection_ops_test.go +++ b/pkg/sql/colexec/projection_ops_test.go @@ -19,9 +19,14 @@ import ( "github.com/cockroachdb/cockroach/pkg/col/coldata" "github.com/cockroachdb/cockroach/pkg/col/coltypes" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/sql/colexec/typeconv" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/randutil" + "github.com/stretchr/testify/assert" ) func TestProjPlusInt64Int64ConstOp(t *testing.T) { @@ -173,6 +178,84 @@ func TestGetProjectionConstMixedTypeOperator(t *testing.T) { } } +// TestRandomComparisons runs binary comparisons against all scalar types +// (supported by the vectorized engine) with random non-null data verifying +// that the result of Datum.Compare matches the result of the exec projection. +func TestRandomComparisons(t *testing.T) { + defer leaktest.AfterTest(t)() + const numTuples = 2048 + rng, _ := randutil.NewPseudoRand() + evalCtx := tree.NewTestingEvalContext(cluster.MakeTestingClusterSettings()) + ctx := evalCtx.Ctx() + defer evalCtx.Stop(ctx) + expected := make([]bool, numTuples) + var da sqlbase.DatumAlloc + lDatums := make([]tree.Datum, numTuples) + rDatums := make([]tree.Datum, numTuples) + for _, ct := range types.Scalar { + if ct.Family() == types.DateFamily { + // TODO(jordan): #40354 tracks failure to compare infinite dates. + continue + } + typ := typeconv.FromColumnType(ct) + if typ == coltypes.Unhandled { + continue + } + typs := []coltypes.T{typ, typ, coltypes.Bool} + bytesFixedLength := 0 + if ct.Family() == types.UuidFamily { + bytesFixedLength = 16 + } + b := coldata.NewMemBatchWithSize(typs, numTuples) + lVec := b.ColVec(0) + rVec := b.ColVec(1) + ret := b.ColVec(2) + RandomVec(rng, typ, bytesFixedLength, lVec, numTuples, 0) + RandomVec(rng, typ, bytesFixedLength, rVec, numTuples, 0) + for i := range lDatums { + lDatums[i] = PhysicalTypeColElemToDatum(lVec, uint16(i), da, ct) + rDatums[i] = PhysicalTypeColElemToDatum(rVec, uint16(i), da, ct) + } + for _, cmpOp := range []tree.ComparisonOperator{tree.EQ, tree.NE, tree.LT, tree.LE, tree.GT, tree.GE} { + for i := range lDatums { + cmp := lDatums[i].Compare(evalCtx, rDatums[i]) + var b bool + switch cmpOp { + case tree.EQ: + b = cmp == 0 + case tree.NE: + b = cmp != 0 + case tree.LT: + b = cmp < 0 + case tree.LE: + b = cmp <= 0 + case tree.GT: + b = cmp > 0 + case tree.GE: + b = cmp >= 0 + } + expected[i] = b + } + input := newChunkingBatchSource(typs, []coldata.Vec{lVec, rVec, ret}, numTuples) + op, err := GetProjectionOperator(ct, ct, cmpOp, input, 0, 1, 2) + if err != nil { + t.Fatal(err) + } + op.Init() + var idx uint16 + for batch := op.Next(ctx); batch.Length() > 0; batch = op.Next(ctx) { + for i := uint16(0); i < batch.Length(); i++ { + absIdx := idx + i + assert.Equal(t, expected[absIdx], batch.ColVec(2).Bool()[i], + "expected %s %s %s (%s[%d]) to be %t found %t", lDatums[absIdx], cmpOp, rDatums[absIdx], ct, absIdx, + expected[absIdx], ret.Bool()[i]) + } + idx += batch.Length() + } + } + } +} + func TestGetProjectionOperator(t *testing.T) { defer leaktest.AfterTest(t)() ct := types.Int2 diff --git a/pkg/sql/colexec/random_testutils.go b/pkg/sql/colexec/random_testutils.go index 9e1432c4ead7..7783d66cc79b 100644 --- a/pkg/sql/colexec/random_testutils.go +++ b/pkg/sql/colexec/random_testutils.go @@ -14,10 +14,12 @@ import ( "context" "fmt" "math/rand" + "time" "github.com/cockroachdb/cockroach/pkg/col/coldata" "github.com/cockroachdb/cockroach/pkg/col/coltypes" "github.com/cockroachdb/cockroach/pkg/sql/colexec/execerror" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" ) // maxVarLen specifies a length limit for variable length types (e.g. byte slices). @@ -36,10 +38,39 @@ func randomTypes(rng *rand.Rand, n int) []coltypes.T { return typs } +var locations []*time.Location + +func init() { + // Load some random time zones. + for _, locationName := range []string{ + "Africa/Addis_Ababa", + "America/Anchorage", + "Antarctica/Davis", + "Asia/Ashkhabad", + "Australia/Sydney", + "Europe/Minsk", + "Pacific/Palau", + } { + loc, err := timeutil.LoadLocation(locationName) + if err == nil { + locations = append(locations, loc) + } + } +} + // RandomVec populates vec with n random values of typ, setting each value to // null with a probability of nullProbability. It is assumed that n is in bounds // of the given vec. -func RandomVec(rng *rand.Rand, typ coltypes.T, vec coldata.Vec, n int, nullProbability float64) { +// bytesFixedLength (when greater than zero) specifies the fixed length of the +// bytes slice to be generated. It is used only if typ == coltypes.Bytes. +func RandomVec( + rng *rand.Rand, + typ coltypes.T, + bytesFixedLength int, + vec coldata.Vec, + n int, + nullProbability float64, +) { switch typ { case coltypes.Bool: bools := vec.Bool() @@ -53,7 +84,11 @@ func RandomVec(rng *rand.Rand, typ coltypes.T, vec coldata.Vec, n int, nullProba case coltypes.Bytes: bytes := vec.Bytes() for i := 0; i < n; i++ { - randBytes := make([]byte, rng.Intn(maxVarLen)) + bytesLen := bytesFixedLength + if bytesLen <= 0 { + bytesLen = rng.Intn(maxVarLen) + } + randBytes := make([]byte, bytesLen) // Read always returns len(bytes[i]) and nil. _, _ = rand.Read(randBytes) bytes.Set(i, randBytes) @@ -84,6 +119,13 @@ func RandomVec(rng *rand.Rand, typ coltypes.T, vec coldata.Vec, n int, nullProba for i := 0; i < n; i++ { floats[i] = rng.Float64() } + case coltypes.Timestamp: + timestamps := vec.Timestamp() + for i := 0; i < n; i++ { + timestamps[i] = timeutil.Unix(rng.Int63n(1000000), rng.Int63n(1000000)) + loc := locations[rng.Intn(len(locations))] + timestamps[i] = timestamps[i].In(loc) + } default: execerror.VectorizedInternalPanic(fmt.Sprintf("unhandled type %s", typ)) } @@ -110,7 +152,7 @@ func RandomBatch( length = capacity } for i, typ := range typs { - RandomVec(rng, typ, batch.ColVec(i), length, nullProbability) + RandomVec(rng, typ, 0 /* bytesFixedLength */, batch.ColVec(i), length, nullProbability) } batch.SetLength(uint16(length)) return batch diff --git a/pkg/sql/colexec/rowstovec_tmpl.go b/pkg/sql/colexec/rowstovec_tmpl.go index e4f82e6dc97b..28f7383baffc 100644 --- a/pkg/sql/colexec/rowstovec_tmpl.go +++ b/pkg/sql/colexec/rowstovec_tmpl.go @@ -21,6 +21,7 @@ package colexec import ( "fmt" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -37,6 +38,9 @@ import ( // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + const ( _FAMILY = types.Family(0) _WIDTH = int32(0) diff --git a/pkg/sql/colexec/select_in_tmpl.go b/pkg/sql/colexec/select_in_tmpl.go index 8848260a38fb..4b5379ef2a2e 100644 --- a/pkg/sql/colexec/select_in_tmpl.go +++ b/pkg/sql/colexec/select_in_tmpl.go @@ -23,6 +23,7 @@ import ( "bytes" "context" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -45,6 +46,9 @@ type _TYPE interface{} // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // Dummy import to pull in "coltypes" package var _ coltypes.T diff --git a/pkg/sql/colexec/selection_ops_tmpl.go b/pkg/sql/colexec/selection_ops_tmpl.go index 2014a7cb6282..5cfae8335233 100644 --- a/pkg/sql/colexec/selection_ops_tmpl.go +++ b/pkg/sql/colexec/selection_ops_tmpl.go @@ -23,6 +23,7 @@ import ( "bytes" "context" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -52,6 +53,9 @@ var _ tree.Datum // Dummy import to pull in "math" package. var _ = math.MaxInt64 +// Dummy import to pull in "time" package. +var _ time.Time + // Dummy import to pull in "coltypes" package. var _ = coltypes.Bool diff --git a/pkg/sql/colexec/sort_tmpl.go b/pkg/sql/colexec/sort_tmpl.go index 34998092fa26..b2ef9e1a4e2f 100644 --- a/pkg/sql/colexec/sort_tmpl.go +++ b/pkg/sql/colexec/sort_tmpl.go @@ -24,6 +24,7 @@ import ( "context" "fmt" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -44,6 +45,9 @@ var _ bytes.Buffer // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // Dummy import to pull in "tree" package. var _ tree.Datum diff --git a/pkg/sql/colexec/supported_sql_types.go b/pkg/sql/colexec/supported_sql_types.go index 3f6dcf9aea1f..6c84b85c212d 100644 --- a/pkg/sql/colexec/supported_sql_types.go +++ b/pkg/sql/colexec/supported_sql_types.go @@ -27,4 +27,5 @@ var allSupportedSQLTypes = []types.T{ *types.Float4, *types.String, *types.Uuid, + *types.Timestamp, } diff --git a/pkg/sql/colexec/typeconv/typeconv.go b/pkg/sql/colexec/typeconv/typeconv.go index e6c24aadc07c..64df0c392b9a 100644 --- a/pkg/sql/colexec/typeconv/typeconv.go +++ b/pkg/sql/colexec/typeconv/typeconv.go @@ -47,6 +47,8 @@ func FromColumnType(ct *types.T) coltypes.T { execerror.VectorizedInternalPanic(fmt.Sprintf("integer with unknown width %d", ct.Width())) case types.FloatFamily: return coltypes.Float64 + case types.TimestampFamily: + return coltypes.Timestamp } return coltypes.Unhandled } @@ -210,6 +212,14 @@ func GetDatumToPhysicalFn(ct *types.T) func(tree.Datum) (interface{}, error) { } return d.UUID.GetBytesMut(), nil } + case types.TimestampFamily: + return func(datum tree.Datum) (interface{}, error) { + d, ok := datum.(*tree.DTimestamp) + if !ok { + return nil, errors.Errorf("expected *tree.DTimestamp, found %s", reflect.TypeOf(datum)) + } + return d.Time, nil + } } // It would probably be more correct to return an error here, rather than a // function which always returns an error. But since the function tends to be diff --git a/pkg/sql/colexec/vec_comparators_tmpl.go b/pkg/sql/colexec/vec_comparators_tmpl.go index 5b02564eca73..713948a25fd5 100644 --- a/pkg/sql/colexec/vec_comparators_tmpl.go +++ b/pkg/sql/colexec/vec_comparators_tmpl.go @@ -23,6 +23,7 @@ import ( "bytes" "fmt" "math" + "time" "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -44,6 +45,9 @@ var _ bytes.Buffer // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // Dummy import to pull in "tree" package. var _ tree.Datum diff --git a/pkg/sql/colexec/vec_elem_to_datum.go b/pkg/sql/colexec/vec_elem_to_datum.go index c84932e4c912..96e1f82a4787 100644 --- a/pkg/sql/colexec/vec_elem_to_datum.go +++ b/pkg/sql/colexec/vec_elem_to_datum.go @@ -28,7 +28,7 @@ import ( // semtype ct. Note that this function handles nulls as well, so there is no // need for a separate null check. func PhysicalTypeColElemToDatum( - col coldata.Vec, rowIdx uint16, da sqlbase.DatumAlloc, ct types.T, + col coldata.Vec, rowIdx uint16, da sqlbase.DatumAlloc, ct *types.T, ) tree.Datum { if col.MaybeHasNulls() { if col.Nulls().NullAt(rowIdx) { @@ -72,6 +72,8 @@ func PhysicalTypeColElemToDatum( execerror.VectorizedInternalPanic(err) } return da.NewDUuid(tree.DUuid{UUID: id}) + case types.TimestampFamily: + return da.NewDTimestamp(tree.DTimestamp{Time: col.Timestamp()[rowIdx]}) default: execerror.VectorizedInternalPanic(fmt.Sprintf("Unsupported column type %s", ct.String())) // This code is unreachable, but the compiler cannot infer that. diff --git a/pkg/sql/colexec/zerocolumns_tmpl.go b/pkg/sql/colexec/zerocolumns_tmpl.go index 71c1c9310c72..d2de183cd474 100644 --- a/pkg/sql/colexec/zerocolumns_tmpl.go +++ b/pkg/sql/colexec/zerocolumns_tmpl.go @@ -20,6 +20,8 @@ package colexec import ( + "time" + "github.com/cockroachdb/apd" "github.com/cockroachdb/cockroach/pkg/col/coldata" ) @@ -30,6 +32,9 @@ import ( // Dummy import to pull in "apd" package. var _ apd.Decimal +// Dummy import to pull in "time" package. +var _ time.Time + // */}} // {{range .}} diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize_types b/pkg/sql/logictest/testdata/logic_test/vectorize_types index 69fa8455aaee..1d9a29fa9c43 100644 --- a/pkg/sql/logictest/testdata/logic_test/vectorize_types +++ b/pkg/sql/logictest/testdata/logic_test/vectorize_types @@ -1,17 +1,18 @@ # Check that all types supported by the vectorized engine can be read correctly. statement ok CREATE TABLE all_types ( - _bool BOOL, - _bytes BYTES, - _date DATE, - _decimal DECIMAL, - _int2 INT2, - _int4 INT4, - _int INT8, - _oid OID, - _float FLOAT8, - _string STRING, - _uuid UUID + _bool BOOL, + _bytes BYTES, + _date DATE, + _decimal DECIMAL, + _int2 INT2, + _int4 INT4, + _int INT8, + _oid OID, + _float FLOAT8, + _string STRING, + _uuid UUID, + _timestamp TIMESTAMP ) statement ok @@ -28,6 +29,7 @@ VALUES ( NULL, NULL, NULL, + NULL, NULL ), ( @@ -41,11 +43,12 @@ VALUES ( 123, 1.23, '123', - '63616665-6630-3064-6465-616462656562' + '63616665-6630-3064-6465-616462656562', + '1-1-18 1:00:00.001' ) -query BTTRIIIORTT +query BTTRIIIORTTT SELECT * FROM all_types ORDER BY 1 ---- -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -false 123 2019-10-22 00:00:00 +0000 +0000 1.23 123 123 123 123 1.23 123 63616665-6630-3064-6465-616462656562 +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +false 123 2019-10-22 00:00:00 +0000 +0000 1.23 123 123 123 123 1.23 123 63616665-6630-3064-6465-616462656562 2001-01-18 01:00:00.001 +0000 +0000 diff --git a/pkg/sql/opt/exec/execbuilder/testdata/explain b/pkg/sql/opt/exec/execbuilder/testdata/explain index 87f1b1e5b380..58cb767d5a00 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/explain +++ b/pkg/sql/opt/exec/execbuilder/testdata/explain @@ -21,7 +21,7 @@ EXPLAIN (PLAN) SELECT 1 FROM system.jobs WHERE TRUE ---- tree field description · distributed false -· vectorized false +· vectorized true render · · └── scan · · · table jobs@jobs_status_created_idx diff --git a/pkg/sql/sqlbase/testutils.go b/pkg/sql/sqlbase/testutils.go index 63e559f920d5..afb08b23acf4 100644 --- a/pkg/sql/sqlbase/testutils.go +++ b/pkg/sql/sqlbase/testutils.go @@ -159,7 +159,7 @@ func RandDatumWithNullChance(rng *rand.Rand, typ *types.T, nullChance int) tree. case types.TimeFamily: return tree.MakeDTime(timeofday.Random(rng)) case types.TimestampFamily: - return &tree.DTimestamp{Time: timeutil.Unix(rng.Int63n(1000000), rng.Int63n(1000000))} + return tree.MakeDTimestamp(timeutil.Unix(rng.Int63n(1000000), rng.Int63n(1000000)), time.Microsecond) case types.IntervalFamily: sign := 1 - rng.Int63n(2)*2 return &tree.DInterval{Duration: duration.MakeDuration(