diff --git a/pkg/sql/flowinfra/stream_encoder.go b/pkg/sql/flowinfra/stream_encoder.go index 31c764793a06..0b3f2c79aace 100644 --- a/pkg/sql/flowinfra/stream_encoder.go +++ b/pkg/sql/flowinfra/stream_encoder.go @@ -105,9 +105,9 @@ func (se *StreamEncoder) AddRow(row sqlbase.EncDatumRow) error { if !ok { enc = PreferredEncoding } - sType := se.infos[i].Type.Family() + sType := &se.infos[i].Type if enc != sqlbase.DatumEncoding_VALUE && - (sqlbase.HasCompositeKeyEncoding(sType) || sqlbase.MustBeValueEncoded(sType)) { + (sqlbase.HasCompositeKeyEncoding(sType.Family()) || sqlbase.MustBeValueEncoded(sType)) { // Force VALUE encoding for composite types (key encodings may lose data). enc = sqlbase.DatumEncoding_VALUE } diff --git a/pkg/sql/logictest/testdata/logic_test/alter_table b/pkg/sql/logictest/testdata/logic_test/alter_table index ea58e856812f..0b6ad7ee777b 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_table +++ b/pkg/sql/logictest/testdata/logic_test/alter_table @@ -801,25 +801,6 @@ decomputed_column CREATE TABLE decomputed_column ( statement ok CREATE TABLE b26483() -statement error unimplemented: column c is of type int\[\] and thus is not indexable -ALTER TABLE b26483 ADD COLUMN c INT[] UNIQUE - -# As above, but performed in a transaction -statement ok -BEGIN - -statement ok -CREATE TABLE b26483_tx() - -statement ok -ALTER TABLE b26483_tx ADD COLUMN c INT[] - -statement error unimplemented: column c is of type int\[\] and thus is not indexable -CREATE INDEX on b26483_tx (c) - -statement ok -ROLLBACK - # Verify that auditing can be enabled by root, and cannot be disabled by non-root. statement ok diff --git a/pkg/sql/logictest/testdata/logic_test/array b/pkg/sql/logictest/testdata/logic_test/array index faa361a25188..f7e5b1ce32ea 100644 --- a/pkg/sql/logictest/testdata/logic_test/array +++ b/pkg/sql/logictest/testdata/logic_test/array @@ -429,17 +429,6 @@ SELECT ARRAY[ARRAY[1,2,3]] query error VECTOR column types are unsupported CREATE TABLE badtable (b INT2VECTOR) -# Using an array as a primary key should be disallowed. #17154 - -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE badtable (b INT[] PRIMARY KEY) - -# Indexing an array column should be disallowed. #17154 - -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE a (b INT[] UNIQUE) - - # Regression test for #18745 statement ok @@ -449,18 +438,6 @@ query T SELECT ARRAY[ROW()] FROM ident ---- -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE a ( - b INT[], - CONSTRAINT c UNIQUE (b) -) - -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE a ( - b INT[], - INDEX c (b) -) - statement ok CREATE TABLE a (b INT ARRAY) @@ -475,18 +452,6 @@ a CREATE TABLE a ( statement ok DROP TABLE a -statement ok -CREATE TABLE a (b INT[], c INT[]) - -statement error column b is of type int\[\] and thus is not indexable -CREATE INDEX idx ON a (b) - -statement error the following columns are not indexable due to their type: b \(type int\[\]\), c \(type int\[\]\) -CREATE INDEX idx ON a (b, c) - -statement ok -DROP TABLE a - # Int array columns. statement ok @@ -1343,3 +1308,129 @@ SELECT x, y FROM t WHERE x < y query TT SELECT x, y FROM t WHERE x > y ---- + +query TT +SELECT x, y FROM t ORDER BY (x, y) +---- +{1} {1,2} +{1,1,1,1} {2} + +subtest array_indexes + +# Create indexes on arrays. +statement ok +DROP TABLE IF EXISTS t; +CREATE TABLE t (x INT[] PRIMARY KEY) + +statement ok +INSERT INTO t VALUES + (ARRAY[1]), + (ARRAY[5]), + (ARRAY[4]), + (ARRAY[1,4,5]), + (ARRAY[1,4,6]), + (ARRAY[1,NULL,10]), + (ARRAY[NULL]), + (ARRAY[NULL, NULL, NULL]) + +# Test that the unique index rejects bad inserts. +statement error pq: duplicate key value \(x\)=\(ARRAY\[1,NULL,10\]\) violates unique constraint "primary" +INSERT INTO t VALUES (ARRAY[1, NULL, 10]) + +# Use the index for point lookups. +query T +SELECT x FROM t WHERE x = ARRAY[1,4,6] +---- +{1,4,6} + +# Use the index for bounded scans. +# Note that nulls sort first in CockroachDB, so this ordering is different +# than what postgres will output. In postgres, NULLs in arrays are treated +# as larger than other elements, while we treat them as less. +# TODO (rohany): We have always done this for array comparisons, so I think +# it would be a breaking change + opposite with our other null behavior to +# change it suddenly... +query T +SELECT x FROM t WHERE x < ARRAY[1, 4, 3] ORDER BY x +---- +{NULL} +{NULL,NULL,NULL} +{1} +{1,NULL,10} + +query T +SELECT x FROM t WHERE x > ARRAY [1, NULL] ORDER BY x DESC +---- +{5} +{4} +{1,4,6} +{1,4,5} +{1,NULL,10} + +query T +SELECT x FROM t WHERE x > ARRAY[1, 3] AND x < ARRAY[1, 4, 10] ORDER BY x +---- +{1,4,5} +{1,4,6} + +# Test some operations on a descending index. +statement ok +CREATE INDEX i ON t(x DESC) + +query T +SELECT x FROM t@i WHERE x <= ARRAY[1] ORDER BY x DESC +---- +{1} +{NULL,NULL,NULL} +{NULL} + +query T +SELECT x FROM t@i WHERE x > ARRAY[1] ORDER BY x +---- +{1,NULL,10} +{1,4,5} +{1,4,6} +{4} +{5} + +# Ensure that we can order by the arrays without any indexes. +statement ok +DROP TABLE t; +CREATE TABLE t (x INT[]); +INSERT INTO t VALUES + (ARRAY[1]), + (ARRAY[5]), + (ARRAY[4]), + (ARRAY[1,4,5]), + (ARRAY[1,4,6]), + (ARRAY[1,NULL,10]), + (ARRAY[NULL]), + (ARRAY[NULL, NULL, NULL]) + +query T +SELECT x FROM t ORDER BY x +---- +{NULL} +{NULL,NULL,NULL} +{1} +{1,NULL,10} +{1,4,5} +{1,4,6} +{4} +{5} + +query T +SELECT x FROM t ORDER BY x DESC +---- +{5} +{4} +{1,4,6} +{1,4,5} +{1,NULL,10} +{1} +{NULL,NULL,NULL} +{NULL} + +# Create an indexes on a bad type. +statement error pq: unimplemented: column x is of type geography\[\] and thus is not indexable +CREATE TABLE tbad (x GEOGRAPHY[] PRIMARY KEY) diff --git a/pkg/sql/logictest/testdata/logic_test/order_by b/pkg/sql/logictest/testdata/logic_test/order_by index 10dda90f2265..0a30a48cc47f 100644 --- a/pkg/sql/logictest/testdata/logic_test/order_by +++ b/pkg/sql/logictest/testdata/logic_test/order_by @@ -187,18 +187,6 @@ SELECT * FROM t ORDER BY foo query error no data source matches prefix: a SELECT a FROM t ORDER BY a.b -query error can't order by column type int\[\] -SELECT generate_series FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] - -query error can't order by column type int\[\] -SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] - -query error can't order by column type int\[\] -SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY 1 - -query error can't order by column type int\[\] -SELECT ARRAY[generate_series] AS a FROM generate_series(1, 100) ORDER BY a - query IT SELECT generate_series, ARRAY[generate_series] FROM generate_series(1, 1) ORDER BY 1 ---- diff --git a/pkg/sql/opt/optbuilder/orderby.go b/pkg/sql/opt/optbuilder/orderby.go index 2058110d0574..e2775cea4fc0 100644 --- a/pkg/sql/opt/optbuilder/orderby.go +++ b/pkg/sql/opt/optbuilder/orderby.go @@ -258,10 +258,8 @@ func (b *Builder) analyzeExtraArgument( func ensureColumnOrderable(e tree.TypedExpr) { typ := e.ResolvedType() - if typ.Family() == types.ArrayFamily { - panic(unimplementedWithIssueDetailf(35707, "", "can't order by column type %s", typ)) - } - if typ.Family() == types.JsonFamily { + if typ.Family() == types.JsonFamily || + (typ.Family() == types.ArrayFamily && typ.ArrayContents().Family() == types.JsonFamily) { panic(unimplementedWithIssueDetailf(35706, "", "can't order by column type jsonb")) } } diff --git a/pkg/sql/opt/optbuilder/testdata/orderby b/pkg/sql/opt/optbuilder/testdata/orderby index 235e0e24c958..5940a1dd51c2 100644 --- a/pkg/sql/opt/optbuilder/testdata/orderby +++ b/pkg/sql/opt/optbuilder/testdata/orderby @@ -391,22 +391,70 @@ error (42P01): no data source matches prefix: a build SELECT generate_series FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: generate_series:1 [hidden: column2:2] + ├── ordering: +2 + └── project + ├── columns: column2:2 generate_series:1 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=column2:2] build SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: array:2 + ├── ordering: +2 + └── project + ├── columns: array:2 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=array:2] build SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY 1 ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: array:2 + ├── ordering: +2 + └── project + ├── columns: array:2 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=array:2] build SELECT ARRAY[generate_series] AS a FROM generate_series(1, 100) ORDER BY a ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: a:2 + ├── ordering: +2 + └── project + ├── columns: a:2 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=a:2] build SELECT generate_series, ARRAY[generate_series] FROM generate_series(1, 1) ORDER BY 1 @@ -988,4 +1036,12 @@ project build SELECT ARRAY[a] FROM abcd ORDER BY 1 ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: array:5!null + ├── ordering: +5 + └── project + ├── columns: array:5!null + ├── scan abcd + │ └── columns: a:1!null b:2 c:3 d:4 + └── projections + └── ARRAY[a:1] [as=array:5] diff --git a/pkg/sql/sqlbase/column_type_encoding.go b/pkg/sql/sqlbase/column_type_encoding.go index 65bde1c29a4c..63b632ba854f 100644 --- a/pkg/sql/sqlbase/column_type_encoding.go +++ b/pkg/sql/sqlbase/column_type_encoding.go @@ -143,6 +143,8 @@ func EncodeTableKey(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, e } } return b, nil + case *tree.DArray: + return encodeArrayKey(b, t, dir) case *tree.DCollatedString: if dir == encoding.Ascending { return encoding.EncodeBytesAscending(b, t.Key), nil @@ -187,6 +189,8 @@ func DecodeTableKey( var err error switch valType.Family() { + case types.ArrayFamily: + return decodeArrayKey(a, valType, key, dir) case types.BitFamily: var r bitarray.BitArray if dir == encoding.Ascending { @@ -918,6 +922,67 @@ func decodeTuple(a *DatumAlloc, tupTyp *types.T, b []byte) (tree.Datum, []byte, return a.NewDTuple(result), b, nil } +// encodeArrayKey generates an ordered key encoding of an array. +// The encoding format for an array [a, b] is as follows: +// [arrayMarker, elementMarker, enc(a), elementMarker, enc(b), terminator] +// We include this element markers to handle the following case. +// x, y = ARRAY[1], ARRAY[1, 2]. x is less than y, so we need the shorter +// array to sort first. The terminator is chosen to be less than the +// elementMarker so that this result is achieved. Note that we cannot +// include the encoding length or number of elements in the array at the +// front of the encoding, as this would affect the sorting of differently +// sizes arrays. The effect of this is that peeking/skipping over the +// encoded array is not as efficient as it could be. +func encodeArrayKey(b []byte, array *tree.DArray, dir encoding.Direction) ([]byte, error) { + var err error + b = encoding.EncodeArrayKeyMarker(b, dir) + for _, elem := range array.Array { + b = encoding.EncodeArrayKeyElementMarker(b, dir) + b, err = EncodeTableKey(b, elem, dir) + if err != nil { + return nil, err + } + } + b = encoding.EncodeArrayKeyTerminator(b, dir) + return b, nil +} + +// decodeArrayKey decodes an array key generated by encodeArrayKey. +func decodeArrayKey( + a *DatumAlloc, t *types.T, buf []byte, dir encoding.Direction, +) (tree.Datum, []byte, error) { + var err error + buf, err = encoding.ValidateAndConsumeArrayKeyMarker(buf, dir) + if err != nil { + return nil, nil, err + } + + result := tree.NewDArray(t.ArrayContents()) + + for { + if len(buf) == 0 { + return nil, nil, errors.AssertionFailedf("invalid array encoding (unterminated)") + } + var more bool + buf, more, err = encoding.ConsumeNextArrayElementMarker(buf, dir) + if err != nil { + return nil, nil, err + } + if !more { + break + } + var d tree.Datum + d, buf, err = DecodeTableKey(a, t.ArrayContents(), buf, dir) + if err != nil { + return nil, nil, err + } + if err := result.Append(d); err != nil { + return nil, nil, err + } + } + return result, buf, nil +} + // encodeArray produces the value encoding for an array. func encodeArray(d *tree.DArray, scratch []byte) ([]byte, error) { if err := d.Validate(); err != nil { diff --git a/pkg/sql/sqlbase/column_type_encoding_test.go b/pkg/sql/sqlbase/column_type_encoding_test.go index ea11f8064ed7..63cba669795b 100644 --- a/pkg/sql/sqlbase/column_type_encoding_test.go +++ b/pkg/sql/sqlbase/column_type_encoding_test.go @@ -34,6 +34,13 @@ func genColumnType() gopter.Gen { } } +func genRandomArrayType() gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + arrType := RandArrayType(genParams.Rng) + return gopter.NewGenResult(arrType, gopter.NoShrinker) + } +} + func genDatum() gopter.Gen { return func(genParams *gopter.GenParameters) *gopter.GenResult { return gopter.NewGenResult(RandDatum(genParams.Rng, RandColumnType(genParams.Rng), @@ -48,6 +55,14 @@ func genDatumWithType(columnType interface{}) gopter.Gen { } } +func genArrayDatumWithType(arrTyp interface{}) gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + // Mark the array contents to have a 1 in 10 chance of being null. + datum := RandArray(genParams.Rng, arrTyp.(*types.T), 10) + return gopter.NewGenResult(datum, gopter.NoShrinker) + } +} + func genEncodingDirection() gopter.Gen { return func(genParams *gopter.GenParameters) *gopter.GenResult { return gopter.NewGenResult( @@ -59,9 +74,11 @@ func genEncodingDirection() gopter.Gen { func hasKeyEncoding(typ *types.T) bool { // Only some types are round-trip key encodable. switch typ.Family() { - case types.JsonFamily, types.ArrayFamily, types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily, + case types.JsonFamily, types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily, types.GeographyFamily, types.GeometryFamily: return false + case types.ArrayFamily: + return hasKeyEncoding(typ.ArrayContents()) } return true } @@ -102,61 +119,75 @@ func TestEncodeTableKey(t *testing.T) { parameters := gopter.DefaultTestParameters() parameters.MinSuccessfulTests = 10000 properties := gopter.NewProperties(parameters) + roundtripDatum := func(d tree.Datum, dir encoding.Direction) string { + b, err := EncodeTableKey(nil, d, dir) + if err != nil { + return "error: " + err.Error() + } + newD, leftoverBytes, err := DecodeTableKey(a, d.ResolvedType(), b, dir) + if len(leftoverBytes) > 0 { + return "Leftover bytes" + } + if err != nil { + return "error: " + err.Error() + } + if newD.Compare(ctx, d) != 0 { + return "unequal" + } + return "" + } properties.Property("roundtrip", prop.ForAll( - func(d tree.Datum, dir encoding.Direction) string { - b, err := EncodeTableKey(nil, d, dir) - if err != nil { - return "error: " + err.Error() - } - newD, leftoverBytes, err := DecodeTableKey(a, d.ResolvedType(), b, dir) - if len(leftoverBytes) > 0 { - return "Leftover bytes" - } - if err != nil { - return "error: " + err.Error() - } - if newD.Compare(ctx, d) != 0 { - return "unequal" - } - return "" - }, + roundtripDatum, genColumnType(). SuchThat(hasKeyEncoding). FlatMap(genDatumWithType, reflect.TypeOf((*tree.Datum)(nil)).Elem()), genEncodingDirection(), )) - properties.Property("order-preserving", prop.ForAll( - func(datums []tree.Datum, dir encoding.Direction) string { - d1 := datums[0] - d2 := datums[1] - b1, err := EncodeTableKey(nil, d1, dir) - if err != nil { - return "error: " + err.Error() - } - b2, err := EncodeTableKey(nil, d2, dir) - if err != nil { - return "error: " + err.Error() - } - expectedCmp := d1.Compare(ctx, d2) - cmp := bytes.Compare(b1, b2) + // Also run the property on arrays possibly containing NULL values. + // The random generator in the property above does not generate NULLs. + properties.Property("roundtrip-arrays", prop.ForAll( + roundtripDatum, + genRandomArrayType(). + SuchThat(hasKeyEncoding). + FlatMap(genArrayDatumWithType, reflect.TypeOf((*tree.Datum)(nil)).Elem()), + genEncodingDirection(), + )) - if expectedCmp == 0 { - if cmp != 0 { - return fmt.Sprintf("equal inputs produced inequal outputs: \n%v\n%v", b1, b2) - } - // If the inputs are equal and so are the outputs, no more checking to do. - return "" - } + generateAndCompareDatums := func(datums []tree.Datum, dir encoding.Direction) string { + d1 := datums[0] + d2 := datums[1] + b1, err := EncodeTableKey(nil, d1, dir) + if err != nil { + return "error: " + err.Error() + } + b2, err := EncodeTableKey(nil, d2, dir) + if err != nil { + return "error: " + err.Error() + } - cmpsMatch := expectedCmp == cmp - dirIsAscending := dir == encoding.Ascending + expectedCmp := d1.Compare(ctx, d2) + cmp := bytes.Compare(b1, b2) - if cmpsMatch != dirIsAscending { - return fmt.Sprintf("non-order preserving encoding: \n%v\n%v", b1, b2) + if expectedCmp == 0 { + if cmp != 0 { + return fmt.Sprintf("equal inputs produced inequal outputs: \n%v\n%v", b1, b2) } + // If the inputs are equal and so are the outputs, no more checking to do. return "" - }, + } + + cmpsMatch := expectedCmp == cmp + dirIsAscending := dir == encoding.Ascending + + if cmpsMatch != dirIsAscending { + return fmt.Sprintf("non-order preserving encoding: \n%v\n%v", b1, b2) + } + return "" + } + + properties.Property("order-preserving", prop.ForAll( + generateAndCompareDatums, // For each column type, generate two datums of that type. genColumnType(). SuchThat(hasKeyEncoding). @@ -176,6 +207,31 @@ func TestEncodeTableKey(t *testing.T) { }), genEncodingDirection(), )) + + // Also run the property on arrays possibly containing NULL values. + // The random generator in the property above does not generate NULLs. + properties.Property("order-preserving-arrays", prop.ForAll( + generateAndCompareDatums, + // For each column type, generate two datums of that type. + genRandomArrayType(). + SuchThat(hasKeyEncoding). + FlatMap( + func(t interface{}) gopter.Gen { + colTyp := t.(*types.T) + return gopter.CombineGens( + genArrayDatumWithType(colTyp), + genArrayDatumWithType(colTyp)) + }, reflect.TypeOf([]interface{}{})). + Map(func(datums []interface{}) []tree.Datum { + ret := make([]tree.Datum, len(datums)) + for i, d := range datums { + ret[i] = d.(tree.Datum) + } + return ret + }), + genEncodingDirection(), + )) + properties.TestingRun(t) } diff --git a/pkg/sql/sqlbase/structured.go b/pkg/sql/sqlbase/structured.go index e792bd65e7bf..0cb1a97aaae9 100644 --- a/pkg/sql/sqlbase/structured.go +++ b/pkg/sql/sqlbase/structured.go @@ -1299,12 +1299,14 @@ func DatumTypeHasCompositeKeyEncoding(typ *types.T) bool { // MustBeValueEncoded returns true if columns of the given kind can only be value // encoded. -func MustBeValueEncoded(semanticType types.Family) bool { - return semanticType == types.ArrayFamily || - semanticType == types.JsonFamily || - semanticType == types.TupleFamily || - semanticType == types.GeometryFamily || - semanticType == types.GeographyFamily +func MustBeValueEncoded(semanticType *types.T) bool { + switch semanticType.Family() { + case types.ArrayFamily: + return MustBeValueEncoded(semanticType.ArrayContents()) + case types.JsonFamily, types.TupleFamily, types.GeographyFamily, types.GeometryFamily: + return true + } + return false } // HasOldStoredColumns returns whether the index has stored columns in the old @@ -2273,7 +2275,7 @@ func fitColumnToFamily(desc *MutableTableDescriptor, col ColumnDescriptor) (int, // ColumnTypeIsIndexable returns whether the type t is valid as an indexed column. func ColumnTypeIsIndexable(t *types.T) bool { - return !MustBeValueEncoded(t.Family()) + return !MustBeValueEncoded(t) } // ColumnTypeIsInvertedIndexable returns whether the type t is valid to be indexed diff --git a/pkg/sql/sqlbase/testutils.go b/pkg/sql/sqlbase/testutils.go index 5dbdc6ec0dea..3e84d967ea69 100644 --- a/pkg/sql/sqlbase/testutils.go +++ b/pkg/sql/sqlbase/testutils.go @@ -265,17 +265,7 @@ func RandDatumWithNullChance(rng *rand.Rand, typ *types.T, nullChance int) tree. case types.UnknownFamily: return tree.DNull case types.ArrayFamily: - contents := typ.ArrayContents() - if contents.Family() == types.AnyFamily { - contents = RandArrayContentsType(rng) - } - arr := tree.NewDArray(contents) - for i := 0; i < rng.Intn(10); i++ { - if err := arr.Append(RandDatumWithNullChance(rng, contents, 0)); err != nil { - panic(err) - } - } - return arr + return RandArray(rng, typ, 0) case types.AnyFamily: return RandDatumWithNullChance(rng, RandType(rng), nullChance) default: @@ -283,6 +273,22 @@ func RandDatumWithNullChance(rng *rand.Rand, typ *types.T, nullChance int) tree. } } +// RandArray generates a random DArray where the contents have nullChance +// of being null. +func RandArray(rng *rand.Rand, typ *types.T, nullChance int) tree.Datum { + contents := typ.ArrayContents() + if contents.Family() == types.AnyFamily { + contents = RandArrayContentsType(rng) + } + arr := tree.NewDArray(contents) + for i := 0; i < rng.Intn(10); i++ { + if err := arr.Append(RandDatumWithNullChance(rng, contents, nullChance)); err != nil { + panic(err) + } + } + return arr +} + const simpleRange = 10 // RandDatumSimple generates a random Datum of the given type. The generated @@ -767,6 +773,16 @@ func RandColumnType(rng *rand.Rand) *types.T { } } +func RandArrayType(rng *rand.Rand) *types.T { + for { + typ := RandColumnType(rng) + resTyp := types.MakeArray(typ) + if err := ValidateColumnDefType(resTyp); err == nil { + return resTyp + } + } +} + // RandColumnTypes returns a slice of numCols random types. These types must be // legal table column types. func RandColumnTypes(rng *rand.Rand, numCols int) []types.T { @@ -780,7 +796,7 @@ func RandColumnTypes(rng *rand.Rand, numCols int) []types.T { // RandSortingType returns a column type which can be key-encoded. func RandSortingType(rng *rand.Rand) *types.T { typ := RandType(rng) - for MustBeValueEncoded(typ.Family()) { + for MustBeValueEncoded(typ) { typ = RandType(rng) } return typ @@ -1325,7 +1341,7 @@ func randIndexTableDefFromCols( indexElemList := make(tree.IndexElemList, 0, len(cols)) for i := range cols { - semType := cols[i].Type.Family() + semType := cols[i].Type if MustBeValueEncoded(semType) { continue } diff --git a/pkg/util/encoding/encoding.go b/pkg/util/encoding/encoding.go index 232947d87792..145b755437da 100644 --- a/pkg/util/encoding/encoding.go +++ b/pkg/util/encoding/encoding.go @@ -83,6 +83,14 @@ const ( timeTZMarker = bitArrayDescMarker + 1 geoMarker = timeTZMarker + 1 + // Markers for key encoding Datum arrays in sorted order. + arrayKeyMarker = geoMarker + 1 + arrayKeyElementMarker = arrayKeyMarker + 1 + arrayKeyDescendingMarker = arrayKeyElementMarker + 1 + arrayKeyDescendingElementMarker = arrayKeyDescendingMarker + 1 + arrayKeyTerminator = 0x00 + arrayKeyDescendingTerminator = 0xFF + // IntMin is chosen such that the range of int tags does not overlap the // ascii character set that is frequently used in testing. IntMin = 0x80 // 128 @@ -1277,6 +1285,8 @@ const ( BitArrayDesc Type = 18 // BitArray encoded descendingly TimeTZ Type = 19 Geo Type = 20 + ArrayKeyAsc Type = 21 // Array key encoding + ArrayKeyDesc Type = 22 // Array key encoded descendingly ) // typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one @@ -1309,6 +1319,10 @@ func slowPeekType(b []byte) Type { return Null case m == encodedNotNull, m == encodedNotNullDesc: return NotNull + case m == arrayKeyMarker: + return ArrayKeyAsc + case m == arrayKeyDescendingMarker: + return ArrayKeyDesc case m == bytesMarker: return Bytes case m == bytesDescMarker: @@ -1369,6 +1383,36 @@ func getMultiNonsortingVarintLen(b []byte, num int) (int, error) { return p, nil } +// getArrayLength returns the length of a key encoded array. The input +// must have had the array type marker stripped from the front. +func getArrayLength(buf []byte, dir Direction) (int, error) { + result := 0 + for { + if len(buf) == 0 { + return 0, errors.AssertionFailedf("invalid array encoding (unterminated)") + } + var more bool + var err error + buf, more, err = ConsumeNextArrayElementMarker(buf, dir) + if err != nil { + return 0, err + } + // Increment to include the consumed marker byte. + result++ + if !more { + break + } + next, err := PeekLength(buf) + if err != nil { + return 0, err + } + // Shift buf over by the encoded data amount. + buf = buf[next:] + result += next + } + return result, nil +} + // PeekLength returns the length of the encoded value at the start of b. Note: // if this function succeeds, it's not a guarantee that decoding the value will // succeed. PeekLength is meant to be used on key encoded data only. @@ -1398,6 +1442,13 @@ func PeekLength(b []byte) (int, error) { return 1 + n + m + 1, err } return 1 + n + m + 1, nil + case arrayKeyMarker, arrayKeyDescendingMarker: + dir := Ascending + if m == arrayKeyDescendingMarker { + dir = Descending + } + length, err := getArrayLength(b[1:], dir) + return 1 + length, err case bytesMarker: return getBytesLength(b, ascendingEscapes) case jsonInvertedIndex: @@ -1510,7 +1561,7 @@ func prettyPrintValueImpl(valDirs []Direction, b []byte, sep string) (string, bo // even if we don't have directions for the child index's columns. func prettyPrintFirstValue(dir Direction, b []byte) ([]byte, string, error) { var err error - switch PeekType(b) { + switch typ := PeekType(b); typ { case Null: b, _ = DecodeIfNull(b) return b, "NULL", nil @@ -1520,6 +1571,48 @@ func prettyPrintFirstValue(dir Direction, b []byte) ([]byte, string, error) { return b[1:], "False", nil case Array: return b[1:], "Arr", nil + case ArrayKeyAsc, ArrayKeyDesc: + // TODO (rohany): I'm not sure what is the best way to remove this duplication + // with the standard decoding logic for array keys. Because the decoding depends + // on the sqlbase {Encode,Decode}TableKey functions, we can't use them here. + encDir := Ascending + if typ == ArrayKeyDesc { + encDir = Descending + } + var build strings.Builder + buf, err := ValidateAndConsumeArrayKeyMarker(b, encDir) + if err != nil { + return nil, "", err + } + build.WriteString("ARRAY[") + first := true + // Use the array key decoding logic, but instead of calling out + // to DecodeTableKey, just make a recursive call. + for { + if len(buf) == 0 { + return nil, "", errors.AssertionFailedf("invalid array (unterminated)") + } + var more bool + buf, more, err = ConsumeNextArrayElementMarker(buf, encDir) + if err != nil { + return nil, "", err + } + if !more { + break + } + var next string + buf, next, err = prettyPrintFirstValue(dir, buf) + if err != nil { + return nil, "", err + } + if !first { + build.WriteString(",") + } + build.WriteString(next) + first = false + } + build.WriteString("]") + return buf, build.String(), nil case NotNull: // The tag can be either encodedNotNull or encodedNotNullDesc. The // latter can be an interleaved sentinel. @@ -2603,3 +2696,84 @@ func getJSONInvertedIndexKeyLength(buf []byte) (int, error) { return len + valLen, nil } } + +// EncodeArrayKeyMarker adds the array key encoding marker to buf and +// returns the new buffer. +func EncodeArrayKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, arrayKeyMarker) + case Descending: + return append(buf, arrayKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +// EncodeArrayKeyTerminator adds the array key terminator to buf and +// returns the new buffer. +func EncodeArrayKeyTerminator(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, arrayKeyTerminator) + case Descending: + return append(buf, arrayKeyDescendingTerminator) + default: + panic("invalid direction") + } +} + +// EncodeArrayKeyElementMarker adds the array key element marker to +// buf and returns the new buffer. +func EncodeArrayKeyElementMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, arrayKeyElementMarker) + case Descending: + return append(buf, arrayKeyDescendingElementMarker) + default: + panic("invalid direction") + } +} + +// ValidateAndConsumeArrayKeyMarker checks that the marker at the front +// of buf is valid for an array of the given direction, and consumes it +// if so. It returns an error if the tag is invalid. +func ValidateAndConsumeArrayKeyMarker(buf []byte, dir Direction) ([]byte, error) { + typ := PeekType(buf) + expected := ArrayKeyAsc + if dir == Descending { + expected = ArrayKeyDesc + } + if typ != expected { + return nil, errors.Newf("invalid type found %s", typ) + } + return buf[1:], nil +} + +// ConsumeNextArrayElementMarker reads the first byte in buf and sees whether +// it is an element marker or a terminator. It consumes the first byte, and +// returns whether the array is finished (read a terminator), or there are +// elements to decode (read an element marker). +func ConsumeNextArrayElementMarker(buf []byte, dir Direction) ([]byte, bool, error) { + var elementMarker, terminator byte + switch dir { + case Ascending: + elementMarker, terminator = arrayKeyElementMarker, arrayKeyTerminator + case Descending: + elementMarker, terminator = arrayKeyDescendingElementMarker, arrayKeyDescendingTerminator + default: + panic("invalid direction") + } + if len(buf) == 0 { + return nil, false, errors.AssertionFailedf("invalid array encoding (unterminated)") + } + switch buf[0] { + case elementMarker: + return buf[1:], true, nil + case terminator: + return buf[1:], false, nil + default: + return nil, false, errors.AssertionFailedf("invalid array encoding (unknown marker)") + } +}