From 7811ade70322d2f6a1b5888686f76f36aecd6086 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Sat, 25 Apr 2020 17:46:39 -0400 Subject: [PATCH] sql: enable indexing and ordering on arrays of orderable and indexable types Fixes #17154. Fixes #35707. This PR enables arrays to be ordered and indexed by introducing an ordered key encoding for arrays. Once this exists, the rest of the SQL infrastructure is ready to handle indexing and ordering on arrays. To encode an array of elements `ARRAY[a, b]`, we create the following encoding. Let `AM` = a marker byte for arrays, let `EM` be an "array element" marker, and let `AT` be a terminator byte. `enc(ARRAY[a, b]) = [AM, EM, enc(a), EM, enc(b), AT]` The key is that the terminator is less than the element marker. This allows for the "prefix matching" style comparison that arrays support. Release note (sql change): This PR adds support for indexing and ordering of arrays of indexable and orderable inner types. --- pkg/sql/flowinfra/stream_encoder.go | 4 +- .../logictest/testdata/logic_test/alter_table | 19 -- pkg/sql/logictest/testdata/logic_test/array | 161 ++++++++++++---- .../logictest/testdata/logic_test/order_by | 12 -- pkg/sql/opt/optbuilder/orderby.go | 6 +- pkg/sql/opt/optbuilder/testdata/orderby | 66 ++++++- pkg/sql/sqlbase/column_type_encoding.go | 65 +++++++ pkg/sql/sqlbase/column_type_encoding_test.go | 144 +++++++++----- pkg/sql/sqlbase/structured.go | 16 +- pkg/sql/sqlbase/testutils.go | 42 +++-- pkg/util/encoding/encoding.go | 176 +++++++++++++++++- 11 files changed, 569 insertions(+), 142 deletions(-) diff --git a/pkg/sql/flowinfra/stream_encoder.go b/pkg/sql/flowinfra/stream_encoder.go index 31c764793a06..0b3f2c79aace 100644 --- a/pkg/sql/flowinfra/stream_encoder.go +++ b/pkg/sql/flowinfra/stream_encoder.go @@ -105,9 +105,9 @@ func (se *StreamEncoder) AddRow(row sqlbase.EncDatumRow) error { if !ok { enc = PreferredEncoding } - sType := se.infos[i].Type.Family() + sType := &se.infos[i].Type if enc != sqlbase.DatumEncoding_VALUE && - (sqlbase.HasCompositeKeyEncoding(sType) || sqlbase.MustBeValueEncoded(sType)) { + (sqlbase.HasCompositeKeyEncoding(sType.Family()) || sqlbase.MustBeValueEncoded(sType)) { // Force VALUE encoding for composite types (key encodings may lose data). enc = sqlbase.DatumEncoding_VALUE } diff --git a/pkg/sql/logictest/testdata/logic_test/alter_table b/pkg/sql/logictest/testdata/logic_test/alter_table index ea58e856812f..0b6ad7ee777b 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_table +++ b/pkg/sql/logictest/testdata/logic_test/alter_table @@ -801,25 +801,6 @@ decomputed_column CREATE TABLE decomputed_column ( statement ok CREATE TABLE b26483() -statement error unimplemented: column c is of type int\[\] and thus is not indexable -ALTER TABLE b26483 ADD COLUMN c INT[] UNIQUE - -# As above, but performed in a transaction -statement ok -BEGIN - -statement ok -CREATE TABLE b26483_tx() - -statement ok -ALTER TABLE b26483_tx ADD COLUMN c INT[] - -statement error unimplemented: column c is of type int\[\] and thus is not indexable -CREATE INDEX on b26483_tx (c) - -statement ok -ROLLBACK - # Verify that auditing can be enabled by root, and cannot be disabled by non-root. statement ok diff --git a/pkg/sql/logictest/testdata/logic_test/array b/pkg/sql/logictest/testdata/logic_test/array index faa361a25188..f7e5b1ce32ea 100644 --- a/pkg/sql/logictest/testdata/logic_test/array +++ b/pkg/sql/logictest/testdata/logic_test/array @@ -429,17 +429,6 @@ SELECT ARRAY[ARRAY[1,2,3]] query error VECTOR column types are unsupported CREATE TABLE badtable (b INT2VECTOR) -# Using an array as a primary key should be disallowed. #17154 - -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE badtable (b INT[] PRIMARY KEY) - -# Indexing an array column should be disallowed. #17154 - -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE a (b INT[] UNIQUE) - - # Regression test for #18745 statement ok @@ -449,18 +438,6 @@ query T SELECT ARRAY[ROW()] FROM ident ---- -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE a ( - b INT[], - CONSTRAINT c UNIQUE (b) -) - -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE a ( - b INT[], - INDEX c (b) -) - statement ok CREATE TABLE a (b INT ARRAY) @@ -475,18 +452,6 @@ a CREATE TABLE a ( statement ok DROP TABLE a -statement ok -CREATE TABLE a (b INT[], c INT[]) - -statement error column b is of type int\[\] and thus is not indexable -CREATE INDEX idx ON a (b) - -statement error the following columns are not indexable due to their type: b \(type int\[\]\), c \(type int\[\]\) -CREATE INDEX idx ON a (b, c) - -statement ok -DROP TABLE a - # Int array columns. statement ok @@ -1343,3 +1308,129 @@ SELECT x, y FROM t WHERE x < y query TT SELECT x, y FROM t WHERE x > y ---- + +query TT +SELECT x, y FROM t ORDER BY (x, y) +---- +{1} {1,2} +{1,1,1,1} {2} + +subtest array_indexes + +# Create indexes on arrays. +statement ok +DROP TABLE IF EXISTS t; +CREATE TABLE t (x INT[] PRIMARY KEY) + +statement ok +INSERT INTO t VALUES + (ARRAY[1]), + (ARRAY[5]), + (ARRAY[4]), + (ARRAY[1,4,5]), + (ARRAY[1,4,6]), + (ARRAY[1,NULL,10]), + (ARRAY[NULL]), + (ARRAY[NULL, NULL, NULL]) + +# Test that the unique index rejects bad inserts. +statement error pq: duplicate key value \(x\)=\(ARRAY\[1,NULL,10\]\) violates unique constraint "primary" +INSERT INTO t VALUES (ARRAY[1, NULL, 10]) + +# Use the index for point lookups. +query T +SELECT x FROM t WHERE x = ARRAY[1,4,6] +---- +{1,4,6} + +# Use the index for bounded scans. +# Note that nulls sort first in CockroachDB, so this ordering is different +# than what postgres will output. In postgres, NULLs in arrays are treated +# as larger than other elements, while we treat them as less. +# TODO (rohany): We have always done this for array comparisons, so I think +# it would be a breaking change + opposite with our other null behavior to +# change it suddenly... +query T +SELECT x FROM t WHERE x < ARRAY[1, 4, 3] ORDER BY x +---- +{NULL} +{NULL,NULL,NULL} +{1} +{1,NULL,10} + +query T +SELECT x FROM t WHERE x > ARRAY [1, NULL] ORDER BY x DESC +---- +{5} +{4} +{1,4,6} +{1,4,5} +{1,NULL,10} + +query T +SELECT x FROM t WHERE x > ARRAY[1, 3] AND x < ARRAY[1, 4, 10] ORDER BY x +---- +{1,4,5} +{1,4,6} + +# Test some operations on a descending index. +statement ok +CREATE INDEX i ON t(x DESC) + +query T +SELECT x FROM t@i WHERE x <= ARRAY[1] ORDER BY x DESC +---- +{1} +{NULL,NULL,NULL} +{NULL} + +query T +SELECT x FROM t@i WHERE x > ARRAY[1] ORDER BY x +---- +{1,NULL,10} +{1,4,5} +{1,4,6} +{4} +{5} + +# Ensure that we can order by the arrays without any indexes. +statement ok +DROP TABLE t; +CREATE TABLE t (x INT[]); +INSERT INTO t VALUES + (ARRAY[1]), + (ARRAY[5]), + (ARRAY[4]), + (ARRAY[1,4,5]), + (ARRAY[1,4,6]), + (ARRAY[1,NULL,10]), + (ARRAY[NULL]), + (ARRAY[NULL, NULL, NULL]) + +query T +SELECT x FROM t ORDER BY x +---- +{NULL} +{NULL,NULL,NULL} +{1} +{1,NULL,10} +{1,4,5} +{1,4,6} +{4} +{5} + +query T +SELECT x FROM t ORDER BY x DESC +---- +{5} +{4} +{1,4,6} +{1,4,5} +{1,NULL,10} +{1} +{NULL,NULL,NULL} +{NULL} + +# Create an indexes on a bad type. +statement error pq: unimplemented: column x is of type geography\[\] and thus is not indexable +CREATE TABLE tbad (x GEOGRAPHY[] PRIMARY KEY) diff --git a/pkg/sql/logictest/testdata/logic_test/order_by b/pkg/sql/logictest/testdata/logic_test/order_by index 10dda90f2265..0a30a48cc47f 100644 --- a/pkg/sql/logictest/testdata/logic_test/order_by +++ b/pkg/sql/logictest/testdata/logic_test/order_by @@ -187,18 +187,6 @@ SELECT * FROM t ORDER BY foo query error no data source matches prefix: a SELECT a FROM t ORDER BY a.b -query error can't order by column type int\[\] -SELECT generate_series FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] - -query error can't order by column type int\[\] -SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] - -query error can't order by column type int\[\] -SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY 1 - -query error can't order by column type int\[\] -SELECT ARRAY[generate_series] AS a FROM generate_series(1, 100) ORDER BY a - query IT SELECT generate_series, ARRAY[generate_series] FROM generate_series(1, 1) ORDER BY 1 ---- diff --git a/pkg/sql/opt/optbuilder/orderby.go b/pkg/sql/opt/optbuilder/orderby.go index 2058110d0574..e2775cea4fc0 100644 --- a/pkg/sql/opt/optbuilder/orderby.go +++ b/pkg/sql/opt/optbuilder/orderby.go @@ -258,10 +258,8 @@ func (b *Builder) analyzeExtraArgument( func ensureColumnOrderable(e tree.TypedExpr) { typ := e.ResolvedType() - if typ.Family() == types.ArrayFamily { - panic(unimplementedWithIssueDetailf(35707, "", "can't order by column type %s", typ)) - } - if typ.Family() == types.JsonFamily { + if typ.Family() == types.JsonFamily || + (typ.Family() == types.ArrayFamily && typ.ArrayContents().Family() == types.JsonFamily) { panic(unimplementedWithIssueDetailf(35706, "", "can't order by column type jsonb")) } } diff --git a/pkg/sql/opt/optbuilder/testdata/orderby b/pkg/sql/opt/optbuilder/testdata/orderby index 235e0e24c958..5940a1dd51c2 100644 --- a/pkg/sql/opt/optbuilder/testdata/orderby +++ b/pkg/sql/opt/optbuilder/testdata/orderby @@ -391,22 +391,70 @@ error (42P01): no data source matches prefix: a build SELECT generate_series FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: generate_series:1 [hidden: column2:2] + ├── ordering: +2 + └── project + ├── columns: column2:2 generate_series:1 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=column2:2] build SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: array:2 + ├── ordering: +2 + └── project + ├── columns: array:2 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=array:2] build SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY 1 ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: array:2 + ├── ordering: +2 + └── project + ├── columns: array:2 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=array:2] build SELECT ARRAY[generate_series] AS a FROM generate_series(1, 100) ORDER BY a ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: a:2 + ├── ordering: +2 + └── project + ├── columns: a:2 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=a:2] build SELECT generate_series, ARRAY[generate_series] FROM generate_series(1, 1) ORDER BY 1 @@ -988,4 +1036,12 @@ project build SELECT ARRAY[a] FROM abcd ORDER BY 1 ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: array:5!null + ├── ordering: +5 + └── project + ├── columns: array:5!null + ├── scan abcd + │ └── columns: a:1!null b:2 c:3 d:4 + └── projections + └── ARRAY[a:1] [as=array:5] diff --git a/pkg/sql/sqlbase/column_type_encoding.go b/pkg/sql/sqlbase/column_type_encoding.go index 65bde1c29a4c..63b632ba854f 100644 --- a/pkg/sql/sqlbase/column_type_encoding.go +++ b/pkg/sql/sqlbase/column_type_encoding.go @@ -143,6 +143,8 @@ func EncodeTableKey(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, e } } return b, nil + case *tree.DArray: + return encodeArrayKey(b, t, dir) case *tree.DCollatedString: if dir == encoding.Ascending { return encoding.EncodeBytesAscending(b, t.Key), nil @@ -187,6 +189,8 @@ func DecodeTableKey( var err error switch valType.Family() { + case types.ArrayFamily: + return decodeArrayKey(a, valType, key, dir) case types.BitFamily: var r bitarray.BitArray if dir == encoding.Ascending { @@ -918,6 +922,67 @@ func decodeTuple(a *DatumAlloc, tupTyp *types.T, b []byte) (tree.Datum, []byte, return a.NewDTuple(result), b, nil } +// encodeArrayKey generates an ordered key encoding of an array. +// The encoding format for an array [a, b] is as follows: +// [arrayMarker, elementMarker, enc(a), elementMarker, enc(b), terminator] +// We include this element markers to handle the following case. +// x, y = ARRAY[1], ARRAY[1, 2]. x is less than y, so we need the shorter +// array to sort first. The terminator is chosen to be less than the +// elementMarker so that this result is achieved. Note that we cannot +// include the encoding length or number of elements in the array at the +// front of the encoding, as this would affect the sorting of differently +// sizes arrays. The effect of this is that peeking/skipping over the +// encoded array is not as efficient as it could be. +func encodeArrayKey(b []byte, array *tree.DArray, dir encoding.Direction) ([]byte, error) { + var err error + b = encoding.EncodeArrayKeyMarker(b, dir) + for _, elem := range array.Array { + b = encoding.EncodeArrayKeyElementMarker(b, dir) + b, err = EncodeTableKey(b, elem, dir) + if err != nil { + return nil, err + } + } + b = encoding.EncodeArrayKeyTerminator(b, dir) + return b, nil +} + +// decodeArrayKey decodes an array key generated by encodeArrayKey. +func decodeArrayKey( + a *DatumAlloc, t *types.T, buf []byte, dir encoding.Direction, +) (tree.Datum, []byte, error) { + var err error + buf, err = encoding.ValidateAndConsumeArrayKeyMarker(buf, dir) + if err != nil { + return nil, nil, err + } + + result := tree.NewDArray(t.ArrayContents()) + + for { + if len(buf) == 0 { + return nil, nil, errors.AssertionFailedf("invalid array encoding (unterminated)") + } + var more bool + buf, more, err = encoding.ConsumeNextArrayElementMarker(buf, dir) + if err != nil { + return nil, nil, err + } + if !more { + break + } + var d tree.Datum + d, buf, err = DecodeTableKey(a, t.ArrayContents(), buf, dir) + if err != nil { + return nil, nil, err + } + if err := result.Append(d); err != nil { + return nil, nil, err + } + } + return result, buf, nil +} + // encodeArray produces the value encoding for an array. func encodeArray(d *tree.DArray, scratch []byte) ([]byte, error) { if err := d.Validate(); err != nil { diff --git a/pkg/sql/sqlbase/column_type_encoding_test.go b/pkg/sql/sqlbase/column_type_encoding_test.go index ea11f8064ed7..63cba669795b 100644 --- a/pkg/sql/sqlbase/column_type_encoding_test.go +++ b/pkg/sql/sqlbase/column_type_encoding_test.go @@ -34,6 +34,13 @@ func genColumnType() gopter.Gen { } } +func genRandomArrayType() gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + arrType := RandArrayType(genParams.Rng) + return gopter.NewGenResult(arrType, gopter.NoShrinker) + } +} + func genDatum() gopter.Gen { return func(genParams *gopter.GenParameters) *gopter.GenResult { return gopter.NewGenResult(RandDatum(genParams.Rng, RandColumnType(genParams.Rng), @@ -48,6 +55,14 @@ func genDatumWithType(columnType interface{}) gopter.Gen { } } +func genArrayDatumWithType(arrTyp interface{}) gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + // Mark the array contents to have a 1 in 10 chance of being null. + datum := RandArray(genParams.Rng, arrTyp.(*types.T), 10) + return gopter.NewGenResult(datum, gopter.NoShrinker) + } +} + func genEncodingDirection() gopter.Gen { return func(genParams *gopter.GenParameters) *gopter.GenResult { return gopter.NewGenResult( @@ -59,9 +74,11 @@ func genEncodingDirection() gopter.Gen { func hasKeyEncoding(typ *types.T) bool { // Only some types are round-trip key encodable. switch typ.Family() { - case types.JsonFamily, types.ArrayFamily, types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily, + case types.JsonFamily, types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily, types.GeographyFamily, types.GeometryFamily: return false + case types.ArrayFamily: + return hasKeyEncoding(typ.ArrayContents()) } return true } @@ -102,61 +119,75 @@ func TestEncodeTableKey(t *testing.T) { parameters := gopter.DefaultTestParameters() parameters.MinSuccessfulTests = 10000 properties := gopter.NewProperties(parameters) + roundtripDatum := func(d tree.Datum, dir encoding.Direction) string { + b, err := EncodeTableKey(nil, d, dir) + if err != nil { + return "error: " + err.Error() + } + newD, leftoverBytes, err := DecodeTableKey(a, d.ResolvedType(), b, dir) + if len(leftoverBytes) > 0 { + return "Leftover bytes" + } + if err != nil { + return "error: " + err.Error() + } + if newD.Compare(ctx, d) != 0 { + return "unequal" + } + return "" + } properties.Property("roundtrip", prop.ForAll( - func(d tree.Datum, dir encoding.Direction) string { - b, err := EncodeTableKey(nil, d, dir) - if err != nil { - return "error: " + err.Error() - } - newD, leftoverBytes, err := DecodeTableKey(a, d.ResolvedType(), b, dir) - if len(leftoverBytes) > 0 { - return "Leftover bytes" - } - if err != nil { - return "error: " + err.Error() - } - if newD.Compare(ctx, d) != 0 { - return "unequal" - } - return "" - }, + roundtripDatum, genColumnType(). SuchThat(hasKeyEncoding). FlatMap(genDatumWithType, reflect.TypeOf((*tree.Datum)(nil)).Elem()), genEncodingDirection(), )) - properties.Property("order-preserving", prop.ForAll( - func(datums []tree.Datum, dir encoding.Direction) string { - d1 := datums[0] - d2 := datums[1] - b1, err := EncodeTableKey(nil, d1, dir) - if err != nil { - return "error: " + err.Error() - } - b2, err := EncodeTableKey(nil, d2, dir) - if err != nil { - return "error: " + err.Error() - } - expectedCmp := d1.Compare(ctx, d2) - cmp := bytes.Compare(b1, b2) + // Also run the property on arrays possibly containing NULL values. + // The random generator in the property above does not generate NULLs. + properties.Property("roundtrip-arrays", prop.ForAll( + roundtripDatum, + genRandomArrayType(). + SuchThat(hasKeyEncoding). + FlatMap(genArrayDatumWithType, reflect.TypeOf((*tree.Datum)(nil)).Elem()), + genEncodingDirection(), + )) - if expectedCmp == 0 { - if cmp != 0 { - return fmt.Sprintf("equal inputs produced inequal outputs: \n%v\n%v", b1, b2) - } - // If the inputs are equal and so are the outputs, no more checking to do. - return "" - } + generateAndCompareDatums := func(datums []tree.Datum, dir encoding.Direction) string { + d1 := datums[0] + d2 := datums[1] + b1, err := EncodeTableKey(nil, d1, dir) + if err != nil { + return "error: " + err.Error() + } + b2, err := EncodeTableKey(nil, d2, dir) + if err != nil { + return "error: " + err.Error() + } - cmpsMatch := expectedCmp == cmp - dirIsAscending := dir == encoding.Ascending + expectedCmp := d1.Compare(ctx, d2) + cmp := bytes.Compare(b1, b2) - if cmpsMatch != dirIsAscending { - return fmt.Sprintf("non-order preserving encoding: \n%v\n%v", b1, b2) + if expectedCmp == 0 { + if cmp != 0 { + return fmt.Sprintf("equal inputs produced inequal outputs: \n%v\n%v", b1, b2) } + // If the inputs are equal and so are the outputs, no more checking to do. return "" - }, + } + + cmpsMatch := expectedCmp == cmp + dirIsAscending := dir == encoding.Ascending + + if cmpsMatch != dirIsAscending { + return fmt.Sprintf("non-order preserving encoding: \n%v\n%v", b1, b2) + } + return "" + } + + properties.Property("order-preserving", prop.ForAll( + generateAndCompareDatums, // For each column type, generate two datums of that type. genColumnType(). SuchThat(hasKeyEncoding). @@ -176,6 +207,31 @@ func TestEncodeTableKey(t *testing.T) { }), genEncodingDirection(), )) + + // Also run the property on arrays possibly containing NULL values. + // The random generator in the property above does not generate NULLs. + properties.Property("order-preserving-arrays", prop.ForAll( + generateAndCompareDatums, + // For each column type, generate two datums of that type. + genRandomArrayType(). + SuchThat(hasKeyEncoding). + FlatMap( + func(t interface{}) gopter.Gen { + colTyp := t.(*types.T) + return gopter.CombineGens( + genArrayDatumWithType(colTyp), + genArrayDatumWithType(colTyp)) + }, reflect.TypeOf([]interface{}{})). + Map(func(datums []interface{}) []tree.Datum { + ret := make([]tree.Datum, len(datums)) + for i, d := range datums { + ret[i] = d.(tree.Datum) + } + return ret + }), + genEncodingDirection(), + )) + properties.TestingRun(t) } diff --git a/pkg/sql/sqlbase/structured.go b/pkg/sql/sqlbase/structured.go index e792bd65e7bf..0cb1a97aaae9 100644 --- a/pkg/sql/sqlbase/structured.go +++ b/pkg/sql/sqlbase/structured.go @@ -1299,12 +1299,14 @@ func DatumTypeHasCompositeKeyEncoding(typ *types.T) bool { // MustBeValueEncoded returns true if columns of the given kind can only be value // encoded. -func MustBeValueEncoded(semanticType types.Family) bool { - return semanticType == types.ArrayFamily || - semanticType == types.JsonFamily || - semanticType == types.TupleFamily || - semanticType == types.GeometryFamily || - semanticType == types.GeographyFamily +func MustBeValueEncoded(semanticType *types.T) bool { + switch semanticType.Family() { + case types.ArrayFamily: + return MustBeValueEncoded(semanticType.ArrayContents()) + case types.JsonFamily, types.TupleFamily, types.GeographyFamily, types.GeometryFamily: + return true + } + return false } // HasOldStoredColumns returns whether the index has stored columns in the old @@ -2273,7 +2275,7 @@ func fitColumnToFamily(desc *MutableTableDescriptor, col ColumnDescriptor) (int, // ColumnTypeIsIndexable returns whether the type t is valid as an indexed column. func ColumnTypeIsIndexable(t *types.T) bool { - return !MustBeValueEncoded(t.Family()) + return !MustBeValueEncoded(t) } // ColumnTypeIsInvertedIndexable returns whether the type t is valid to be indexed diff --git a/pkg/sql/sqlbase/testutils.go b/pkg/sql/sqlbase/testutils.go index 5dbdc6ec0dea..3e84d967ea69 100644 --- a/pkg/sql/sqlbase/testutils.go +++ b/pkg/sql/sqlbase/testutils.go @@ -265,17 +265,7 @@ func RandDatumWithNullChance(rng *rand.Rand, typ *types.T, nullChance int) tree. case types.UnknownFamily: return tree.DNull case types.ArrayFamily: - contents := typ.ArrayContents() - if contents.Family() == types.AnyFamily { - contents = RandArrayContentsType(rng) - } - arr := tree.NewDArray(contents) - for i := 0; i < rng.Intn(10); i++ { - if err := arr.Append(RandDatumWithNullChance(rng, contents, 0)); err != nil { - panic(err) - } - } - return arr + return RandArray(rng, typ, 0) case types.AnyFamily: return RandDatumWithNullChance(rng, RandType(rng), nullChance) default: @@ -283,6 +273,22 @@ func RandDatumWithNullChance(rng *rand.Rand, typ *types.T, nullChance int) tree. } } +// RandArray generates a random DArray where the contents have nullChance +// of being null. +func RandArray(rng *rand.Rand, typ *types.T, nullChance int) tree.Datum { + contents := typ.ArrayContents() + if contents.Family() == types.AnyFamily { + contents = RandArrayContentsType(rng) + } + arr := tree.NewDArray(contents) + for i := 0; i < rng.Intn(10); i++ { + if err := arr.Append(RandDatumWithNullChance(rng, contents, nullChance)); err != nil { + panic(err) + } + } + return arr +} + const simpleRange = 10 // RandDatumSimple generates a random Datum of the given type. The generated @@ -767,6 +773,16 @@ func RandColumnType(rng *rand.Rand) *types.T { } } +func RandArrayType(rng *rand.Rand) *types.T { + for { + typ := RandColumnType(rng) + resTyp := types.MakeArray(typ) + if err := ValidateColumnDefType(resTyp); err == nil { + return resTyp + } + } +} + // RandColumnTypes returns a slice of numCols random types. These types must be // legal table column types. func RandColumnTypes(rng *rand.Rand, numCols int) []types.T { @@ -780,7 +796,7 @@ func RandColumnTypes(rng *rand.Rand, numCols int) []types.T { // RandSortingType returns a column type which can be key-encoded. func RandSortingType(rng *rand.Rand) *types.T { typ := RandType(rng) - for MustBeValueEncoded(typ.Family()) { + for MustBeValueEncoded(typ) { typ = RandType(rng) } return typ @@ -1325,7 +1341,7 @@ func randIndexTableDefFromCols( indexElemList := make(tree.IndexElemList, 0, len(cols)) for i := range cols { - semType := cols[i].Type.Family() + semType := cols[i].Type if MustBeValueEncoded(semType) { continue } diff --git a/pkg/util/encoding/encoding.go b/pkg/util/encoding/encoding.go index 232947d87792..145b755437da 100644 --- a/pkg/util/encoding/encoding.go +++ b/pkg/util/encoding/encoding.go @@ -83,6 +83,14 @@ const ( timeTZMarker = bitArrayDescMarker + 1 geoMarker = timeTZMarker + 1 + // Markers for key encoding Datum arrays in sorted order. + arrayKeyMarker = geoMarker + 1 + arrayKeyElementMarker = arrayKeyMarker + 1 + arrayKeyDescendingMarker = arrayKeyElementMarker + 1 + arrayKeyDescendingElementMarker = arrayKeyDescendingMarker + 1 + arrayKeyTerminator = 0x00 + arrayKeyDescendingTerminator = 0xFF + // IntMin is chosen such that the range of int tags does not overlap the // ascii character set that is frequently used in testing. IntMin = 0x80 // 128 @@ -1277,6 +1285,8 @@ const ( BitArrayDesc Type = 18 // BitArray encoded descendingly TimeTZ Type = 19 Geo Type = 20 + ArrayKeyAsc Type = 21 // Array key encoding + ArrayKeyDesc Type = 22 // Array key encoded descendingly ) // typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one @@ -1309,6 +1319,10 @@ func slowPeekType(b []byte) Type { return Null case m == encodedNotNull, m == encodedNotNullDesc: return NotNull + case m == arrayKeyMarker: + return ArrayKeyAsc + case m == arrayKeyDescendingMarker: + return ArrayKeyDesc case m == bytesMarker: return Bytes case m == bytesDescMarker: @@ -1369,6 +1383,36 @@ func getMultiNonsortingVarintLen(b []byte, num int) (int, error) { return p, nil } +// getArrayLength returns the length of a key encoded array. The input +// must have had the array type marker stripped from the front. +func getArrayLength(buf []byte, dir Direction) (int, error) { + result := 0 + for { + if len(buf) == 0 { + return 0, errors.AssertionFailedf("invalid array encoding (unterminated)") + } + var more bool + var err error + buf, more, err = ConsumeNextArrayElementMarker(buf, dir) + if err != nil { + return 0, err + } + // Increment to include the consumed marker byte. + result++ + if !more { + break + } + next, err := PeekLength(buf) + if err != nil { + return 0, err + } + // Shift buf over by the encoded data amount. + buf = buf[next:] + result += next + } + return result, nil +} + // PeekLength returns the length of the encoded value at the start of b. Note: // if this function succeeds, it's not a guarantee that decoding the value will // succeed. PeekLength is meant to be used on key encoded data only. @@ -1398,6 +1442,13 @@ func PeekLength(b []byte) (int, error) { return 1 + n + m + 1, err } return 1 + n + m + 1, nil + case arrayKeyMarker, arrayKeyDescendingMarker: + dir := Ascending + if m == arrayKeyDescendingMarker { + dir = Descending + } + length, err := getArrayLength(b[1:], dir) + return 1 + length, err case bytesMarker: return getBytesLength(b, ascendingEscapes) case jsonInvertedIndex: @@ -1510,7 +1561,7 @@ func prettyPrintValueImpl(valDirs []Direction, b []byte, sep string) (string, bo // even if we don't have directions for the child index's columns. func prettyPrintFirstValue(dir Direction, b []byte) ([]byte, string, error) { var err error - switch PeekType(b) { + switch typ := PeekType(b); typ { case Null: b, _ = DecodeIfNull(b) return b, "NULL", nil @@ -1520,6 +1571,48 @@ func prettyPrintFirstValue(dir Direction, b []byte) ([]byte, string, error) { return b[1:], "False", nil case Array: return b[1:], "Arr", nil + case ArrayKeyAsc, ArrayKeyDesc: + // TODO (rohany): I'm not sure what is the best way to remove this duplication + // with the standard decoding logic for array keys. Because the decoding depends + // on the sqlbase {Encode,Decode}TableKey functions, we can't use them here. + encDir := Ascending + if typ == ArrayKeyDesc { + encDir = Descending + } + var build strings.Builder + buf, err := ValidateAndConsumeArrayKeyMarker(b, encDir) + if err != nil { + return nil, "", err + } + build.WriteString("ARRAY[") + first := true + // Use the array key decoding logic, but instead of calling out + // to DecodeTableKey, just make a recursive call. + for { + if len(buf) == 0 { + return nil, "", errors.AssertionFailedf("invalid array (unterminated)") + } + var more bool + buf, more, err = ConsumeNextArrayElementMarker(buf, encDir) + if err != nil { + return nil, "", err + } + if !more { + break + } + var next string + buf, next, err = prettyPrintFirstValue(dir, buf) + if err != nil { + return nil, "", err + } + if !first { + build.WriteString(",") + } + build.WriteString(next) + first = false + } + build.WriteString("]") + return buf, build.String(), nil case NotNull: // The tag can be either encodedNotNull or encodedNotNullDesc. The // latter can be an interleaved sentinel. @@ -2603,3 +2696,84 @@ func getJSONInvertedIndexKeyLength(buf []byte) (int, error) { return len + valLen, nil } } + +// EncodeArrayKeyMarker adds the array key encoding marker to buf and +// returns the new buffer. +func EncodeArrayKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, arrayKeyMarker) + case Descending: + return append(buf, arrayKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +// EncodeArrayKeyTerminator adds the array key terminator to buf and +// returns the new buffer. +func EncodeArrayKeyTerminator(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, arrayKeyTerminator) + case Descending: + return append(buf, arrayKeyDescendingTerminator) + default: + panic("invalid direction") + } +} + +// EncodeArrayKeyElementMarker adds the array key element marker to +// buf and returns the new buffer. +func EncodeArrayKeyElementMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, arrayKeyElementMarker) + case Descending: + return append(buf, arrayKeyDescendingElementMarker) + default: + panic("invalid direction") + } +} + +// ValidateAndConsumeArrayKeyMarker checks that the marker at the front +// of buf is valid for an array of the given direction, and consumes it +// if so. It returns an error if the tag is invalid. +func ValidateAndConsumeArrayKeyMarker(buf []byte, dir Direction) ([]byte, error) { + typ := PeekType(buf) + expected := ArrayKeyAsc + if dir == Descending { + expected = ArrayKeyDesc + } + if typ != expected { + return nil, errors.Newf("invalid type found %s", typ) + } + return buf[1:], nil +} + +// ConsumeNextArrayElementMarker reads the first byte in buf and sees whether +// it is an element marker or a terminator. It consumes the first byte, and +// returns whether the array is finished (read a terminator), or there are +// elements to decode (read an element marker). +func ConsumeNextArrayElementMarker(buf []byte, dir Direction) ([]byte, bool, error) { + var elementMarker, terminator byte + switch dir { + case Ascending: + elementMarker, terminator = arrayKeyElementMarker, arrayKeyTerminator + case Descending: + elementMarker, terminator = arrayKeyDescendingElementMarker, arrayKeyDescendingTerminator + default: + panic("invalid direction") + } + if len(buf) == 0 { + return nil, false, errors.AssertionFailedf("invalid array encoding (unterminated)") + } + switch buf[0] { + case elementMarker: + return buf[1:], true, nil + case terminator: + return buf[1:], false, nil + default: + return nil, false, errors.AssertionFailedf("invalid array encoding (unknown marker)") + } +}