diff --git a/pkg/sql/flowinfra/stream_encoder.go b/pkg/sql/flowinfra/stream_encoder.go index 31c764793a06..b2a113f13780 100644 --- a/pkg/sql/flowinfra/stream_encoder.go +++ b/pkg/sql/flowinfra/stream_encoder.go @@ -105,7 +105,7 @@ func (se *StreamEncoder) AddRow(row sqlbase.EncDatumRow) error { if !ok { enc = PreferredEncoding } - sType := se.infos[i].Type.Family() + sType := &se.infos[i].Type if enc != sqlbase.DatumEncoding_VALUE && (sqlbase.HasCompositeKeyEncoding(sType) || sqlbase.MustBeValueEncoded(sType)) { // Force VALUE encoding for composite types (key encodings may lose data). diff --git a/pkg/sql/logictest/testdata/logic_test/alter_table b/pkg/sql/logictest/testdata/logic_test/alter_table index ea58e856812f..0b6ad7ee777b 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_table +++ b/pkg/sql/logictest/testdata/logic_test/alter_table @@ -801,25 +801,6 @@ decomputed_column CREATE TABLE decomputed_column ( statement ok CREATE TABLE b26483() -statement error unimplemented: column c is of type int\[\] and thus is not indexable -ALTER TABLE b26483 ADD COLUMN c INT[] UNIQUE - -# As above, but performed in a transaction -statement ok -BEGIN - -statement ok -CREATE TABLE b26483_tx() - -statement ok -ALTER TABLE b26483_tx ADD COLUMN c INT[] - -statement error unimplemented: column c is of type int\[\] and thus is not indexable -CREATE INDEX on b26483_tx (c) - -statement ok -ROLLBACK - # Verify that auditing can be enabled by root, and cannot be disabled by non-root. statement ok diff --git a/pkg/sql/logictest/testdata/logic_test/array b/pkg/sql/logictest/testdata/logic_test/array index faa361a25188..3c7b3b3e60b7 100644 --- a/pkg/sql/logictest/testdata/logic_test/array +++ b/pkg/sql/logictest/testdata/logic_test/array @@ -429,17 +429,6 @@ SELECT ARRAY[ARRAY[1,2,3]] query error VECTOR column types are unsupported CREATE TABLE badtable (b INT2VECTOR) -# Using an array as a primary key should be disallowed. #17154 - -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE badtable (b INT[] PRIMARY KEY) - -# Indexing an array column should be disallowed. #17154 - -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE a (b INT[] UNIQUE) - - # Regression test for #18745 statement ok @@ -449,18 +438,6 @@ query T SELECT ARRAY[ROW()] FROM ident ---- -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE a ( - b INT[], - CONSTRAINT c UNIQUE (b) -) - -statement error column b is of type int\[\] and thus is not indexable -CREATE TABLE a ( - b INT[], - INDEX c (b) -) - statement ok CREATE TABLE a (b INT ARRAY) @@ -475,18 +452,6 @@ a CREATE TABLE a ( statement ok DROP TABLE a -statement ok -CREATE TABLE a (b INT[], c INT[]) - -statement error column b is of type int\[\] and thus is not indexable -CREATE INDEX idx ON a (b) - -statement error the following columns are not indexable due to their type: b \(type int\[\]\), c \(type int\[\]\) -CREATE INDEX idx ON a (b, c) - -statement ok -DROP TABLE a - # Int array columns. statement ok @@ -1343,3 +1308,241 @@ SELECT x, y FROM t WHERE x < y query TT SELECT x, y FROM t WHERE x > y ---- + +query TT +SELECT x, y FROM t ORDER BY (x, y) +---- +{1} {1,2} +{1,1,1,1} {2} + +subtest array_indexes + +# Create indexes on arrays. +statement ok +DROP TABLE IF EXISTS t; +CREATE TABLE t (x INT[] PRIMARY KEY) + +statement ok +INSERT INTO t VALUES + (ARRAY[1]), + (ARRAY[5]), + (ARRAY[4]), + (ARRAY[1,4,5]), + (ARRAY[1,4,6]), + (ARRAY[1,NULL,10]), + (ARRAY[NULL]), + (ARRAY[NULL, NULL, NULL]) + +# Test that the unique index rejects bad inserts. +statement error pq: duplicate key value \(x\)=\(ARRAY\[1,NULL,10\]\) violates unique constraint "primary" +INSERT INTO t VALUES (ARRAY[1, NULL, 10]) + +query T +SELECT x FROM t ORDER BY x +---- +{NULL} +{NULL,NULL,NULL} +{1} +{1,NULL,10} +{1,4,5} +{1,4,6} +{4} +{5} + +# Use the index for point lookups. +query T +SELECT x FROM t WHERE x = ARRAY[1,4,6] +---- +{1,4,6} + +# Use the index for bounded scans. +# Note that nulls sort first in CockroachDB, so this ordering is different +# than what postgres will output. In postgres, NULLs in arrays are treated +# as larger than other elements, while we treat them as less. +# TODO (rohany): We have always done this for array comparisons, so I think +# it would be a breaking change + opposite with our other null behavior to +# change it suddenly... +query T +SELECT x FROM t WHERE x < ARRAY[1, 4, 3] ORDER BY x +---- +{NULL} +{NULL,NULL,NULL} +{1} +{1,NULL,10} + +query T +SELECT x FROM t WHERE x > ARRAY [1, NULL] ORDER BY x DESC +---- +{5} +{4} +{1,4,6} +{1,4,5} +{1,NULL,10} + +query T +SELECT x FROM t WHERE x > ARRAY[1, 3] AND x < ARRAY[1, 4, 10] ORDER BY x +---- +{1,4,5} +{1,4,6} + +query T +SELECT x FROM t WHERE x > ARRAY[NULL, NULL]:::INT[] ORDER BY x +---- +{NULL,NULL,NULL} +{1} +{1,NULL,10} +{1,4,5} +{1,4,6} +{4} +{5} + +# Test some operations on a descending index. +statement ok +CREATE INDEX i ON t(x DESC) + +query T +SELECT x FROM t@i WHERE x <= ARRAY[1] ORDER BY x DESC +---- +{1} +{NULL,NULL,NULL} +{NULL} + +query T +SELECT x FROM t@i WHERE x > ARRAY[1] ORDER BY x +---- +{1,NULL,10} +{1,4,5} +{1,4,6} +{4} +{5} + +# Ensure that we can order by the arrays without any indexes. +statement ok +DROP TABLE t; +CREATE TABLE t (x INT[]); +INSERT INTO t VALUES + (ARRAY[1]), + (ARRAY[5]), + (ARRAY[4]), + (ARRAY[1,4,5]), + (ARRAY[1,4,6]), + (ARRAY[1,NULL,10]), + (ARRAY[NULL]), + (ARRAY[NULL, NULL, NULL]) + +query T +SELECT x FROM t ORDER BY x +---- +{NULL} +{NULL,NULL,NULL} +{1} +{1,NULL,10} +{1,4,5} +{1,4,6} +{4} +{5} + +query T +SELECT x FROM t ORDER BY x DESC +---- +{5} +{4} +{1,4,6} +{1,4,5} +{1,NULL,10} +{1} +{NULL,NULL,NULL} +{NULL} + +statement ok +CREATE INDEX i ON t (x); +INSERT INTO t VALUES (NULL), (NULL) + +# Test that NULL's are differentiated from {NULL}. +query T +SELECT x FROM t@i WHERE x IS NOT NULL ORDER BY x +---- +{NULL} +{NULL,NULL,NULL} +{1} +{1,NULL,10} +{1,4,5} +{1,4,6} +{4} +{5} + +# Create an indexes on a bad type. +statement error pq: unimplemented: column x is of type geography\[\] and thus is not indexable +CREATE TABLE tbad (x GEOGRAPHY[] PRIMARY KEY) + +# Test arrays of composite types. +statement ok +CREATE TABLE tarray(x DECIMAL[] PRIMARY KEY); +INSERT INTO tarray VALUES (ARRAY[1.00]), (ARRAY[1.501]) + +# Ensure these are round tripped correctly. +query T +SELECT x FROM tarray ORDER BY x +---- +{1.00} +{1.501} + +# Test indexes on multiple columns with arrays. +statement ok +DROP TABLE t; +CREATE TABLE t (x INT, y INT[], z INT, INDEX i (x, y, z)); +INSERT INTO t VALUES + (1, ARRAY[1, 2, 3], 3), + (NULL, ARRAY[1, NULL, 3], NULL), + (2, ARRAY[NULL, NULL, NULL], NULL), + (NULL, ARRAY[NULL, NULL], 3), + (2, ARRAY[4, 5], 7) + +query ITI +SELECT x, y, z FROM t WHERE x IS NOT NULL AND y > ARRAY[1] ORDER BY z +---- + 1 {1,2,3} 3 + 2 {4,5} 7 + +query ITI +SELECT x, y, z FROM t WHERE x = 2 AND y < ARRAY[10] ORDER BY y +---- +2 {NULL,NULL,NULL} NULL +2 {4,5} 7 + +# Test that interleaving an array index doesn't lead to problems. +statement ok +DROP TABLE IF EXISTS parent, child CASCADE; +CREATE TABLE parent (x INT, y INT[], PRIMARY KEY (x, y DESC)); +CREATE TABLE child (x INT, y INT[], z INT[], PRIMARY KEY (x, y DESC, z)) INTERLEAVE IN PARENT parent (x, y); +INSERT INTO parent VALUES + (1, ARRAY[1, 2, 3]), + (1, ARRAY[1, NULL]), + (2, ARRAY[NULL]), + (3, ARRAY[NULL, 1, NULL]); +INSERT INTO child VALUES + (1, ARRAY[1, 2, 3], ARRAY[4]), + (1, ARRAY[1, 2, 3, 4], ARRAY[5]), + (1, ARRAY[1, NULL], ARRAY[5]), + (1, ARRAY[1, NULL, NULL], ARRAY[10]), + (2, ARRAY[NULL], ARRAY[1]), + (3, ARRAY[NULL, 1, NULL], ARRAY[3]); + +# Ensure scans on the parent and child aren't affected. +query IT +SELECT x, y FROM parent ORDER BY x, y DESC +---- +1 {1,2,3} +1 {1,NULL} +2 {NULL} +3 {NULL,1,NULL} + +query ITT +SELECT x, y, z FROM child ORDER BY x, y DESC, z +---- +1 {1,2,3,4} {5} +1 {1,2,3} {4} +1 {1,NULL,NULL} {10} +1 {1,NULL} {5} +2 {NULL} {1} +3 {NULL,1,NULL} {3} diff --git a/pkg/sql/logictest/testdata/logic_test/order_by b/pkg/sql/logictest/testdata/logic_test/order_by index 10dda90f2265..0a30a48cc47f 100644 --- a/pkg/sql/logictest/testdata/logic_test/order_by +++ b/pkg/sql/logictest/testdata/logic_test/order_by @@ -187,18 +187,6 @@ SELECT * FROM t ORDER BY foo query error no data source matches prefix: a SELECT a FROM t ORDER BY a.b -query error can't order by column type int\[\] -SELECT generate_series FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] - -query error can't order by column type int\[\] -SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] - -query error can't order by column type int\[\] -SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY 1 - -query error can't order by column type int\[\] -SELECT ARRAY[generate_series] AS a FROM generate_series(1, 100) ORDER BY a - query IT SELECT generate_series, ARRAY[generate_series] FROM generate_series(1, 1) ORDER BY 1 ---- diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 93bbfc575d06..04088a049537 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -578,3 +578,17 @@ lookup-join · · └── scan · · () · · table e@e_b_idx · · · fixedvals 1 column · · + +# Ensure that an inverted index with a composite primary key still encodes +# the primary key data in the composite value. +statement ok +DROP TABLE IF EXISTS t; +CREATE TABLE t (x DECIMAL PRIMARY KEY, y int[], FAMILY (x, y)); +CREATE INVERTED INDEX ON t(y) + +query T kvtrace +INSERT INTO t VALUES (1.00, ARRAY[1,2]) +---- +CPut /Table/56/1/1/0 -> /TUPLE/1:1:Decimal/1.00/ +InitPut /Table/56/2/1/1/0 -> /BYTES/0x1503348964 +InitPut /Table/56/2/2/1/0 -> /BYTES/0x1503348964 diff --git a/pkg/sql/opt/optbuilder/orderby.go b/pkg/sql/opt/optbuilder/orderby.go index 2058110d0574..e2775cea4fc0 100644 --- a/pkg/sql/opt/optbuilder/orderby.go +++ b/pkg/sql/opt/optbuilder/orderby.go @@ -258,10 +258,8 @@ func (b *Builder) analyzeExtraArgument( func ensureColumnOrderable(e tree.TypedExpr) { typ := e.ResolvedType() - if typ.Family() == types.ArrayFamily { - panic(unimplementedWithIssueDetailf(35707, "", "can't order by column type %s", typ)) - } - if typ.Family() == types.JsonFamily { + if typ.Family() == types.JsonFamily || + (typ.Family() == types.ArrayFamily && typ.ArrayContents().Family() == types.JsonFamily) { panic(unimplementedWithIssueDetailf(35706, "", "can't order by column type jsonb")) } } diff --git a/pkg/sql/opt/optbuilder/testdata/orderby b/pkg/sql/opt/optbuilder/testdata/orderby index 235e0e24c958..5940a1dd51c2 100644 --- a/pkg/sql/opt/optbuilder/testdata/orderby +++ b/pkg/sql/opt/optbuilder/testdata/orderby @@ -391,22 +391,70 @@ error (42P01): no data source matches prefix: a build SELECT generate_series FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: generate_series:1 [hidden: column2:2] + ├── ordering: +2 + └── project + ├── columns: column2:2 generate_series:1 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=column2:2] build SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY ARRAY[generate_series] ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: array:2 + ├── ordering: +2 + └── project + ├── columns: array:2 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=array:2] build SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY 1 ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: array:2 + ├── ordering: +2 + └── project + ├── columns: array:2 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=array:2] build SELECT ARRAY[generate_series] AS a FROM generate_series(1, 100) ORDER BY a ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: a:2 + ├── ordering: +2 + └── project + ├── columns: a:2 + ├── project-set + │ ├── columns: generate_series:1 + │ ├── values + │ │ └── () + │ └── zip + │ └── generate_series(1, 100) + └── projections + └── ARRAY[generate_series:1] [as=a:2] build SELECT generate_series, ARRAY[generate_series] FROM generate_series(1, 1) ORDER BY 1 @@ -988,4 +1036,12 @@ project build SELECT ARRAY[a] FROM abcd ORDER BY 1 ---- -error (0A000): unimplemented: can't order by column type int[] +sort + ├── columns: array:5!null + ├── ordering: +5 + └── project + ├── columns: array:5!null + ├── scan abcd + │ └── columns: a:1!null b:2 c:3 d:4 + └── projections + └── ARRAY[a:1] [as=array:5] diff --git a/pkg/sql/rowcontainer/disk_row_container.go b/pkg/sql/rowcontainer/disk_row_container.go index e7d3a3e3d920..21b39641d9fc 100644 --- a/pkg/sql/rowcontainer/disk_row_container.go +++ b/pkg/sql/rowcontainer/disk_row_container.go @@ -114,7 +114,7 @@ func MakeDiskRowContainer( // returns true may not necessarily need to be encoded in the value, so // make this more fine-grained. See IsComposite() methods in // pkg/sql/parser/datum.go. - if _, ok := orderingIdxs[i]; !ok || sqlbase.HasCompositeKeyEncoding(d.types[i].Family()) { + if _, ok := orderingIdxs[i]; !ok || sqlbase.HasCompositeKeyEncoding(&d.types[i]) { d.valueIdxs = append(d.valueIdxs, i) } } @@ -244,7 +244,7 @@ func (d *DiskRowContainer) Close(ctx context.Context) { func (d *DiskRowContainer) keyValToRow(k []byte, v []byte) (sqlbase.EncDatumRow, error) { for i, orderInfo := range d.ordering { // Types with composite key encodings are decoded from the value. - if sqlbase.HasCompositeKeyEncoding(d.types[orderInfo.ColIdx].Family()) { + if sqlbase.HasCompositeKeyEncoding(&d.types[orderInfo.ColIdx]) { // Skip over the encoded key. encLen, err := encoding.PeekLength(k) if err != nil { diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index 7b60bbb76b39..6eb5827bf1d3 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -3517,6 +3517,16 @@ func (d *DArray) ResolvedType() *types.T { return types.MakeArray(d.ParamTyp) } +// IsComposite implements the CompositeDatum interface. +func (d *DArray) IsComposite() bool { + for _, elem := range d.Array { + if cdatum, ok := elem.(CompositeDatum); ok && cdatum.IsComposite() { + return true + } + } + return false +} + // FirstIndex returns the first index of the array. 1 for normal SQL arrays, // which are 1-indexed, and 0 for the special Postgers vector types which are // 0-indexed. diff --git a/pkg/sql/sqlbase/column_type_encoding.go b/pkg/sql/sqlbase/column_type_encoding.go index 65bde1c29a4c..3886667616f0 100644 --- a/pkg/sql/sqlbase/column_type_encoding.go +++ b/pkg/sql/sqlbase/column_type_encoding.go @@ -143,6 +143,8 @@ func EncodeTableKey(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, e } } return b, nil + case *tree.DArray: + return encodeArrayKey(b, t, dir) case *tree.DCollatedString: if dir == encoding.Ascending { return encoding.EncodeBytesAscending(b, t.Key), nil @@ -187,6 +189,8 @@ func DecodeTableKey( var err error switch valType.Family() { + case types.ArrayFamily: + return decodeArrayKey(a, valType, key, dir) case types.BitFamily: var r bitarray.BitArray if dir == encoding.Ascending { @@ -918,6 +922,68 @@ func decodeTuple(a *DatumAlloc, tupTyp *types.T, b []byte) (tree.Datum, []byte, return a.NewDTuple(result), b, nil } +// encodeArrayKey generates an ordered key encoding of an array. +// The encoding format for an array [a, b] is as follows: +// [arrayMarker, enc(a), enc(b), terminator]. +// The terminator is guaranteed to be less than all encoded values, +// so two arrays with the same prefix but different lengths will sort +// correctly. The key difference is that NULL values need to be encoded +// differently, because the standard NULL encoding conflicts with the +// terminator byte. This NULL value is chosen to be larger than the +// terminator but less than all existing encoded values. +func encodeArrayKey(b []byte, array *tree.DArray, dir encoding.Direction) ([]byte, error) { + var err error + b = encoding.EncodeArrayKeyMarker(b, dir) + for _, elem := range array.Array { + if elem == tree.DNull { + b = encoding.EncodeNullWithinArrayKey(b, dir) + } else { + b, err = EncodeTableKey(b, elem, dir) + if err != nil { + return nil, err + } + } + } + return encoding.EncodeArrayKeyTerminator(b, dir), nil +} + +// decodeArrayKey decodes an array key generated by encodeArrayKey. +func decodeArrayKey( + a *DatumAlloc, t *types.T, buf []byte, dir encoding.Direction, +) (tree.Datum, []byte, error) { + var err error + buf, err = encoding.ValidateAndConsumeArrayKeyMarker(buf, dir) + if err != nil { + return nil, nil, err + } + + result := tree.NewDArray(t.ArrayContents()) + + for { + if len(buf) == 0 { + return nil, nil, errors.AssertionFailedf("invalid array encoding (unterminated)") + } + if encoding.IsArrayKeyDone(buf, dir) { + buf = buf[1:] + break + } + var d tree.Datum + if encoding.IsNextByteArrayEncodedNull(buf, dir) { + d = tree.DNull + buf = buf[1:] + } else { + d, buf, err = DecodeTableKey(a, t.ArrayContents(), buf, dir) + if err != nil { + return nil, nil, err + } + } + if err := result.Append(d); err != nil { + return nil, nil, err + } + } + return result, buf, nil +} + // encodeArray produces the value encoding for an array. func encodeArray(d *tree.DArray, scratch []byte) ([]byte, error) { if err := d.Validate(); err != nil { diff --git a/pkg/sql/sqlbase/column_type_encoding_test.go b/pkg/sql/sqlbase/column_type_encoding_test.go index ea11f8064ed7..63cba669795b 100644 --- a/pkg/sql/sqlbase/column_type_encoding_test.go +++ b/pkg/sql/sqlbase/column_type_encoding_test.go @@ -34,6 +34,13 @@ func genColumnType() gopter.Gen { } } +func genRandomArrayType() gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + arrType := RandArrayType(genParams.Rng) + return gopter.NewGenResult(arrType, gopter.NoShrinker) + } +} + func genDatum() gopter.Gen { return func(genParams *gopter.GenParameters) *gopter.GenResult { return gopter.NewGenResult(RandDatum(genParams.Rng, RandColumnType(genParams.Rng), @@ -48,6 +55,14 @@ func genDatumWithType(columnType interface{}) gopter.Gen { } } +func genArrayDatumWithType(arrTyp interface{}) gopter.Gen { + return func(genParams *gopter.GenParameters) *gopter.GenResult { + // Mark the array contents to have a 1 in 10 chance of being null. + datum := RandArray(genParams.Rng, arrTyp.(*types.T), 10) + return gopter.NewGenResult(datum, gopter.NoShrinker) + } +} + func genEncodingDirection() gopter.Gen { return func(genParams *gopter.GenParameters) *gopter.GenResult { return gopter.NewGenResult( @@ -59,9 +74,11 @@ func genEncodingDirection() gopter.Gen { func hasKeyEncoding(typ *types.T) bool { // Only some types are round-trip key encodable. switch typ.Family() { - case types.JsonFamily, types.ArrayFamily, types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily, + case types.JsonFamily, types.CollatedStringFamily, types.TupleFamily, types.DecimalFamily, types.GeographyFamily, types.GeometryFamily: return false + case types.ArrayFamily: + return hasKeyEncoding(typ.ArrayContents()) } return true } @@ -102,61 +119,75 @@ func TestEncodeTableKey(t *testing.T) { parameters := gopter.DefaultTestParameters() parameters.MinSuccessfulTests = 10000 properties := gopter.NewProperties(parameters) + roundtripDatum := func(d tree.Datum, dir encoding.Direction) string { + b, err := EncodeTableKey(nil, d, dir) + if err != nil { + return "error: " + err.Error() + } + newD, leftoverBytes, err := DecodeTableKey(a, d.ResolvedType(), b, dir) + if len(leftoverBytes) > 0 { + return "Leftover bytes" + } + if err != nil { + return "error: " + err.Error() + } + if newD.Compare(ctx, d) != 0 { + return "unequal" + } + return "" + } properties.Property("roundtrip", prop.ForAll( - func(d tree.Datum, dir encoding.Direction) string { - b, err := EncodeTableKey(nil, d, dir) - if err != nil { - return "error: " + err.Error() - } - newD, leftoverBytes, err := DecodeTableKey(a, d.ResolvedType(), b, dir) - if len(leftoverBytes) > 0 { - return "Leftover bytes" - } - if err != nil { - return "error: " + err.Error() - } - if newD.Compare(ctx, d) != 0 { - return "unequal" - } - return "" - }, + roundtripDatum, genColumnType(). SuchThat(hasKeyEncoding). FlatMap(genDatumWithType, reflect.TypeOf((*tree.Datum)(nil)).Elem()), genEncodingDirection(), )) - properties.Property("order-preserving", prop.ForAll( - func(datums []tree.Datum, dir encoding.Direction) string { - d1 := datums[0] - d2 := datums[1] - b1, err := EncodeTableKey(nil, d1, dir) - if err != nil { - return "error: " + err.Error() - } - b2, err := EncodeTableKey(nil, d2, dir) - if err != nil { - return "error: " + err.Error() - } - expectedCmp := d1.Compare(ctx, d2) - cmp := bytes.Compare(b1, b2) + // Also run the property on arrays possibly containing NULL values. + // The random generator in the property above does not generate NULLs. + properties.Property("roundtrip-arrays", prop.ForAll( + roundtripDatum, + genRandomArrayType(). + SuchThat(hasKeyEncoding). + FlatMap(genArrayDatumWithType, reflect.TypeOf((*tree.Datum)(nil)).Elem()), + genEncodingDirection(), + )) - if expectedCmp == 0 { - if cmp != 0 { - return fmt.Sprintf("equal inputs produced inequal outputs: \n%v\n%v", b1, b2) - } - // If the inputs are equal and so are the outputs, no more checking to do. - return "" - } + generateAndCompareDatums := func(datums []tree.Datum, dir encoding.Direction) string { + d1 := datums[0] + d2 := datums[1] + b1, err := EncodeTableKey(nil, d1, dir) + if err != nil { + return "error: " + err.Error() + } + b2, err := EncodeTableKey(nil, d2, dir) + if err != nil { + return "error: " + err.Error() + } - cmpsMatch := expectedCmp == cmp - dirIsAscending := dir == encoding.Ascending + expectedCmp := d1.Compare(ctx, d2) + cmp := bytes.Compare(b1, b2) - if cmpsMatch != dirIsAscending { - return fmt.Sprintf("non-order preserving encoding: \n%v\n%v", b1, b2) + if expectedCmp == 0 { + if cmp != 0 { + return fmt.Sprintf("equal inputs produced inequal outputs: \n%v\n%v", b1, b2) } + // If the inputs are equal and so are the outputs, no more checking to do. return "" - }, + } + + cmpsMatch := expectedCmp == cmp + dirIsAscending := dir == encoding.Ascending + + if cmpsMatch != dirIsAscending { + return fmt.Sprintf("non-order preserving encoding: \n%v\n%v", b1, b2) + } + return "" + } + + properties.Property("order-preserving", prop.ForAll( + generateAndCompareDatums, // For each column type, generate two datums of that type. genColumnType(). SuchThat(hasKeyEncoding). @@ -176,6 +207,31 @@ func TestEncodeTableKey(t *testing.T) { }), genEncodingDirection(), )) + + // Also run the property on arrays possibly containing NULL values. + // The random generator in the property above does not generate NULLs. + properties.Property("order-preserving-arrays", prop.ForAll( + generateAndCompareDatums, + // For each column type, generate two datums of that type. + genRandomArrayType(). + SuchThat(hasKeyEncoding). + FlatMap( + func(t interface{}) gopter.Gen { + colTyp := t.(*types.T) + return gopter.CombineGens( + genArrayDatumWithType(colTyp), + genArrayDatumWithType(colTyp)) + }, reflect.TypeOf([]interface{}{})). + Map(func(datums []interface{}) []tree.Datum { + ret := make([]tree.Datum, len(datums)) + for i, d := range datums { + ret[i] = d.(tree.Datum) + } + return ret + }), + genEncodingDirection(), + )) + properties.TestingRun(t) } diff --git a/pkg/sql/sqlbase/encoded_datum_test.go b/pkg/sql/sqlbase/encoded_datum_test.go index f8f1ee708cf4..743be19ae850 100644 --- a/pkg/sql/sqlbase/encoded_datum_test.go +++ b/pkg/sql/sqlbase/encoded_datum_test.go @@ -248,7 +248,7 @@ func TestEncDatumCompare(t *testing.T) { // These cases require decoding. Data with a composite key encoding cannot // be decoded from their key part alone. - if !HasCompositeKeyEncoding(typ.Family()) { + if !HasCompositeKeyEncoding(typ) { checkEncDatumCmp(t, a, typ, &v1, &v2, noncmp, noncmp, -1, true) checkEncDatumCmp(t, a, typ, &v2, &v1, desc, noncmp, +1, true) checkEncDatumCmp(t, a, typ, &v1, &v1, asc, desc, 0, true) @@ -277,7 +277,7 @@ func TestEncDatumFromBuffer(t *testing.T) { var buf []byte enc := make([]DatumEncoding, len(ed)) for i := range ed { - if HasCompositeKeyEncoding(typs[i].Family()) { + if HasCompositeKeyEncoding(&typs[i]) { // There's no way to reconstruct data from the key part of a composite // encoding. enc[i] = DatumEncoding_VALUE diff --git a/pkg/sql/sqlbase/index_encoding.go b/pkg/sql/sqlbase/index_encoding.go index e3fa02bbcb80..dfbc95d8c993 100644 --- a/pkg/sql/sqlbase/index_encoding.go +++ b/pkg/sql/sqlbase/index_encoding.go @@ -1042,11 +1042,11 @@ func EncodeSecondaryIndex( key = append(key, extraKey...) } - // We do all computation that affects indexes with families in a separate code path to avoid performance - // regression for tables without column families. if len(tableDesc.Families) == 1 || secondaryIndex.Type == IndexDescriptor_INVERTED || secondaryIndex.Version == BaseIndexFormatVersion { + // We do all computation that affects indexes with families in a separate code path to avoid performance + // regression for tables without column families. entry, err := encodeSecondaryIndexNoFamilies(secondaryIndex, colMap, key, values, extraKey) if err != nil { return []IndexEntry{}, err @@ -1207,6 +1207,11 @@ func encodeSecondaryIndexNoFamilies( cols = append(cols, valueEncodedColumn{id: id, isComposite: false}) } for _, id := range index.CompositeColumnIDs { + // Inverted indexes on a composite type (i.e. an array of composite types) + // should not add the indexed column to the value. + if index.Type == IndexDescriptor_INVERTED && id == index.ColumnIDs[0] { + continue + } cols = append(cols, valueEncodedColumn{id: id, isComposite: true}) } sort.Sort(byID(cols)) diff --git a/pkg/sql/sqlbase/structured.go b/pkg/sql/sqlbase/structured.go index e792bd65e7bf..481e5ede76f0 100644 --- a/pkg/sql/sqlbase/structured.go +++ b/pkg/sql/sqlbase/structured.go @@ -35,6 +35,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/errors" + "github.com/lib/pq/oid" ) // ID, ColumnID, FamilyID, and IndexID are all uint32, but are each given a @@ -1281,12 +1282,14 @@ func (desc *MutableTableDescriptor) ensurePrimaryKey() error { // key, so that different strings that collate equal cannot both be used as // keys. The value part is the usual UTF-8 encoding of the string, stored so // that it can be recovered later for inspection/display. -func HasCompositeKeyEncoding(semanticType types.Family) bool { - switch semanticType { +func HasCompositeKeyEncoding(semanticType *types.T) bool { + switch semanticType.Family() { case types.CollatedStringFamily, types.FloatFamily, types.DecimalFamily: return true + case types.ArrayFamily: + return HasCompositeKeyEncoding(semanticType.ArrayContents()) } return false } @@ -1294,17 +1297,24 @@ func HasCompositeKeyEncoding(semanticType types.Family) bool { // DatumTypeHasCompositeKeyEncoding is a version of HasCompositeKeyEncoding // which works on datum types. func DatumTypeHasCompositeKeyEncoding(typ *types.T) bool { - return HasCompositeKeyEncoding(typ.Family()) + return HasCompositeKeyEncoding(typ) } // MustBeValueEncoded returns true if columns of the given kind can only be value // encoded. -func MustBeValueEncoded(semanticType types.Family) bool { - return semanticType == types.ArrayFamily || - semanticType == types.JsonFamily || - semanticType == types.TupleFamily || - semanticType == types.GeometryFamily || - semanticType == types.GeographyFamily +func MustBeValueEncoded(semanticType *types.T) bool { + switch semanticType.Family() { + case types.ArrayFamily: + switch semanticType.Oid() { + case oid.T_int2vector, oid.T_oidvector: + return true + default: + return MustBeValueEncoded(semanticType.ArrayContents()) + } + case types.JsonFamily, types.TupleFamily, types.GeographyFamily, types.GeometryFamily: + return true + } + return false } // HasOldStoredColumns returns whether the index has stored columns in the old @@ -1346,7 +1356,7 @@ func (desc *MutableTableDescriptor) allocateIndexIDs(columnNames map[string]Colu isCompositeColumn := make(map[ColumnID]struct{}) for i := range desc.Columns { col := &desc.Columns[i] - if HasCompositeKeyEncoding(col.Type.Family()) { + if HasCompositeKeyEncoding(&col.Type) { isCompositeColumn[col.ID] = struct{}{} } } @@ -2273,7 +2283,7 @@ func fitColumnToFamily(desc *MutableTableDescriptor, col ColumnDescriptor) (int, // ColumnTypeIsIndexable returns whether the type t is valid as an indexed column. func ColumnTypeIsIndexable(t *types.T) bool { - return !MustBeValueEncoded(t.Family()) + return !MustBeValueEncoded(t) } // ColumnTypeIsInvertedIndexable returns whether the type t is valid to be indexed diff --git a/pkg/sql/sqlbase/testutils.go b/pkg/sql/sqlbase/testutils.go index 5dbdc6ec0dea..8ec3c7eebdcc 100644 --- a/pkg/sql/sqlbase/testutils.go +++ b/pkg/sql/sqlbase/testutils.go @@ -265,17 +265,7 @@ func RandDatumWithNullChance(rng *rand.Rand, typ *types.T, nullChance int) tree. case types.UnknownFamily: return tree.DNull case types.ArrayFamily: - contents := typ.ArrayContents() - if contents.Family() == types.AnyFamily { - contents = RandArrayContentsType(rng) - } - arr := tree.NewDArray(contents) - for i := 0; i < rng.Intn(10); i++ { - if err := arr.Append(RandDatumWithNullChance(rng, contents, 0)); err != nil { - panic(err) - } - } - return arr + return RandArray(rng, typ, 0) case types.AnyFamily: return RandDatumWithNullChance(rng, RandType(rng), nullChance) default: @@ -283,6 +273,22 @@ func RandDatumWithNullChance(rng *rand.Rand, typ *types.T, nullChance int) tree. } } +// RandArray generates a random DArray where the contents have nullChance +// of being null. +func RandArray(rng *rand.Rand, typ *types.T, nullChance int) tree.Datum { + contents := typ.ArrayContents() + if contents.Family() == types.AnyFamily { + contents = RandArrayContentsType(rng) + } + arr := tree.NewDArray(contents) + for i := 0; i < rng.Intn(10); i++ { + if err := arr.Append(RandDatumWithNullChance(rng, contents, nullChance)); err != nil { + panic(err) + } + } + return arr +} + const simpleRange = 10 // RandDatumSimple generates a random Datum of the given type. The generated @@ -767,6 +773,17 @@ func RandColumnType(rng *rand.Rand) *types.T { } } +// RandArrayType generates a random array type. +func RandArrayType(rng *rand.Rand) *types.T { + for { + typ := RandColumnType(rng) + resTyp := types.MakeArray(typ) + if err := ValidateColumnDefType(resTyp); err == nil { + return resTyp + } + } +} + // RandColumnTypes returns a slice of numCols random types. These types must be // legal table column types. func RandColumnTypes(rng *rand.Rand, numCols int) []types.T { @@ -780,7 +797,7 @@ func RandColumnTypes(rng *rand.Rand, numCols int) []types.T { // RandSortingType returns a column type which can be key-encoded. func RandSortingType(rng *rand.Rand) *types.T { typ := RandType(rng) - for MustBeValueEncoded(typ.Family()) { + for MustBeValueEncoded(typ) { typ = RandType(rng) } return typ @@ -1325,7 +1342,7 @@ func randIndexTableDefFromCols( indexElemList := make(tree.IndexElemList, 0, len(cols)) for i := range cols { - semType := cols[i].Type.Family() + semType := cols[i].Type if MustBeValueEncoded(semType) { continue } diff --git a/pkg/util/encoding/encoding.go b/pkg/util/encoding/encoding.go index 232947d87792..1738d1fcae88 100644 --- a/pkg/util/encoding/encoding.go +++ b/pkg/util/encoding/encoding.go @@ -83,6 +83,20 @@ const ( timeTZMarker = bitArrayDescMarker + 1 geoMarker = timeTZMarker + 1 + // Markers and terminators for key encoding Datum arrays in sorted order. + arrayKeyMarker = geoMarker + 1 + arrayKeyDescendingMarker = arrayKeyMarker + 1 + arrayKeyTerminator byte = 0x00 + arrayKeyDescendingTerminator = 0xFF + // We use different null encodings for nulls within key arrays. + // Doing this allows for the terminator to be less/greater than + // the null value within arrays. These byte values overlap with + // encodedNotNull, encodedNotNullDesc, and interleavedSentinel, + // but they can only exist within an encoded array key. Because + // of the context, they cannot be ambiguous with these other bytes. + ascendingNullWithinArrayKey byte = 0x01 + descendingNullWithinArrayKey = 0xFE + // IntMin is chosen such that the range of int tags does not overlap the // ascii character set that is frequently used in testing. IntMin = 0x80 // 128 @@ -1277,6 +1291,8 @@ const ( BitArrayDesc Type = 18 // BitArray encoded descendingly TimeTZ Type = 19 Geo Type = 20 + ArrayKeyAsc Type = 21 // Array key encoding + ArrayKeyDesc Type = 22 // Array key encoded descendingly ) // typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one @@ -1309,6 +1325,10 @@ func slowPeekType(b []byte) Type { return Null case m == encodedNotNull, m == encodedNotNullDesc: return NotNull + case m == arrayKeyMarker: + return ArrayKeyAsc + case m == arrayKeyDescendingMarker: + return ArrayKeyDesc case m == bytesMarker: return Bytes case m == bytesDescMarker: @@ -1369,6 +1389,30 @@ func getMultiNonsortingVarintLen(b []byte, num int) (int, error) { return p, nil } +// getArrayLength returns the length of a key encoded array. The input +// must have had the array type marker stripped from the front. +func getArrayLength(buf []byte, dir Direction) (int, error) { + result := 0 + for { + if len(buf) == 0 { + return 0, errors.AssertionFailedf("invalid array encoding (unterminated)") + } + if IsArrayKeyDone(buf, dir) { + // Increment to include the terminator byte. + result++ + break + } + next, err := PeekLength(buf) + if err != nil { + return 0, err + } + // Shift buf over by the encoded data amount. + buf = buf[next:] + result += next + } + return result, nil +} + // PeekLength returns the length of the encoded value at the start of b. Note: // if this function succeeds, it's not a guarantee that decoding the value will // succeed. PeekLength is meant to be used on key encoded data only. @@ -1383,6 +1427,9 @@ func PeekLength(b []byte) (int, error) { // interleavedSentinel also falls into this path. Since it // contains the same byte value as encodedNotNullDesc, it // cannot be included explicitly in the case statement. + // ascendingNullWithinArrayKey and descendingNullWithinArrayKey also + // contain the same byte values as encodedNotNull and encodedNotNullDesc + // respectively. return 1, nil case bitArrayMarker, bitArrayDescMarker: terminator := byte(bitArrayDataTerminator) @@ -1398,6 +1445,13 @@ func PeekLength(b []byte) (int, error) { return 1 + n + m + 1, err } return 1 + n + m + 1, nil + case arrayKeyMarker, arrayKeyDescendingMarker: + dir := Ascending + if m == arrayKeyDescendingMarker { + dir = Descending + } + length, err := getArrayLength(b[1:], dir) + return 1 + length, err case bytesMarker: return getBytesLength(b, ascendingEscapes) case jsonInvertedIndex: @@ -1510,7 +1564,7 @@ func prettyPrintValueImpl(valDirs []Direction, b []byte, sep string) (string, bo // even if we don't have directions for the child index's columns. func prettyPrintFirstValue(dir Direction, b []byte) ([]byte, string, error) { var err error - switch PeekType(b) { + switch typ := PeekType(b); typ { case Null: b, _ = DecodeIfNull(b) return b, "NULL", nil @@ -1520,6 +1574,46 @@ func prettyPrintFirstValue(dir Direction, b []byte) ([]byte, string, error) { return b[1:], "False", nil case Array: return b[1:], "Arr", nil + case ArrayKeyAsc, ArrayKeyDesc: + encDir := Ascending + if typ == ArrayKeyDesc { + encDir = Descending + } + var build strings.Builder + buf, err := ValidateAndConsumeArrayKeyMarker(b, encDir) + if err != nil { + return nil, "", err + } + build.WriteString("ARRAY[") + first := true + // Use the array key decoding logic, but instead of calling out + // to DecodeTableKey, just make a recursive call. + for { + if len(buf) == 0 { + return nil, "", errors.AssertionFailedf("invalid array (unterminated)") + } + if IsArrayKeyDone(buf, encDir) { + buf = buf[1:] + break + } + var next string + if IsNextByteArrayEncodedNull(buf, dir) { + next = "NULL" + buf = buf[1:] + } else { + buf, next, err = prettyPrintFirstValue(dir, buf) + if err != nil { + return nil, "", err + } + } + if !first { + build.WriteString(",") + } + build.WriteString(next) + first = false + } + build.WriteString("]") + return buf, build.String(), nil case NotNull: // The tag can be either encodedNotNull or encodedNotNullDesc. The // latter can be an interleaved sentinel. @@ -2603,3 +2697,76 @@ func getJSONInvertedIndexKeyLength(buf []byte) (int, error) { return len + valLen, nil } } + +// EncodeArrayKeyMarker adds the array key encoding marker to buf and +// returns the new buffer. +func EncodeArrayKeyMarker(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, arrayKeyMarker) + case Descending: + return append(buf, arrayKeyDescendingMarker) + default: + panic("invalid direction") + } +} + +// EncodeArrayKeyTerminator adds the array key terminator to buf and +// returns the new buffer. +func EncodeArrayKeyTerminator(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, arrayKeyTerminator) + case Descending: + return append(buf, arrayKeyDescendingTerminator) + default: + panic("invalid direction") + } +} + +// EncodeNullWithinArrayKey encodes NULL within a key encoded array. +func EncodeNullWithinArrayKey(buf []byte, dir Direction) []byte { + switch dir { + case Ascending: + return append(buf, ascendingNullWithinArrayKey) + case Descending: + return append(buf, descendingNullWithinArrayKey) + default: + panic("invalid direction") + } +} + +// IsNextByteArrayEncodedNull returns if the first byte in the input +// is the NULL encoded byte within an array key. +func IsNextByteArrayEncodedNull(buf []byte, dir Direction) bool { + expected := ascendingNullWithinArrayKey + if dir == Descending { + expected = descendingNullWithinArrayKey + } + return buf[0] == expected +} + +// ValidateAndConsumeArrayKeyMarker checks that the marker at the front +// of buf is valid for an array of the given direction, and consumes it +// if so. It returns an error if the tag is invalid. +func ValidateAndConsumeArrayKeyMarker(buf []byte, dir Direction) ([]byte, error) { + typ := PeekType(buf) + expected := ArrayKeyAsc + if dir == Descending { + expected = ArrayKeyDesc + } + if typ != expected { + return nil, errors.Newf("invalid type found %s", typ) + } + return buf[1:], nil +} + +// IsArrayKeyDone returns if the first byte in the input is the array +// terminator for the input direction. +func IsArrayKeyDone(buf []byte, dir Direction) bool { + expected := arrayKeyTerminator + if dir == Descending { + expected = arrayKeyDescendingTerminator + } + return buf[0] == expected +} diff --git a/pkg/util/encoding/type_string.go b/pkg/util/encoding/type_string.go index 096817a610b9..51ff7aec93a1 100644 --- a/pkg/util/encoding/type_string.go +++ b/pkg/util/encoding/type_string.go @@ -29,11 +29,13 @@ func _() { _ = x[BitArrayDesc-18] _ = x[TimeTZ-19] _ = x[Geo-20] + _ = x[ArrayKeyAsc-21] + _ = x[ArrayKeyDesc-22] } -const _Type_name = "UnknownNullNotNullIntFloatDecimalBytesBytesDescTimeDurationTrueFalseUUIDArrayIPAddrJSONTupleBitArrayBitArrayDescTimeTZGeo" +const _Type_name = "UnknownNullNotNullIntFloatDecimalBytesBytesDescTimeDurationTrueFalseUUIDArrayIPAddrJSONTupleBitArrayBitArrayDescTimeTZGeoArrayKeyAscArrayKeyDesc" -var _Type_index = [...]uint8{0, 7, 11, 18, 21, 26, 33, 38, 47, 51, 59, 63, 68, 72, 77, 83, 87, 92, 100, 112, 118, 121} +var _Type_index = [...]uint8{0, 7, 11, 18, 21, 26, 33, 38, 47, 51, 59, 63, 68, 72, 77, 83, 87, 92, 100, 112, 118, 121, 132, 144} func (i Type) String() string { if i < 0 || i >= Type(len(_Type_index)-1) {