Skip to content

Commit

Permalink
sql: enable indexing and ordering on arrays of orderable and indexabl…
Browse files Browse the repository at this point in the history
…e types

Fixes #17154.
Fixes #35707.

This PR enables arrays to be ordered and indexed by
introducing an ordered key encoding for arrays.
Once this exists, the rest of the SQL infrastructure
is ready to handle indexing and ordering on arrays.

To encode an array of elements `ARRAY[a, b]`,
we create the following encoding.

Let `AM` = a marker byte for arrays and let `AT` be a terminator byte.

`enc(ARRAY[a, b]) = [AM, enc(a), enc(b), AT]`

The key is that the terminator is less than the element marker.
This allows for the "prefix matching" style comparison that
arrays support.

Release note (sql change): This PR adds support for indexing
and ordering of arrays of indexable and orderable inner types.
  • Loading branch information
rohany committed Apr 28, 2020
1 parent ef2b897 commit 2ee1739
Show file tree
Hide file tree
Showing 17 changed files with 726 additions and 153 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/flowinfra/stream_encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func (se *StreamEncoder) AddRow(row sqlbase.EncDatumRow) error {
if !ok {
enc = PreferredEncoding
}
sType := se.infos[i].Type.Family()
sType := &se.infos[i].Type
if enc != sqlbase.DatumEncoding_VALUE &&
(sqlbase.HasCompositeKeyEncoding(sType) || sqlbase.MustBeValueEncoded(sType)) {
// Force VALUE encoding for composite types (key encodings may lose data).
Expand Down
19 changes: 0 additions & 19 deletions pkg/sql/logictest/testdata/logic_test/alter_table
Original file line number Diff line number Diff line change
Expand Up @@ -801,25 +801,6 @@ decomputed_column CREATE TABLE decomputed_column (
statement ok
CREATE TABLE b26483()

statement error unimplemented: column c is of type int\[\] and thus is not indexable
ALTER TABLE b26483 ADD COLUMN c INT[] UNIQUE

# As above, but performed in a transaction
statement ok
BEGIN

statement ok
CREATE TABLE b26483_tx()

statement ok
ALTER TABLE b26483_tx ADD COLUMN c INT[]

statement error unimplemented: column c is of type int\[\] and thus is not indexable
CREATE INDEX on b26483_tx (c)

statement ok
ROLLBACK

# Verify that auditing can be enabled by root, and cannot be disabled by non-root.

statement ok
Expand Down
273 changes: 238 additions & 35 deletions pkg/sql/logictest/testdata/logic_test/array
Original file line number Diff line number Diff line change
Expand Up @@ -429,17 +429,6 @@ SELECT ARRAY[ARRAY[1,2,3]]
query error VECTOR column types are unsupported
CREATE TABLE badtable (b INT2VECTOR)

# Using an array as a primary key should be disallowed. #17154

statement error column b is of type int\[\] and thus is not indexable
CREATE TABLE badtable (b INT[] PRIMARY KEY)

# Indexing an array column should be disallowed. #17154

statement error column b is of type int\[\] and thus is not indexable
CREATE TABLE a (b INT[] UNIQUE)


# Regression test for #18745

statement ok
Expand All @@ -449,18 +438,6 @@ query T
SELECT ARRAY[ROW()] FROM ident
----

statement error column b is of type int\[\] and thus is not indexable
CREATE TABLE a (
b INT[],
CONSTRAINT c UNIQUE (b)
)

statement error column b is of type int\[\] and thus is not indexable
CREATE TABLE a (
b INT[],
INDEX c (b)
)

statement ok
CREATE TABLE a (b INT ARRAY)

Expand All @@ -475,18 +452,6 @@ a CREATE TABLE a (
statement ok
DROP TABLE a

statement ok
CREATE TABLE a (b INT[], c INT[])

statement error column b is of type int\[\] and thus is not indexable
CREATE INDEX idx ON a (b)

statement error the following columns are not indexable due to their type: b \(type int\[\]\), c \(type int\[\]\)
CREATE INDEX idx ON a (b, c)

statement ok
DROP TABLE a

# Int array columns.

statement ok
Expand Down Expand Up @@ -1343,3 +1308,241 @@ SELECT x, y FROM t WHERE x < y
query TT
SELECT x, y FROM t WHERE x > y
----

query TT
SELECT x, y FROM t ORDER BY (x, y)
----
{1} {1,2}
{1,1,1,1} {2}

subtest array_indexes

# Create indexes on arrays.
statement ok
DROP TABLE IF EXISTS t;
CREATE TABLE t (x INT[] PRIMARY KEY)

statement ok
INSERT INTO t VALUES
(ARRAY[1]),
(ARRAY[5]),
(ARRAY[4]),
(ARRAY[1,4,5]),
(ARRAY[1,4,6]),
(ARRAY[1,NULL,10]),
(ARRAY[NULL]),
(ARRAY[NULL, NULL, NULL])

# Test that the unique index rejects bad inserts.
statement error pq: duplicate key value \(x\)=\(ARRAY\[1,NULL,10\]\) violates unique constraint "primary"
INSERT INTO t VALUES (ARRAY[1, NULL, 10])

query T
SELECT x FROM t ORDER BY x
----
{NULL}
{NULL,NULL,NULL}
{1}
{1,NULL,10}
{1,4,5}
{1,4,6}
{4}
{5}

# Use the index for point lookups.
query T
SELECT x FROM t WHERE x = ARRAY[1,4,6]
----
{1,4,6}

# Use the index for bounded scans.
# Note that nulls sort first in CockroachDB, so this ordering is different
# than what postgres will output. In postgres, NULLs in arrays are treated
# as larger than other elements, while we treat them as less.
# TODO (rohany): We have always done this for array comparisons, so I think
# it would be a breaking change + opposite with our other null behavior to
# change it suddenly...
query T
SELECT x FROM t WHERE x < ARRAY[1, 4, 3] ORDER BY x
----
{NULL}
{NULL,NULL,NULL}
{1}
{1,NULL,10}

query T
SELECT x FROM t WHERE x > ARRAY [1, NULL] ORDER BY x DESC
----
{5}
{4}
{1,4,6}
{1,4,5}
{1,NULL,10}

query T
SELECT x FROM t WHERE x > ARRAY[1, 3] AND x < ARRAY[1, 4, 10] ORDER BY x
----
{1,4,5}
{1,4,6}

query T
SELECT x FROM t WHERE x > ARRAY[NULL, NULL]:::INT[] ORDER BY x
----
{NULL,NULL,NULL}
{1}
{1,NULL,10}
{1,4,5}
{1,4,6}
{4}
{5}

# Test some operations on a descending index.
statement ok
CREATE INDEX i ON t(x DESC)

query T
SELECT x FROM t@i WHERE x <= ARRAY[1] ORDER BY x DESC
----
{1}
{NULL,NULL,NULL}
{NULL}

query T
SELECT x FROM t@i WHERE x > ARRAY[1] ORDER BY x
----
{1,NULL,10}
{1,4,5}
{1,4,6}
{4}
{5}

# Ensure that we can order by the arrays without any indexes.
statement ok
DROP TABLE t;
CREATE TABLE t (x INT[]);
INSERT INTO t VALUES
(ARRAY[1]),
(ARRAY[5]),
(ARRAY[4]),
(ARRAY[1,4,5]),
(ARRAY[1,4,6]),
(ARRAY[1,NULL,10]),
(ARRAY[NULL]),
(ARRAY[NULL, NULL, NULL])

query T
SELECT x FROM t ORDER BY x
----
{NULL}
{NULL,NULL,NULL}
{1}
{1,NULL,10}
{1,4,5}
{1,4,6}
{4}
{5}

query T
SELECT x FROM t ORDER BY x DESC
----
{5}
{4}
{1,4,6}
{1,4,5}
{1,NULL,10}
{1}
{NULL,NULL,NULL}
{NULL}

statement ok
CREATE INDEX i ON t (x);
INSERT INTO t VALUES (NULL), (NULL)

# Test that NULL's are differentiated from {NULL}.
query T
SELECT x FROM t@i WHERE x IS NOT NULL ORDER BY x
----
{NULL}
{NULL,NULL,NULL}
{1}
{1,NULL,10}
{1,4,5}
{1,4,6}
{4}
{5}

# Create an indexes on a bad type.
statement error pq: unimplemented: column x is of type geography\[\] and thus is not indexable
CREATE TABLE tbad (x GEOGRAPHY[] PRIMARY KEY)

# Test arrays of composite types.
statement ok
CREATE TABLE tarray(x DECIMAL[] PRIMARY KEY);
INSERT INTO tarray VALUES (ARRAY[1.00]), (ARRAY[1.501])

# Ensure these are round tripped correctly.
query T
SELECT x FROM tarray ORDER BY x
----
{1.00}
{1.501}

# Test indexes on multiple columns with arrays.
statement ok
DROP TABLE t;
CREATE TABLE t (x INT, y INT[], z INT, INDEX i (x, y, z));
INSERT INTO t VALUES
(1, ARRAY[1, 2, 3], 3),
(NULL, ARRAY[1, NULL, 3], NULL),
(2, ARRAY[NULL, NULL, NULL], NULL),
(NULL, ARRAY[NULL, NULL], 3),
(2, ARRAY[4, 5], 7)

query ITI
SELECT x, y, z FROM t WHERE x IS NOT NULL AND y > ARRAY[1] ORDER BY z
----
1 {1,2,3} 3
2 {4,5} 7

query ITI
SELECT x, y, z FROM t WHERE x = 2 AND y < ARRAY[10] ORDER BY y
----
2 {NULL,NULL,NULL} NULL
2 {4,5} 7

# Test that interleaving an array index doesn't lead to problems.
statement ok
DROP TABLE IF EXISTS parent, child CASCADE;
CREATE TABLE parent (x INT, y INT[], PRIMARY KEY (x, y DESC));
CREATE TABLE child (x INT, y INT[], z INT[], PRIMARY KEY (x, y DESC, z)) INTERLEAVE IN PARENT parent (x, y);
INSERT INTO parent VALUES
(1, ARRAY[1, 2, 3]),
(1, ARRAY[1, NULL]),
(2, ARRAY[NULL]),
(3, ARRAY[NULL, 1, NULL]);
INSERT INTO child VALUES
(1, ARRAY[1, 2, 3], ARRAY[4]),
(1, ARRAY[1, 2, 3, 4], ARRAY[5]),
(1, ARRAY[1, NULL], ARRAY[5]),
(1, ARRAY[1, NULL, NULL], ARRAY[10]),
(2, ARRAY[NULL], ARRAY[1]),
(3, ARRAY[NULL, 1, NULL], ARRAY[3]);

# Ensure scans on the parent and child aren't affected.
query IT
SELECT x, y FROM parent ORDER BY x, y DESC
----
1 {1,2,3}
1 {1,NULL}
2 {NULL}
3 {NULL,1,NULL}

query ITT
SELECT x, y, z FROM child ORDER BY x, y DESC, z
----
1 {1,2,3,4} {5}
1 {1,2,3} {4}
1 {1,NULL,NULL} {10}
1 {1,NULL} {5}
2 {NULL} {1}
3 {NULL,1,NULL} {3}
12 changes: 0 additions & 12 deletions pkg/sql/logictest/testdata/logic_test/order_by
Original file line number Diff line number Diff line change
Expand Up @@ -187,18 +187,6 @@ SELECT * FROM t ORDER BY foo
query error no data source matches prefix: a
SELECT a FROM t ORDER BY a.b

query error can't order by column type int\[\]
SELECT generate_series FROM generate_series(1, 100) ORDER BY ARRAY[generate_series]

query error can't order by column type int\[\]
SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY ARRAY[generate_series]

query error can't order by column type int\[\]
SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY 1

query error can't order by column type int\[\]
SELECT ARRAY[generate_series] AS a FROM generate_series(1, 100) ORDER BY a

query IT
SELECT generate_series, ARRAY[generate_series] FROM generate_series(1, 1) ORDER BY 1
----
Expand Down
14 changes: 14 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -578,3 +578,17 @@ lookup-join · ·
└── scan · · () ·
· table e@e_b_idx · ·
· fixedvals 1 column · ·

# Ensure that an inverted index with a composite primary key still encodes
# the primary key data in the composite value.
statement ok
DROP TABLE IF EXISTS t;
CREATE TABLE t (x DECIMAL PRIMARY KEY, y int[], FAMILY (x, y));
CREATE INVERTED INDEX ON t(y)

query T kvtrace
INSERT INTO t VALUES (1.00, ARRAY[1,2])
----
CPut /Table/56/1/1/0 -> /TUPLE/1:1:Decimal/1.00/
InitPut /Table/56/2/1/1/0 -> /BYTES/0x1503348964
InitPut /Table/56/2/2/1/0 -> /BYTES/0x1503348964
6 changes: 2 additions & 4 deletions pkg/sql/opt/optbuilder/orderby.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,8 @@ func (b *Builder) analyzeExtraArgument(

func ensureColumnOrderable(e tree.TypedExpr) {
typ := e.ResolvedType()
if typ.Family() == types.ArrayFamily {
panic(unimplementedWithIssueDetailf(35707, "", "can't order by column type %s", typ))
}
if typ.Family() == types.JsonFamily {
if typ.Family() == types.JsonFamily ||
(typ.Family() == types.ArrayFamily && typ.ArrayContents().Family() == types.JsonFamily) {
panic(unimplementedWithIssueDetailf(35706, "", "can't order by column type jsonb"))
}
}
Loading

0 comments on commit 2ee1739

Please sign in to comment.