Skip to content

Commit

Permalink
sql: enable indexing and ordering on arrays of orderable and indexabl…
Browse files Browse the repository at this point in the history
…e types

Fixes #17154.
Fixes #35707.

This PR enables arrays to be ordered and indexed by
introducing an ordered key encoding for arrays.
Once this exists, the rest of the SQL infrastructure
is ready to handle indexing and ordering on arrays.

To encode an array of elements `ARRAY[a, b]`,
we create the following encoding.

Let `AM` = a marker byte for arrays, let `EM` be
an "array element" marker, and let `AT` be a terminator byte.

`enc(ARRAY[a, b]) = [AM, EM, enc(a), EM, enc(b), AT]`

The key is that the terminator is less than the element marker.
This allows for the "prefix matching" style comparison that
arrays support.

Release note (sql change): This PR adds support for indexing
and ordering of arrays of indexable and orderable inner types.
  • Loading branch information
rohany committed Apr 26, 2020
1 parent ef2b897 commit 7811ade
Show file tree
Hide file tree
Showing 11 changed files with 569 additions and 142 deletions.
4 changes: 2 additions & 2 deletions pkg/sql/flowinfra/stream_encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ func (se *StreamEncoder) AddRow(row sqlbase.EncDatumRow) error {
if !ok {
enc = PreferredEncoding
}
sType := se.infos[i].Type.Family()
sType := &se.infos[i].Type
if enc != sqlbase.DatumEncoding_VALUE &&
(sqlbase.HasCompositeKeyEncoding(sType) || sqlbase.MustBeValueEncoded(sType)) {
(sqlbase.HasCompositeKeyEncoding(sType.Family()) || sqlbase.MustBeValueEncoded(sType)) {
// Force VALUE encoding for composite types (key encodings may lose data).
enc = sqlbase.DatumEncoding_VALUE
}
Expand Down
19 changes: 0 additions & 19 deletions pkg/sql/logictest/testdata/logic_test/alter_table
Original file line number Diff line number Diff line change
Expand Up @@ -801,25 +801,6 @@ decomputed_column CREATE TABLE decomputed_column (
statement ok
CREATE TABLE b26483()

statement error unimplemented: column c is of type int\[\] and thus is not indexable
ALTER TABLE b26483 ADD COLUMN c INT[] UNIQUE

# As above, but performed in a transaction
statement ok
BEGIN

statement ok
CREATE TABLE b26483_tx()

statement ok
ALTER TABLE b26483_tx ADD COLUMN c INT[]

statement error unimplemented: column c is of type int\[\] and thus is not indexable
CREATE INDEX on b26483_tx (c)

statement ok
ROLLBACK

# Verify that auditing can be enabled by root, and cannot be disabled by non-root.

statement ok
Expand Down
161 changes: 126 additions & 35 deletions pkg/sql/logictest/testdata/logic_test/array
Original file line number Diff line number Diff line change
Expand Up @@ -429,17 +429,6 @@ SELECT ARRAY[ARRAY[1,2,3]]
query error VECTOR column types are unsupported
CREATE TABLE badtable (b INT2VECTOR)

# Using an array as a primary key should be disallowed. #17154

statement error column b is of type int\[\] and thus is not indexable
CREATE TABLE badtable (b INT[] PRIMARY KEY)

# Indexing an array column should be disallowed. #17154

statement error column b is of type int\[\] and thus is not indexable
CREATE TABLE a (b INT[] UNIQUE)


# Regression test for #18745

statement ok
Expand All @@ -449,18 +438,6 @@ query T
SELECT ARRAY[ROW()] FROM ident
----

statement error column b is of type int\[\] and thus is not indexable
CREATE TABLE a (
b INT[],
CONSTRAINT c UNIQUE (b)
)

statement error column b is of type int\[\] and thus is not indexable
CREATE TABLE a (
b INT[],
INDEX c (b)
)

statement ok
CREATE TABLE a (b INT ARRAY)

Expand All @@ -475,18 +452,6 @@ a CREATE TABLE a (
statement ok
DROP TABLE a

statement ok
CREATE TABLE a (b INT[], c INT[])

statement error column b is of type int\[\] and thus is not indexable
CREATE INDEX idx ON a (b)

statement error the following columns are not indexable due to their type: b \(type int\[\]\), c \(type int\[\]\)
CREATE INDEX idx ON a (b, c)

statement ok
DROP TABLE a

# Int array columns.

statement ok
Expand Down Expand Up @@ -1343,3 +1308,129 @@ SELECT x, y FROM t WHERE x < y
query TT
SELECT x, y FROM t WHERE x > y
----

query TT
SELECT x, y FROM t ORDER BY (x, y)
----
{1} {1,2}
{1,1,1,1} {2}

subtest array_indexes

# Create indexes on arrays.
statement ok
DROP TABLE IF EXISTS t;
CREATE TABLE t (x INT[] PRIMARY KEY)

statement ok
INSERT INTO t VALUES
(ARRAY[1]),
(ARRAY[5]),
(ARRAY[4]),
(ARRAY[1,4,5]),
(ARRAY[1,4,6]),
(ARRAY[1,NULL,10]),
(ARRAY[NULL]),
(ARRAY[NULL, NULL, NULL])

# Test that the unique index rejects bad inserts.
statement error pq: duplicate key value \(x\)=\(ARRAY\[1,NULL,10\]\) violates unique constraint "primary"
INSERT INTO t VALUES (ARRAY[1, NULL, 10])

# Use the index for point lookups.
query T
SELECT x FROM t WHERE x = ARRAY[1,4,6]
----
{1,4,6}

# Use the index for bounded scans.
# Note that nulls sort first in CockroachDB, so this ordering is different
# than what postgres will output. In postgres, NULLs in arrays are treated
# as larger than other elements, while we treat them as less.
# TODO (rohany): We have always done this for array comparisons, so I think
# it would be a breaking change + opposite with our other null behavior to
# change it suddenly...
query T
SELECT x FROM t WHERE x < ARRAY[1, 4, 3] ORDER BY x
----
{NULL}
{NULL,NULL,NULL}
{1}
{1,NULL,10}

query T
SELECT x FROM t WHERE x > ARRAY [1, NULL] ORDER BY x DESC
----
{5}
{4}
{1,4,6}
{1,4,5}
{1,NULL,10}

query T
SELECT x FROM t WHERE x > ARRAY[1, 3] AND x < ARRAY[1, 4, 10] ORDER BY x
----
{1,4,5}
{1,4,6}

# Test some operations on a descending index.
statement ok
CREATE INDEX i ON t(x DESC)

query T
SELECT x FROM t@i WHERE x <= ARRAY[1] ORDER BY x DESC
----
{1}
{NULL,NULL,NULL}
{NULL}

query T
SELECT x FROM t@i WHERE x > ARRAY[1] ORDER BY x
----
{1,NULL,10}
{1,4,5}
{1,4,6}
{4}
{5}

# Ensure that we can order by the arrays without any indexes.
statement ok
DROP TABLE t;
CREATE TABLE t (x INT[]);
INSERT INTO t VALUES
(ARRAY[1]),
(ARRAY[5]),
(ARRAY[4]),
(ARRAY[1,4,5]),
(ARRAY[1,4,6]),
(ARRAY[1,NULL,10]),
(ARRAY[NULL]),
(ARRAY[NULL, NULL, NULL])

query T
SELECT x FROM t ORDER BY x
----
{NULL}
{NULL,NULL,NULL}
{1}
{1,NULL,10}
{1,4,5}
{1,4,6}
{4}
{5}

query T
SELECT x FROM t ORDER BY x DESC
----
{5}
{4}
{1,4,6}
{1,4,5}
{1,NULL,10}
{1}
{NULL,NULL,NULL}
{NULL}

# Create an indexes on a bad type.
statement error pq: unimplemented: column x is of type geography\[\] and thus is not indexable
CREATE TABLE tbad (x GEOGRAPHY[] PRIMARY KEY)
12 changes: 0 additions & 12 deletions pkg/sql/logictest/testdata/logic_test/order_by
Original file line number Diff line number Diff line change
Expand Up @@ -187,18 +187,6 @@ SELECT * FROM t ORDER BY foo
query error no data source matches prefix: a
SELECT a FROM t ORDER BY a.b

query error can't order by column type int\[\]
SELECT generate_series FROM generate_series(1, 100) ORDER BY ARRAY[generate_series]

query error can't order by column type int\[\]
SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY ARRAY[generate_series]

query error can't order by column type int\[\]
SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY 1

query error can't order by column type int\[\]
SELECT ARRAY[generate_series] AS a FROM generate_series(1, 100) ORDER BY a

query IT
SELECT generate_series, ARRAY[generate_series] FROM generate_series(1, 1) ORDER BY 1
----
Expand Down
6 changes: 2 additions & 4 deletions pkg/sql/opt/optbuilder/orderby.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,8 @@ func (b *Builder) analyzeExtraArgument(

func ensureColumnOrderable(e tree.TypedExpr) {
typ := e.ResolvedType()
if typ.Family() == types.ArrayFamily {
panic(unimplementedWithIssueDetailf(35707, "", "can't order by column type %s", typ))
}
if typ.Family() == types.JsonFamily {
if typ.Family() == types.JsonFamily ||
(typ.Family() == types.ArrayFamily && typ.ArrayContents().Family() == types.JsonFamily) {
panic(unimplementedWithIssueDetailf(35706, "", "can't order by column type jsonb"))
}
}
66 changes: 61 additions & 5 deletions pkg/sql/opt/optbuilder/testdata/orderby
Original file line number Diff line number Diff line change
Expand Up @@ -391,22 +391,70 @@ error (42P01): no data source matches prefix: a
build
SELECT generate_series FROM generate_series(1, 100) ORDER BY ARRAY[generate_series]
----
error (0A000): unimplemented: can't order by column type int[]
sort
├── columns: generate_series:1 [hidden: column2:2]
├── ordering: +2
└── project
├── columns: column2:2 generate_series:1
├── project-set
│ ├── columns: generate_series:1
│ ├── values
│ │ └── ()
│ └── zip
│ └── generate_series(1, 100)
└── projections
└── ARRAY[generate_series:1] [as=column2:2]

build
SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY ARRAY[generate_series]
----
error (0A000): unimplemented: can't order by column type int[]
sort
├── columns: array:2
├── ordering: +2
└── project
├── columns: array:2
├── project-set
│ ├── columns: generate_series:1
│ ├── values
│ │ └── ()
│ └── zip
│ └── generate_series(1, 100)
└── projections
└── ARRAY[generate_series:1] [as=array:2]

build
SELECT ARRAY[generate_series] FROM generate_series(1, 100) ORDER BY 1
----
error (0A000): unimplemented: can't order by column type int[]
sort
├── columns: array:2
├── ordering: +2
└── project
├── columns: array:2
├── project-set
│ ├── columns: generate_series:1
│ ├── values
│ │ └── ()
│ └── zip
│ └── generate_series(1, 100)
└── projections
└── ARRAY[generate_series:1] [as=array:2]

build
SELECT ARRAY[generate_series] AS a FROM generate_series(1, 100) ORDER BY a
----
error (0A000): unimplemented: can't order by column type int[]
sort
├── columns: a:2
├── ordering: +2
└── project
├── columns: a:2
├── project-set
│ ├── columns: generate_series:1
│ ├── values
│ │ └── ()
│ └── zip
│ └── generate_series(1, 100)
└── projections
└── ARRAY[generate_series:1] [as=a:2]

build
SELECT generate_series, ARRAY[generate_series] FROM generate_series(1, 1) ORDER BY 1
Expand Down Expand Up @@ -988,4 +1036,12 @@ project
build
SELECT ARRAY[a] FROM abcd ORDER BY 1
----
error (0A000): unimplemented: can't order by column type int[]
sort
├── columns: array:5!null
├── ordering: +5
└── project
├── columns: array:5!null
├── scan abcd
│ └── columns: a:1!null b:2 c:3 d:4
└── projections
└── ARRAY[a:1] [as=array:5]
Loading

0 comments on commit 7811ade

Please sign in to comment.