From ebad853c93b55859886c42f1a3278225dafbdd14 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Tue, 23 Oct 2018 11:20:32 +0200 Subject: [PATCH 1/2] sql/parser: re-allow FAMILY, MINVALUE, MAXVALUE, NOTHING and INDEX in table names CockroachDB introduced non-standard extensions to its SQL dialect very early in its history, before concerns of compatibility with existing PostgreSQL clients became a priority. When these features were added, new keywords were liberally marked as "reserved", so as to "make the grammar work", and without noticing / care for the fact that this change would make existing valid SQL queries/clients encounter new errors. An example of this: 1. let's make "column families" a thing 2. the syntax `create table(..., constraint xxx family(a,b,c))` is not good enough (although this would not require reserved keywords), we really want also `create table (..., family (a,b,c))` to be possible. 3. oh, the grammar won't let us because "family" is a possible column name? No matter! let's mark "FAMILY" as a reserved name for column/function names. - No concern for the fact that "family" is a perfectly valid English name for things that people want to make an attribute of in inventory / classification tables. - No concern for the fact that reserved column/function names are also reserved for table names. 4. (much later) Clients complaining about the fact they can't call their columns or tables `family` without quoting. Ditto "INDEX", "MINVALUE", "MAXVALUE", and perhaps others. Moral of the story: DO NOT MAKE NEW RESERVED KEYWORDS UNLESS YOU'RE VERY VERY VERY SURE THAT THERE IS NO LEGITIMATE USE FOR THEM IN CLIENT APPS EVER. (An example perhaps: the word "NOTHING" was also marked as reserved, but it's much more unlikely this word will ever be used for something useful.) This patch restores the use of FAMILY, INDEX, NOTHING, MINVALUE and MAXVALUE in table and function names, by introducing an awkward dance in the grammar of keyword non-terminals and database object names. They remain reserved as colum names because of the non-standard CockroachDB extensions. Release note (sql change): It is now again possible to use the keywords FAMILY, MINVALUE, MAXVALUE, INDEX or NOTHING as table names, for compatibility with PostgreSQL. --- docs/generated/sql/bnf/stmt_block.bnf | 43 ++++++++------ pkg/sql/parser/all_keywords.awk | 6 +- pkg/sql/parser/parse_test.go | 12 ++++ pkg/sql/parser/reserved_keywords.awk | 4 +- pkg/sql/parser/sql.y | 78 +++++++++++++++++++------- pkg/sql/parser/unreserved_keywords.awk | 2 +- 6 files changed, 103 insertions(+), 42 deletions(-) diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf index 1183b850b6a9..3681a03d5bc8 100644 --- a/docs/generated/sql/bnf/stmt_block.bnf +++ b/docs/generated/sql/bnf/stmt_block.bnf @@ -810,8 +810,8 @@ kv_option_list ::= complex_table_pattern ::= complex_db_object_name - | name '.' unrestricted_name '.' '*' - | name '.' '*' + | db_object_name_component '.' unrestricted_name '.' '*' + | db_object_name_component '.' '*' | '*' table_pattern ::= @@ -1095,11 +1095,11 @@ set_clause ::= | multiple_set_clause simple_db_object_name ::= - name + db_object_name_component complex_db_object_name ::= - name '.' unrestricted_name - | name '.' unrestricted_name '.' unrestricted_name + db_object_name_component '.' unrestricted_name + | db_object_name_component '.' unrestricted_name '.' unrestricted_name non_reserved_word ::= 'identifier' @@ -1113,6 +1113,11 @@ kv_option ::= | 'SCONST' '=' string_or_placeholder | 'SCONST' +db_object_name_component ::= + name + | cockroachdb_extra_type_func_name_keyword + | cockroachdb_extra_reserved_keyword + unrestricted_name ::= 'identifier' | unreserved_keyword @@ -1432,7 +1437,6 @@ multiple_set_clause ::= type_func_name_keyword ::= 'COLLATION' | 'CROSS' - | 'FAMILY' | 'FULL' | 'INNER' | 'ILIKE' @@ -1441,14 +1445,22 @@ type_func_name_keyword ::= | 'JOIN' | 'LEFT' | 'LIKE' - | 'MAXVALUE' - | 'MINVALUE' | 'NATURAL' | 'NOTNULL' | 'OUTER' | 'OVERLAPS' | 'RIGHT' | 'SIMILAR' + | cockroachdb_extra_type_func_name_keyword + +cockroachdb_extra_type_func_name_keyword ::= + 'FAMILY' + | 'MAXVALUE' + | 'MINVALUE' + +cockroachdb_extra_reserved_keyword ::= + 'INDEX' + | 'NOTHING' reserved_keyword ::= 'ALL' @@ -1492,7 +1504,6 @@ reserved_keyword ::= | 'GROUP' | 'HAVING' | 'IN' - | 'INDEX' | 'INITIALLY' | 'INTERSECT' | 'INTO' @@ -1502,7 +1513,6 @@ reserved_keyword ::= | 'LOCALTIME' | 'LOCALTIMESTAMP' | 'NOT' - | 'NOTHING' | 'NULL' | 'OFFSET' | 'ON' @@ -1531,6 +1541,7 @@ reserved_keyword ::= | 'WHERE' | 'WINDOW' | 'WITH' + | cockroachdb_extra_reserved_keyword transaction_iso_level ::= 'ISOLATION' 'LEVEL' iso_level @@ -1651,9 +1662,9 @@ interval ::= column_path_with_star ::= column_path - | name '.' unrestricted_name '.' unrestricted_name '.' '*' - | name '.' unrestricted_name '.' '*' - | name '.' '*' + | db_object_name_component '.' unrestricted_name '.' unrestricted_name '.' '*' + | db_object_name_component '.' unrestricted_name '.' '*' + | db_object_name_component '.' '*' func_expr ::= func_application filter_clause over_clause @@ -2050,9 +2061,9 @@ interval_qualifier ::= | 'MINUTE' 'TO' interval_second prefixed_column_path ::= - name '.' unrestricted_name - | name '.' unrestricted_name '.' unrestricted_name - | name '.' unrestricted_name '.' unrestricted_name '.' unrestricted_name + db_object_name_component '.' unrestricted_name + | db_object_name_component '.' unrestricted_name '.' unrestricted_name + | db_object_name_component '.' unrestricted_name '.' unrestricted_name '.' unrestricted_name func_name ::= type_function_name diff --git a/pkg/sql/parser/all_keywords.awk b/pkg/sql/parser/all_keywords.awk index ff6d085b1c63..c167bffdf7e2 100644 --- a/pkg/sql/parser/all_keywords.awk +++ b/pkg/sql/parser/all_keywords.awk @@ -21,9 +21,9 @@ BEGIN { category = "C" } else if ($1 == "unreserved_keyword:") { category = "U" - } else if ($1 == "type_func_name_keyword:") { + } else if ($1 == "type_func_name_keyword:" || $1 == "cockroachdb_extra_type_func_name_keyword:") { category = "T" - } else if ($1 == "reserved_keyword:") { + } else if ($1 == "reserved_keyword:" || $1 == "cockroachdb_extra_reserved_keyword:") { category ="R" } else { print "unknown keyword type:", $1 >>"/dev/stderr" @@ -36,7 +36,7 @@ BEGIN { keyword = 0 } -{ +/^ *\|? *[A-Z]/ { if (keyword && $NF != "") { printf("\"%s\": {%s, \"%s\"},\n", tolower($NF), $NF, category) | sort } diff --git a/pkg/sql/parser/parse_test.go b/pkg/sql/parser/parse_test.go index 08068cb02dd5..29967d08a4a1 100644 --- a/pkg/sql/parser/parse_test.go +++ b/pkg/sql/parser/parse_test.go @@ -1831,6 +1831,18 @@ func TestParse2(t *testing.T) { {`ALTER TABLE a ALTER b DROP NOT NULL`, `ALTER TABLE a ALTER COLUMN b DROP NOT NULL`}, {`ALTER TABLE a ALTER b TYPE INT`, `ALTER TABLE a ALTER COLUMN b SET DATA TYPE INT`}, {`EXPLAIN ANALYZE SELECT 1`, `EXPLAIN ANALYZE (DISTSQL) SELECT 1`}, + + // Regression for #31589 + {`CREATE TABLE FAMILY (x INT)`, + `CREATE TABLE "family" (x INT)`}, + {`CREATE TABLE INDEX (x INT)`, + `CREATE TABLE "index" (x INT)`}, + {`CREATE TABLE NOTHING (x INT)`, + `CREATE TABLE "nothing" (x INT)`}, + {`CREATE TABLE MINVALUE (x INT)`, + `CREATE TABLE "minvalue" (x INT)`}, + {`CREATE TABLE MAXVALUE (x INT)`, + `CREATE TABLE "maxvalue" (x INT)`}, } for _, d := range testData { stmts, err := parser.Parse(d.sql) diff --git a/pkg/sql/parser/reserved_keywords.awk b/pkg/sql/parser/reserved_keywords.awk index 603354c36e47..4ad85589e34a 100644 --- a/pkg/sql/parser/reserved_keywords.awk +++ b/pkg/sql/parser/reserved_keywords.awk @@ -3,12 +3,12 @@ next } -/^type_func_name_keyword:/ { +/^(cockroachdb_extra_)?type_func_name_keyword:/ { reserved_keyword = 1 next } -/^reserved_keyword:/ { +/^(cockroachdb_extra_)?reserved_keyword:/ { reserved_keyword = 1 next } diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index 4ba6d4e5e4d2..ce7ab64a42d6 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -774,6 +774,7 @@ func newNameFromStr(s string) *tree.Name { %type database_name index_name opt_index_name column_name insert_column_item statistics_name window_name %type family_name opt_family_name table_alias_name constraint_name target_name zone_name partition_name collation_name +%type db_object_name_component %type <*tree.UnresolvedName> table_name sequence_name type_name view_name db_object_name simple_db_object_name complex_db_object_name %type <*tree.UnresolvedName> table_pattern complex_table_pattern %type <*tree.UnresolvedName> column_path prefixed_column_path column_path_with_star @@ -919,8 +920,8 @@ func newNameFromStr(s string) *tree.Name { %type string_or_placeholder %type string_or_placeholder_list -%type unreserved_keyword type_func_name_keyword -%type col_name_keyword reserved_keyword +%type unreserved_keyword type_func_name_keyword cockroachdb_extra_type_func_name_keyword +%type col_name_keyword reserved_keyword cockroachdb_extra_reserved_keyword %type table_constraint constraint_elem %type index_def @@ -8045,11 +8046,11 @@ table_pattern: // every pattern not composed of a single identifier. complex_table_pattern: complex_db_object_name -| name '.' unrestricted_name '.' '*' +| db_object_name_component '.' unrestricted_name '.' '*' { $$.val = &tree.UnresolvedName{Star: true, NumParts: 3, Parts: tree.NameParts{"", $3, $1}} } -| name '.' '*' +| db_object_name_component '.' '*' { $$.val = &tree.UnresolvedName{Star: true, NumParts: 2, Parts: tree.NameParts{"", $1}} } @@ -8212,15 +8213,15 @@ column_path: | prefixed_column_path prefixed_column_path: - name '.' unrestricted_name + db_object_name_component '.' unrestricted_name { $$.val = &tree.UnresolvedName{NumParts:2, Parts: tree.NameParts{$3,$1}} } -| name '.' unrestricted_name '.' unrestricted_name +| db_object_name_component '.' unrestricted_name '.' unrestricted_name { $$.val = &tree.UnresolvedName{NumParts:3, Parts: tree.NameParts{$5,$3,$1}} } -| name '.' unrestricted_name '.' unrestricted_name '.' unrestricted_name +| db_object_name_component '.' unrestricted_name '.' unrestricted_name '.' unrestricted_name { $$.val = &tree.UnresolvedName{NumParts:4, Parts: tree.NameParts{$7,$5,$3,$1}} } @@ -8234,15 +8235,15 @@ prefixed_column_path: // The single unqualified star is handled separately by target_elem. column_path_with_star: column_path -| name '.' unrestricted_name '.' unrestricted_name '.' '*' +| db_object_name_component '.' unrestricted_name '.' unrestricted_name '.' '*' { $$.val = &tree.UnresolvedName{Star:true, NumParts:4, Parts: tree.NameParts{"",$5,$3,$1}} } -| name '.' unrestricted_name '.' '*' +| db_object_name_component '.' unrestricted_name '.' '*' { $$.val = &tree.UnresolvedName{Star:true, NumParts:3, Parts: tree.NameParts{"",$3,$1}} } -| name '.' '*' +| db_object_name_component '.' '*' { $$.val = &tree.UnresolvedName{Star:true, NumParts:2, Parts: tree.NameParts{"",$1}} } @@ -8273,7 +8274,7 @@ db_object_name: // simple_db_object_name is the part of db_object_name that recognizes // simple identifiers. simple_db_object_name: - name + db_object_name_component { $$.val = &tree.UnresolvedName{NumParts:1, Parts: tree.NameParts{$1}} } @@ -8283,15 +8284,23 @@ simple_db_object_name: // It is split away from db_object_name in order to enable the definition // of table_pattern. complex_db_object_name: - name '.' unrestricted_name + db_object_name_component '.' unrestricted_name { $$.val = &tree.UnresolvedName{NumParts:2, Parts: tree.NameParts{$3,$1}} } -| name '.' unrestricted_name '.' unrestricted_name +| db_object_name_component '.' unrestricted_name '.' unrestricted_name { $$.val = &tree.UnresolvedName{NumParts:3, Parts: tree.NameParts{$5,$3,$1}} } +// DB object name component -- this cannot not include any reserved +// keyword because of ambiguity after FROM, but we've been too lax +// with reserved keywords and made INDEX and FAMILY reserved, so we're +// trying to gain them back here. +db_object_name_component: + name +| cockroachdb_extra_type_func_name_keyword +| cockroachdb_extra_reserved_keyword // General name --- names that can be column, table, etc names. name: @@ -8660,11 +8669,12 @@ col_name_keyword: // productions in a_expr to support the goofy SQL9x argument syntax. // - thomas 2000-11-28 // -// TODO(dan): see if we can move MAXVALUE and MINVALUE to a less restricted list +// *** DO NOT ADD COCKROACHDB-SPECIFIC KEYWORDS HERE *** +// +// See cockroachdb_extra_type_func_name_keyword below. type_func_name_keyword: COLLATION | CROSS -| FAMILY | FULL | INNER | ILIKE @@ -8673,19 +8683,37 @@ type_func_name_keyword: | JOIN | LEFT | LIKE -| MAXVALUE -| MINVALUE | NATURAL | NOTNULL | OUTER | OVERLAPS | RIGHT | SIMILAR +| cockroachdb_extra_type_func_name_keyword + +// CockroachDB-specific keywords that can be used in type/function +// identifiers. +// +// *** REFRAIN FROM ADDING KEYWORDS HERE *** +// +// Adding keywords here creates non-resolvable incompatibilities with +// postgres clients. +// +// TODO(dan): Make MINVALUE/MAXVALUE less reserved. +cockroachdb_extra_type_func_name_keyword: + FAMILY +| MAXVALUE +| MINVALUE // Reserved keyword --- these keywords are usable only as a unrestricted_name. // // Keywords appear here if they could not be distinguished from variable, type, -// or function names in some contexts. Don't put things here unless forced to. +// or function names in some contexts. +// +// *** NEVER ADD KEYWORDS HERE *** +// +// See cockroachdb_extra_reserved_keyword below. +// reserved_keyword: ALL | ANALYSE @@ -8728,7 +8756,6 @@ reserved_keyword: | GROUP | HAVING | IN -| INDEX | INITIALLY | INTERSECT | INTO @@ -8738,7 +8765,6 @@ reserved_keyword: | LOCALTIME | LOCALTIMESTAMP | NOT -| NOTHING | NULL | OFFSET | ON @@ -8767,5 +8793,17 @@ reserved_keyword: | WHERE | WINDOW | WITH +| cockroachdb_extra_reserved_keyword + +// Reserved keywords in CockroachDB, in addition to those reserved in +// PostgreSQL. +// +// *** REFRAIN FROM ADDING KEYWORDS HERE *** +// +// Adding keywords here creates non-resolvable incompatibilities with +// postgres clients. +cockroachdb_extra_reserved_keyword: + INDEX +| NOTHING %% diff --git a/pkg/sql/parser/unreserved_keywords.awk b/pkg/sql/parser/unreserved_keywords.awk index e6e08f3d568e..a859f942dc13 100644 --- a/pkg/sql/parser/unreserved_keywords.awk +++ b/pkg/sql/parser/unreserved_keywords.awk @@ -1,4 +1,4 @@ -/^reserved_keyword:/ { +/^(cockroachdb_extra_)?reserved_keyword:/ { keyword = 0 next } From b18eeb176b231483a77c934b02c1d29caf4cd180 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Tue, 23 Oct 2018 12:52:47 +0200 Subject: [PATCH 2/2] sql/parser: unreserve INDEX and NOTHING from the RHS of SET statements The SET statement in the pg dialect is special because it auto-converts identifiers on its RHS to symbolic values or strings. In particular it is meant to support a diversity of special keywords as pseudo-values. This patch ensures that INDEX and NOTHING are accepted on the RHS. Release note (sql change): the names "index" and "nothing" are again accepted in the right-hand-side of the assignment in SET statements, for compatibility with PostgreSQL. --- docs/generated/sql/bnf/stmt_block.bnf | 6 +++++- pkg/sql/parser/parse_test.go | 3 +++ pkg/sql/parser/sql.y | 19 +++++++++++++++++-- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf index 3681a03d5bc8..f8412483a450 100644 --- a/docs/generated/sql/bnf/stmt_block.bnf +++ b/docs/generated/sql/bnf/stmt_block.bnf @@ -1052,7 +1052,7 @@ to_or_eq ::= var_value ::= a_expr - | 'ON' + | extra_var_value opt_on_targets_roles ::= 'ON' targets_roles @@ -1421,6 +1421,10 @@ offset_clause ::= generic_set ::= var_name to_or_eq var_list +extra_var_value ::= + 'ON' + | cockroachdb_extra_reserved_keyword + targets_roles ::= 'ROLE' name_list | targets diff --git a/pkg/sql/parser/parse_test.go b/pkg/sql/parser/parse_test.go index 29967d08a4a1..337b74111923 100644 --- a/pkg/sql/parser/parse_test.go +++ b/pkg/sql/parser/parse_test.go @@ -1832,6 +1832,9 @@ func TestParse2(t *testing.T) { {`ALTER TABLE a ALTER b TYPE INT`, `ALTER TABLE a ALTER COLUMN b SET DATA TYPE INT`}, {`EXPLAIN ANALYZE SELECT 1`, `EXPLAIN ANALYZE (DISTSQL) SELECT 1`}, + {`SET a = INDEX`, `SET a = "index"`}, + {`SET a = NOTHING`, `SET a = "nothing"`}, + // Regression for #31589 {`CREATE TABLE FAMILY (x INT)`, `CREATE TABLE "family" (x INT)`}, diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index ce7ab64a42d6..3956fb336146 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -921,7 +921,7 @@ func newNameFromStr(s string) *tree.Name { %type string_or_placeholder_list %type unreserved_keyword type_func_name_keyword cockroachdb_extra_type_func_name_keyword -%type col_name_keyword reserved_keyword cockroachdb_extra_reserved_keyword +%type col_name_keyword reserved_keyword cockroachdb_extra_reserved_keyword extra_var_value %type table_constraint constraint_elem %type index_def @@ -2823,11 +2823,26 @@ attrs: var_value: a_expr -| ON +| extra_var_value { $$.val = tree.Expr(&tree.UnresolvedName{NumParts: 1, Parts: tree.NameParts{$1}}) } +// The RHS of a SET statement can contain any valid expression, which +// themselves can contain identifiers like TRUE, FALSE. These are parsed +// as column names (via a_expr) and later during semantic analysis +// assigned their special value. +// +// In addition, for compatibility with CockroachDB we need to support +// the reserved keyword ON (to go along OFF, which is a valid column name). +// +// Finally, in PostgreSQL the CockroachDB-reserved words "index", +// "nothing", etc. are not special and are valid in SET. These need to +// be allowed here too. +extra_var_value: + ON +| cockroachdb_extra_reserved_keyword + var_list: var_value {