diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5e3abc1ab6..a80a0d11b5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,11 +4,7 @@ Run `yarn` after checkout to install all dependencies. -## Branch Guidelines - -New branches: Please branch off of the `develop` branch. - -### Naming +## Branch Naming Please use one of the following prefixes: (ie. feature/new-feature) diff --git a/README.md b/README.md index 51a602625e..9d9cd43747 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ It started as a port of a [PHP Library][], but has since considerably diverged. It supports various SQL dialects: -GCP BigQuery, IBM DB2, Apache Hive, MariaDB, MySQL, Couchbase N1QL, Oracle PL/SQL, PostgreSQL, Amazon Redshift, Spark, SQL Server Transact-SQL. +GCP BigQuery, IBM DB2, Apache Hive, MariaDB, MySQL, Couchbase N1QL, Oracle PL/SQL, PostgreSQL, Amazon Redshift, Spark, SQL Server Transact-SQL, Trino/Presto. See [language option docs](docs/language.md) for more details. It does not support: @@ -95,7 +95,7 @@ sql-formatter -h ``` usage: sql-formatter [-h] [-o OUTPUT] \ -[-l {bigquery,db2,hive,mariadb,mysql,n1ql,plsql,postgresql,redshift,spark,sql,sqlite,tsql}] [-c CONFIG] [--version] [FILE] +[-l {bigquery,db2,hive,mariadb,mysql,n1ql,plsql,postgresql,redshift,spark,sql,sqlite,trino,tsql}] [-c CONFIG] [--version] [FILE] SQL Formatter @@ -106,7 +106,7 @@ optional arguments: -h, --help show this help message and exit -o, --output OUTPUT File to write SQL output (defaults to stdout) - -l, --language {bigquery,db2,hive,mariadb,mysql,n1ql,plsql,postgresql,redshift,spark,sql,sqlite,tsql} + -l, --language {bigquery,db2,hive,mariadb,mysql,n1ql,plsql,postgresql,redshift,spark,sql,sqlite,trino,tsql} SQL dialect (defaults to standard sql) -c, --config CONFIG Path to config json file (will use default configs if unspecified) diff --git a/docs/language.md b/docs/language.md index 6f8ad1175d..c15d741c27 100644 --- a/docs/language.md +++ b/docs/language.md @@ -16,6 +16,7 @@ Specifies the SQL dialect to use. - `"redshift"` - [Amazon Redshift][] - `"spark"` - [Spark][] - `"sqlite"` - [SQLite][sqlite] +- `"trino"` - [Trino][] / [Presto][] - `"tsql"` - [SQL Server Transact-SQL][tsql] The default `"sql"` dialect is meant for cases where you don't know which dialect of SQL you're about to format. @@ -32,7 +33,9 @@ Better to always pick something more specific if possible. [couchbase n1ql]: http://www.couchbase.com/n1ql [oracle pl/sql]: http://www.oracle.com/technetwork/database/features/plsql/index.html [postgresql]: https://www.postgresql.org/ +[presto]: https://prestodb.io/docs/current/ [amazon redshift]: https://docs.aws.amazon.com/redshift/latest/dg/cm_chap_SQLCommandRef.html [spark]: https://spark.apache.org/docs/latest/api/sql/index.html [sqlite]: https://sqlite.org/index.html +[trino]: https://trino.io/docs/current/ [tsql]: https://docs.microsoft.com/en-us/sql/sql-server/ diff --git a/package.json b/package.json index cf40cc68df..3a15d022b4 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,10 @@ "pl/sql", "transact-sql", "db2", - "sqlite" + "sqlite", + "trino", + "presto", + "prestosql" ], "contributors": [ "Adrien Pyke ", diff --git a/sql/arrays-and-maps.md b/sql/arrays-and-maps.md index 6393a77109..d29e8287c2 100644 --- a/sql/arrays-and-maps.md +++ b/sql/arrays-and-maps.md @@ -13,6 +13,7 @@ Array literals `ARRAY[1, 2, 3]`. Supported by: - [BigQuery][bigquery-literals]. - [PostgreSQL][postgres-literals]. +- [Trino][] Map literals in JSON style `{"foo": 1, "bar": "John"}`. Supported by: @@ -28,6 +29,7 @@ Supported by: - [Spark][] - [N1QL][] - [PostgreSQL][] +- [Trino][] Array subscript operator `arr[OFFSET(5)]`. Supported by: @@ -39,5 +41,6 @@ Array subscript operator `arr[OFFSET(5)]`. Supported by: [n1ql-literals]: https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/datatypes.html#arrays [n1ql]: https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/nestedops.html#field-selection [postgresql]: https://www.postgresql.org/docs/current/arrays.html#ARRAYS-ACCESSING +[trino]: https://trino.io/docs/current/functions/array.html [bigquery-literals]: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#array_literals [postgres-literals]: https://www.postgresql.org/docs/current/arrays.html diff --git a/sql/create-table.md b/sql/create-table.md index c041891205..e7dd3b2b0d 100644 --- a/sql/create-table.md +++ b/sql/create-table.md @@ -61,6 +61,10 @@ _No support for CREATE TABLE._ CREATE TABLE +[Trino][]: + + CREATE TABLE [IF NOT EXISTS] + [sql standard]: https://jakewheat.github.io/sql-overview/sql-2008-foundation-grammar.html#_11_3_table_definition [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_table_statement [db2]: https://www.ibm.com/docs/en/db2/9.7?topic=statements-create-table @@ -74,3 +78,4 @@ _No support for CREATE TABLE._ [spark]: https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table.html [sqlite]: https://www.sqlite.org/lang_createtable.html [transact-sql]: https://docs.microsoft.com/en-us/sql/t-sql/statements/create-table-transact-sql?view=sql-server-ver15 +[trino]: https://trino.io/docs/current/sql/create-table.html diff --git a/sql/create-view.md b/sql/create-view.md index 1edc9f12f7..7d6ccfa183 100644 --- a/sql/create-view.md +++ b/sql/create-view.md @@ -64,6 +64,10 @@ _No support for CREATE VIEW._ CREATE [OR ALTER] VIEW +[Trino][]: + + CREATE [OR REPLACE] VIEW + [sql standard]: https://jakewheat.github.io/sql-overview/sql-2008-foundation-grammar.html#_11_22_view_definition [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_view_statement [db2]: https://www.ibm.com/docs/en/db2/9.7?topic=statements-create-view @@ -77,3 +81,4 @@ _No support for CREATE VIEW._ [spark]: https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-view.html [sqlite]: https://www.sqlite.org/lang_createview.html [transact-sql]: https://docs.microsoft.com/en-us/sql/t-sql/statements/create-view-transact-sql?view=sql-server-ver15 +[trino]: https://trino.io/docs/current/sql/create-view.html diff --git a/sql/delete.md b/sql/delete.md index 1441f6c757..603f84d3bc 100644 --- a/sql/delete.md +++ b/sql/delete.md @@ -96,6 +96,10 @@ _No support for DELETE_ [WHERE condition | WHERE CURRENT OF [GLOBAL] cursor_name] [OPTION query_hints] +[Trino][]: + + DELETE FROM table_name [WHERE condition] + [sql standard]: https://jakewheat.github.io/sql-overview/sql-2008-foundation-grammar.html#_14_8_delete_statement_searched [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement [db2]: https://www.ibm.com/docs/en/db2/9.7?topic=statements-delete @@ -109,3 +113,4 @@ _No support for DELETE_ [spark]: https://spark.apache.org/docs/latest/sql-ref-syntax.html#dml-statements [sqlite]: https://www.sqlite.org/lang_delete.html [transact-sql]: https://docs.microsoft.com/en-us/sql/t-sql/statements/delete-transact-sql?view=sql-server-ver16 +[trino]: https://trino.io/docs/current/sql/delete.html diff --git a/sql/identifiers.md b/sql/identifiers.md index 2b5f946a49..e03a37d8d7 100644 --- a/sql/identifiers.md +++ b/sql/identifiers.md @@ -18,6 +18,7 @@ The differences from this are listed below: - [SQLite][sqlite-syntax-pdf]: _(no differences)_ - [Transact-SQL][]: `@` and `#` are allowed as first chars plus `$` in the rest. Also unicode letters are allowed. Though the beginning `@` signifies a local variable or parameter and `#` a temporary table or procedure. +- [Trino][]: `[a-zA-Z0-9_]+`, no first-letter restrictions ## Delimited identifiers @@ -36,6 +37,7 @@ There is a considerable variation in implementations: - `` `..` `` [Spark][] - `".."`, `` `..` ``, `[..]` [SQLite][sqlite-keywords] - `".."`3, `[..]` [Transact-SQL][] +- `".."`, `` `..` `` [Trino][] Notes: @@ -56,3 +58,4 @@ Notes: [sqlite-keywords]: https://www.sqlite.org/lang_keywords.html [sqlite-syntax-pdf]: https://www.pearsonhighered.com/assets/samplechapter/0/6/7/2/067232685X.pdf [transact-sql]: https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers?view=sql-server-ver15 +[trino]: https://github.com/trinodb/trino/blob/ca7dcaa873b9dd24185e9a69cecdd1dd8717694c/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4#L1175-L1189 diff --git a/sql/insert.md b/sql/insert.md index e8a350d998..f5bbadf984 100644 --- a/sql/insert.md +++ b/sql/insert.md @@ -60,6 +60,10 @@ All dialects (except Hive) suppurt this syntax, plus a bunch of extra stuff: INSERT [TOP ( expression ) [PERCENT]] [INTO] table_name +[Trino][]: + + INSERT INTO table_name + [sql standard]: https://jakewheat.github.io/sql-overview/sql-2008-foundation-grammar.html#insert-statement [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#insert_statement [db2]: https://www.ibm.com/docs/en/db2/9.7?topic=statements-insert @@ -73,3 +77,4 @@ All dialects (except Hive) suppurt this syntax, plus a bunch of extra stuff: [spark]: https://spark.apache.org/docs/latest/sql-ref-syntax-dml-insert-table.html [sqlite]: https://www.sqlite.org/lang_insert.html [transact-sql]: https://docs.microsoft.com/en-us/sql/t-sql/statements/insert-transact-sql?view=sql-server-ver16 +[trino]: https://trino.io/docs/current/sql/insert.html diff --git a/sql/parameters.md b/sql/parameters.md index ac80d6ed32..ebdce40a5a 100644 --- a/sql/parameters.md +++ b/sql/parameters.md @@ -15,6 +15,7 @@ These come in the form of single question mark (`?`), supported by: - [N1QL][] - [SQLite][] - [Transact-SQL][]1 +- [Trino][] ## Numbered parameters @@ -51,3 +52,4 @@ These come in the form of single question mark (`?`), supported by: [redshift]: https://docs.aws.amazon.com/redshift/latest/dg/r_PREPARE.html [sqlite]: https://sqlite.org/c3ref/bind_blob.html [transact-sql]: https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/configuring-parameters-and-parameter-data-types +[trino]: https://trino.io/docs/current/sql/prepare.html diff --git a/sql/select.md b/sql/select.md index 7952adceb6..65ef2be593 100644 --- a/sql/select.md +++ b/sql/select.md @@ -66,6 +66,10 @@ All dialects support that, but also quite a bit extra stuff: [ALL | DISTINCT] [TOP ( expression ) [PERCENT] [WITH TIES]] +[Trino][]: + + SELECT [ALL | DISTINCT] + [sql standard]: https://jakewheat.github.io/sql-overview/sql-2008-foundation-grammar.html#query-specification [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax [db2]: https://www.ibm.com/docs/en/db2/9.7?topic=queries-subselect @@ -79,3 +83,4 @@ All dialects support that, but also quite a bit extra stuff: [spark]: https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html [sqlite]: https://www.sqlite.org/lang_select.html [transact-sql]: https://docs.microsoft.com/en-US/sql/t-sql/queries/select-transact-sql?view=sql-server-ver15 +[trino]: https://trino.io/docs/current/sql/select.html diff --git a/sql/strings.md b/sql/strings.md index 4cb4efb584..c07bd5fe5e 100644 --- a/sql/strings.md +++ b/sql/strings.md @@ -46,6 +46,10 @@ The real world implementations have lots of variation: - `'..'` (two single quotes `''` are used for escaping) - (`".."`3) - `N'..'` (`N".."`3) unicode strings +- [Trino][]: + - `'..'` (two single quotes `''` are used for escaping) + - `U&'..'`, `u&'..'` string with unicode escapes (two single quotes `''` are used for escaping) + - `X'..'`, `x'..'` hex string ### Notes: @@ -67,3 +71,4 @@ The real world implementations have lots of variation: [spark]: https://spark.apache.org/docs/latest/sql-ref-literals.html#string-literal [sqlite]: https://www.sqlite.org/lang_expr.html#literal_values_constants_ [transact-sql]: https://docs.microsoft.com/en-us/sql/t-sql/data-types/constants-transact-sql?view=sql-server-ver15 +[trino]: https://github.com/trinodb/trino/blob/ca7dcaa873b9dd24185e9a69cecdd1dd8717694c/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4#L1146-L1159 diff --git a/sql/update.md b/sql/update.md index c1f19f3296..9975940178 100644 --- a/sql/update.md +++ b/sql/update.md @@ -108,6 +108,12 @@ _No support for UPDATE_ [WHERE condition | WHERE CURRENT OF [GLOBAL] cursor_name] [OPTION query_hints] +[Trino][]: + + UPDATE table_name + SET [(column = expression [, ... ])] + [WHERE condition] + [sql standard]: https://jakewheat.github.io/sql-overview/sql-2008-foundation-grammar.html#_14_13_update_statement_searched [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#update_statement [db2]: https://www.ibm.com/docs/en/db2/9.7?topic=statements-update @@ -121,3 +127,4 @@ _No support for UPDATE_ [spark]: https://spark.apache.org/docs/latest/sql-ref-syntax.html#dml-statements [sqlite]: https://www.sqlite.org/lang_update.html [transact-sql]: https://docs.microsoft.com/en-us/sql/t-sql/queries/update-transact-sql?view=sql-server-ver16 +[trino]: https://trino.io/docs/current/sql/update.html diff --git a/sql/variables.md b/sql/variables.md index 32582acf9e..527cb364d4 100644 --- a/sql/variables.md +++ b/sql/variables.md @@ -16,6 +16,7 @@ - [Spark][]: `${name}` Like with Hive, these are substitution variables. - SQLite: _N/A_ - [Transact-SQL][]: `@name` (using identifier syntax for name) +- Trino: _N/A_ [parameters]: ./parameters.md [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language diff --git a/src/languages/trino.formatter.ts b/src/languages/trino.formatter.ts new file mode 100644 index 0000000000..a5702fa23a --- /dev/null +++ b/src/languages/trino.formatter.ts @@ -0,0 +1,872 @@ +import Formatter from 'src/formatter/Formatter'; +import Tokenizer from 'src/lexer/Tokenizer'; +import { dedupe } from 'src/utils'; + +/** + * Priority 5 (last) + * Full list of reserved functions + * distinct from Keywords due to interaction with parentheses + */ +// https://github.com/trinodb/trino/tree/432d2897bdef99388c1a47188743a061c4ac1f34/docs/src/main/sphinx/functions +// rg '^\.\. function::' ./docs/src/main/sphinx/functions | cut -d' ' -f 3 | cut -d '(' -f 1 | sort | uniq +// rg '\* ' ./docs/src/main/sphinx/functions/list-by-topic.rst | grep '\* :func:' | cut -d'`' -f 2 +// rg '\* ' ./docs/src/main/sphinx/functions/list-by-topic.rst | grep -v '\* :func:' +// grep -e '^- ' ./docs/src/main/sphinx/functions/list.rst | grep -e '^- :func:' | cut -d'`' -f2 +// grep -e '^- ' ./docs/src/main/sphinx/functions/list.rst | grep -ve '^- :func:' +const reservedFunctions = [ + 'ABS', + 'ACOS', + 'ALL_MATCH', + 'ANY_MATCH', + 'APPROX_DISTINCT', + 'APPROX_MOST_FREQUENT', + 'APPROX_PERCENTILE', + 'APPROX_SET', + 'ARBITRARY', + 'ARRAYS_OVERLAP', + 'ARRAY_AGG', + 'ARRAY_DISTINCT', + 'ARRAY_EXCEPT', + 'ARRAY_INTERSECT', + 'ARRAY_JOIN', + 'ARRAY_MAX', + 'ARRAY_MIN', + 'ARRAY_POSITION', + 'ARRAY_REMOVE', + 'ARRAY_SORT', + 'ARRAY_UNION', + 'ASIN', + 'ATAN', + 'ATAN2', + 'AT_TIMEZONE', + 'AVG', + 'BAR', + 'BETA_CDF', + 'BING_TILE', + 'BING_TILES_AROUND', + 'BING_TILE_AT', + 'BING_TILE_COORDINATES', + 'BING_TILE_POLYGON', + 'BING_TILE_QUADKEY', + 'BING_TILE_ZOOM_LEVEL', + 'BITWISE_AND', + 'BITWISE_AND_AGG', + 'BITWISE_LEFT_SHIFT', + 'BITWISE_NOT', + 'BITWISE_OR', + 'BITWISE_OR_AGG', + 'BITWISE_RIGHT_SHIFT', + 'BITWISE_RIGHT_SHIFT_ARITHMETIC', + 'BITWISE_XOR', + 'BIT_COUNT', + 'BOOL_AND', + 'BOOL_OR', + 'CARDINALITY', + 'CAST', + 'CBRT', + 'CEIL', + 'CEILING', + 'CHAR2HEXINT', + 'CHECKSUM', + 'CHR', + 'CLASSIFY', + 'COALESCE', + 'CODEPOINT', + 'COLOR', + 'COMBINATIONS', + 'CONCAT', + 'CONCAT_WS', + 'CONTAINS', + 'CONTAINS_SEQUENCE', + 'CONVEX_HULL_AGG', + 'CORR', + 'COS', + 'COSH', + 'COSINE_SIMILARITY', + 'COUNT', + 'COUNT_IF', + 'COVAR_POP', + 'COVAR_SAMP', + 'CRC32', + 'CUME_DIST', + 'CURRENT_CATALOG', + 'CURRENT_DATE', + 'CURRENT_GROUPS', + 'CURRENT_SCHEMA', + 'CURRENT_TIME', + 'CURRENT_TIMESTAMP', + 'CURRENT_TIMEZONE', + 'CURRENT_USER', + 'DATE', + 'DATE_ADD', + 'DATE_DIFF', + 'DATE_FORMAT', + 'DATE_PARSE', + 'DATE_TRUNC', + 'DAY', + 'DAY_OF_MONTH', + 'DAY_OF_WEEK', + 'DAY_OF_YEAR', + 'DEGREES', + 'DENSE_RANK', + 'DOW', + 'DOY', + 'E', + 'ELEMENT_AT', + 'EMPTY_APPROX_SET', + 'EVALUATE_CLASSIFIER_PREDICTIONS', + 'EVERY', + 'EXP', + 'EXTRACT', + 'FEATURES', + 'FILTER', + 'FIRST_VALUE', + 'FLATTEN', + 'FLOOR', + 'FORMAT', + 'FORMAT_DATETIME', + 'FORMAT_NUMBER', + 'FROM_BASE', + 'FROM_BASE32', + 'FROM_BASE64', + 'FROM_BASE64URL', + 'FROM_BIG_ENDIAN_32', + 'FROM_BIG_ENDIAN_64', + 'FROM_ENCODED_POLYLINE', + 'FROM_GEOJSON_GEOMETRY', + 'FROM_HEX', + 'FROM_IEEE754_32', + 'FROM_IEEE754_64', + 'FROM_ISO8601_DATE', + 'FROM_ISO8601_TIMESTAMP', + 'FROM_ISO8601_TIMESTAMP_NANOS', + 'FROM_UNIXTIME', + 'FROM_UNIXTIME_NANOS', + 'FROM_UTF8', + 'GEOMETRIC_MEAN', + 'GEOMETRY_FROM_HADOOP_SHAPE', + 'GEOMETRY_INVALID_REASON', + 'GEOMETRY_NEAREST_POINTS', + 'GEOMETRY_TO_BING_TILES', + 'GEOMETRY_UNION', + 'GEOMETRY_UNION_AGG', + 'GREATEST', + 'GREAT_CIRCLE_DISTANCE', + 'HAMMING_DISTANCE', + 'HASH_COUNTS', + 'HISTOGRAM', + 'HMAC_MD5', + 'HMAC_SHA1', + 'HMAC_SHA256', + 'HMAC_SHA512', + 'HOUR', + 'HUMAN_READABLE_SECONDS', + 'IF', + 'INDEX', + 'INFINITY', + 'INTERSECTION_CARDINALITY', + 'INVERSE_BETA_CDF', + 'INVERSE_NORMAL_CDF', + 'IS_FINITE', + 'IS_INFINITE', + 'IS_JSON_SCALAR', + 'IS_NAN', + 'JACCARD_INDEX', + 'JSON_ARRAY_CONTAINS', + 'JSON_ARRAY_GET', + 'JSON_ARRAY_LENGTH', + 'JSON_EXISTS', + 'JSON_EXTRACT', + 'JSON_EXTRACT_SCALAR', + 'JSON_FORMAT', + 'JSON_PARSE', + 'JSON_QUERY', + 'JSON_SIZE', + 'JSON_VALUE', + 'KURTOSIS', + 'LAG', + 'LAST_DAY_OF_MONTH', + 'LAST_VALUE', + 'LEAD', + 'LEARN_CLASSIFIER', + 'LEARN_LIBSVM_CLASSIFIER', + 'LEARN_LIBSVM_REGRESSOR', + 'LEARN_REGRESSOR', + 'LEAST', + 'LENGTH', + 'LEVENSHTEIN_DISTANCE', + 'LINE_INTERPOLATE_POINT', + 'LINE_INTERPOLATE_POINTS', + 'LINE_LOCATE_POINT', + 'LISTAGG', + 'LN', + 'LOCALTIME', + 'LOCALTIMESTAMP', + 'LOG', + 'LOG10', + 'LOG2', + 'LOWER', + 'LPAD', + 'LTRIM', + 'LUHN_CHECK', + 'MAKE_SET_DIGEST', + 'MAP', + 'MAP_AGG', + 'MAP_CONCAT', + 'MAP_ENTRIES', + 'MAP_FILTER', + 'MAP_FROM_ENTRIES', + 'MAP_KEYS', + 'MAP_UNION', + 'MAP_VALUES', + 'MAP_ZIP_WITH', + 'MAX', + 'MAX_BY', + 'MD5', + 'MERGE', + 'MERGE_SET_DIGEST', + 'MILLISECOND', + 'MIN', + 'MINUTE', + 'MIN_BY', + 'MOD', + 'MONTH', + 'MULTIMAP_AGG', + 'MULTIMAP_FROM_ENTRIES', + 'MURMUR3', + 'NAN', + 'NGRAMS', + 'NONE_MATCH', + 'NORMALIZE', + 'NORMAL_CDF', + 'NOW', + 'NTH_VALUE', + 'NTILE', + 'NULLIF', + 'NUMERIC_HISTOGRAM', + 'OBJECTID', + 'OBJECTID_TIMESTAMP', + 'PARSE_DATA_SIZE', + 'PARSE_DATETIME', + 'PARSE_DURATION', + 'PERCENT_RANK', + 'PI', + 'POSITION', + 'POW', + 'POWER', + 'QDIGEST_AGG', + 'QUARTER', + 'RADIANS', + 'RAND', + 'RANDOM', + 'RANK', + 'REDUCE', + 'REDUCE_AGG', + 'REGEXP_COUNT', + 'REGEXP_EXTRACT', + 'REGEXP_EXTRACT_ALL', + 'REGEXP_LIKE', + 'REGEXP_POSITION', + 'REGEXP_REPLACE', + 'REGEXP_SPLIT', + 'REGRESS', + 'REGR_INTERCEPT', + 'REGR_SLOPE', + 'RENDER', + 'REPEAT', + 'REPLACE', + 'REVERSE', + 'RGB', + 'ROUND', + 'ROW_NUMBER', + 'RPAD', + 'RTRIM', + 'SECOND', + 'SEQUENCE', + 'SHA1', + 'SHA256', + 'SHA512', + 'SHUFFLE', + 'SIGN', + 'SIMPLIFY_GEOMETRY', + 'SIN', + 'SKEWNESS', + 'SLICE', + 'SOUNDEX', + 'SPATIAL_PARTITIONING', + 'SPATIAL_PARTITIONS', + 'SPLIT', + 'SPLIT_PART', + 'SPLIT_TO_MAP', + 'SPLIT_TO_MULTIMAP', + 'SPOOKY_HASH_V2_32', + 'SPOOKY_HASH_V2_64', + 'SQRT', + 'STARTS_WITH', + 'STDDEV', + 'STDDEV_POP', + 'STDDEV_SAMP', + 'STRPOS', + 'ST_AREA', + 'ST_ASBINARY', + 'ST_ASTEXT', + 'ST_BOUNDARY', + 'ST_BUFFER', + 'ST_CENTROID', + 'ST_CONTAINS', + 'ST_CONVEXHULL', + 'ST_COORDDIM', + 'ST_CROSSES', + 'ST_DIFFERENCE', + 'ST_DIMENSION', + 'ST_DISJOINT', + 'ST_DISTANCE', + 'ST_ENDPOINT', + 'ST_ENVELOPE', + 'ST_ENVELOPEASPTS', + 'ST_EQUALS', + 'ST_EXTERIORRING', + 'ST_GEOMETRIES', + 'ST_GEOMETRYFROMTEXT', + 'ST_GEOMETRYN', + 'ST_GEOMETRYTYPE', + 'ST_GEOMFROMBINARY', + 'ST_INTERIORRINGN', + 'ST_INTERIORRINGS', + 'ST_INTERSECTION', + 'ST_INTERSECTS', + 'ST_ISCLOSED', + 'ST_ISEMPTY', + 'ST_ISRING', + 'ST_ISSIMPLE', + 'ST_ISVALID', + 'ST_LENGTH', + 'ST_LINEFROMTEXT', + 'ST_LINESTRING', + 'ST_MULTIPOINT', + 'ST_NUMGEOMETRIES', + 'ST_NUMINTERIORRING', + 'ST_NUMPOINTS', + 'ST_OVERLAPS', + 'ST_POINT', + 'ST_POINTN', + 'ST_POINTS', + 'ST_POLYGON', + 'ST_RELATE', + 'ST_STARTPOINT', + 'ST_SYMDIFFERENCE', + 'ST_TOUCHES', + 'ST_UNION', + 'ST_WITHIN', + 'ST_X', + 'ST_XMAX', + 'ST_XMIN', + 'ST_Y', + 'ST_YMAX', + 'ST_YMIN', + 'SUBSTR', + 'SUBSTRING', + 'SUM', + 'TAN', + 'TANH', + 'TDIGEST_AGG', + 'TIMESTAMP_OBJECTID', + 'TIMEZONE_HOUR', + 'TIMEZONE_MINUTE', + 'TO_BASE', + 'TO_BASE32', + 'TO_BASE64', + 'TO_BASE64URL', + 'TO_BIG_ENDIAN_32', + 'TO_BIG_ENDIAN_64', + 'TO_CHAR', + 'TO_DATE', + 'TO_ENCODED_POLYLINE', + 'TO_GEOJSON_GEOMETRY', + 'TO_GEOMETRY', + 'TO_HEX', + 'TO_IEEE754_32', + 'TO_IEEE754_64', + 'TO_ISO8601', + 'TO_MILLISECONDS', + 'TO_SPHERICAL_GEOGRAPHY', + 'TO_TIMESTAMP', + 'TO_UNIXTIME', + 'TO_UTF8', + 'TRANSFORM', + 'TRANSFORM_KEYS', + 'TRANSFORM_VALUES', + 'TRANSLATE', + 'TRIM', + 'TRIM_ARRAY', + 'TRUNCATE', + 'TRY', + 'TRY_CAST', + 'TYPEOF', + 'UPPER', + 'URL_DECODE', + 'URL_ENCODE', + 'URL_EXTRACT_FRAGMENT', + 'URL_EXTRACT_HOST', + 'URL_EXTRACT_PARAMETER', + 'URL_EXTRACT_PATH', + 'URL_EXTRACT_PORT', + 'URL_EXTRACT_PROTOCOL', + 'URL_EXTRACT_QUERY', + 'UUID', + 'VALUES_AT_QUANTILES', + 'VALUE_AT_QUANTILE', + 'VARIANCE', + 'VAR_POP', + 'VAR_SAMP', + 'VERSION', + 'WEEK', + 'WEEK_OF_YEAR', + 'WIDTH_BUCKET', + 'WILSON_INTERVAL_LOWER', + 'WILSON_INTERVAL_UPPER', + 'WITH_TIMEZONE', + 'WORD_STEM', + 'XXHASH64', + 'YEAR', + 'YEAR_OF_WEEK', + 'YOW', + 'ZIP', + 'ZIP_WITH', +]; + +/** + * Priority 5 (last) + * Full list of reserved words + * any words that are in a higher priority are removed + */ +// https://github.com/trinodb/trino/blob/432d2897bdef99388c1a47188743a061c4ac1f34/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4#L858-L1128 +// +SKIP +// +const reservedKeywords = [ + 'ABSENT', + 'ADD', + 'ADMIN', + 'AFTER', + 'ALL', + 'ALTER', + 'ANALYZE', + // 'AND', + 'ANY', + 'ARRAY', + 'AS', + 'ASC', + 'AT', + 'AUTHORIZATION', + 'BERNOULLI', + 'BETWEEN', + 'BOTH', + 'BY', + 'CALL', + 'CASCADE', + 'CASE', + 'CATALOGS', + 'COLUMN', + 'COLUMNS', + 'COMMENT', + 'COMMIT', + 'COMMITTED', + 'CONDITIONAL', + 'CONSTRAINT', + 'COPARTITION', + 'CREATE', + 'CROSS', + 'CUBE', + 'CURRENT', + 'CURRENT_PATH', + 'CURRENT_ROLE', + 'DATA', + 'DEALLOCATE', + 'DEFAULT', + 'DEFINE', + 'DEFINER', + 'DELETE', + 'DENY', + 'DESC', + 'DESCRIBE', + 'DESCRIPTOR', + 'DISTINCT', + 'DISTRIBUTED', + 'DOUBLE', + 'DROP', + 'ELSE', + 'EMPTY', + 'ENCODING', + 'END', + 'ERROR', + 'ESCAPE', + 'EXCEPT', + 'EXCLUDING', + 'EXECUTE', + 'EXISTS', + 'EXPLAIN', + 'FALSE', + 'FETCH', + 'FINAL', + 'FIRST', + 'FOLLOWING', + 'FOR', + 'FROM', + 'FULL', + 'FUNCTIONS', + 'GRANT', + 'GRANTED', + 'GRANTS', + 'GRAPHVIZ', + 'GROUP', + 'GROUPING', + 'GROUPS', + 'HAVING', + 'IGNORE', + 'IN', + 'INCLUDING', + 'INITIAL', + 'INNER', + 'INPUT', + 'INSERT', + 'INTERSECT', + 'INTERVAL', + 'INTO', + 'INVOKER', + 'IO', + 'IS', + 'ISOLATION', + 'JOIN', + 'JSON', + 'JSON_ARRAY', + 'JSON_OBJECT', + 'KEEP', + 'KEY', + 'KEYS', + 'LAST', + 'LATERAL', + 'LEADING', + 'LEFT', + 'LEVEL', + 'LIKE', + 'LIMIT', + 'LOCAL', + 'LOGICAL', + 'MATCH', + 'MATCHED', + 'MATCHES', + 'MATCH_RECOGNIZE', + 'MATERIALIZED', + 'MEASURES', + 'NATURAL', + 'NEXT', + 'NFC', + 'NFD', + 'NFKC', + 'NFKD', + 'NO', + 'NONE', + 'NOT', + 'NULL', + 'NULLS', + 'OBJECT', + 'OF', + 'OFFSET', + 'OMIT', + 'ON', + 'ONE', + 'ONLY', + 'OPTION', + // 'OR', + 'ORDER', + 'ORDINALITY', + 'OUTER', + 'OUTPUT', + 'OVER', + 'OVERFLOW', + 'PARTITION', + 'PARTITIONS', + 'PASSING', + 'PAST', + 'PATH', + 'PATTERN', + 'PER', + 'PERMUTE', + 'PRECEDING', + 'PRECISION', + 'PREPARE', + 'PRIVILEGES', + 'PROPERTIES', + 'PRUNE', + 'QUOTES', + 'RANGE', + 'READ', + 'RECURSIVE', + 'REFRESH', + 'RENAME', + 'REPEATABLE', + 'RESET', + 'RESPECT', + 'RESTRICT', + 'RETURNING', + 'REVOKE', + 'RIGHT', + 'ROLE', + 'ROLES', + 'ROLLBACK', + 'ROLLUP', + 'ROW', + 'ROWS', + 'RUNNING', + 'SCALAR', + 'SCHEMA', + 'SCHEMAS', + 'SECURITY', + 'SEEK', + 'SELECT', + 'SERIALIZABLE', + 'SESSION', + 'SET', + 'SETS', + 'SHOW', + 'SKIP', + 'SOME', + 'START', + 'STATS', + 'STRING', + 'SUBSET', + 'SYSTEM', + 'TABLE', + 'TABLES', + 'TABLESAMPLE', + 'TEXT', + 'THEN', + 'TIES', + 'TIME', + 'TIMESTAMP', + 'TO', + 'TRAILING', + 'TRANSACTION', + 'TRUE', + 'TYPE', + 'UESCAPE', + 'UNBOUNDED', + 'UNCOMMITTED', + 'UNCONDITIONAL', + 'UNION', + 'UNIQUE', + 'UNKNOWN', + 'UNMATCHED', + 'UNNEST', + 'UPDATE', + 'USE', + 'USER', + 'USING', + 'UTF16', + 'UTF32', + 'UTF8', + 'VALIDATE', + 'VALUE', + 'VALUES', + 'VERBOSE', + 'VIEW', + 'WHEN', + 'WHERE', + 'WINDOW', + 'WITH', + 'WITHIN', + 'WITHOUT', + 'WORK', + 'WRAPPER', + 'WRITE', + 'ZONE', + // https://github.com/trinodb/trino/blob/432d2897bdef99388c1a47188743a061c4ac1f34/core/trino-main/src/main/java/io/trino/metadata/TypeRegistry.java#L131-L168 + // or https://trino.io/docs/current/language/types.html + 'BIGINT', + 'INT', + 'INTEGER', + 'SMALLINT', + 'TINYINT', + 'BOOLEAN', + 'DATE', + 'DECIMAL', + 'REAL', + 'DOUBLE', + 'HYPERLOGLOG', + 'QDIGEST', + 'TDIGEST', + 'P4HYPERLOGLOG', + 'INTERVAL', + 'TIMESTAMP', + 'TIME', + 'VARBINARY', + 'VARCHAR', + 'CHAR', + 'ROW', + 'ARRAY', + 'MAP', + 'JSON', + 'JSON2016', + 'IPADDRESS', + 'GEOMETRY', + 'UUID', + 'SETDIGEST', + 'JONIREGEXP', + 'RE2JREGEXP', + 'LIKEPATTERN', + 'COLOR', + 'CODEPOINTS', + 'FUNCTION', + 'JSONPATH', +]; + +/** + * Priority 1 (first) + * keywords that begin a new statement + * will begin new indented block + */ +// TODO +// https://github.com/trinodb/trino/blob/432d2897bdef99388c1a47188743a061c4ac1f34/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4#L41 +const reservedCommands = [ + // DDL + 'ALTER SCHEMA', + 'ALTER TABLE', + 'ALTER MATERIALIZED VIEW', + 'ALTER VIEW', + 'CREATE SCHEMA', + 'CREATE TABLE', + 'CREATE VIEW', + 'CREATE OR REPLACE VIEW', + 'CREATE MATERIALIZED VIEW', + 'CREATE OR REPLACE MATERIALIZED VIEW', + 'CREATE ROLE', + 'DROP SCHEMA', + 'DROP TABLE', + 'DROP COLUMN', + 'DROP MATERIALIZED VIEW', + 'DROP VIEW', + 'DROP ROLE', + 'TRUNCATE TABLE', + 'USE', // TODO? + // DML + 'INSERT INTO', + 'DELETE FROM', + // Data Retrieval + 'WITH', + 'SELECT', + 'FROM', + 'WHERE', + 'GROUP BY', + 'HAVING', + 'WINDOW', // verify + + 'VALUES', + + 'ORDER BY', + 'OFFSET', + + 'LIMIT', + 'FETCH', + + 'PARTITION BY', // verify + 'TABLESAMPLE', + 'EXPLAIN', + // Auxiliary + 'ANALYZE', + + 'COMMENT ON TABLE', + 'COMMENT ON COLUMN', + 'DESCRIBE INPUT', + 'DESCRIBE OUTPUT', + + 'REFRESH MATERIALIZED VIEW', + 'RESET SESSION', + // 'SET SESSION', // TODO + + 'SHOW GRANTS', + 'SHOW CREATE TABLE', + 'SHOW CREATE SCHEMA', + 'SHOW CREATE VIEW', + 'SHOW CREATE MATERIALIZED VIEW', + 'SHOW TABLES', + 'SHOW SCHEMAS', + 'SHOW CATALOGS', + 'SHOW COLUMNS', + 'SHOW STATS FOR', + 'SHOW ROLES', + 'SHOW CURRENT ROLES', + 'SHOW ROLE GRANTS', + 'SHOW FUNCTIONS', + 'SHOW SESSION', + + // other + 'INSERT INTO', + 'LATERAL', // verify + 'UPDATE', +]; + +// https://github.com/trinodb/trino/blob/432d2897bdef99388c1a47188743a061c4ac1f34/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4#L231-L235 +// https://github.com/trinodb/trino/blob/432d2897bdef99388c1a47188743a061c4ac1f34/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4#L288-L291 +const reservedBinaryCommands = [ + // set booleans + 'INTERSECT', + 'INTERSECT ALL', + 'INTERSECT DISTINCT', + 'UNION', + 'UNION ALL', + 'UNION DISTINCT', + 'EXCEPT', + 'EXCEPT ALL', + 'EXCEPT DISTINCT', +]; + +// https://github.com/trinodb/trino/blob/432d2897bdef99388c1a47188743a061c4ac1f34/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4#L299-L313 +const reservedJoins = [ + 'JOIN', + 'INNER JOIN', + 'LEFT JOIN', + 'LEFT OUTER JOIN', + 'RIGHT JOIN', + 'RIGHT OUTER JOIN', + 'FULL JOIN', + 'FULL OUTER JOIN', + 'CROSS JOIN', + 'NATURAL JOIN', + 'NATURAL INNER JOIN', + 'NATURAL LEFT JOIN', + 'NATURAL LEFT OUTER JOIN', + 'NATURAL RIGHT JOIN', + 'NATURAL RIGHT OUTER JOIN', + 'NATURAL FULL JOIN', + 'NATURAL FULL OUTER JOIN', +]; + +/** + * Priority 3 + * keywords that follow a previous Statement, must be attached to subsequent data + * can be fully inline or on newline with optional indent + */ +// TODO +const reservedDependentClauses = ['WHEN', 'ELSE']; + +export default class TrinoFormatter extends Formatter { + // https://trino.io/docs/current/functions/list.html#id1 + // TODO: [] substring operator? + static operators = ['||', '<', '>', '<=', '>=', '=', '<>', '!=', '->', '+', '-', '*', '/', '%']; + + tokenizer() { + return new Tokenizer({ + reservedCommands, + reservedBinaryCommands, + reservedJoins, + reservedDependentClauses, + reservedKeywords: dedupe([...reservedKeywords, ...reservedFunctions]), + openParens: ['(', '['], + closeParens: [')', ']'], + stringTypes: [{ quote: "''", prefixes: ['U&', 'X'] }], + identTypes: ['""', '``'], + positionalParams: true, + operators: TrinoFormatter.operators, + }); + } +} diff --git a/src/sqlFormatter.ts b/src/sqlFormatter.ts index 409b6ab9d5..d272488d38 100644 --- a/src/sqlFormatter.ts +++ b/src/sqlFormatter.ts @@ -10,6 +10,7 @@ import RedshiftFormatter from 'src/languages/redshift.formatter'; import SparkFormatter from 'src/languages/spark.formatter'; import SqliteFormatter from 'src/languages/sqlite.formatter'; import SqlFormatter from 'src/languages/sql.formatter'; +import TrinoFormatter from 'src/languages/trino.formatter'; import TSqlFormatter from 'src/languages/tsql.formatter'; import type { FormatOptions } from './types'; @@ -28,6 +29,7 @@ export const formatters = { spark: SparkFormatter, sql: SqlFormatter, sqlite: SqliteFormatter, + trino: TrinoFormatter, tsql: TSqlFormatter, }; export type SqlLanguage = keyof typeof formatters; diff --git a/static/index.html b/static/index.html index 288ae85fa8..a642b829b9 100644 --- a/static/index.html +++ b/static/index.html @@ -69,6 +69,7 @@

Options

+
diff --git a/test/behavesLikeSqlFormatter.ts b/test/behavesLikeSqlFormatter.ts index 68e379b248..3835993560 100644 --- a/test/behavesLikeSqlFormatter.ts +++ b/test/behavesLikeSqlFormatter.ts @@ -286,13 +286,13 @@ export default function behavesLikeSqlFormatter(format: FormatFn) { }); it('formats top-level and newline multi-word reserved words with inconsistent spacing', () => { - const result = format('SELECT * FROM foo LEFT \t \n JOIN bar ORDER \n BY blah'); + const result = format('SELECT * FROM foo LEFT \t \n JOIN mycol ORDER \n BY blah'); expect(result).toBe(dedent` SELECT * FROM foo - LEFT JOIN bar + LEFT JOIN mycol ORDER BY blah `); diff --git a/test/options/keywordCase.ts b/test/options/keywordCase.ts index a0257b0380..5637f295dd 100644 --- a/test/options/keywordCase.ts +++ b/test/options/keywordCase.ts @@ -18,7 +18,7 @@ export default function supportsKeywordCase(format: FormatFn) { }); it('converts keywords to uppercase', () => { - const result = format('select distinct * frOM foo left JOIN bar WHERe cola > 1 and colb = 3', { + const result = format('select distinct * frOM foo left JOIN mycol WHERe cola > 1 and colb = 3', { keywordCase: 'upper', }); expect(result).toBe(dedent` @@ -26,7 +26,7 @@ export default function supportsKeywordCase(format: FormatFn) { DISTINCT * FROM foo - LEFT JOIN bar + LEFT JOIN mycol WHERE cola > 1 AND colb = 3 diff --git a/test/trino.test.ts b/test/trino.test.ts new file mode 100644 index 0000000000..32fbb3863c --- /dev/null +++ b/test/trino.test.ts @@ -0,0 +1,59 @@ +import dedent from 'dedent-js'; + +import { format as originalFormat, FormatFn } from 'src/sqlFormatter'; +import TrinoFormatter from 'src/languages/trino.formatter'; +// import behavesLikeSqlFormatter from './behavesLikeSqlFormatter'; + +// import supportsAlterTable from './features/alterTable'; +import supportsArrayLiterals from './features/arrayLiterals'; +import supportsBetween from './features/between'; +import supportsCreateTable from './features/createTable'; +import supportsDeleteFrom from './features/deleteFrom'; +import supportsJoin from './features/join'; +import supportsOperators from './features/operators'; +import supportsStrings from './features/strings'; +import supportsArrayAndMapAccessors from './features/arrayAndMapAccessors'; +import supportsComments from './features/comments'; +import supportsIdentifiers from './features/identifiers'; +import supportsParams from './options/param'; + +describe('TrinoFormatter', () => { + const language = 'trino'; + const format: FormatFn = (query, cfg = {}) => originalFormat(query, { ...cfg, language }); + + // behavesLikeSqlFormatter(format); + supportsComments(format); + supportsCreateTable(format); + // supportsAlterTable(format); + supportsDeleteFrom(format); + supportsStrings(format, ["''", "X''"]); + supportsIdentifiers(format, ['""', '``']); + supportsBetween(format); + supportsOperators(format, TrinoFormatter.operators, ['AND', 'OR']); + supportsArrayLiterals(format); + supportsArrayAndMapAccessors(format); + supportsJoin(format, { + additionally: [ + 'NATURAL INNER JOIN', + 'NATURAL LEFT JOIN', + 'NATURAL LEFT OUTER JOIN', + 'NATURAL RIGHT JOIN', + 'NATURAL RIGHT OUTER JOIN', + 'NATURAL FULL JOIN', + 'NATURAL FULL OUTER JOIN', + ], + }); + supportsParams(format, { positional: true }); + + it('formats SET SESSION lines as their own statement', () => { + const result = format('SET SESSION foo = 444; SELECT * FROM tbl'); + expect(result).toBe(dedent` + SET SESSION foo = 444; + + SELECT + * + FROM + tbl + `); + }); +}); diff --git a/vscode/package.json b/vscode/package.json index 7a5819e987..23368252ac 100644 --- a/vscode/package.json +++ b/vscode/package.json @@ -37,6 +37,8 @@ "pl/sql", "postgres", "postgresql", + "presto", + "prestosql", "prettier", "redshift", "spark", @@ -44,6 +46,7 @@ "sql", "sqlite", "sql server", + "trino", "tsql" ], "activationEvents": [ @@ -122,6 +125,7 @@ "redshift", "spark", "sqlite", + "trino", "tsql" ], "default": "sql",