From e851205b43fd2e77a7422c85d9a39273750cd1a4 Mon Sep 17 00:00:00 2001 From: Charles Givre Date: Wed, 9 Nov 2022 20:47:46 -0500 Subject: [PATCH 1/6] Initial commit, some UTs working --- sqlglot/dialects/__init__.py | 1 + sqlglot/dialects/dialect.py | 1 + sqlglot/dialects/drill.py | 110 +++++++++++++++++++++++++++++++++ tests/dialects/test_dialect.py | 23 +++++++ tests/dialects/test_drill.py | 45 ++++++++++++++ 5 files changed, 180 insertions(+) create mode 100644 sqlglot/dialects/drill.py create mode 100644 tests/dialects/test_drill.py diff --git a/sqlglot/dialects/__init__.py b/sqlglot/dialects/__init__.py index 0816831379..2e42e7df89 100644 --- a/sqlglot/dialects/__init__.py +++ b/sqlglot/dialects/__init__.py @@ -2,6 +2,7 @@ from sqlglot.dialects.clickhouse import ClickHouse from sqlglot.dialects.databricks import Databricks from sqlglot.dialects.dialect import Dialect, Dialects +from sqlglot.dialects.drill import Drill from sqlglot.dialects.duckdb import DuckDB from sqlglot.dialects.hive import Hive from sqlglot.dialects.mysql import MySQL diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py index 3af08bb1ec..a942305927 100644 --- a/sqlglot/dialects/dialect.py +++ b/sqlglot/dialects/dialect.py @@ -32,6 +32,7 @@ class Dialects(str, Enum): TRINO = "trino" TSQL = "tsql" DATABRICKS = "databricks" + DRILL = "drill" class _Dialect(type): diff --git a/sqlglot/dialects/drill.py b/sqlglot/dialects/drill.py new file mode 100644 index 0000000000..4d5a3f9c81 --- /dev/null +++ b/sqlglot/dialects/drill.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +from sqlglot import exp, parser, tokens, generator +from sqlglot.dialects.dialect import ( + Dialect, + create_with_partitions_sql, + format_time_lambda, + rename_func, str_position_sql, + no_trycast_sql +) +from sqlglot.dialects.postgres import _lateral_sql +from sqlglot.tokens import TokenType + + +def _to_timestamp(args): + # TO_TIMESTAMP accepts either a single double argument or (text, text) + if len(args) == 1 and args[0].is_number: + # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE + return exp.UnixToTime.from_arg_list(args) + # https://www.postgresql.org/docs/current/functions-formatting.html + return format_time_lambda(exp.StrToTime, "postgres")(args) + + +def if_sql(self, expression): + expressions = self.format_args( + expression.this, expression.args.get("true"), expression.args.get("false") + ) + return f"`IF`({expressions})" + + +class Drill(Dialect): + normalize_functions = None + null_ordering = "nulls_are_last" + date_format = "'yyyy-MM-dd'" + + class Tokenizer(tokens.Tokenizer): + QUOTES = ["'"] + IDENTIFIERS = ["`"] + ESCAPES = ["\\"] + ENCODE = "utf-8" + + KEYWORDS = { + **tokens.Tokenizer.KEYWORDS, + "VARBINARY": TokenType.BINARY + } + + time_mapping = { + "y": "%Y", + "Y": "%Y", + "YYYY": "%Y", + "yyyy": "%Y", + "YY": "%y", + "yy": "%y", + "MMMM": "%B", + "MMM": "%b", + "MM": "%m", + "M": "%-m", + "dd": "%d", + "d": "%-d", + "HH": "%H", + "H": "%-H", + "hh": "%I", + "h": "%-I", + "mm": "%M", + "m": "%-M", + "ss": "%S", + "s": "%-S", + "SSSSSS": "%f", + "a": "%p", + "DD": "%j", + "D": "%-j", + "E": "%a", + "EE": "%a", + "EEE": "%a", + "EEEE": "%A", + } + + class Parser(parser.Parser): + FUNCTIONS = { + **parser.Parser.FUNCTIONS, + } + + class Generator(generator.Generator): + TYPE_MAPPING = { + **generator.Generator.TYPE_MAPPING, + exp.DataType.Type.INT: "INTEGER", + exp.DataType.Type.SMALLINT: "INTEGER", + exp.DataType.Type.TINYINT: "INTEGER", + exp.DataType.Type.BINARY: "VARBINARY", + exp.DataType.Type.TEXT: "VARCHAR", + exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", + exp.DataType.Type.DATETIME: "TIMESTAMP" + } + + TRANSFORMS = { + **generator.Generator.TRANSFORMS, + exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", + exp.Lateral: _lateral_sql, + exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", + + exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", + exp.ArrayContains: rename_func("REPEATED_CONTAINS"), + exp.ArraySize: rename_func("REPEATED_COUNT"), + exp.If: if_sql, + exp.ILike: lambda self, e: f" {self.sql(e, 'this')} `ILIKE` {self.sql(e, 'expression')}", + exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), + exp.StrPosition: str_position_sql, + exp.TryCast: no_trycast_sql, + } diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index e24b54eaee..40267817e0 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -69,6 +69,7 @@ def test_cast(self): write={ "bigquery": "CAST(a AS STRING)", "clickhouse": "CAST(a AS TEXT)", + "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "mysql": "CAST(a AS TEXT)", "hive": "CAST(a AS STRING)", @@ -86,6 +87,7 @@ def test_cast(self): write={ "bigquery": "CAST(a AS BINARY(4))", "clickhouse": "CAST(a AS BINARY(4))", + "drill": "CAST(a AS VARBINARY(4))", "duckdb": "CAST(a AS BINARY(4))", "mysql": "CAST(a AS BINARY(4))", "hive": "CAST(a AS BINARY(4))", @@ -146,6 +148,7 @@ def test_cast(self): "CAST(a AS STRING)", write={ "bigquery": "CAST(a AS STRING)", + "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "mysql": "CAST(a AS TEXT)", "hive": "CAST(a AS STRING)", @@ -162,6 +165,7 @@ def test_cast(self): "CAST(a AS VARCHAR)", write={ "bigquery": "CAST(a AS STRING)", + "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "mysql": "CAST(a AS VARCHAR)", "hive": "CAST(a AS STRING)", @@ -178,6 +182,7 @@ def test_cast(self): "CAST(a AS VARCHAR(3))", write={ "bigquery": "CAST(a AS STRING(3))", + "drill": "CAST(a AS VARCHAR(3))", "duckdb": "CAST(a AS TEXT(3))", "mysql": "CAST(a AS VARCHAR(3))", "hive": "CAST(a AS VARCHAR(3))", @@ -194,6 +199,7 @@ def test_cast(self): "CAST(a AS SMALLINT)", write={ "bigquery": "CAST(a AS INT64)", + "drill": "CAST(a AS INTEGER)", "duckdb": "CAST(a AS SMALLINT)", "mysql": "CAST(a AS SMALLINT)", "hive": "CAST(a AS SMALLINT)", @@ -215,6 +221,7 @@ def test_cast(self): }, write={ "duckdb": "TRY_CAST(a AS DOUBLE)", + "drill" : "CAST(a AS DOUBLE)", "postgres": "CAST(a AS DOUBLE PRECISION)", "redshift": "CAST(a AS DOUBLE PRECISION)", }, @@ -225,6 +232,7 @@ def test_cast(self): write={ "bigquery": "CAST(a AS FLOAT64)", "clickhouse": "CAST(a AS Float64)", + "drill": "CAST(a AS DOUBLE)", "duckdb": "CAST(a AS DOUBLE)", "mysql": "CAST(a AS DOUBLE)", "hive": "CAST(a AS DOUBLE)", @@ -279,6 +287,7 @@ def test_time(self): "duckdb": "STRPTIME(x, '%Y-%m-%dT%H:%M:%S')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)", "presto": "DATE_PARSE(x, '%Y-%m-%dT%H:%i:%S')", + "drill": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')", "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')", "spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')", }, @@ -649,6 +658,7 @@ def test_array(self): write={ "bigquery": "ARRAY_LENGTH(x)", "duckdb": "ARRAY_LENGTH(x)", + "drill": "REPEATED_COUNT(x)", "presto": "CARDINALITY(x)", "spark": "SIZE(x)", }, @@ -775,6 +785,7 @@ def test_set_operators(self): }, write={ "bigquery": "SELECT * FROM a UNION DISTINCT SELECT * FROM b", + "drill": "SELECT * FROM a UNION SELECT * FROM b", "duckdb": "SELECT * FROM a UNION SELECT * FROM b", "presto": "SELECT * FROM a UNION SELECT * FROM b", "spark": "SELECT * FROM a UNION SELECT * FROM b", @@ -887,6 +898,7 @@ def test_operators(self): write={ "bigquery": "LOWER(x) LIKE '%y'", "clickhouse": "x ILIKE '%y'", + "drill": "x `ILIKE` '%y'", "duckdb": "x ILIKE '%y'", "hive": "LOWER(x) LIKE '%y'", "mysql": "LOWER(x) LIKE '%y'", @@ -910,6 +922,7 @@ def test_operators(self): self.validate_all( "POSITION(' ' in x)", write={ + "drill": "STRPOS(x, ' ')", "duckdb": "STRPOS(x, ' ')", "postgres": "STRPOS(x, ' ')", "presto": "STRPOS(x, ' ')", @@ -921,6 +934,7 @@ def test_operators(self): self.validate_all( "STR_POSITION('a', x)", write={ + "drill": "STRPOS(x, 'a')", "duckdb": "STRPOS(x, 'a')", "postgres": "STRPOS(x, 'a')", "presto": "STRPOS(x, 'a')", @@ -932,6 +946,7 @@ def test_operators(self): self.validate_all( "POSITION('a', x, 3)", write={ + "drill": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1", "presto": "STRPOS(SUBSTR(x, 3), 'a') + 3 - 1", "spark": "LOCATE('a', x, 3)", "clickhouse": "position(x, 'a', 3)", @@ -960,6 +975,7 @@ def test_operators(self): self.validate_all( "IF(x > 1, 1, 0)", write={ + "drill": "`IF`(x > 1, 1, 0)", "duckdb": "CASE WHEN x > 1 THEN 1 ELSE 0 END", "presto": "IF(x > 1, 1, 0)", "hive": "IF(x > 1, 1, 0)", @@ -970,6 +986,7 @@ def test_operators(self): self.validate_all( "CASE WHEN 1 THEN x ELSE 0 END", write={ + "drill": "CASE WHEN 1 THEN x ELSE 0 END", "duckdb": "CASE WHEN 1 THEN x ELSE 0 END", "presto": "CASE WHEN 1 THEN x ELSE 0 END", "hive": "CASE WHEN 1 THEN x ELSE 0 END", @@ -980,6 +997,7 @@ def test_operators(self): self.validate_all( "x[y]", write={ + "drill": "x[y]", "duckdb": "x[y]", "presto": "x[y]", "hive": "x[y]", @@ -1000,6 +1018,7 @@ def test_operators(self): 'true or null as "foo"', write={ "bigquery": "TRUE OR NULL AS `foo`", + "drill": "TRUE OR NULL AS `foo`", "duckdb": 'TRUE OR NULL AS "foo"', "presto": 'TRUE OR NULL AS "foo"', "hive": "TRUE OR NULL AS `foo`", @@ -1020,6 +1039,7 @@ def test_operators(self): "LEVENSHTEIN(col1, col2)", write={ "duckdb": "LEVENSHTEIN(col1, col2)", + "drill": "LEVENSHTEIN_DISTANCE(col1, col2)", "presto": "LEVENSHTEIN_DISTANCE(col1, col2)", "hive": "LEVENSHTEIN(col1, col2)", "spark": "LEVENSHTEIN(col1, col2)", @@ -1029,6 +1049,7 @@ def test_operators(self): "LEVENSHTEIN(coalesce(col1, col2), coalesce(col2, col1))", write={ "duckdb": "LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1))", + "drill": "LEVENSHTEIN_DISTANCE(COALESCE(col1, col2), COALESCE(col2, col1))", "presto": "LEVENSHTEIN_DISTANCE(COALESCE(col1, col2), COALESCE(col2, col1))", "hive": "LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1))", "spark": "LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1))", @@ -1152,6 +1173,7 @@ def test_alias(self): self.validate_all( "SELECT a AS b FROM x GROUP BY b", write={ + "drill": "SELECT a AS b FROM x GROUP BY b", "duckdb": "SELECT a AS b FROM x GROUP BY b", "presto": "SELECT a AS b FROM x GROUP BY 1", "hive": "SELECT a AS b FROM x GROUP BY 1", @@ -1162,6 +1184,7 @@ def test_alias(self): self.validate_all( "SELECT y x FROM my_table t", write={ + "drill": "SELECT y AS x FROM my_table AS t", "hive": "SELECT y AS x FROM my_table AS t", "oracle": "SELECT y AS x FROM my_table t", "postgres": "SELECT y AS x FROM my_table AS t", diff --git a/tests/dialects/test_drill.py b/tests/dialects/test_drill.py new file mode 100644 index 0000000000..41d4695367 --- /dev/null +++ b/tests/dialects/test_drill.py @@ -0,0 +1,45 @@ +from tests.dialects.test_dialect import Validator + + +class TestDrill(Validator): + dialect = "drill" + + def test_string_literals(self): + self.validate_all( + "SELECT '2021-01-01' + INTERVAL 1 MONTH", + write={ + "mysql": "SELECT '2021-01-01' + INTERVAL 1 MONTH", + }, + ) + + def test_quotes(self): + self.validate_all( + "'\\''", + write={ + "duckdb": "''''", + "presto": "''''", + "hive": "'\\''", + "spark": "'\\''", + }, + ) + self.validate_all( + "'\"x\"'", + write={ + "duckdb": "'\"x\"'", + "presto": "'\"x\"'", + "hive": "'\"x\"'", + "spark": "'\"x\"'", + } + ) + self.validate_all( + "'\\\\a'", + read={ + "presto": "'\\a'", + }, + write={ + "duckdb": "'\\a'", + "presto": "'\\a'", + "hive": "'\\\\a'", + "spark": "'\\\\a'", + }, + ) \ No newline at end of file From cbbc8aea49673489bc2051b5f79eef5f8787d73e Mon Sep 17 00:00:00 2001 From: Charles Givre Date: Thu, 10 Nov 2022 07:33:11 -0500 Subject: [PATCH 2/6] Various additions --- sqlglot/dialects/drill.py | 19 ++++++++++++++++++- tests/dialects/test_drill.py | 8 ++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/sqlglot/dialects/drill.py b/sqlglot/dialects/drill.py index 4d5a3f9c81..99ea2a99c6 100644 --- a/sqlglot/dialects/drill.py +++ b/sqlglot/dialects/drill.py @@ -22,6 +22,16 @@ def _to_timestamp(args): def if_sql(self, expression): + """ + Drill requires backticks around certain SQL reserved words, IF being one of them, This function + adds the backticks around the keyword IF. + Args: + self: The Drill dialect + expression: The input IF expression + + Returns: The expression with IF in backticks. + + """ expressions = self.format_args( expression.this, expression.args.get("true"), expression.args.get("false") ) @@ -88,23 +98,30 @@ class Generator(generator.Generator): exp.DataType.Type.TINYINT: "INTEGER", exp.DataType.Type.BINARY: "VARBINARY", exp.DataType.Type.TEXT: "VARCHAR", + exp.DataType.Type.NCHAR: "VARCHAR", + exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", exp.DataType.Type.DATETIME: "TIMESTAMP" } + ROOT_PROPERTIES = { + exp.PartitionedByProperty + } + TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.Lateral: _lateral_sql, exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", - exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", exp.ArrayContains: rename_func("REPEATED_CONTAINS"), exp.ArraySize: rename_func("REPEATED_COUNT"), + exp.Create: create_with_partitions_sql, exp.If: if_sql, exp.ILike: lambda self, e: f" {self.sql(e, 'this')} `ILIKE` {self.sql(e, 'expression')}", exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), + exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'value')}", exp.StrPosition: str_position_sql, exp.TryCast: no_trycast_sql, } diff --git a/tests/dialects/test_drill.py b/tests/dialects/test_drill.py index 41d4695367..cf1e09114d 100644 --- a/tests/dialects/test_drill.py +++ b/tests/dialects/test_drill.py @@ -42,4 +42,12 @@ def test_quotes(self): "hive": "'\\\\a'", "spark": "'\\\\a'", }, + ) + + def test_table_function(self): + self.validate_all( + "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))", + write={ + "drill": "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))", + }, ) \ No newline at end of file From 9284950a96fc7e721bbbd2c0d57663d31aab1684 Mon Sep 17 00:00:00 2001 From: Charles Givre Date: Thu, 10 Nov 2022 16:29:59 -0500 Subject: [PATCH 3/6] Date formatting working --- sqlglot/dialects/drill.py | 91 ++++++++++++++++++++-------------- tests/dialects/test_dialect.py | 9 +++- 2 files changed, 63 insertions(+), 37 deletions(-) diff --git a/sqlglot/dialects/drill.py b/sqlglot/dialects/drill.py index 99ea2a99c6..cb7afe307f 100644 --- a/sqlglot/dialects/drill.py +++ b/sqlglot/dialects/drill.py @@ -15,10 +15,18 @@ def _to_timestamp(args): # TO_TIMESTAMP accepts either a single double argument or (text, text) if len(args) == 1 and args[0].is_number: - # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE return exp.UnixToTime.from_arg_list(args) - # https://www.postgresql.org/docs/current/functions-formatting.html - return format_time_lambda(exp.StrToTime, "postgres")(args) + return format_time_lambda(exp.StrToTime, "drill")(args) + +def _str_to_time_sql(self, expression): + return f"STRPTIME({self.sql(expression, 'this')}, {self.format_time(expression)})" + + +def _ts_or_ds_to_date_sql(self, expression): + time_format = self.format_time(expression) + if time_format and time_format not in (DuckDB.time_format, DuckDB.date_format): + return f"CAST({_str_to_time_sql(self, expression)} AS DATE)" + return f"CAST({self.sql(expression, 'this')} AS DATE)" def if_sql(self, expression): @@ -43,6 +51,37 @@ class Drill(Dialect): null_ordering = "nulls_are_last" date_format = "'yyyy-MM-dd'" + time_mapping = { + "y": "%Y", + "Y": "%Y", + "YYYY": "%Y", + "yyyy": "%Y", + "YY": "%y", + "yy": "%y", + "MMMM": "%B", + "MMM": "%b", + "MM": "%m", + "M": "%-m", + "dd": "%d", + "d": "%-d", + "HH": "%H", + "H": "%-H", + "hh": "%I", + "h": "%-I", + "mm": "%M", + "m": "%-M", + "ss": "%S", + "s": "%-S", + "SSSSSS": "%f", + "a": "%p", + "DD": "%j", + "D": "%-j", + "E": "%a", + "EE": "%a", + "EEE": "%a", + "EEEE": "%A", + "''T''": "T" + } class Tokenizer(tokens.Tokenizer): QUOTES = ["'"] IDENTIFIERS = ["`"] @@ -54,40 +93,17 @@ class Tokenizer(tokens.Tokenizer): "VARBINARY": TokenType.BINARY } - time_mapping = { - "y": "%Y", - "Y": "%Y", - "YYYY": "%Y", - "yyyy": "%Y", - "YY": "%y", - "yy": "%y", - "MMMM": "%B", - "MMM": "%b", - "MM": "%m", - "M": "%-m", - "dd": "%d", - "d": "%-d", - "HH": "%H", - "H": "%-H", - "hh": "%I", - "h": "%-I", - "mm": "%M", - "m": "%-M", - "ss": "%S", - "s": "%-S", - "SSSSSS": "%f", - "a": "%p", - "DD": "%j", - "D": "%-j", - "E": "%a", - "EE": "%a", - "EEE": "%a", - "EEEE": "%A", - } + date_format = "'yyyy-MM-dd'" + dateint_format = "'yyyyMMdd'" + time_format = "'yyyy-MM-dd HH:mm:ss'" class Parser(parser.Parser): + STRICT_CAST = False + FUNCTIONS = { **parser.Parser.FUNCTIONS, + "TO_TIMESTAMP": exp.TimeStrToTime.from_arg_list, + "TO_CHAR": format_time_lambda(exp.TimeToStr, "drill"), } class Generator(generator.Generator): @@ -112,9 +128,6 @@ class Generator(generator.Generator): **generator.Generator.TRANSFORMS, exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.Lateral: _lateral_sql, - exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", - exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", - exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", exp.ArrayContains: rename_func("REPEATED_CONTAINS"), exp.ArraySize: rename_func("REPEATED_COUNT"), exp.Create: create_with_partitions_sql, @@ -123,5 +136,11 @@ class Generator(generator.Generator): exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'value')}", exp.StrPosition: str_position_sql, + exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", + exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", + exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", + exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", exp.TryCast: no_trycast_sql, + exp.TsOrDsToDate: _ts_or_ds_to_date_sql, } diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 40267817e0..28d82eccb7 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -287,7 +287,7 @@ def test_time(self): "duckdb": "STRPTIME(x, '%Y-%m-%dT%H:%M:%S')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)", "presto": "DATE_PARSE(x, '%Y-%m-%dT%H:%i:%S')", - "drill": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')", + "drill": "TO_TIMESTAMP(x, 'yyyy-MM-dd''T''HH:mm:ss')", "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH:MI:SS')", "spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')", }, @@ -295,6 +295,7 @@ def test_time(self): self.validate_all( "STR_TO_TIME('2020-01-01', '%Y-%m-%d')", write={ + "drill": "TO_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')", "duckdb": "STRPTIME('2020-01-01', '%Y-%m-%d')", "hive": "CAST('2020-01-01' AS TIMESTAMP)", "oracle": "TO_TIMESTAMP('2020-01-01', 'YYYY-MM-DD')", @@ -307,6 +308,7 @@ def test_time(self): self.validate_all( "STR_TO_TIME(x, '%y')", write={ + "drill": "TO_TIMESTAMP(x, 'yy')", "duckdb": "STRPTIME(x, '%y')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yy')) AS TIMESTAMP)", "presto": "DATE_PARSE(x, '%y')", @@ -328,6 +330,7 @@ def test_time(self): self.validate_all( "TIME_STR_TO_DATE('2020-01-01')", write={ + "drill": "CAST('2020-01-01' AS DATE)", "duckdb": "CAST('2020-01-01' AS DATE)", "hive": "TO_DATE('2020-01-01')", "presto": "DATE_PARSE('2020-01-01', '%Y-%m-%d %H:%i:%s')", @@ -337,6 +340,7 @@ def test_time(self): self.validate_all( "TIME_STR_TO_TIME('2020-01-01')", write={ + "drill": "CAST('2020-01-01' AS TIMESTAMP)", "duckdb": "CAST('2020-01-01' AS TIMESTAMP)", "hive": "CAST('2020-01-01' AS TIMESTAMP)", "presto": "DATE_PARSE('2020-01-01', '%Y-%m-%d %H:%i:%s')", @@ -353,6 +357,7 @@ def test_time(self): self.validate_all( "TIME_TO_STR(x, '%Y-%m-%d')", write={ + "drill": "TO_CHAR(x, 'yyyy-MM-dd')", "duckdb": "STRFTIME(x, '%Y-%m-%d')", "hive": "DATE_FORMAT(x, 'yyyy-MM-dd')", "oracle": "TO_CHAR(x, 'YYYY-MM-DD')", @@ -746,6 +751,7 @@ def test_cross_join(self): self.validate_all( "SELECT a FROM x CROSS JOIN UNNEST(y) AS t (a)", write={ + "drill": "SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a)", "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a)", "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a", }, @@ -753,6 +759,7 @@ def test_cross_join(self): self.validate_all( "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t (a, b)", write={ + "drill": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)", "presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)", "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) t AS b", }, From 4cfa7ce2a7a1309b5b472d59cdea58acfd2da450 Mon Sep 17 00:00:00 2001 From: Charles Givre Date: Mon, 14 Nov 2022 23:35:42 -0500 Subject: [PATCH 4/6] Time functions now working --- sqlglot/dialects/drill.py | 39 ++++++++++++++++++++++++++++------ tests/dialects/test_dialect.py | 15 +++++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/sqlglot/dialects/drill.py b/sqlglot/dialects/drill.py index cb7afe307f..1048b718d9 100644 --- a/sqlglot/dialects/drill.py +++ b/sqlglot/dialects/drill.py @@ -6,7 +6,7 @@ create_with_partitions_sql, format_time_lambda, rename_func, str_position_sql, - no_trycast_sql + no_trycast_sql, no_pivot_sql ) from sqlglot.dialects.postgres import _lateral_sql from sqlglot.tokens import TokenType @@ -24,11 +24,20 @@ def _str_to_time_sql(self, expression): def _ts_or_ds_to_date_sql(self, expression): time_format = self.format_time(expression) - if time_format and time_format not in (DuckDB.time_format, DuckDB.date_format): + if time_format and time_format not in (Drill.time_format, Drill.date_format): return f"CAST({_str_to_time_sql(self, expression)} AS DATE)" return f"CAST({self.sql(expression, 'this')} AS DATE)" +def _date_add_sql(kind): + def func(self, expression): + this = self.sql(expression, "this") + unit = expression.text("unit").upper() or "DAY" + expression = self.sql(expression, "expression") + return f"DATE_{kind}({this}, INTERVAL '{expression}' {unit})" + + return func + def if_sql(self, expression): """ Drill requires backticks around certain SQL reserved words, IF being one of them, This function @@ -46,10 +55,19 @@ def if_sql(self, expression): return f"`IF`({expressions})" +def _str_to_date(self, expression): + this = self.sql(expression, "this") + time_format = self.format_time(expression) + if time_format == Drill.date_format: + return f"CAST({this} AS DATE)" + return f"TO_DATE({this}, {time_format})" + class Drill(Dialect): normalize_functions = None null_ordering = "nulls_are_last" date_format = "'yyyy-MM-dd'" + dateint_format = "'yyyyMMdd'" + time_format = "'yyyy-MM-dd HH:mm:ss'" time_mapping = { "y": "%Y", @@ -93,9 +111,7 @@ class Tokenizer(tokens.Tokenizer): "VARBINARY": TokenType.BINARY } - date_format = "'yyyy-MM-dd'" - dateint_format = "'yyyyMMdd'" - time_format = "'yyyy-MM-dd HH:mm:ss'" + normalize_functions = None class Parser(parser.Parser): STRICT_CAST = False @@ -131,16 +147,27 @@ class Generator(generator.Generator): exp.ArrayContains: rename_func("REPEATED_CONTAINS"), exp.ArraySize: rename_func("REPEATED_COUNT"), exp.Create: create_with_partitions_sql, + exp.DateAdd: _date_add_sql("ADD"), + exp.DateStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", + exp.DateSub: _date_add_sql("SUB"), + exp.DateToDi: lambda self, e: f"CAST(TO_DATE({self.sql(e, 'this')}, {Drill.dateint_format}) AS INT)", + exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS VARCHAR), {Drill.dateint_format})", exp.If: if_sql, exp.ILike: lambda self, e: f" {self.sql(e, 'this')} `ILIKE` {self.sql(e, 'expression')}", exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'value')}", + exp.Pivot: no_pivot_sql, + exp.RegexpLike: rename_func("REGEXP_MATCHES"), exp.StrPosition: str_position_sql, - exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", + exp.StrToDate: _str_to_date, exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeStrToDate: lambda self, e: f"CAST({self.sql(e, 'this')} AS DATE)", exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", + exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", + exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), exp.TryCast: no_trycast_sql, + exp.TsOrDsAdd: lambda self, e: f"DATE_ADD(CAST({self.sql(e,'this')} AS DATE), INTERVAL '{self.sql(e,'expression')}' DAY)", exp.TsOrDsToDate: _ts_or_ds_to_date_sql, + exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", } diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 28d82eccb7..1f0ff923ac 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -369,6 +369,7 @@ def test_time(self): self.validate_all( "TIME_TO_TIME_STR(x)", write={ + "drill": "CAST(x AS VARCHAR)", "duckdb": "CAST(x AS TEXT)", "hive": "CAST(x AS STRING)", "presto": "CAST(x AS VARCHAR)", @@ -378,6 +379,7 @@ def test_time(self): self.validate_all( "TIME_TO_UNIX(x)", write={ + "drill": "UNIX_TIMESTAMP(x)", "duckdb": "EPOCH(x)", "hive": "UNIX_TIMESTAMP(x)", "presto": "TO_UNIXTIME(x)", @@ -439,6 +441,7 @@ def test_time(self): self.validate_all( "DATE_TO_DATE_STR(x)", write={ + "drill": "CAST(x AS VARCHAR)", "duckdb": "CAST(x AS TEXT)", "hive": "CAST(x AS STRING)", "presto": "CAST(x AS VARCHAR)", @@ -447,6 +450,7 @@ def test_time(self): self.validate_all( "DATE_TO_DI(x)", write={ + "drill": "CAST(TO_DATE(x, 'yyyyMMdd') AS INT)", "duckdb": "CAST(STRFTIME(x, '%Y%m%d') AS INT)", "hive": "CAST(DATE_FORMAT(x, 'yyyyMMdd') AS INT)", "presto": "CAST(DATE_FORMAT(x, '%Y%m%d') AS INT)", @@ -455,6 +459,7 @@ def test_time(self): self.validate_all( "DI_TO_DATE(x)", write={ + "drill": "TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')", "duckdb": "CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)", "hive": "TO_DATE(CAST(x AS STRING), 'yyyyMMdd')", "presto": "CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)", @@ -477,6 +482,7 @@ def test_time(self): }, write={ "bigquery": "DATE_ADD(x, INTERVAL 1 'day')", + "drill": "DATE_ADD(x, INTERVAL '1' DAY)", "duckdb": "x + INTERVAL 1 day", "hive": "DATE_ADD(x, 1)", "mysql": "DATE_ADD(x, INTERVAL 1 DAY)", @@ -491,6 +497,7 @@ def test_time(self): "DATE_ADD(x, 1)", write={ "bigquery": "DATE_ADD(x, INTERVAL 1 'day')", + "drill": "DATE_ADD(x, INTERVAL '1' DAY)", "duckdb": "x + INTERVAL 1 DAY", "hive": "DATE_ADD(x, 1)", "mysql": "DATE_ADD(x, INTERVAL 1 DAY)", @@ -560,6 +567,7 @@ def test_time(self): "starrocks": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", }, write={ + "drill": "TO_DATE(x, 'yyyy-MM-dd''T''HH:mm:ss')", "mysql": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", "starrocks": "STR_TO_DATE(x, '%Y-%m-%dT%H:%i:%S')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS DATE)", @@ -570,6 +578,7 @@ def test_time(self): self.validate_all( "STR_TO_DATE(x, '%Y-%m-%d')", write={ + "drill": "CAST(x AS DATE)", "mysql": "STR_TO_DATE(x, '%Y-%m-%d')", "starrocks": "STR_TO_DATE(x, '%Y-%m-%d')", "hive": "CAST(x AS DATE)", @@ -580,6 +589,7 @@ def test_time(self): self.validate_all( "DATE_STR_TO_DATE(x)", write={ + "drill": "CAST(x AS DATE)", "duckdb": "CAST(x AS DATE)", "hive": "TO_DATE(x)", "presto": "CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)", @@ -589,6 +599,7 @@ def test_time(self): self.validate_all( "TS_OR_DS_ADD('2021-02-01', 1, 'DAY')", write={ + "drill": "DATE_ADD(CAST('2021-02-01' AS DATE), INTERVAL '1' DAY)", "duckdb": "CAST('2021-02-01' AS DATE) + INTERVAL 1 DAY", "hive": "DATE_ADD('2021-02-01', 1)", "presto": "DATE_ADD('DAY', 1, DATE_PARSE(SUBSTR('2021-02-01', 1, 10), '%Y-%m-%d'))", @@ -598,6 +609,7 @@ def test_time(self): self.validate_all( "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", write={ + "drill": "DATE_ADD(CAST('2020-01-01' AS DATE), INTERVAL '1' DAY)", "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 DAY", "hive": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", "presto": "DATE_ADD('day', 1, CAST('2020-01-01' AS DATE))", @@ -607,6 +619,7 @@ def test_time(self): self.validate_all( "TIMESTAMP '2022-01-01'", write={ + "drill": "CAST('2022-01-01' AS TIMESTAMP)", "mysql": "CAST('2022-01-01' AS TIMESTAMP)", "starrocks": "CAST('2022-01-01' AS DATETIME)", "hive": "CAST('2022-01-01' AS TIMESTAMP)", @@ -628,6 +641,7 @@ def test_time(self): dialect: f"{unit}(x)" for dialect in ( "bigquery", + "drill", "duckdb", "mysql", "presto", @@ -638,6 +652,7 @@ def test_time(self): dialect: f"{unit}(x)" for dialect in ( "bigquery", + "drill", "duckdb", "mysql", "presto", From 69f28734756f26f1c01434fb586c3cf8f984dbf7 Mon Sep 17 00:00:00 2001 From: Charles Givre Date: Mon, 14 Nov 2022 23:47:24 -0500 Subject: [PATCH 5/6] Fixed style issues --- sqlglot/dialects/drill.py | 25 +++++++++++++------------ tests/dialects/test_dialect.py | 2 +- tests/dialects/test_drill.py | 4 ++-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/sqlglot/dialects/drill.py b/sqlglot/dialects/drill.py index 1048b718d9..6dca6eff79 100644 --- a/sqlglot/dialects/drill.py +++ b/sqlglot/dialects/drill.py @@ -1,12 +1,14 @@ from __future__ import annotations -from sqlglot import exp, parser, tokens, generator +from sqlglot import exp, generator, parser, tokens from sqlglot.dialects.dialect import ( Dialect, create_with_partitions_sql, format_time_lambda, - rename_func, str_position_sql, - no_trycast_sql, no_pivot_sql + no_pivot_sql, + no_trycast_sql, + rename_func, + str_position_sql, ) from sqlglot.dialects.postgres import _lateral_sql from sqlglot.tokens import TokenType @@ -18,6 +20,7 @@ def _to_timestamp(args): return exp.UnixToTime.from_arg_list(args) return format_time_lambda(exp.StrToTime, "drill")(args) + def _str_to_time_sql(self, expression): return f"STRPTIME({self.sql(expression, 'this')}, {self.format_time(expression)})" @@ -38,6 +41,7 @@ def func(self, expression): return func + def if_sql(self, expression): """ Drill requires backticks around certain SQL reserved words, IF being one of them, This function @@ -62,6 +66,7 @@ def _str_to_date(self, expression): return f"CAST({this} AS DATE)" return f"TO_DATE({this}, {time_format})" + class Drill(Dialect): normalize_functions = None null_ordering = "nulls_are_last" @@ -98,18 +103,16 @@ class Drill(Dialect): "EE": "%a", "EEE": "%a", "EEEE": "%A", - "''T''": "T" + "''T''": "T", } + class Tokenizer(tokens.Tokenizer): QUOTES = ["'"] IDENTIFIERS = ["`"] ESCAPES = ["\\"] ENCODE = "utf-8" - KEYWORDS = { - **tokens.Tokenizer.KEYWORDS, - "VARBINARY": TokenType.BINARY - } + KEYWORDS = {**tokens.Tokenizer.KEYWORDS, "VARBINARY": TokenType.BINARY} normalize_functions = None @@ -133,12 +136,10 @@ class Generator(generator.Generator): exp.DataType.Type.NCHAR: "VARCHAR", exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", - exp.DataType.Type.DATETIME: "TIMESTAMP" + exp.DataType.Type.DATETIME: "TIMESTAMP", } - ROOT_PROPERTIES = { - exp.PartitionedByProperty - } + ROOT_PROPERTIES = {exp.PartitionedByProperty} TRANSFORMS = { **generator.Generator.TRANSFORMS, diff --git a/tests/dialects/test_dialect.py b/tests/dialects/test_dialect.py index 1f0ff923ac..0c95de33b0 100644 --- a/tests/dialects/test_dialect.py +++ b/tests/dialects/test_dialect.py @@ -221,7 +221,7 @@ def test_cast(self): }, write={ "duckdb": "TRY_CAST(a AS DOUBLE)", - "drill" : "CAST(a AS DOUBLE)", + "drill": "CAST(a AS DOUBLE)", "postgres": "CAST(a AS DOUBLE PRECISION)", "redshift": "CAST(a AS DOUBLE PRECISION)", }, diff --git a/tests/dialects/test_drill.py b/tests/dialects/test_drill.py index cf1e09114d..ccecbbe9e2 100644 --- a/tests/dialects/test_drill.py +++ b/tests/dialects/test_drill.py @@ -29,7 +29,7 @@ def test_quotes(self): "presto": "'\"x\"'", "hive": "'\"x\"'", "spark": "'\"x\"'", - } + }, ) self.validate_all( "'\\\\a'", @@ -50,4 +50,4 @@ def test_table_function(self): write={ "drill": "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))", }, - ) \ No newline at end of file + ) From f1f4fa2dfd9e20277e8496623c339ef9ee34b687 Mon Sep 17 00:00:00 2001 From: Charles Givre Date: Tue, 15 Nov 2022 07:29:34 -0500 Subject: [PATCH 6/6] Disabled table test and code cleanup --- sqlglot/dialects/drill.py | 5 +---- tests/dialects/test_drill.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/sqlglot/dialects/drill.py b/sqlglot/dialects/drill.py index 6dca6eff79..c651957640 100644 --- a/sqlglot/dialects/drill.py +++ b/sqlglot/dialects/drill.py @@ -11,7 +11,6 @@ str_position_sql, ) from sqlglot.dialects.postgres import _lateral_sql -from sqlglot.tokens import TokenType def _to_timestamp(args): @@ -112,8 +111,6 @@ class Tokenizer(tokens.Tokenizer): ESCAPES = ["\\"] ENCODE = "utf-8" - KEYWORDS = {**tokens.Tokenizer.KEYWORDS, "VARBINARY": TokenType.BINARY} - normalize_functions = None class Parser(parser.Parser): @@ -168,7 +165,7 @@ class Generator(generator.Generator): exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), exp.TryCast: no_trycast_sql, - exp.TsOrDsAdd: lambda self, e: f"DATE_ADD(CAST({self.sql(e,'this')} AS DATE), INTERVAL '{self.sql(e,'expression')}' DAY)", + exp.TsOrDsAdd: lambda self, e: f"DATE_ADD(CAST({self.sql(e, 'this')} AS DATE), INTERVAL '{self.sql(e, 'expression')}' DAY)", exp.TsOrDsToDate: _ts_or_ds_to_date_sql, exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", } diff --git a/tests/dialects/test_drill.py b/tests/dialects/test_drill.py index ccecbbe9e2..30639efb4a 100644 --- a/tests/dialects/test_drill.py +++ b/tests/dialects/test_drill.py @@ -44,10 +44,11 @@ def test_quotes(self): }, ) - def test_table_function(self): - self.validate_all( - "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))", - write={ - "drill": "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))", - }, - ) + # TODO Add support for Drill's table() function + # def test_table_function(self): + # self.validate_all( + # "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))", + # write={ + # "drill": "SELECT * FROM table( dfs.`test_data.xlsx` (type => 'excel', sheetName => 'secondSheet'))", + # }, + # )