Improve Snowflake array and map support (#780)

* improve sf array and map support * add parser support object_construct * move to snowflake parser
tobymao · Nov 29, 2022 · 0506657 · 0506657
1 parent 26b1da1
commit 0506657
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 3 deletions.
diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py
@@ -289,19 +289,19 @@ def struct_extract_sql(self, expression):
     return f"{this}.{struct_key}"
 
 
-def var_map_sql(self, expression):
+def var_map_sql(self, expression, map_func_name="MAP"):
     keys = expression.args["keys"]
     values = expression.args["values"]
 
     if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
         self.unsupported("Cannot convert array columns into map.")
-        return f"MAP({self.format_args(keys, values)})"
+        return f"{map_func_name}({self.format_args(keys, values)})"
 
     args = []
     for key, value in zip(keys.expressions, values.expressions):
         args.append(self.sql(key))
         args.append(self.sql(value))
-    return f"MAP({self.format_args(*args)})"
+    return f"{map_func_name}({self.format_args(*args)})"
 
 
 def format_time_lambda(exp_class, dialect, default=None):

diff --git a/sqlglot/dialects/snowflake.py b/sqlglot/dialects/snowflake.py
@@ -6,6 +6,7 @@
     format_time_lambda,
     inline_array_sql,
     rename_func,
+    var_map_sql,
 )
 from sqlglot.expressions import Literal
 from sqlglot.helper import seq_get
@@ -100,6 +101,14 @@ def _parse_date_part(self):
     return self.expression(exp.Extract, this=this, expression=expression)
 
 
+def _datatype_sql(self, expression):
+    if expression.this == exp.DataType.Type.ARRAY:
+        return "ARRAY"
+    elif expression.this == exp.DataType.Type.MAP:
+        return "OBJECT"
+    return self.datatype_sql(expression)
+
+
 class Snowflake(Dialect):
     null_ordering = "nulls_are_large"
     time_format = "'yyyy-mm-dd hh24:mi:ss'"
@@ -143,6 +152,7 @@ class Parser(parser.Parser):
             "ARRAY_CONSTRUCT": exp.Array.from_arg_list,
             "RLIKE": exp.RegexpLike.from_arg_list,
             "DECODE": exp.Matches.from_arg_list,
+            "OBJECT_CONSTRUCT": parser.parse_var_map,
         }
 
         FUNCTION_PARSERS = {
@@ -198,7 +208,10 @@ class Generator(generator.Generator):
             **generator.Generator.TRANSFORMS,
             exp.Array: inline_array_sql,
             exp.ArrayConcat: rename_func("ARRAY_CAT"),
+            exp.DataType: _datatype_sql,
             exp.If: rename_func("IFF"),
+            exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
+            exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
             exp.Parameter: lambda self, e: f"${self.sql(e, 'this')}",
             exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
             exp.Matches: rename_func("DECODE"),

diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py
@@ -90,6 +90,7 @@ def test_duckdb(self):
                 "hive": "CAST(COL AS ARRAY<BIGINT>)",
                 "spark": "CAST(COL AS ARRAY<LONG>)",
                 "postgres": "CAST(COL AS BIGINT[])",
+                "snowflake": "CAST(COL AS ARRAY)",
             },
         )
 

diff --git a/tests/dialects/test_hive.py b/tests/dialects/test_hive.py
@@ -459,6 +459,7 @@ def test_hive(self):
                 "hive": "MAP(a, b, c, d)",
                 "presto": "MAP(ARRAY[a, c], ARRAY[b, d])",
                 "spark": "MAP(a, b, c, d)",
+                "snowflake": "OBJECT_CONSTRUCT(a, b, c, d)",
             },
             write={
                 "": "MAP(ARRAY(a, c), ARRAY(b, d))",
@@ -467,6 +468,7 @@ def test_hive(self):
                 "presto": "MAP(ARRAY[a, c], ARRAY[b, d])",
                 "hive": "MAP(a, b, c, d)",
                 "spark": "MAP(a, b, c, d)",
+                "snowflake": "OBJECT_CONSTRUCT(a, b, c, d)",
             },
         )
         self.validate_all(
@@ -476,6 +478,7 @@ def test_hive(self):
                 "presto": "MAP(ARRAY[a], ARRAY[b])",
                 "hive": "MAP(a, b)",
                 "spark": "MAP(a, b)",
+                "snowflake": "OBJECT_CONSTRUCT(a, b)",
             },
         )
         self.validate_all(

diff --git a/tests/dialects/test_presto.py b/tests/dialects/test_presto.py
@@ -13,6 +13,7 @@ def test_cast(self):
                 "duckdb": "CAST(a AS INT[])",
                 "presto": "CAST(a AS ARRAY(INTEGER))",
                 "spark": "CAST(a AS ARRAY<INT>)",
+                "snowflake": "CAST(a AS ARRAY)",
             },
         )
         self.validate_all(
@@ -31,6 +32,7 @@ def test_cast(self):
                 "duckdb": "CAST(LIST_VALUE(1, 2) AS BIGINT[])",
                 "presto": "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))",
                 "spark": "CAST(ARRAY(1, 2) AS ARRAY<LONG>)",
+                "snowflake": "CAST([1, 2] AS ARRAY)",
             },
         )
         self.validate_all(
@@ -41,6 +43,7 @@ def test_cast(self):
                 "presto": "CAST(MAP(ARRAY[1], ARRAY[1]) AS MAP(INTEGER, INTEGER))",
                 "hive": "CAST(MAP(1, 1) AS MAP<INT, INT>)",
                 "spark": "CAST(MAP_FROM_ARRAYS(ARRAY(1), ARRAY(1)) AS MAP<INT, INT>)",
+                "snowflake": "CAST(OBJECT_CONSTRUCT(1, 1) AS OBJECT)",
             },
         )
         self.validate_all(
@@ -51,6 +54,7 @@ def test_cast(self):
                 "presto": "CAST(MAP(ARRAY['a', 'b', 'c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INTEGER)))",
                 "hive": "CAST(MAP('a', ARRAY(1), 'b', ARRAY(2), 'c', ARRAY(3)) AS MAP<STRING, ARRAY<INT>>)",
                 "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('a', 'b', 'c'), ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))) AS MAP<STRING, ARRAY<INT>>)",
+                "snowflake": "CAST(OBJECT_CONSTRUCT('a', [1], 'b', [2], 'c', [3]) AS OBJECT)",
             },
         )
         self.validate_all(
@@ -393,6 +397,7 @@ def test_presto(self):
             write={
                 "hive": UnsupportedError,
                 "spark": "MAP_FROM_ARRAYS(a, b)",
+                "snowflake": UnsupportedError,
             },
         )
         self.validate_all(
@@ -401,6 +406,7 @@ def test_presto(self):
                 "hive": "MAP(a, c, b, d)",
                 "presto": "MAP(ARRAY[a, b], ARRAY[c, d])",
                 "spark": "MAP_FROM_ARRAYS(ARRAY(a, b), ARRAY(c, d))",
+                "snowflake": "OBJECT_CONSTRUCT(a, c, b, d)",
             },
         )
         self.validate_all(
@@ -409,6 +415,7 @@ def test_presto(self):
                 "hive": "MAP('a', 'b')",
                 "presto": "MAP(ARRAY['a'], ARRAY['b'])",
                 "spark": "MAP_FROM_ARRAYS(ARRAY('a'), ARRAY('b'))",
+                "snowflake": "OBJECT_CONSTRUCT('a', 'b')",
             },
         )
         self.validate_all(

diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py
@@ -32,6 +32,7 @@ def test_ddl(self):
                 "presto": "CREATE TABLE db.example_table (col_a ARRAY(INTEGER), col_b ARRAY(ARRAY(INTEGER)))",
                 "hive": "CREATE TABLE db.example_table (col_a ARRAY<INT>, col_b ARRAY<ARRAY<INT>>)",
                 "spark": "CREATE TABLE db.example_table (col_a ARRAY<INT>, col_b ARRAY<ARRAY<INT>>)",
+                "snowflake": "CREATE TABLE db.example_table (col_a ARRAY, col_b ARRAY)",
             },
         )
         self.validate_all(
@@ -278,6 +279,7 @@ def test_spark(self):
                 "presto": "MAP(ARRAY[1], c)",
                 "hive": "MAP(ARRAY(1), c)",
                 "spark": "MAP_FROM_ARRAYS(ARRAY(1), c)",
+                "snowflake": "OBJECT_CONSTRUCT([1], c)",
             },
         )
         self.validate_all(