diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql index 33b61666ca4d9..5b97d3d726415 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql @@ -12,9 +12,9 @@ -- Note that currently registered UDF returns a string. So there are some differences, for instance -- in string cast within UDF in Scala and Python. -SELECT avg(udf(four)) AS avg_1 FROM onek; +SELECT CAST(avg(udf(four)) AS decimal(10,3)) AS avg_1 FROM onek; -SELECT udf(avg(a)) AS avg_32 FROM aggtest WHERE a < 100; +SELECT CAST(udf(avg(a)) AS decimal(10,3)) AS avg_32 FROM aggtest WHERE a < 100; -- In 7.1, avg(float4) is computed using float8 arithmetic. -- Round the result to 3 digits to avoid platform-specific results. @@ -23,32 +23,32 @@ select CAST(avg(udf(b)) AS Decimal(10,3)) AS avg_107_943 FROM aggtest; -- `student` has a column with data type POINT, which is not supported by Spark [SPARK-27766] -- SELECT avg(gpa) AS avg_3_4 FROM ONLY student; -SELECT sum(udf(four)) AS sum_1500 FROM onek; +SELECT CAST(sum(udf(four)) AS int) AS sum_1500 FROM onek; SELECT udf(sum(a)) AS sum_198 FROM aggtest; -SELECT udf(udf(sum(b))) AS avg_431_773 FROM aggtest; +SELECT CAST(udf(udf(sum(b))) AS decimal(10,3)) AS avg_431_773 FROM aggtest; -- `student` has a column with data type POINT, which is not supported by Spark [SPARK-27766] -- SELECT sum(gpa) AS avg_6_8 FROM ONLY student; SELECT udf(max(four)) AS max_3 FROM onek; -SELECT max(udf(a)) AS max_100 FROM aggtest; -SELECT CAST(udf(udf(max(aggtest.b))) AS int) AS max_324_78 FROM aggtest; +SELECT max(CAST(udf(a) AS int)) AS max_100 FROM aggtest; +SELECT CAST(udf(udf(max(aggtest.b))) AS decimal(10,3)) AS max_324_78 FROM aggtest; -- `student` has a column with data type POINT, which is not supported by Spark [SPARK-27766] -- SELECT max(student.gpa) AS max_3_7 FROM student; -SELECT CAST(stddev_pop(udf(b)) AS int) FROM aggtest; -SELECT udf(stddev_samp(b)) FROM aggtest; -SELECT CAST(var_pop(udf(b)) as int) FROM aggtest; -SELECT udf(var_samp(b)) FROM aggtest; +SELECT CAST(stddev_pop(udf(b)) AS decimal(10,3)) FROM aggtest; +SELECT CAST(udf(stddev_samp(b)) AS decimal(10,3)) FROM aggtest; +SELECT CAST(var_pop(udf(b)) AS decimal(10,3)) FROM aggtest; +SELECT CAST(udf(var_samp(b)) AS decimal(10,3)) FROM aggtest; -SELECT udf(stddev_pop(CAST(b AS Decimal(38,0)))) FROM aggtest; -SELECT stddev_samp(CAST(udf(b) AS Decimal(38,0))) FROM aggtest; -SELECT udf(var_pop(CAST(b AS Decimal(38,0)))) FROM aggtest; -SELECT var_samp(udf(CAST(b AS Decimal(38,0)))) FROM aggtest; +SELECT CAST(udf(stddev_pop(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest; +SELECT CAST(stddev_samp(CAST(udf(b) AS Decimal(38,0))) AS decimal(10,3)) FROM aggtest; +SELECT CAST(udf(var_pop(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest; +SELECT CAST(var_samp(udf(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest; -- population variance is defined for a single tuple, sample variance -- is not -SELECT udf(var_pop(1.0)), var_samp(udf(2.0)); -SELECT stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))), stddev_samp(CAST(udf(4.0) AS Decimal(38,0))); +SELECT CAST(udf(var_pop(1.0)) AS int), var_samp(udf(2.0)); +SELECT CAST(stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))) AS int), stddev_samp(CAST(udf(4.0) AS Decimal(38,0))); -- verify correct results for null and NaN inputs @@ -76,9 +76,9 @@ FROM (VALUES ('-Infinity'), ('Infinity')) v(x); -- test accuracy with a large input offset -SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE))) +SELECT CAST(avg(udf(CAST(x AS DOUBLE))) AS int), CAST(udf(var_pop(CAST(x AS DOUBLE))) AS decimal(10,3)) FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x); -SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE))) +SELECT CAST(avg(udf(CAST(x AS DOUBLE))) AS long), CAST(udf(var_pop(CAST(x AS DOUBLE))) AS decimal(10,3)) FROM (VALUES (7000000000005), (7000000000007)) v(x); -- SQL2003 binary aggregates [SPARK-23907] @@ -89,8 +89,8 @@ FROM (VALUES (7000000000005), (7000000000007)) v(x); -- SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest; -- SELECT regr_r2(b, a) FROM aggtest; -- SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest; -SELECT CAST(udf(covar_pop(b, udf(a))) AS int), CAST(covar_samp(udf(b), a) as int) FROM aggtest; -SELECT corr(b, udf(a)) FROM aggtest; +SELECT CAST(udf(covar_pop(b, udf(a))) AS decimal(10,3)), CAST(covar_samp(udf(b), a) as decimal(10,3)) FROM aggtest; +SELECT CAST(corr(b, udf(a)) AS decimal(10,3)) FROM aggtest; -- test accum and combine functions directly [SPARK-23907] @@ -122,7 +122,7 @@ SELECT corr(b, udf(a)) FROM aggtest; SELECT count(udf(four)) AS cnt_1000 FROM onek; SELECT udf(count(DISTINCT four)) AS cnt_4 FROM onek; -select ten, udf(count(*)), sum(udf(four)) from onek +select ten, udf(count(*)), CAST(sum(udf(four)) AS int) from onek group by ten order by ten; select ten, count(udf(four)), udf(sum(DISTINCT four)) from onek diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out index 32be362d87cad..98e04b4e24506 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out @@ -3,19 +3,19 @@ -- !query 0 -SELECT avg(udf(four)) AS avg_1 FROM onek +SELECT CAST(avg(udf(four)) AS decimal(10,3)) AS avg_1 FROM onek -- !query 0 schema -struct +struct -- !query 0 output 1.5 -- !query 1 -SELECT udf(avg(a)) AS avg_32 FROM aggtest WHERE a < 100 +SELECT CAST(udf(avg(a)) AS decimal(10,3)) AS avg_32 FROM aggtest WHERE a < 100 -- !query 1 schema -struct +struct -- !query 1 output -32.666666666666664 +32.667 -- !query 2 @@ -27,11 +27,11 @@ struct -- !query 3 -SELECT sum(udf(four)) AS sum_1500 FROM onek +SELECT CAST(sum(udf(four)) AS int) AS sum_1500 FROM onek -- !query 3 schema -struct +struct -- !query 3 output -1500.0 +1500 -- !query 4 @@ -43,11 +43,11 @@ struct -- !query 5 -SELECT udf(udf(sum(b))) AS avg_431_773 FROM aggtest +SELECT CAST(udf(udf(sum(b))) AS decimal(10,3)) AS avg_431_773 FROM aggtest -- !query 5 schema -struct +struct -- !query 5 output -431.77260909229517 +431.773 -- !query 6 @@ -59,99 +59,99 @@ struct -- !query 7 -SELECT max(udf(a)) AS max_100 FROM aggtest +SELECT max(CAST(udf(a) AS int)) AS max_100 FROM aggtest -- !query 7 schema -struct +struct -- !query 7 output -56 +100 -- !query 8 -SELECT CAST(udf(udf(max(aggtest.b))) AS int) AS max_324_78 FROM aggtest +SELECT CAST(udf(udf(max(aggtest.b))) AS decimal(10,3)) AS max_324_78 FROM aggtest -- !query 8 schema -struct +struct -- !query 8 output -324 +324.78 -- !query 9 -SELECT CAST(stddev_pop(udf(b)) AS int) FROM aggtest +SELECT CAST(stddev_pop(udf(b)) AS decimal(10,3)) FROM aggtest -- !query 9 schema -struct +struct -- !query 9 output -131 +131.107 -- !query 10 -SELECT udf(stddev_samp(b)) FROM aggtest +SELECT CAST(udf(stddev_samp(b)) AS decimal(10,3)) FROM aggtest -- !query 10 schema -struct +struct -- !query 10 output -151.38936080399804 +151.389 -- !query 11 -SELECT CAST(var_pop(udf(b)) as int) FROM aggtest +SELECT CAST(var_pop(udf(b)) AS decimal(10,3)) FROM aggtest -- !query 11 schema -struct +struct -- !query 11 output -17189 +17189.054 -- !query 12 -SELECT udf(var_samp(b)) FROM aggtest +SELECT CAST(udf(var_samp(b)) AS decimal(10,3)) FROM aggtest -- !query 12 schema -struct +struct -- !query 12 output -22918.738564643096 +22918.739 -- !query 13 -SELECT udf(stddev_pop(CAST(b AS Decimal(38,0)))) FROM aggtest +SELECT CAST(udf(stddev_pop(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest -- !query 13 schema -struct +struct -- !query 13 output -131.18117242958306 +131.181 -- !query 14 -SELECT stddev_samp(CAST(udf(b) AS Decimal(38,0))) FROM aggtest +SELECT CAST(stddev_samp(CAST(udf(b) AS Decimal(38,0))) AS decimal(10,3)) FROM aggtest -- !query 14 schema -struct +struct -- !query 14 output -151.47497042966097 +151.475 -- !query 15 -SELECT udf(var_pop(CAST(b AS Decimal(38,0)))) FROM aggtest +SELECT CAST(udf(var_pop(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest -- !query 15 schema -struct +struct -- !query 15 output 17208.5 -- !query 16 -SELECT var_samp(udf(CAST(b AS Decimal(38,0)))) FROM aggtest +SELECT CAST(var_samp(udf(CAST(b AS Decimal(38,0)))) AS decimal(10,3)) FROM aggtest -- !query 16 schema -struct +struct -- !query 16 output -22944.666666666668 +22944.667 -- !query 17 -SELECT udf(var_pop(1.0)), var_samp(udf(2.0)) +SELECT CAST(udf(var_pop(1.0)) AS int), var_samp(udf(2.0)) -- !query 17 schema -struct +struct -- !query 17 output -0.0 NaN +0 NaN -- !query 18 -SELECT stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))), stddev_samp(CAST(udf(4.0) AS Decimal(38,0))) +SELECT CAST(stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))) AS int), stddev_samp(CAST(udf(4.0) AS Decimal(38,0))) -- !query 18 schema -struct +struct -- !query 18 output -0.0 NaN +0 NaN -- !query 19 @@ -262,37 +262,37 @@ NaN NaN -- !query 32 -SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE))) +SELECT CAST(avg(udf(CAST(x AS DOUBLE))) AS int), CAST(udf(var_pop(CAST(x AS DOUBLE))) AS decimal(10,3)) FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x) -- !query 32 schema -struct +struct -- !query 32 output -1.00000005E8 2.5 +100000005 2.5 -- !query 33 -SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE))) +SELECT CAST(avg(udf(CAST(x AS DOUBLE))) AS long), CAST(udf(var_pop(CAST(x AS DOUBLE))) AS decimal(10,3)) FROM (VALUES (7000000000005), (7000000000007)) v(x) -- !query 33 schema -struct +struct -- !query 33 output -7.000000000006E12 1.0 +7000000000006 1 -- !query 34 -SELECT CAST(udf(covar_pop(b, udf(a))) AS int), CAST(covar_samp(udf(b), a) as int) FROM aggtest +SELECT CAST(udf(covar_pop(b, udf(a))) AS decimal(10,3)), CAST(covar_samp(udf(b), a) as decimal(10,3)) FROM aggtest -- !query 34 schema -struct +struct -- !query 34 output -653 871 +653.629 871.505 -- !query 35 -SELECT corr(b, udf(a)) FROM aggtest +SELECT CAST(corr(b, udf(a)) AS decimal(10,3)) FROM aggtest -- !query 35 schema -struct +struct -- !query 35 output -0.1396345165178734 +0.14 -- !query 36 @@ -312,21 +312,21 @@ struct -- !query 38 -select ten, udf(count(*)), sum(udf(four)) from onek +select ten, udf(count(*)), CAST(sum(udf(four)) AS int) from onek group by ten order by ten -- !query 38 schema -struct +struct -- !query 38 output -0 100 100.0 -1 100 200.0 -2 100 100.0 -3 100 200.0 -4 100 100.0 -5 100 200.0 -6 100 100.0 -7 100 200.0 -8 100 100.0 -9 100 200.0 +0 100 100 +1 100 200 +2 100 100 +3 100 200 +4 100 100 +5 100 200 +6 100 100 +7 100 200 +8 100 100 +9 100 200 -- !query 39