From 5c19505c5f1670ac81cf7de93f734455dc8f7e9d Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 25 Jul 2024 17:39:11 +0800 Subject: [PATCH] [SPARK-48844][FOLLOWUP][TESTS] Cleanup duplicated data resource files in hive-thriftserver test ### What changes were proposed in this pull request? A follow up of SPARK-48844 to cleanup duplicated data resource files in hive-thriftserver test ### Why are the changes needed? code refactoring ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #47480 from yaooqinn/SPARK-48844-F. Authored-by: Kent Yao Signed-off-by: Kent Yao --- .../analyzer-results/sql-on-files.sql.out | 106 ++++++++++++++---- .../sql-tests/inputs/sql-on-files.sql | 19 +++- .../sql-tests/results/sql-on-files.sql.out | 104 +++++++++++++++-- .../before_1582_date_v2_4.snappy.orc | Bin 201 -> 0 bytes .../src/test/resources/test-data/cars.csv | 7 -- .../test-data/dec-in-fixed-len.parquet | Bin 460 -> 0 bytes .../resources/test-data/with-map-fields.json | 5 - 7 files changed, 193 insertions(+), 48 deletions(-) delete mode 100644 sql/hive-thriftserver/src/test/resources/test-data/before_1582_date_v2_4.snappy.orc delete mode 100644 sql/hive-thriftserver/src/test/resources/test-data/cars.csv delete mode 100644 sql/hive-thriftserver/src/test/resources/test-data/dec-in-fixed-len.parquet delete mode 100644 sql/hive-thriftserver/src/test/resources/test-data/with-map-fields.json diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out index 78e2a876da861..b098a9758fe4e 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out @@ -1,4 +1,19 @@ -- Automatically generated by SQLQueryTestSuite +-- !query +CREATE DATABASE IF NOT EXISTS sql_on_files +-- !query analysis +CreateNamespace true ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_on_files] + + +-- !query +CREATE TABLE sql_on_files.test_parquet USING PARQUET AS SELECT 1 +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_on_files`.`test_parquet`, ErrorIfExists, [1] + +- Project [1 AS 1#x] + +- OneRowRelation + + -- !query SELECT * FROM parquet.`` -- !query analysis @@ -33,12 +48,25 @@ org.apache.spark.sql.AnalysisException -- !query -SELECT * FROM parquet.`src/test/resources/test-data/dec-in-fixed-len.parquet` LIMIT 1 +SELECT * FROM parquet.`${spark.sql.warehouse.dir}/sql_on_files.db/test_parquet` +-- !query analysis +Project [1#x] ++- Relation [1#x] parquet + + +-- !query +DROP TABLE sql_on_files.test_parquet +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_parquet + + +-- !query +CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1 -- !query analysis -GlobalLimit 1 -+- LocalLimit 1 - +- Project [fixed_len_dec#x] - +- Relation [fixed_len_dec#x] parquet +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_on_files`.`test_orc`, ErrorIfExists, [1] + +- Project [1 AS 1#x] + +- OneRowRelation -- !query @@ -75,12 +103,25 @@ org.apache.spark.sql.AnalysisException -- !query -SELECT * FROM orc.`src/test/resources/test-data/before_1582_date_v2_4.snappy.orc` LIMIT 1 +SELECT * FROM orc.`${spark.sql.warehouse.dir}/sql_on_files.db/test_orc` +-- !query analysis +Project [1#x] ++- Relation [1#x] orc + + +-- !query +DROP TABLE sql_on_files.test_orc -- !query analysis -GlobalLimit 1 -+- LocalLimit 1 - +- Project [dt#x] - +- Relation [dt#x] orc +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_orc + + +-- !query +CREATE TABLE sql_on_files.test_csv USING CSV AS SELECT 1 +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_on_files`.`test_csv`, ErrorIfExists, [1] + +- Project [1 AS 1#x] + +- OneRowRelation -- !query @@ -117,12 +158,25 @@ org.apache.spark.sql.AnalysisException -- !query -SELECT * FROM csv.`src/test/resources/test-data/cars.csv` LIMIT 1 +SELECT * FROM csv.`${spark.sql.warehouse.dir}/sql_on_files.db/test_csv` +-- !query analysis +Project [_c0#x] ++- Relation [_c0#x] csv + + +-- !query +DROP TABLE sql_on_files.test_csv +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_csv + + +-- !query +CREATE TABLE sql_on_files.test_json USING JSON AS SELECT 1 -- !query analysis -GlobalLimit 1 -+- LocalLimit 1 - +- Project [_c0#x, _c1#x, _c2#x, _c3#x, _c4#x] - +- Relation [_c0#x,_c1#x,_c2#x,_c3#x,_c4#x] csv +CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_on_files`.`test_json`, ErrorIfExists, [1] + +- Project [1 AS 1#x] + +- OneRowRelation -- !query @@ -159,9 +213,21 @@ org.apache.spark.sql.AnalysisException -- !query -SELECT * FROM json.`src/test/resources/test-data/with-map-fields.json` LIMIT 1 +SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json` +-- !query analysis +Project [1#xL] ++- Relation [1#xL] json + + +-- !query +DROP TABLE sql_on_files.test_json +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_json + + +-- !query +DROP DATABASE sql_on_files -- !query analysis -GlobalLimit 1 -+- LocalLimit 1 - +- Project [id#xL, intervals#x] - +- Relation [id#xL,intervals#x] json +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_on_files] diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql index aee8aaa4d195b..8a00e4400e6b0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql @@ -1,19 +1,30 @@ +CREATE DATABASE IF NOT EXISTS sql_on_files; -- Parquet +CREATE TABLE sql_on_files.test_parquet USING PARQUET AS SELECT 1; SELECT * FROM parquet.``; SELECT * FROM parquet.`/file/not/found`; -SELECT * FROM parquet.`src/test/resources/test-data/dec-in-fixed-len.parquet` LIMIT 1; +SELECT * FROM parquet.`${spark.sql.warehouse.dir}/sql_on_files.db/test_parquet`; +DROP TABLE sql_on_files.test_parquet; -- ORC +CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1; SELECT * FROM orc.``; SELECT * FROM orc.`/file/not/found`; -SELECT * FROM orc.`src/test/resources/test-data/before_1582_date_v2_4.snappy.orc` LIMIT 1; +SELECT * FROM orc.`${spark.sql.warehouse.dir}/sql_on_files.db/test_orc`; +DROP TABLE sql_on_files.test_orc; -- CSV +CREATE TABLE sql_on_files.test_csv USING CSV AS SELECT 1; SELECT * FROM csv.``; SELECT * FROM csv.`/file/not/found`; -SELECT * FROM csv.`src/test/resources/test-data/cars.csv` LIMIT 1; +SELECT * FROM csv.`${spark.sql.warehouse.dir}/sql_on_files.db/test_csv`; +DROP TABLE sql_on_files.test_csv; -- JSON +CREATE TABLE sql_on_files.test_json USING JSON AS SELECT 1; SELECT * FROM json.``; SELECT * FROM json.`/file/not/found`; -SELECT * FROM json.`src/test/resources/test-data/with-map-fields.json` LIMIT 1; +SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json`; +DROP TABLE sql_on_files.test_json; + +DROP DATABASE sql_on_files; diff --git a/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out index 5c1e5697d029d..fc8f44bc22fee 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out @@ -1,4 +1,20 @@ -- Automatically generated by SQLQueryTestSuite +-- !query +CREATE DATABASE IF NOT EXISTS sql_on_files +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_on_files.test_parquet USING PARQUET AS SELECT 1 +-- !query schema +struct<> +-- !query output + + + -- !query SELECT * FROM parquet.`` -- !query schema @@ -37,11 +53,27 @@ org.apache.spark.sql.AnalysisException -- !query -SELECT * FROM parquet.`src/test/resources/test-data/dec-in-fixed-len.parquet` LIMIT 1 +SELECT * FROM parquet.`${spark.sql.warehouse.dir}/sql_on_files.db/test_parquet` -- !query schema -struct +struct<1:int> -- !query output -0.00 +1 + + +-- !query +DROP TABLE sql_on_files.test_parquet +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1 +-- !query schema +struct<> +-- !query output + -- !query @@ -82,11 +114,27 @@ org.apache.spark.sql.AnalysisException -- !query -SELECT * FROM orc.`src/test/resources/test-data/before_1582_date_v2_4.snappy.orc` LIMIT 1 +SELECT * FROM orc.`${spark.sql.warehouse.dir}/sql_on_files.db/test_orc` +-- !query schema +struct<1:int> +-- !query output +1 + + +-- !query +DROP TABLE sql_on_files.test_orc +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE sql_on_files.test_csv USING CSV AS SELECT 1 -- !query schema -struct +struct<> -- !query output -1200-01-01 + -- !query @@ -127,11 +175,27 @@ org.apache.spark.sql.AnalysisException -- !query -SELECT * FROM csv.`src/test/resources/test-data/cars.csv` LIMIT 1 +SELECT * FROM csv.`${spark.sql.warehouse.dir}/sql_on_files.db/test_csv` +-- !query schema +struct<_c0:string> +-- !query output +1 + + +-- !query +DROP TABLE sql_on_files.test_csv -- !query schema -struct<_c0:string,_c1:string,_c2:string,_c3:string,_c4:string> +struct<> -- !query output -year make model comment blank + + + +-- !query +CREATE TABLE sql_on_files.test_json USING JSON AS SELECT 1 +-- !query schema +struct<> +-- !query output + -- !query @@ -172,8 +236,24 @@ org.apache.spark.sql.AnalysisException -- !query -SELECT * FROM json.`src/test/resources/test-data/with-map-fields.json` LIMIT 1 +SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json` -- !query schema -struct,b:struct>> +struct<1:bigint> +-- !query output +1 + + +-- !query +DROP TABLE sql_on_files.test_json +-- !query schema +struct<> -- !query output -1 {"a":{"endTime":211,"startTime":111},"b":{"endTime":221,"startTime":121}} + + + +-- !query +DROP DATABASE sql_on_files +-- !query schema +struct<> +-- !query output + diff --git a/sql/hive-thriftserver/src/test/resources/test-data/before_1582_date_v2_4.snappy.orc b/sql/hive-thriftserver/src/test/resources/test-data/before_1582_date_v2_4.snappy.orc deleted file mode 100644 index ebe01743b2e2043d9374630c5f24586b6e5df3cb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 201 zcmeYdau#G@;9?VE;b074&<8R_xtJLk7=-vZ7_B%sX0K8b0O9}!ZU%-t29BxHjv&Px zYyu1t++dnfLJCMT3NT7=2r+Rm05Ov(P>BQ=*gP%)R8t~2Y8eT&8yPVu5U4^0 z3hK5WJW4SNaflr_N}BXrgbo(V<<*j?`)dokQEarvSr7`N-{ZFH k+I`P