From e8f84063d6c059b650d6529bcc33f2e46e19b055 Mon Sep 17 00:00:00 2001 From: Kunal Kundu Date: Thu, 21 Mar 2024 19:59:55 +0530 Subject: [PATCH 1/4] make format prefix optional for format options in COPY --- datafusion/sql/src/statement.rs | 11 ++++++- datafusion/sqllogictest/test_files/copy.slt | 32 +++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index e50aceb757df..007f356cae66 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -850,7 +850,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return plan_err!("Unsupported Value in COPY statement {}", value); } }; - options.insert(key.to_lowercase(), value_string.to_lowercase()); + if !(&key.contains(".")) { + // If config does not belong to any namespace, assume it is + // a format option and apply the format prefix for backwards + // compatibility. + + let renamed_key = format!("format.{}", key); + options.insert(renamed_key.to_lowercase(), value_string.to_lowercase()); + } else { + options.insert(key.to_lowercase(), value_string.to_lowercase()); + } } let file_type = if let Some(file_type) = statement.stored_as { diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index 7884bece1f39..5a770f34d4a8 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -474,6 +474,38 @@ select * from validate_arrow; 1 Foo 2 Bar +# Legacy Format Options Support + +# Copy with legacy format options for Parquet +query IT +COPY source_table TO 'test_files/scratch/copy/format_table.parquet' +OPTIONS ( + compression snappy, + 'compression::col1' 'zstd(5)' +); +---- +2 + +# Copy with legacy format options for JSON +query IT +COPY source_table to 'test_files/scratch/copy/format_table' +STORED AS JSON OPTIONS (compression gzip); +---- +2 + +# Copy with legacy format options for CSV +query IT +COPY source_table to 'test_files/scratch/copy/format_table.csv' +OPTIONS ( + has_header false, + compression xz, + datetime_format '%FT%H:%M:%S.%9f', + delimiter ';', + null_value 'NULLVAL' +); +---- +2 + # Error cases: From fb54f05f096af529e1f8a767ea5303b9c2bb95a9 Mon Sep 17 00:00:00 2001 From: Kunal Kundu Date: Thu, 21 Mar 2024 20:39:32 +0530 Subject: [PATCH 2/4] fix clippy lint error --- datafusion/sql/src/statement.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 007f356cae66..4cca4c114a91 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -850,7 +850,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return plan_err!("Unsupported Value in COPY statement {}", value); } }; - if !(&key.contains(".")) { + if !(&key.contains('.')) { // If config does not belong to any namespace, assume it is // a format option and apply the format prefix for backwards // compatibility. From 3dc6c4baefd89d1e54e019a08eaacc50495a0bd9 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 21 Mar 2024 15:02:45 -0400 Subject: [PATCH 3/4] Add negative test case for unknown option --- datafusion/sqllogictest/test_files/copy.slt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index 5a770f34d4a8..b094ef1a73c0 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -506,6 +506,13 @@ OPTIONS ( ---- 2 +# Use unknown option with legacy format options to ensure error is sensible +query error DataFusion error: Invalid or Unsupported Configuration: Config value "unknown_option" not found on CsvOptions +COPY source_table to 'test_files/scratch/copy/format_table2.csv' +OPTIONS ( + unknown_option false, +); + # Error cases: From 6bb5bd4900c89c48cbc4f02e243c2ac7f29c3ce2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 21 Mar 2024 15:05:44 -0400 Subject: [PATCH 4/4] Improve test comments --- datafusion/sqllogictest/test_files/copy.slt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index b094ef1a73c0..75f1ccb07aac 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -474,9 +474,9 @@ select * from validate_arrow; 1 Foo 2 Bar -# Legacy Format Options Support +# Format Options Support without the 'format.' prefix -# Copy with legacy format options for Parquet +# Copy with format options for Parquet without the 'format.' prefix query IT COPY source_table TO 'test_files/scratch/copy/format_table.parquet' OPTIONS ( @@ -486,14 +486,14 @@ OPTIONS ( ---- 2 -# Copy with legacy format options for JSON +# Copy with format options for JSON without the 'format.' prefix query IT COPY source_table to 'test_files/scratch/copy/format_table' STORED AS JSON OPTIONS (compression gzip); ---- 2 -# Copy with legacy format options for CSV +# Copy with format options for CSV without the 'format.' prefix query IT COPY source_table to 'test_files/scratch/copy/format_table.csv' OPTIONS ( @@ -506,7 +506,7 @@ OPTIONS ( ---- 2 -# Use unknown option with legacy format options to ensure error is sensible +# Copy with unknown format options without the 'format.' prefix to ensure error is sensible query error DataFusion error: Invalid or Unsupported Configuration: Config value "unknown_option" not found on CsvOptions COPY source_table to 'test_files/scratch/copy/format_table2.csv' OPTIONS (