From af5eb085b0ac01eec2bb5db206b7d499d462abc4 Mon Sep 17 00:00:00 2001 From: wforget <643348094@qq.com> Date: Fri, 19 Jul 2024 07:58:22 +0000 Subject: [PATCH] [SPARK-47307][SQL][FOLLOWUP] Promote spark.sql.legacy.chunkBase64String.enabled from a legacy/internal config to a regular/public one ### What changes were proposed in this pull request? + Promote spark.sql.legacy.chunkBase64String.enabled from a legacy/internal config to a regular/public one. + Add test cases for unbase64 ### Why are the changes needed? Keep the same behavior as before. More details: https://github.com/apache/spark/pull/47303#issuecomment-2237785431 ### Does this PR introduce _any_ user-facing change? yes, revert behavior change introduced in #47303 ### How was this patch tested? existing unit test ### Was this patch authored or co-authored using generative AI tooling? No Closes #47410 from wForget/SPARK-47307_followup. Lead-authored-by: wforget <643348094@qq.com> Co-authored-by: Kent Yao Signed-off-by: Kent Yao --- .../query-tests/explain-results/function_base64.explain | 2 +- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 5 ++--- .../sql/catalyst/expressions/StringExpressionsSuite.scala | 4 ++++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain b/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain index d3a250919ea5d..d7e8e4ffb3fc5 100644 --- a/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain +++ b/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain @@ -1,2 +1,2 @@ -Project [static_invoke(Base64.encode(cast(g#0 as binary), false)) AS base64(CAST(g AS BINARY))#0] +Project [static_invoke(Base64.encode(cast(g#0 as binary), true)) AS base64(CAST(g AS BINARY))#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index f50eb9b121589..04fdc7655bb36 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3525,14 +3525,13 @@ object SQLConf { .booleanConf .createWithDefault(false) - val CHUNK_BASE64_STRING_ENABLED = buildConf("spark.sql.legacy.chunkBase64String.enabled") - .internal() + val CHUNK_BASE64_STRING_ENABLED = buildConf("spark.sql.chunkBase64String.enabled") .doc("Whether to truncate string generated by the `Base64` function. When true, base64" + " strings generated by the base64 function are chunked into lines of at most 76" + " characters. When false, the base64 strings are not chunked.") .version("3.5.2") .booleanConf - .createWithDefault(false) + .createWithDefault(true) val ENABLE_DEFAULT_COLUMNS = buildConf("spark.sql.defaultColumn.enabled") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 2ad8652f2b314..847c783e19369 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -520,6 +520,10 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { withSQLConf(SQLConf.CHUNK_BASE64_STRING_ENABLED.key -> "true") { checkEvaluation(Base64(Literal(longString.getBytes)), chunkEncoded) } + + // check if unbase64 works well for chunked and non-chunked encoded strings + checkEvaluation(StringDecode(UnBase64(Literal(encoded)), Literal("utf-8")), longString) + checkEvaluation(StringDecode(UnBase64(Literal(chunkEncoded)), Literal("utf-8")), longString) } test("initcap unit test") {