From 310f8ea2456dad7cec0f22bfed05a679764c3d7e Mon Sep 17 00:00:00 2001 From: wforget <643348094@qq.com> Date: Thu, 4 Jul 2024 22:27:01 +0800 Subject: [PATCH] [SPARK-48806][SQL] Pass actual exception when url_decode fails ### What changes were proposed in this pull request? Pass actual exception for url_decode. Follow-up to https://issues.apache.org/jira/browse/SPARK-40156 ### Why are the changes needed? Currently url_decode function ignores actual exception, which contains information that is useful for quickly locating the problem. Like executing this sql: ``` select url_decode('https%3A%2F%2spark.apache.org'); ``` We only get the error message: ``` org.apache.spark.SparkIllegalArgumentException: [CANNOT_DECODE_URL] The provided URL cannot be decoded: https%3A%2F%2spark.apache.org. Please ensure that the URL is properly formatted and try again. at org.apache.spark.sql.errors.QueryExecutionErrors$.illegalUrlError(QueryExecutionErrors.scala:376) at org.apache.spark.sql.catalyst.expressions.UrlCodec$.decode(urlExpressions.scala:118) at org.apache.spark.sql.catalyst.expressions.UrlCodec.decode(urlExpressions.scala) ``` However, the actual useful exception information is ignored: ``` java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - Error at index 1 in: "2s" ``` After this pr we will get: ``` org.apache.spark.SparkIllegalArgumentException: [CANNOT_DECODE_URL] The provided URL cannot be decoded: https%3A%2F%2spark.apache.org. Please ensure that the URL is properly formatted and try again. SQLSTATE: 22546 at org.apache.spark.sql.errors.QueryExecutionErrors$.illegalUrlError(QueryExecutionErrors.scala:372) at org.apache.spark.sql.catalyst.expressions.UrlCodec$.decode(urlExpressions.scala:119) at org.apache.spark.sql.catalyst.expressions.UrlCodec.decode(urlExpressions.scala) ... Caused by: java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - Error at index 1 in: "2s" at java.base/java.net.URLDecoder.decode(URLDecoder.java:237) at java.base/java.net.URLDecoder.decode(URLDecoder.java:147) at org.apache.spark.sql.catalyst.expressions.UrlCodec$.decode(urlExpressions.scala:116) ... 135 more ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? unit test ### Was this patch authored or co-authored using generative AI tooling? No Closes #47211 from wForget/SPARK-48806. Lead-authored-by: wforget <643348094@qq.com> Co-authored-by: Kent Yao Signed-off-by: Kent Yao --- .../sql/catalyst/expressions/urlExpressions.scala | 2 +- .../spark/sql/errors/QueryExecutionErrors.scala | 5 +++-- .../org/apache/spark/sql/StringFunctionsSuite.scala | 11 ++++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala index ef8f2ea96eb0b..65fce955b8f87 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala @@ -116,7 +116,7 @@ object UrlCodec { UTF8String.fromString(URLDecoder.decode(src.toString, enc.toString)) } catch { case e: IllegalArgumentException => - throw QueryExecutionErrors.illegalUrlError(src) + throw QueryExecutionErrors.illegalUrlError(src, e) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 6fb09bdeffc51..40679348e96a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -374,10 +374,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE cause = e) } - def illegalUrlError(url: UTF8String): Throwable = { + def illegalUrlError(url: UTF8String, e: IllegalArgumentException): Throwable = { new SparkIllegalArgumentException( errorClass = "CANNOT_DECODE_URL", - messageParameters = Map("url" -> url.toString) + messageParameters = Map("url" -> url.toString), + cause = e ) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 31c1cac9fb718..fd2661003a151 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql -import org.apache.spark.{SPARK_DOC_ROOT, SparkRuntimeException} +import org.apache.spark.{SPARK_DOC_ROOT, SparkIllegalArgumentException, SparkRuntimeException} import org.apache.spark.sql.catalyst.expressions.Cast._ import org.apache.spark.sql.execution.FormattedMode import org.apache.spark.sql.functions._ @@ -1273,4 +1273,13 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { ) ) } + + test("SPARK-48806: url_decode exception") { + val e = intercept[SparkIllegalArgumentException] { + sql("select url_decode('https%3A%2F%2spark.apache.org')").collect() + } + assert(e.getCause.isInstanceOf[IllegalArgumentException] && + e.getCause.getMessage + .startsWith("URLDecoder: Illegal hex characters in escape (%) pattern - ")) + } }