Skip to content

Commit

Permalink
[SPARK-49987][SQL] Fix the error prompt when seedExpression is non-…
Browse files Browse the repository at this point in the history
…foldable in `randstr`

### What changes were proposed in this pull request?
The pr aims to
- fix the `error prompt` when `seedExpression` is `non-foldable` in `randstr`.
- use `toSQLId` to set the parameter value `inputName` for `randstr ` and `uniform` of `NON_FOLDABLE_INPUT`.

### Why are the changes needed?
- Let me take an example
```scala
val df = Seq(1.1).toDF("a")
df.createOrReplaceTempView("t")
sql("SELECT randstr(1, a) from t").show(false)
```

- Before
<img width="1255" alt="image" src="https://github.com/user-attachments/assets/4e14af7f-19ab-41e7-bdef-836ce86c8ecc">

```shell
[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "randstr(1, a)" due to data type mismatch: the input seedExpression should be a foldable INT or SMALLINT expression; however, got "a". SQLSTATE: 42K09; line 1 pos 7;
'Project [unresolvedalias(randstr(1, a#5, false))]
+- SubqueryAlias t
   +- View (`t`, [a#5])
      +- Project [value#1 AS a#5]
         +- LocalRelation [value#1]
```

- After
```shell
[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "randstr(1, a)" due to data type mismatch: the input seed should be a foldable INT or SMALLINT expression; however, got "a". SQLSTATE: 42K09; line 1 pos 7;
'Project [unresolvedalias(randstr(1, a#5, false))]
+- SubqueryAlias t
   +- View (`t`, [a#5])
      +- Project [value#1 AS a#5]
         +- LocalRelation [value#1]
```

- The `parameter` name (`seedExpression`) in the error message does not match the `parameter` name (`seed`) seen  in docs by the end-user.
<img width="787" alt="image" src="https://github.com/user-attachments/assets/5c03acab-f57d-40b7-b5aa-8d37fd2cc557">

### Does this PR introduce _any_ user-facing change?
Yes, When `seed` is `non-foldable `, the end-user will get a consistent experience in the error prompt.

### How was this patch tested?
Update existed UT.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #48490 from panbingkun/SPARK-49987.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
  • Loading branch information
panbingkun authored and MaxGekk committed Oct 16, 2024
1 parent 39112e4 commit 8d1cb76
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult, UnresolvedSeed}
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
import org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.{ordinalNumber, toSQLExpr, toSQLType}
import org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.{ordinalNumber, toSQLExpr, toSQLId, toSQLType}
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike, UnaryLike}
Expand Down Expand Up @@ -263,7 +263,7 @@ case class Uniform(min: Expression, max: Expression, seedExpression: Expression,
result = DataTypeMismatch(
errorSubClass = "NON_FOLDABLE_INPUT",
messageParameters = Map(
"inputName" -> name,
"inputName" -> toSQLId(name),
"inputType" -> requiredType,
"inputExpr" -> toSQLExpr(expr)))
} else expr.dataType match {
Expand Down Expand Up @@ -374,14 +374,14 @@ case class RandStr(
var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
def requiredType = "INT or SMALLINT"
Seq((length, "length", 0),
(seedExpression, "seedExpression", 1)).foreach {
(seedExpression, "seed", 1)).foreach {
case (expr: Expression, name: String, index: Int) =>
if (result == TypeCheckResult.TypeCheckSuccess) {
if (!expr.foldable) {
result = DataTypeMismatch(
errorSubClass = "NON_FOLDABLE_INPUT",
messageParameters = Map(
"inputName" -> name,
"inputName" -> toSQLId(name),
"inputType" -> requiredType,
"inputExpr" -> toSQLExpr(expr)))
} else expr.dataType match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"sqlState" : "42K09",
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "seed",
"inputName" : "`seed`",
"inputType" : "integer or floating-point",
"sqlExpr" : "\"uniform(10, 20, col)\""
},
Expand All @@ -211,7 +211,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"sqlState" : "42K09",
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "min",
"inputName" : "`min`",
"inputType" : "integer or floating-point",
"sqlExpr" : "\"uniform(col, 10, 0)\""
},
Expand Down Expand Up @@ -436,7 +436,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"sqlState" : "42K09",
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "length",
"inputName" : "`length`",
"inputType" : "INT or SMALLINT",
"sqlExpr" : "\"randstr(col, 0)\""
},
Expand All @@ -459,7 +459,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"sqlState" : "42K09",
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "seedExpression",
"inputName" : "`seed`",
"inputType" : "INT or SMALLINT",
"sqlExpr" : "\"randstr(10, col)\""
},
Expand Down
8 changes: 4 additions & 4 deletions sql/core/src/test/resources/sql-tests/results/random.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"sqlState" : "42K09",
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "seed",
"inputName" : "`seed`",
"inputType" : "integer or floating-point",
"sqlExpr" : "\"uniform(10, 20, col)\""
},
Expand All @@ -265,7 +265,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"sqlState" : "42K09",
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "min",
"inputName" : "`min`",
"inputType" : "integer or floating-point",
"sqlExpr" : "\"uniform(col, 10, 0)\""
},
Expand Down Expand Up @@ -520,7 +520,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"sqlState" : "42K09",
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "length",
"inputName" : "`length`",
"inputType" : "INT or SMALLINT",
"sqlExpr" : "\"randstr(col, 0)\""
},
Expand All @@ -545,7 +545,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"sqlState" : "42K09",
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "seedExpression",
"inputName" : "`seed`",
"inputType" : "INT or SMALLINT",
"sqlExpr" : "\"randstr(10, col)\""
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
intercept[AnalysisException](df.select(expr)),
condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
parameters = Map(
"inputName" -> "length",
"inputName" -> "`length`",
"inputType" -> "INT or SMALLINT",
"inputExpr" -> "\"a\"",
"sqlExpr" -> "\"randstr(a, 10)\""),
Expand Down Expand Up @@ -530,7 +530,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
intercept[AnalysisException](df.select(expr)),
condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
parameters = Map(
"inputName" -> "min",
"inputName" -> "`min`",
"inputType" -> "integer or floating-point",
"inputExpr" -> "\"a\"",
"sqlExpr" -> "\"uniform(a, 10)\""),
Expand Down

0 comments on commit 8d1cb76

Please sign in to comment.