Skip to content

Commit

Permalink
[SPARK-47504][SQL] Resolve AbstractDataType simpleStrings for StringT…
Browse files Browse the repository at this point in the history
…ypeCollated

### What changes were proposed in this pull request?
Renaming simpleString in StringTypeAnyCollation. This PR should only be merged after #45383 is merged.

### Why are the changes needed?
[SPARK-47296](#45422) introduced a change to fail all unsupported functions. Because of this change expected inputTypes in ExpectsInputTypes had to be changed. This change introduced a change on user side which will print "STRING_ANY_COLLATION" in places where before we printed "STRING" when an error occurred. Concretely if we get an input of Int where StringTypeAnyCollation was expected, we will throw this faulty message for users.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
Existing tests were changed back to "STRING" notation.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #45694 from mihailom-db/SPARK-47504.

Authored-by: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
mihailom-db authored and cloud-fan committed Apr 8, 2024
1 parent 134a139 commit abb7b04
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
*/
abstract class StringTypeCollated extends AbstractDataType {
override private[sql] def defaultConcreteType: DataType = StringType
override private[sql] def simpleString: String = "string"
}

/**
* Use StringTypeBinary for expressions supporting only binary collation.
*/
case object StringTypeBinary extends StringTypeCollated {
override private[sql] def simpleString: String = "string_binary"
override private[sql] def acceptsType(other: DataType): Boolean =
other.isInstanceOf[StringType] && other.asInstanceOf[StringType].supportsBinaryEquality
}
Expand All @@ -39,7 +39,6 @@ case object StringTypeBinary extends StringTypeCollated {
* Use StringTypeBinaryLcase for expressions supporting only binary and lowercase collation.
*/
case object StringTypeBinaryLcase extends StringTypeCollated {
override private[sql] def simpleString: String = "string_binary_lcase"
override private[sql] def acceptsType(other: DataType): Boolean =
other.isInstanceOf[StringType] && (other.asInstanceOf[StringType].supportsBinaryEquality ||
other.asInstanceOf[StringType].isUTF8BinaryLcaseCollation)
Expand All @@ -49,6 +48,5 @@ case object StringTypeBinaryLcase extends StringTypeCollated {
* Use StringTypeAnyCollation for expressions supporting all possible collation types.
*/
case object StringTypeAnyCollation extends StringTypeCollated {
override private[sql] def simpleString: String = "string_any_collation"
override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType]
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
errorSubClass = "UNEXPECTED_INPUT_TYPE",
messageParameters = Map(
"paramIndex" -> ordinalNumber(0),
"requiredType" -> "(\"STRING_ANY_COLLATION\" or \"BINARY\" or \"ARRAY\")",
"requiredType" -> "(\"STRING\" or \"BINARY\" or \"ARRAY\")",
"inputSql" -> "\"1\"",
"inputType" -> "\"INT\""
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
"paramIndex" -> "first",
"inputSql" -> "\"1\"",
"inputType" -> "\"INT\"",
"requiredType" -> "\"STRING_ANY_COLLATION\""),
"requiredType" -> "\"STRING\""),
context = ExpectedContext(
fragment = s"collate(1, 'UTF8_BINARY')", start = 7, stop = 31))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1713,7 +1713,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
"paramIndex" -> "second",
"inputSql" -> "\"1\"",
"inputType" -> "\"INT\"",
"requiredType" -> "\"STRING_ANY_COLLATION\""
"requiredType" -> "\"STRING\""
),
queryContext = Array(ExpectedContext("", "", 0, 15, "array_join(x, 1)"))
)
Expand All @@ -1727,7 +1727,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
"paramIndex" -> "third",
"inputSql" -> "\"1\"",
"inputType" -> "\"INT\"",
"requiredType" -> "\"STRING_ANY_COLLATION\""
"requiredType" -> "\"STRING\""
),
queryContext = Array(ExpectedContext("", "", 0, 21, "array_join(x, ', ', 1)"))
)
Expand Down Expand Up @@ -1987,7 +1987,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
"paramIndex" -> "first",
"inputSql" -> "\"struct(1, a)\"",
"inputType" -> "\"STRUCT<col1: INT NOT NULL, col2: STRING NOT NULL>\"",
"requiredType" -> "(\"STRING_ANY_COLLATION\" or \"ARRAY\")"
"requiredType" -> "(\"STRING\" or \"ARRAY\")"
),
queryContext = Array(ExpectedContext("", "", 7, 29, "reverse(struct(1, 'a'))"))
)
Expand All @@ -2002,7 +2002,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
"paramIndex" -> "first",
"inputSql" -> "\"map(1, a)\"",
"inputType" -> "\"MAP<INT, STRING>\"",
"requiredType" -> "(\"STRING_ANY_COLLATION\" or \"ARRAY\")"
"requiredType" -> "(\"STRING\" or \"ARRAY\")"
),
queryContext = Array(ExpectedContext("", "", 7, 26, "reverse(map(1, 'a'))"))
)
Expand Down Expand Up @@ -2552,7 +2552,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
parameters = Map(
"sqlExpr" -> "\"concat(map(1, 2), map(3, 4))\"",
"paramIndex" -> "first",
"requiredType" -> "(\"STRING_ANY_COLLATION\" or \"BINARY\" or \"ARRAY\")",
"requiredType" -> "(\"STRING\" or \"BINARY\" or \"ARRAY\")",
"inputSql" -> "\"map(1, 2)\"",
"inputType" -> "\"MAP<INT, INT>\""
),
Expand Down

0 comments on commit abb7b04

Please sign in to comment.