Skip to content

Commit

Permalink
[SPARK-47357][SQL] Add support for Upper, Lower, InitCap (all collati…
Browse files Browse the repository at this point in the history
…ons)

### What changes were proposed in this pull request?
Addition of InitCap support and tests for Upper, Lower, InitCap.

### Why are the changes needed?
We need to support functions with collations.

### Does this PR introduce _any_ user-facing change?
Yes, we enabled more functions.

### How was this patch tested?
Test in `CollationStringExpressionsSuite`.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes apache#46008 from mihailom-db/SPARK-47357.

Authored-by: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
mihailom-db authored and cloud-fan committed Apr 15, 2024
1 parent b8354bb commit f3a6ca9
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1810,8 +1810,8 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
case class InitCap(child: Expression)
extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {

override def inputTypes: Seq[DataType] = Seq(StringType)
override def dataType: DataType = StringType
override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
override def dataType: DataType = child.dataType

override def nullSafeEval(string: Any): Any = {
// scalastyle:off caselocale
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,13 @@ package org.apache.spark.sql
import scala.collection.immutable.Seq

import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.{BooleanType, StringType}

class CollationStringExpressionsSuite
extends QueryTest
with SharedSparkSession
with ExpressionEvalHelper {
with SharedSparkSession {

test("Support ConcatWs string expression with collation") {
// Supported collations
Expand Down Expand Up @@ -163,6 +161,57 @@ class CollationStringExpressionsSuite
})
}

test("SPARK-47357: Support Upper string expression with collation") {
// Supported collations
case class UpperTestCase[R](s: String, c: String, result: R)
val testCases = Seq(
UpperTestCase("aBc", "UTF8_BINARY", "ABC"),
UpperTestCase("aBc", "UTF8_BINARY_LCASE", "ABC"),
UpperTestCase("aBc", "UNICODE", "ABC"),
UpperTestCase("aBc", "UNICODE_CI", "ABC")
)
testCases.foreach(t => {
val query = s"SELECT upper(collate('${t.s}', '${t.c}'))"
// Result & data type
checkAnswer(sql(query), Row(t.result))
assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
})
}

test("SPARK-47357: Support Lower string expression with collation") {
// Supported collations
case class LowerTestCase[R](s: String, c: String, result: R)
val testCases = Seq(
LowerTestCase("aBc", "UTF8_BINARY", "abc"),
LowerTestCase("aBc", "UTF8_BINARY_LCASE", "abc"),
LowerTestCase("aBc", "UNICODE", "abc"),
LowerTestCase("aBc", "UNICODE_CI", "abc")
)
testCases.foreach(t => {
val query = s"SELECT lower(collate('${t.s}', '${t.c}'))"
// Result & data type
checkAnswer(sql(query), Row(t.result))
assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
})
}

test("SPARK-47357: Support InitCap string expression with collation") {
// Supported collations
case class InitCapTestCase[R](s: String, c: String, result: R)
val testCases = Seq(
InitCapTestCase("aBc ABc", "UTF8_BINARY", "Abc Abc"),
InitCapTestCase("aBc ABc", "UTF8_BINARY_LCASE", "Abc Abc"),
InitCapTestCase("aBc ABc", "UNICODE", "Abc Abc"),
InitCapTestCase("aBc ABc", "UNICODE_CI", "Abc Abc")
)
testCases.foreach(t => {
val query = s"SELECT initcap(collate('${t.s}', '${t.c}'))"
// Result & data type
checkAnswer(sql(query), Row(t.result))
assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
})
}

// TODO: Add more tests for other string expressions

}
Expand Down

0 comments on commit f3a6ca9

Please sign in to comment.