Skip to content

Commit

Permalink
[SPARK-6851] [SQL] function least/greatest follow up
Browse files Browse the repository at this point in the history
This is a follow up of remaining comments from apache#6851

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes apache#7387 from adrian-wang/udflgfollow and squashes the following commits:

6163e62 [Daoyuan Wang] add skipping null values
e8c2e09 [Daoyuan Wang] use seq
8362966 [Daoyuan Wang] pr6851 follow up
  • Loading branch information
adrian-wang authored and rxin committed Jul 14, 2015
1 parent c1feebd commit 257236c
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,11 @@ case class CaseKeyWhen(key: Expression, branches: Seq[Expression]) extends CaseW
}
}

case class Least(children: Expression*) extends Expression {
/**
* A function that returns the least value of all parameters, skipping null values.
* It takes at least 2 parameters, and returns null iff all parameters are null.
*/
case class Least(children: Seq[Expression]) extends Expression {
require(children.length > 1, "LEAST requires at least 2 arguments, got " + children.length)

override def nullable: Boolean = children.forall(_.nullable)
Expand Down Expand Up @@ -356,12 +360,16 @@ case class Least(children: Expression*) extends Expression {
${evalChildren.map(_.code).mkString("\n")}
boolean ${ev.isNull} = true;
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
${(0 until children.length).map(updateEval).mkString("\n")}
${children.indices.map(updateEval).mkString("\n")}
"""
}
}

case class Greatest(children: Expression*) extends Expression {
/**
* A function that returns the greatest value of all parameters, skipping null values.
* It takes at least 2 parameters, and returns null iff all parameters are null.
*/
case class Greatest(children: Seq[Expression]) extends Expression {
require(children.length > 1, "GREATEST requires at least 2 arguments, got " + children.length)

override def nullable: Boolean = children.forall(_.nullable)
Expand Down Expand Up @@ -406,7 +414,7 @@ case class Greatest(children: Expression*) extends Expression {
${evalChildren.map(_.code).mkString("\n")}
boolean ${ev.isNull} = true;
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
${(0 until children.length).map(updateEval).mkString("\n")}
${children.indices.map(updateEval).mkString("\n")}
"""
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -144,35 +144,35 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
val c3 = 'a.string.at(2)
val c4 = 'a.string.at(3)
val c5 = 'a.string.at(4)
checkEvaluation(Least(c4, c3, c5), "a", row)
checkEvaluation(Least(c1, c2), 1, row)
checkEvaluation(Least(c1, c2, Literal(-1)), -1, row)
checkEvaluation(Least(c4, c5, c3, c3, Literal("a")), "a", row)

checkEvaluation(Least(Literal(null), Literal(null)), null, InternalRow.empty)
checkEvaluation(Least(Literal(-1.0), Literal(2.5)), -1.0, InternalRow.empty)
checkEvaluation(Least(Literal(-1), Literal(2)), -1, InternalRow.empty)
checkEvaluation(Least(Seq(c4, c3, c5)), "a", row)
checkEvaluation(Least(Seq(c1, c2)), 1, row)
checkEvaluation(Least(Seq(c1, c2, Literal(-1))), -1, row)
checkEvaluation(Least(Seq(c4, c5, c3, c3, Literal("a"))), "a", row)

checkEvaluation(Least(Seq(Literal(null), Literal(null))), null, InternalRow.empty)
checkEvaluation(Least(Seq(Literal(-1.0), Literal(2.5))), -1.0, InternalRow.empty)
checkEvaluation(Least(Seq(Literal(-1), Literal(2))), -1, InternalRow.empty)
checkEvaluation(
Least(Literal((-1.0).toFloat), Literal(2.5.toFloat)), (-1.0).toFloat, InternalRow.empty)
Least(Seq(Literal((-1.0).toFloat), Literal(2.5.toFloat))), (-1.0).toFloat, InternalRow.empty)
checkEvaluation(
Least(Literal(Long.MaxValue), Literal(Long.MinValue)), Long.MinValue, InternalRow.empty)
checkEvaluation(Least(Literal(1.toByte), Literal(2.toByte)), 1.toByte, InternalRow.empty)
Least(Seq(Literal(Long.MaxValue), Literal(Long.MinValue))), Long.MinValue, InternalRow.empty)
checkEvaluation(Least(Seq(Literal(1.toByte), Literal(2.toByte))), 1.toByte, InternalRow.empty)
checkEvaluation(
Least(Literal(1.toShort), Literal(2.toByte.toShort)), 1.toShort, InternalRow.empty)
checkEvaluation(Least(Literal("abc"), Literal("aaaa")), "aaaa", InternalRow.empty)
checkEvaluation(Least(Literal(true), Literal(false)), false, InternalRow.empty)
Least(Seq(Literal(1.toShort), Literal(2.toByte.toShort))), 1.toShort, InternalRow.empty)
checkEvaluation(Least(Seq(Literal("abc"), Literal("aaaa"))), "aaaa", InternalRow.empty)
checkEvaluation(Least(Seq(Literal(true), Literal(false))), false, InternalRow.empty)
checkEvaluation(
Least(
Least(Seq(
Literal(BigDecimal("1234567890987654321123456")),
Literal(BigDecimal("1234567890987654321123458"))),
Literal(BigDecimal("1234567890987654321123458")))),
BigDecimal("1234567890987654321123456"), InternalRow.empty)
checkEvaluation(
Least(Literal(Date.valueOf("2015-01-01")), Literal(Date.valueOf("2015-07-01"))),
Least(Seq(Literal(Date.valueOf("2015-01-01")), Literal(Date.valueOf("2015-07-01")))),
Date.valueOf("2015-01-01"), InternalRow.empty)
checkEvaluation(
Least(
Least(Seq(
Literal(Timestamp.valueOf("2015-07-01 08:00:00")),
Literal(Timestamp.valueOf("2015-07-01 10:00:00"))),
Literal(Timestamp.valueOf("2015-07-01 10:00:00")))),
Timestamp.valueOf("2015-07-01 08:00:00"), InternalRow.empty)
}

Expand All @@ -183,35 +183,36 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
val c3 = 'a.string.at(2)
val c4 = 'a.string.at(3)
val c5 = 'a.string.at(4)
checkEvaluation(Greatest(c4, c5, c3), "c", row)
checkEvaluation(Greatest(c2, c1), 2, row)
checkEvaluation(Greatest(c1, c2, Literal(2)), 2, row)
checkEvaluation(Greatest(c4, c5, c3, Literal("ccc")), "ccc", row)

checkEvaluation(Greatest(Literal(null), Literal(null)), null, InternalRow.empty)
checkEvaluation(Greatest(Literal(-1.0), Literal(2.5)), 2.5, InternalRow.empty)
checkEvaluation(Greatest(Literal(-1), Literal(2)), 2, InternalRow.empty)
checkEvaluation(Greatest(Seq(c4, c5, c3)), "c", row)
checkEvaluation(Greatest(Seq(c2, c1)), 2, row)
checkEvaluation(Greatest(Seq(c1, c2, Literal(2))), 2, row)
checkEvaluation(Greatest(Seq(c4, c5, c3, Literal("ccc"))), "ccc", row)

checkEvaluation(Greatest(Seq(Literal(null), Literal(null))), null, InternalRow.empty)
checkEvaluation(Greatest(Seq(Literal(-1.0), Literal(2.5))), 2.5, InternalRow.empty)
checkEvaluation(Greatest(Seq(Literal(-1), Literal(2))), 2, InternalRow.empty)
checkEvaluation(
Greatest(Literal((-1.0).toFloat), Literal(2.5.toFloat)), 2.5.toFloat, InternalRow.empty)
Greatest(Seq(Literal((-1.0).toFloat), Literal(2.5.toFloat))), 2.5.toFloat, InternalRow.empty)
checkEvaluation(Greatest(
Seq(Literal(Long.MaxValue), Literal(Long.MinValue))), Long.MaxValue, InternalRow.empty)
checkEvaluation(
Greatest(Literal(Long.MaxValue), Literal(Long.MinValue)), Long.MaxValue, InternalRow.empty)
checkEvaluation(Greatest(Literal(1.toByte), Literal(2.toByte)), 2.toByte, InternalRow.empty)
Greatest(Seq(Literal(1.toByte), Literal(2.toByte))), 2.toByte, InternalRow.empty)
checkEvaluation(
Greatest(Literal(1.toShort), Literal(2.toByte.toShort)), 2.toShort, InternalRow.empty)
checkEvaluation(Greatest(Literal("abc"), Literal("aaaa")), "abc", InternalRow.empty)
checkEvaluation(Greatest(Literal(true), Literal(false)), true, InternalRow.empty)
Greatest(Seq(Literal(1.toShort), Literal(2.toByte.toShort))), 2.toShort, InternalRow.empty)
checkEvaluation(Greatest(Seq(Literal("abc"), Literal("aaaa"))), "abc", InternalRow.empty)
checkEvaluation(Greatest(Seq(Literal(true), Literal(false))), true, InternalRow.empty)
checkEvaluation(
Greatest(
Greatest(Seq(
Literal(BigDecimal("1234567890987654321123456")),
Literal(BigDecimal("1234567890987654321123458"))),
Literal(BigDecimal("1234567890987654321123458")))),
BigDecimal("1234567890987654321123458"), InternalRow.empty)
checkEvaluation(
Greatest(Literal(Date.valueOf("2015-01-01")), Literal(Date.valueOf("2015-07-01"))),
checkEvaluation(Greatest(
Seq(Literal(Date.valueOf("2015-01-01")), Literal(Date.valueOf("2015-07-01")))),
Date.valueOf("2015-07-01"), InternalRow.empty)
checkEvaluation(
Greatest(
Greatest(Seq(
Literal(Timestamp.valueOf("2015-07-01 08:00:00")),
Literal(Timestamp.valueOf("2015-07-01 10:00:00"))),
Literal(Timestamp.valueOf("2015-07-01 10:00:00")))),
Timestamp.valueOf("2015-07-01 10:00:00"), InternalRow.empty)
}

Expand Down
16 changes: 10 additions & 6 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,8 @@ object functions {
def floor(columnName: String): Column = floor(Column(columnName))

/**
* Returns the greatest value of the list of values.
* Returns the greatest value of the list of values, skipping null values.
* This function takes at least 2 parameters. It will return null iff all parameters are null.
*
* @group normal_funcs
* @since 1.5.0
Expand All @@ -1082,11 +1083,12 @@ object functions {
def greatest(exprs: Column*): Column = if (exprs.length < 2) {
sys.error("GREATEST takes at least 2 parameters")
} else {
Greatest(exprs.map(_.expr): _*)
Greatest(exprs.map(_.expr))
}

/**
* Returns the greatest value of the list of column names.
* Returns the greatest value of the list of column names, skipping null values.
* This function takes at least 2 parameters. It will return null iff all parameters are null.
*
* @group normal_funcs
* @since 1.5.0
Expand Down Expand Up @@ -1198,7 +1200,8 @@ object functions {
def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName))

/**
* Returns the least value of the list of values.
* Returns the least value of the list of values, skipping null values.
* This function takes at least 2 parameters. It will return null iff all parameters are null.
*
* @group normal_funcs
* @since 1.5.0
Expand All @@ -1207,11 +1210,12 @@ object functions {
def least(exprs: Column*): Column = if (exprs.length < 2) {
sys.error("LEAST takes at least 2 parameters")
} else {
Least(exprs.map(_.expr): _*)
Least(exprs.map(_.expr))
}

/**
* Returns the least value of the list of column names.
* Returns the least value of the list of column names, skipping null values.
* This function takes at least 2 parameters. It will return null iff all parameters are null.
*
* @group normal_funcs
* @since 1.5.0
Expand Down

0 comments on commit 257236c

Please sign in to comment.