Skip to content

Commit

Permalink
[SPARK-3582][SQL] not limit argument type for hive simple udf
Browse files Browse the repository at this point in the history
Since we have moved to `ConventionHelper`, it is quite easy to avoid call `javaClassToDataType` in hive simple udf. This will solve SPARK-3582.

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #2506 from adrian-wang/spark3582 and squashes the following commits:

450c28e [Daoyuan Wang] not limit argument type for hive simple udf
  • Loading branch information
adrian-wang authored and marmbrus committed Sep 23, 2014
1 parent 66bc0f2 commit 116016b
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,15 @@ private[hive] trait HiveInspectors {

/** Converts native catalyst types to the types expected by Hive */
def wrap(a: Any): AnyRef = a match {
case s: String => new hadoopIo.Text(s) // TODO why should be Text?
case s: String => s: java.lang.String
case i: Int => i: java.lang.Integer
case b: Boolean => b: java.lang.Boolean
case f: Float => f: java.lang.Float
case d: Double => d: java.lang.Double
case l: Long => l: java.lang.Long
case l: Short => l: java.lang.Short
case l: Byte => l: java.lang.Byte
case b: BigDecimal => b.bigDecimal
case b: BigDecimal => new HiveDecimal(b.underlying())
case b: Array[Byte] => b
case t: java.sql.Timestamp => t
case s: Seq[_] => seqAsJavaList(s.map(wrap))
Expand Down
22 changes: 2 additions & 20 deletions sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,7 @@ private[hive] abstract class HiveFunctionRegistry
val functionClassName = functionInfo.getFunctionClass.getName

if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) {
val function = functionInfo.getFunctionClass.newInstance().asInstanceOf[UDF]
val method = function.getResolver.getEvalMethod(children.map(_.dataType.toTypeInfo))

val expectedDataTypes = method.getParameterTypes.map(javaClassToDataType)

HiveSimpleUdf(
functionClassName,
children.zip(expectedDataTypes).map {
case (e, NullType) => e
case (e, t) if (e.dataType == t) => e
case (e, t) => Cast(e, t)
}
)
HiveSimpleUdf(functionClassName, children)
} else if (classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) {
HiveGenericUdf(functionClassName, children)
} else if (
Expand Down Expand Up @@ -117,15 +105,9 @@ private[hive] case class HiveSimpleUdf(functionClassName: String, children: Seq[
@transient
lazy val dataType = javaClassToDataType(method.getReturnType)

def catalystToHive(value: Any): Object = value match {
// TODO need more types here? or can we use wrap()
case bd: BigDecimal => new HiveDecimal(bd.underlying())
case d => d.asInstanceOf[Object]
}

// TODO: Finish input output types.
override def eval(input: Row): Any = {
val evaluatedChildren = children.map(c => catalystToHive(c.eval(input)))
val evaluatedChildren = children.map(c => wrap(c.eval(input)))

unwrap(FunctionRegistry.invoke(method, function, conversionHelper
.convertIfNecessary(evaluatedChildren: _*): _*))
Expand Down

0 comments on commit 116016b

Please sign in to comment.