apache · culler · Nov 3, 2014 · Nov 3, 2014 · Nov 3, 2014 · Nov 3, 2014
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -124,9 +124,9 @@ object ScalaReflection {
     case obj: LongType.JvmType => LongType
     case obj: FloatType.JvmType => FloatType
     case obj: DoubleType.JvmType => DoubleType
-    case obj: DateType.JvmType => DateType
     case obj: BigDecimal => DecimalType.Unlimited
     case obj: Decimal => DecimalType.Unlimited
+    case obj: DateType.JvmType => DateType
     case obj: TimestampType.JvmType => TimestampType
     case null => NullType
     // For other cases, there is no obvious mapping from the type of the given object to a

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -125,9 +125,9 @@ package object dsl {
     implicit def floatToLiteral(f: Float) = Literal(f)
     implicit def doubleToLiteral(d: Double) = Literal(d)
     implicit def stringToLiteral(s: String) = Literal(s)
-    implicit def dateToLiteral(d: Date) = Literal(d)
     implicit def bigDecimalToLiteral(d: BigDecimal) = Literal(d)
     implicit def decimalToLiteral(d: Decimal) = Literal(d)
+    implicit def dateToLiteral(d: Date) = Literal(d)
     implicit def timestampToLiteral(t: Timestamp) = Literal(t)
     implicit def binaryToLiteral(a: Array[Byte]) = Literal(a)
 
@@ -146,6 +146,31 @@ package object dsl {
     def upper(e: Expression) = Upper(e)
     def lower(e: Expression) = Lower(e)
 
+    /*
+     * Conversions to provide the standard operators in the special case
+     * where a literal is being combined with a symbol. Without these an
+     * expression such as 0 < 'x is not recognized.
+     */
+    implicit class InitialLiteral(x: Any) {
+      val literal = Literal(x)
+      def + (other: Symbol):Expression = {literal + other}
+      def - (other: Symbol):Expression = {literal - other}
+      def * (other: Symbol):Expression = {literal * other}
+      def / (other: Symbol):Expression = {literal / other}
+      def % (other: Symbol):Expression = {literal % other}
+
+      def && (other: Symbol):Expression = {literal && other}
+      def || (other: Symbol):Expression = {literal || other}
+
+      def < (other: Symbol):Expression  = {literal < other}
+      def <= (other: Symbol):Expression = {literal <= other}
+      def > (other: Symbol):Expression  = {literal > other}
+      def >= (other: Symbol):Expression = {literal >= other}
+      def === (other: Symbol):Expression = {literal === other}
+      def <=> (other: Symbol):Expression = {literal <=> other}
+      def !== (other: Symbol):Expression = {literal !== other}
+    }
+
     implicit class DslSymbol(sym: Symbol) extends ImplicitAttribute { def s = sym.name }
     // TODO more implicit class for literal?
     implicit class DslString(val s: String) extends ImplicitOperators {
@@ -182,16 +207,16 @@ package object dsl {
       /** Creates a new AttributeReference of type string */
       def string = AttributeReference(s, StringType, nullable = true)()
 
-      /** Creates a new AttributeReference of type date */
-      def date = AttributeReference(s, DateType, nullable = true)()
-
       /** Creates a new AttributeReference of type decimal */
       def decimal = AttributeReference(s, DecimalType.Unlimited, nullable = true)()
 
       /** Creates a new AttributeReference of type decimal */
       def decimal(precision: Int, scale: Int) =
         AttributeReference(s, DecimalType(precision, scale), nullable = true)()
 
+      /** Creates a new AttributeReference of type date */
+      def date = AttributeReference(s, DateType, nullable = true)()
+
       /** Creates a new AttributeReference of type timestamp */
       def timestamp = AttributeReference(s, TimestampType, nullable = true)()
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -31,8 +31,8 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
 
   override def nullable = (child.dataType, dataType) match {
     case (StringType, _: NumericType) => true
-    case (StringType, TimestampType)  => true
     case (StringType, DateType)       => true
+    case (StringType, TimestampType)  => true
     case (_: NumericType, DateType)   => true
     case (BooleanType, DateType)      => true
     case (DateType, _: NumericType)   => true
@@ -333,8 +333,8 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     case dt if dt == child.dataType => identity[Any]
     case StringType    => castToString
     case BinaryType    => castToBinary
-    case DateType      => castToDate
     case decimal: DecimalType => castToDecimal(decimal)
+    case DateType      => castToDate
     case TimestampType => castToTimestamp
     case BooleanType   => castToBoolean
     case ByteType      => castToByte

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.sql.{Date, Timestamp}
 
 /**
  * A [[Projection]] that is calculated by calling the `eval` of each of the specified expressions.
@@ -139,6 +140,12 @@ class JoinedRow extends Row {
   def getString(i: Int): String =
     if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
 
+  def getDate(i: Int): Date =
+    if (i < row1.size) row1.getDate(i) else row2.getDate(i - row1.size)
+
+  def getTimestamp(i: Int): Timestamp =
+    if (i < row1.size) row1.getTimestamp(i) else row2.getTimestamp(i - row1.size)
+
   override def getAs[T](i: Int): T =
     if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
 
@@ -231,6 +238,13 @@ class JoinedRow2 extends Row {
   def getString(i: Int): String =
     if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
 
+
+  def getDate(i: Int): Date =
+    if (i < row1.size) row1.getDate(i) else row2.getDate(i - row1.size)
+
+  def getTimestamp(i: Int): Timestamp =
+    if (i < row1.size) row1.getTimestamp(i) else row2.getTimestamp(i - row1.size)
+
   override def getAs[T](i: Int): T =
     if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
 
@@ -317,6 +331,13 @@ class JoinedRow3 extends Row {
   def getString(i: Int): String =
     if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
 
+
+  def getDate(i: Int): Date =
+    if (i < row1.size) row1.getDate(i) else row2.getDate(i - row1.size)
+
+  def getTimestamp(i: Int): Timestamp =
+    if (i < row1.size) row1.getTimestamp(i) else row2.getTimestamp(i - row1.size)
+
   override def getAs[T](i: Int): T =
     if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
 
@@ -403,6 +424,13 @@ class JoinedRow4 extends Row {
   def getString(i: Int): String =
     if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
 
+
+  def getDate(i: Int): Date =
+    if (i < row1.size) row1.getDate(i) else row2.getDate(i - row1.size)
+
+  def getTimestamp(i: Int): Timestamp =
+    if (i < row1.size) row1.getTimestamp(i) else row2.getTimestamp(i - row1.size)
+
   override def getAs[T](i: Int): T =
     if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
 
@@ -489,6 +517,13 @@ class JoinedRow5 extends Row {
   def getString(i: Int): String =
     if (i < row1.size) row1.getString(i) else row2.getString(i - row1.size)
 
+
+  def getDate(i: Int): Date =
+    if (i < row1.size) row1.getDate(i) else row2.getDate(i - row1.size)
+
+  def getTimestamp(i: Int): Timestamp =
+    if (i < row1.size) row1.getTimestamp(i) else row2.getTimestamp(i - row1.size)
+
   override def getAs[T](i: Int): T =
     if (i < row1.size) row1.getAs[T](i) else row2.getAs[T](i - row1.size)
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.catalyst.types.NativeType
+import org.apache.spark.sql.catalyst.types._
+import java.sql.{Date, Timestamp}
+import java.math.BigDecimal
 
 object Row {
   /**
@@ -42,6 +44,31 @@ object Row {
    * This method can be used to construct a [[Row]] from a [[Seq]] of values.
    */
   def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray)
+
+  /**
+   * This method can be used to construct a [[Row]] from a [[Seq]] of Strings,
+   * converting each item to the type specified in a [[StructType]] schema.
+   * Only primitive types can be used.
+   */
+  def fromStringsBySchema(strings: Seq[String], schema: StructType): Row = {
+     val values = for {
+       (field, str) <- schema.fields zip strings
+       item = field.dataType match {
+         case IntegerType    => str.toInt
+         case LongType       => str.toLong
+         case DoubleType     => str.toDouble
+         case FloatType      => str.toFloat
+         case ByteType       => str.toByte
+         case ShortType      => str.toShort
+         case StringType     => str
+         case BooleanType    => (str != "")
+         case DateType       => Date.valueOf(str)
+         case TimestampType  => Timestamp.valueOf(str)
+         case DecimalType()  => new BigDecimal(str)
+       }
+     } yield item
+     new GenericRow(values.toArray)
+  }
 }
 
 /**
@@ -64,6 +91,8 @@ trait Row extends Seq[Any] with Serializable {
   def getShort(i: Int): Short
   def getByte(i: Int): Byte
   def getString(i: Int): String
+  def getDate(i: Int): Date
+  def getTimestamp(i: Int): Timestamp
   def getAs[T](i: Int): T = apply(i).asInstanceOf[T]
 
   override def toString() =
@@ -99,6 +128,8 @@ trait MutableRow extends Row {
   def setByte(ordinal: Int, value: Byte)
   def setFloat(ordinal: Int, value: Float)
   def setString(ordinal: Int, value: String)
+  def setDate(ordinal: Int, value: Date)
+  def setTimestamp(ordinal: Int, value: Timestamp)
 }
 
 /**
@@ -119,6 +150,9 @@ object EmptyRow extends Row {
   def getShort(i: Int): Short = throw new UnsupportedOperationException
   def getByte(i: Int): Byte = throw new UnsupportedOperationException
   def getString(i: Int): String = throw new UnsupportedOperationException
+  def getDate(i: Int): Date = throw new UnsupportedOperationException
+  def getTimestamp(i: Int): Timestamp = throw new UnsupportedOperationException
+
   override def getAs[T](i: Int): T = throw new UnsupportedOperationException
 
   def copy() = this
@@ -183,6 +217,16 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row {
     values(i).asInstanceOf[String]
   }
 
+  def getDate(i: Int): Date = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive String value.")
+    values(i).asInstanceOf[Date]
+  }
+
+  def getTimestamp(i: Int): Timestamp = {
+    if (values(i) == null) sys.error("Failed to check null bit for primitive String value.")
+    values(i).asInstanceOf[Timestamp]
+  }
+
   // Custom hashCode function that matches the efficient code generated version.
   override def hashCode(): Int = {
     var result: Int = 37
@@ -226,6 +270,8 @@ class GenericMutableRow(size: Int) extends GenericRow(size) with MutableRow {
   override def setInt(ordinal: Int, value: Int): Unit = { values(ordinal) = value }
   override def setLong(ordinal: Int, value: Long): Unit = { values(ordinal) = value }
   override def setString(ordinal: Int, value: String): Unit = { values(ordinal) = value }
+  override def setDate(ordinal: Int,value: Date): Unit = { values(ordinal) = value }
+  override def setTimestamp(ordinal: Int,value: Timestamp): Unit = { values(ordinal) = value }
 
   override def setNullAt(i: Int): Unit = { values(i) = null }
 

diff --git a/...atalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificMutableRow.scala b/...atalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificMutableRow.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.types._
+import java.sql.{Date, Timestamp}
 
 /**
  * A parent class for mutable container objects that are reused when the values are changed,
@@ -169,6 +170,35 @@ final class MutableByte extends MutableValue {
     newCopy.asInstanceOf[this.type]
   }
 }
+final class MutableDate extends MutableValue {
+  var value: Date = new Date(0)
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Date]
+  }
+  def copy() = {
+    val newCopy = new MutableDate
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
+
+final class MutableTimestamp extends MutableValue {
+  var value: Timestamp = new Timestamp(0)
+  def boxed = if (isNull) null else value
+  def update(v: Any) = value = {
+    isNull = false
+    v.asInstanceOf[Timestamp]
+  }
+  def copy() = {
+    val newCopy = new MutableTimestamp
+    newCopy.isNull = isNull
+    newCopy.value = value
+    newCopy.asInstanceOf[this.type]
+  }
+}
 
 final class MutableAny extends MutableValue {
   var value: Any = _
@@ -307,6 +337,25 @@ final class SpecificMutableRow(val values: Array[MutableValue]) extends MutableR
     values(i).asInstanceOf[MutableByte].value
   }
 
+  override def setDate(ordinal: Int, value: Date): Unit = {
+    val currentValue = values(ordinal).asInstanceOf[MutableDate]
+    currentValue.isNull = false
+    currentValue.value = value
+  }
+
+  override def getDate(i: Int): Date = {
+    values(i).asInstanceOf[MutableDate].value
+  }
+  override def setTimestamp(ordinal: Int, value: Timestamp): Unit = {
+    val currentValue = values(ordinal).asInstanceOf[MutableTimestamp]
+    currentValue.isNull = false
+    currentValue.value = value
+  }
+
+  override def getTimestamp(i: Int): Timestamp = {
+    values(i).asInstanceOf[MutableTimestamp].value
+  }
+
   override def getAs[T](i: Int): T = {
     values(i).boxed.asInstanceOf[T]
   }

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -34,8 +34,8 @@ object Literal {
     case b: Boolean => Literal(b, BooleanType)
     case d: BigDecimal => Literal(Decimal(d), DecimalType.Unlimited)
     case d: Decimal => Literal(d, DecimalType.Unlimited)
-    case t: Timestamp => Literal(t, TimestampType)
     case d: Date => Literal(d, DateType)
+    case t: Timestamp => Literal(t, TimestampType)
     case a: Array[Byte] => Literal(a, BinaryType)
     case null => Literal(null, NullType)
   }

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
@@ -92,9 +92,9 @@ object DataType {
       | "LongType" ^^^ LongType
       | "BinaryType" ^^^ BinaryType
       | "BooleanType" ^^^ BooleanType
-      | "DateType" ^^^ DateType
       | "DecimalType()" ^^^ DecimalType.Unlimited
       | fixedDecimalType
+      | "DateType" ^^^ DateType
       | "TimestampType" ^^^ TimestampType
       )
 
@@ -187,7 +187,8 @@ case object NullType extends DataType
 
 object NativeType {
   val all = Seq(
-    IntegerType, BooleanType, LongType, DoubleType, FloatType, ShortType, ByteType, StringType)
+    IntegerType, BooleanType, LongType, DoubleType, FloatType, ShortType,
+    ByteType, StringType, DateType, TimestampType)
 
   def unapply(dt: DataType): Boolean = all.contains(dt)