Skip to content

Commit

Permalink
Inferring date type
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxGekk committed Dec 2, 2018
1 parent a8d27d6 commit fa915fd
Showing 1 changed file with 18 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.catalyst.csv

import java.text.ParsePosition

import scala.util.control.Exception.allCatch

import org.apache.spark.rdd.RDD
Expand Down Expand Up @@ -98,6 +100,7 @@ class CSVInferSchema(options: CSVOptions) extends Serializable {
compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType)
case DoubleType => tryParseDouble(field)
case TimestampType => tryParseTimestamp(field)
case DateType => tryParseDate(field)
case BooleanType => tryParseBoolean(field)
case StringType => StringType
case other: DataType =>
Expand Down Expand Up @@ -159,6 +162,21 @@ class CSVInferSchema(options: CSVOptions) extends Serializable {
} else if ((allCatch opt DateTimeUtils.stringToTime(field)).isDefined) {
// We keep this for backwards compatibility.
TimestampType
} else {
tryParseDate(field)
}
}

private def tryParseDate(field: String): DataType = {
val dateTry = allCatch opt {
val pos = new ParsePosition(0)
options.dateFormat.parse(field, pos)
if (pos.getErrorIndex != -1 || pos.getIndex != field.length) {
throw new IllegalArgumentException(s"${field} cannot be parsed as ${DateType.simpleString}")
}
}
if (dateTry.isDefined) {
DateType
} else {
tryParseBoolean(field)
}
Expand Down

0 comments on commit fa915fd

Please sign in to comment.