apache · mingkangli-db · Jul 4, 2023 · Jul 4, 2023 · Jul 6, 2023 · Jul 6, 2023
diff --git a/...integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/...integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -148,6 +148,11 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
       |('2013-04-05 18:01:02.123'),
       |('2013-04-05 18:01:02.123456')""".stripMargin).executeUpdate()
 
+    conn.prepareStatement(s"CREATE TABLE infinity_timestamp" +
+      "(id SERIAL PRIMARY KEY, timestamp_column TIMESTAMP").executeUpdate();
+    conn.prepareStatement(s"INSERT INTO infinity_timestamp (timestamp_column)" +
+      " VALUES ('infinity'), ('-infinity');").executeUpdate()
+
     conn.prepareStatement("CREATE DOMAIN not_null_text AS TEXT DEFAULT ''").executeUpdate()
     conn.prepareStatement("create table custom_type(type_array not_null_text[]," +
       "type not_null_text)").executeUpdate()
@@ -432,4 +437,15 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(row(0).getSeq[String](0) == Seq("1", "fds", "fdsa"))
     assert(row(0).getString(1) == "fdasfasdf")
   }
+
+  test("SPARK-44280: infinity timestamp test") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "infinity_timestamp", new Properties)
+    val row = df.collect()
+    assert(row.length == 2)
+
+    val infinity = row(0).getAs[Timestamp]("timestamp_column")
+    val negativeInfinity = row(1).getAs[Timestamp]("timestamp_column")
+    assert(infinity.getTime == Long.MaxValue)
+    assert(negativeInfinity.getTime == Long.MinValue)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -484,7 +484,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
       (rs: ResultSet, row: InternalRow, pos: Int) =>
         val t = rs.getTimestamp(pos + 1)
         if (t != null) {
-          row.setLong(pos, DateTimeUtils.fromJavaTimestamp(t))
+          row.setLong(pos, dialect.convertJavaTimestampToTimestamp(t))
         } else {
           row.update(pos, null)
         }

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -105,10 +105,29 @@ abstract class JdbcDialect extends Serializable with Logging {
    */
   def getJDBCType(dt: DataType): Option[JdbcType] = None
 
+  /**
+   * Converts an instance of `java.sql.Timestamp` to the number of microseconds since
+   * 1970-01-01T00:00:00.000000Z. It extracts date-time fields from the input, builds
+   * a local timestamp in Proleptic Gregorian calendar from the fields, and binds
+   * the timestamp to the system time zone. The resulted instant is converted to
+   * microseconds since the epoch.
+   * JDBC dialects can override this function to provide implementations that suit their
+   * JDBC drivers (e.g. if there are special "infinity" values that would overflow)
+   *
+   * @param t represents a specific instant in time based on
+   *          the hybrid calendar which combines Julian and
+   *          Gregorian calendars.
+   * @return The number of micros since epoch from `java.sql.Timestamp`.
+   * @throws IllegalArgumentException if t is null
+   */
+  def convertJavaTimestampToTimestamp(t: Timestamp): Long = {
+    DateTimeUtils.fromJavaTimestamp(t)
+  }
+
   /**
    * Convert java.sql.Timestamp to a LocalDateTime representing the same wall-clock time as the
    * value stored in a remote database.
-   * JDBC dialects should override this function to provide implementations that suite their
+   * JDBC dialects should override this function to provide implementations that suit their
    * JDBC drivers.
    * @param t Timestamp returned from JDBC driver getTimestamp method.
    * @return A LocalDateTime representing the same wall clock time as the timestamp in database.

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -25,6 +25,7 @@ import java.util.Locale
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NonEmptyNamespaceException, NoSuchIndexException}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.expressions.NamedReference
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -281,4 +282,28 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
     }
     s"ALTER TABLE ${getFullyQualifiedQuotedTableName(oldTable)} RENAME TO ${newTable.name()}"
   }
+
+  /**
+   * PostgreSQL has four special "infinity values" that we need clamp to avoid overflow.
+   * If it is not one of the infinity values, fall back to default behavior. */
+  override def convertJavaTimestampToTimestamp(t: Timestamp): Long = {
+    // Variable names come from PostgreSQL "constant field docs":
+    // https://jdbc.postgresql.org/documentation/publicapi/index.html?constant-values.html
+    val POSTGRESQL_DATE_NEGATIVE_INFINITY = -9223372036832400000L
+    val POSTGRESQL_DATE_NEGATIVE_SMALLER_INFINITY = -185543533774800000L
+    val POSTGRESQL_DATE_POSITIVE_INFINITY = 9223372036825200000L
+    val POSTGRESQL_DATE_DATE_POSITIVE_SMALLER_INFINITY = 185543533774800000L
+
+    val time = t.getTime
+
+    if (time == POSTGRESQL_DATE_POSITIVE_INFINITY ||
+      time == POSTGRESQL_DATE_DATE_POSITIVE_SMALLER_INFINITY) {
+      Long.MaxValue
+    } else if (time == POSTGRESQL_DATE_NEGATIVE_INFINITY ||
+      time == POSTGRESQL_DATE_NEGATIVE_SMALLER_INFINITY) {
+      Long.MinValue
+    } else {
+      DateTimeUtils.fromJavaTimestamp(t)
+    }
+  }
 }