From 1abfd490d072850ae40c46c1a0f1791a8aaa5698 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Sun, 13 Oct 2024 19:01:42 +0800 Subject: [PATCH] [SPARK-49943][PS] Remove `timestamp_ntz_to_long` from `PythonSQLUtils` ### What changes were proposed in this pull request? Remove `timestamp_ntz_to_long` from `PythonSQLUtils` ### Why are the changes needed? we no longer need to add internal functions in `PythonSQLUtils` for PySpark Classic ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #48437 from zhengruifeng/fun_cat_nzt. Authored-by: Ruifeng Zheng Signed-off-by: Ruifeng Zheng --- python/pyspark/pandas/data_type_ops/datetime_ops.py | 6 ++---- python/pyspark/pandas/spark/functions.py | 4 ++++ .../org/apache/spark/sql/api/python/PythonSQLUtils.scala | 3 --- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py index 9b4cc72fa2e45..dc2f68232e730 100644 --- a/python/pyspark/pandas/data_type_ops/datetime_ops.py +++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py @@ -34,6 +34,7 @@ ) from pyspark.sql.utils import pyspark_column_op from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex +from pyspark.pandas.spark import functions as SF from pyspark.pandas.base import IndexOpsMixin from pyspark.pandas.data_type_ops.base import ( DataTypeOps, @@ -150,10 +151,7 @@ class DatetimeNTZOps(DatetimeOps): """ def _cast_spark_column_timestamp_to_long(self, scol: Column) -> Column: - from pyspark import SparkContext - - jvm = SparkContext._active_spark_context._jvm - return Column(jvm.PythonSQLUtils.castTimestampNTZToLong(scol._jc)) + return SF.timestamp_ntz_to_long(scol) def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py index 4d95466a98e12..bdd11559df3b6 100644 --- a/python/pyspark/pandas/spark/functions.py +++ b/python/pyspark/pandas/spark/functions.py @@ -39,6 +39,10 @@ def _invoke_internal_function_over_columns(name: str, *cols: "ColumnOrName") -> return Column(sc._jvm.PythonSQLUtils.internalFn(name, _to_seq(sc, cols, _to_java_column))) +def timestamp_ntz_to_long(col: Column) -> Column: + return _invoke_internal_function_over_columns("timestamp_ntz_to_long", col) + + def product(col: Column, dropna: bool) -> Column: return _invoke_internal_function_over_columns("pandas_product", col, F.lit(dropna)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index 3504f6e76f79d..08395ef4c347c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -143,9 +143,6 @@ private[sql] object PythonSQLUtils extends Logging { } } - def castTimestampNTZToLong(c: Column): Column = - Column.internalFn("timestamp_ntz_to_long", c) - def unresolvedNamedLambdaVariable(name: String): Column = Column(internal.UnresolvedNamedLambdaVariable.apply(name))