[SPARK-46419][PS][TESTS] Reorganize DatetimeIndexTests: Factor out …

…3 slow tests ### What changes were proposed in this pull request? Reorganize `DatetimeIndexTests`: Factor out 3 slow tests ### Why are the changes needed? its parity test is slow, sometime take > 10 mins, this PR move 3 slow tests from it. (will move other slow tests in a followup to control the change) ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? test-only ### Was this patch authored or co-authored using generative AI tooling? no Closes apache#44369 from zhengruifeng/ps_test_idx_dt_I. Authored-by: Ruifeng Zheng <ruifengz@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
ueshin · Dec 15, 2023 · 3d447c2 · 3d447c2
1 parent a1b0da2
commit 3d447c2
Show file tree

Hide file tree

Showing 11 changed files with 343 additions and 107 deletions.
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -797,6 +797,9 @@ def __hash__(self):
         "pyspark.pandas.tests.indexes.test_base",
         "pyspark.pandas.tests.indexes.test_base_slow",
         "pyspark.pandas.tests.indexes.test_datetime",
+        "pyspark.pandas.tests.indexes.test_datetime_at",
+        "pyspark.pandas.tests.indexes.test_datetime_between",
+        "pyspark.pandas.tests.indexes.test_datetime_ceil",
         "pyspark.pandas.tests.indexes.test_datetime_property",
         "pyspark.pandas.tests.indexes.test_align",
         "pyspark.pandas.tests.indexes.test_indexing",
@@ -1135,7 +1138,6 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.computation.test_parity_pivot",
         "pyspark.pandas.tests.connect.computation.test_parity_stats",
         "pyspark.pandas.tests.connect.indexes.test_parity_base_slow",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
         "pyspark.pandas.tests.connect.frame.test_parity_interpolate",
         "pyspark.pandas.tests.connect.frame.test_parity_interpolate_error",
         "pyspark.pandas.tests.connect.series.test_parity_interpolate",
@@ -1186,6 +1188,10 @@ def __hash__(self):
     python_test_goals=[
         # pandas-on-Spark unittests
         "pyspark.pandas.tests.connect.indexes.test_parity_datetime",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_at",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_between",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
         "pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames",
         "pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames_groupby",
     ],

diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py
@@ -18,11 +18,13 @@
 
 from pyspark.pandas.tests.indexes.test_datetime import DatetimeIndexTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
 class DatetimeIndexParityTests(
-    DatetimeIndexTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
+    DatetimeIndexTestsMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
 ):
     pass
 

diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime_at.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime_at.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.indexes.test_datetime_at import DatetimeIndexAtMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class DatetimeIndexAtParityTests(
+    DatetimeIndexAtMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.indexes.test_parity_datetime_at import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime_between.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime_between.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.indexes.test_datetime_between import DatetimeIndexBetweenMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class DatetimeIndexBetweenParityTests(
+    DatetimeIndexBetweenMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.indexes.test_parity_datetime_between import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime_ceil.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime_ceil.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.pandas.tests.indexes.test_datetime_ceil import DatetimeIndexCeilMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+
+class DatetimeIndexCeilParityTests(
+    DatetimeIndexCeilMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime_property.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime_property.py
@@ -18,11 +18,13 @@
 
 from pyspark.pandas.tests.indexes.test_datetime_property import DatetimeIndexPropertyTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
 
 class DatetimeIndexParityTests(
-    DatetimeIndexPropertyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
+    DatetimeIndexPropertyTestsMixin,
+    PandasOnSparkTestUtils,
+    ReusedConnectTestCase,
 ):
     pass
 

diff --git a/python/pyspark/pandas/tests/indexes/test_datetime.py b/python/pyspark/pandas/tests/indexes/test_datetime.py
@@ -24,7 +24,7 @@
 from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
 
 
-class DatetimeIndexTestsMixin:
+class DatetimeIndexTestingFuncMixin:
     @property
     def fixed_freqs(self):
         return [
@@ -63,6 +63,8 @@ def _disallow_nanoseconds(self, f):
         self.assertRaises(ValueError, lambda: f(freq="ns"))
         self.assertRaises(ValueError, lambda: f(freq="N"))
 
+
+class DatetimeIndexTestsMixin(DatetimeIndexTestingFuncMixin):
     def test_datetime_index(self):
         with self.assertRaisesRegex(TypeError, "Index.name must be a hashable type"):
             ps.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"], name=[(1, 2)])
@@ -71,13 +73,6 @@ def test_datetime_index(self):
         ):
             ps.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"]).all()
 
-    def test_ceil(self):
-        for psidx, pidx in self.idx_pairs:
-            for freq in self.fixed_freqs:
-                self.assert_eq(psidx.ceil(freq), pidx.ceil(freq))
-
-        self._disallow_nanoseconds(self.psidxs[0].ceil)
-
     def test_floor(self):
         for psidx, pidx in self.idx_pairs:
             for freq in self.fixed_freqs:
@@ -110,62 +105,6 @@ def test_strftime(self):
                 psidx.strftime(date_format="%B %d, %Y"), pidx.strftime(date_format="%B %d, %Y")
             )
 
-    def test_indexer_between_time(self):
-        for psidx, pidx in self.idx_pairs:
-            self.assert_eq(
-                psidx.indexer_between_time("00:00:00", "00:01:00").sort_values(),
-                pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00")),
-            )
-
-            self.assert_eq(
-                psidx.indexer_between_time(
-                    datetime.time(0, 0, 0), datetime.time(0, 1, 0)
-                ).sort_values(),
-                pd.Index(pidx.indexer_between_time(datetime.time(0, 0, 0), datetime.time(0, 1, 0))),
-            )
-
-            self.assert_eq(
-                psidx.indexer_between_time("00:00:00", "00:01:00", True, False).sort_values(),
-                pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", True, False)),
-            )
-
-            self.assert_eq(
-                psidx.indexer_between_time("00:00:00", "00:01:00", False, True).sort_values(),
-                pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", False, True)),
-            )
-
-            self.assert_eq(
-                psidx.indexer_between_time("00:00:00", "00:01:00", False, False).sort_values(),
-                pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", False, False)),
-            )
-
-            self.assert_eq(
-                psidx.indexer_between_time("00:00:00", "00:01:00", True, True).sort_values(),
-                pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", True, True)),
-            )
-
-    def test_indexer_at_time(self):
-        for psidx, pidx in self.idx_pairs:
-            self.assert_eq(
-                psidx.indexer_at_time("00:00:00").sort_values(),
-                pd.Index(pidx.indexer_at_time("00:00:00")),
-            )
-
-            self.assert_eq(
-                psidx.indexer_at_time(datetime.time(0, 1, 0)).sort_values(),
-                pd.Index(pidx.indexer_at_time(datetime.time(0, 1, 0))),
-            )
-
-            self.assert_eq(
-                psidx.indexer_at_time("00:00:01").sort_values(),
-                pd.Index(pidx.indexer_at_time("00:00:01")),
-            )
-
-        self.assertRaises(
-            NotImplementedError,
-            lambda: ps.DatetimeIndex([0]).indexer_at_time("00:00:00", asof=True),
-        )
-
     def test_arithmetic_op_exceptions(self):
         for psidx, pidx in self.idx_pairs:
             py_datetime = pidx.to_pydatetime()

diff --git a/python/pyspark/pandas/tests/indexes/test_datetime_at.py b/python/pyspark/pandas/tests/indexes/test_datetime_at.py
@@ -0,0 +1,68 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import datetime
+
+import pandas as pd
+
+import pyspark.pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.indexes.test_datetime import DatetimeIndexTestingFuncMixin
+
+
+class DatetimeIndexAtMixin(DatetimeIndexTestingFuncMixin):
+    def test_indexer_at_time(self):
+        for psidx, pidx in self.idx_pairs:
+            self.assert_eq(
+                psidx.indexer_at_time("00:00:00").sort_values(),
+                pd.Index(pidx.indexer_at_time("00:00:00")),
+            )
+
+            self.assert_eq(
+                psidx.indexer_at_time(datetime.time(0, 1, 0)).sort_values(),
+                pd.Index(pidx.indexer_at_time(datetime.time(0, 1, 0))),
+            )
+
+            self.assert_eq(
+                psidx.indexer_at_time("00:00:01").sort_values(),
+                pd.Index(pidx.indexer_at_time("00:00:01")),
+            )
+
+        self.assertRaises(
+            NotImplementedError,
+            lambda: ps.DatetimeIndex([0]).indexer_at_time("00:00:00", asof=True),
+        )
+
+
+class DatetimeIndexAtTests(
+    DatetimeIndexAtMixin,
+    PandasOnSparkTestCase,
+):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.indexes.test_datetime_at import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)