Skip to content

Commit

Permalink
[SPARK-46419][PS][TESTS] Reorganize DatetimeIndexTests: Factor out …
Browse files Browse the repository at this point in the history
…3 slow tests

### What changes were proposed in this pull request?
Reorganize `DatetimeIndexTests`: Factor out 3 slow tests

### Why are the changes needed?
its parity test is slow, sometime take > 10 mins, this PR move 3 slow tests from it.
(will move other slow tests in a followup to control the change)

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
test-only

### Was this patch authored or co-authored using generative AI tooling?
no

Closes apache#44369 from zhengruifeng/ps_test_idx_dt_I.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
zhengruifeng authored and HyukjinKwon committed Dec 15, 2023
1 parent a1b0da2 commit 3d447c2
Show file tree
Hide file tree
Showing 11 changed files with 343 additions and 107 deletions.
8 changes: 7 additions & 1 deletion dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,9 @@ def __hash__(self):
"pyspark.pandas.tests.indexes.test_base",
"pyspark.pandas.tests.indexes.test_base_slow",
"pyspark.pandas.tests.indexes.test_datetime",
"pyspark.pandas.tests.indexes.test_datetime_at",
"pyspark.pandas.tests.indexes.test_datetime_between",
"pyspark.pandas.tests.indexes.test_datetime_ceil",
"pyspark.pandas.tests.indexes.test_datetime_property",
"pyspark.pandas.tests.indexes.test_align",
"pyspark.pandas.tests.indexes.test_indexing",
Expand Down Expand Up @@ -1135,7 +1138,6 @@ def __hash__(self):
"pyspark.pandas.tests.connect.computation.test_parity_pivot",
"pyspark.pandas.tests.connect.computation.test_parity_stats",
"pyspark.pandas.tests.connect.indexes.test_parity_base_slow",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
"pyspark.pandas.tests.connect.frame.test_parity_interpolate",
"pyspark.pandas.tests.connect.frame.test_parity_interpolate_error",
"pyspark.pandas.tests.connect.series.test_parity_interpolate",
Expand Down Expand Up @@ -1186,6 +1188,10 @@ def __hash__(self):
python_test_goals=[
# pandas-on-Spark unittests
"pyspark.pandas.tests.connect.indexes.test_parity_datetime",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_at",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_between",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
"pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
"pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames",
"pyspark.pandas.tests.connect.test_parity_ops_on_diff_frames_groupby",
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@

from pyspark.pandas.tests.indexes.test_datetime import DatetimeIndexTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
from pyspark.testing.pandasutils import PandasOnSparkTestUtils


class DatetimeIndexParityTests(
DatetimeIndexTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
DatetimeIndexTestsMixin,
PandasOnSparkTestUtils,
ReusedConnectTestCase,
):
pass

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import unittest

from pyspark.pandas.tests.indexes.test_datetime_at import DatetimeIndexAtMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.pandasutils import PandasOnSparkTestUtils


class DatetimeIndexAtParityTests(
DatetimeIndexAtMixin,
PandasOnSparkTestUtils,
ReusedConnectTestCase,
):
pass


if __name__ == "__main__":
from pyspark.pandas.tests.connect.indexes.test_parity_datetime_at import * # noqa: F401

try:
import xmlrunner # type: ignore[import]

testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
except ImportError:
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import unittest

from pyspark.pandas.tests.indexes.test_datetime_between import DatetimeIndexBetweenMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.pandasutils import PandasOnSparkTestUtils


class DatetimeIndexBetweenParityTests(
DatetimeIndexBetweenMixin,
PandasOnSparkTestUtils,
ReusedConnectTestCase,
):
pass


if __name__ == "__main__":
from pyspark.pandas.tests.connect.indexes.test_parity_datetime_between import * # noqa: F401

try:
import xmlrunner # type: ignore[import]

testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
except ImportError:
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import unittest

from pyspark.pandas.tests.indexes.test_datetime_ceil import DatetimeIndexCeilMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.pandasutils import PandasOnSparkTestUtils


class DatetimeIndexCeilParityTests(
DatetimeIndexCeilMixin,
PandasOnSparkTestUtils,
ReusedConnectTestCase,
):
pass


if __name__ == "__main__":
from pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil import * # noqa: F401

try:
import xmlrunner # type: ignore[import]

testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
except ImportError:
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@

from pyspark.pandas.tests.indexes.test_datetime_property import DatetimeIndexPropertyTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
from pyspark.testing.pandasutils import PandasOnSparkTestUtils


class DatetimeIndexParityTests(
DatetimeIndexPropertyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
DatetimeIndexPropertyTestsMixin,
PandasOnSparkTestUtils,
ReusedConnectTestCase,
):
pass

Expand Down
67 changes: 3 additions & 64 deletions python/pyspark/pandas/tests/indexes/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils


class DatetimeIndexTestsMixin:
class DatetimeIndexTestingFuncMixin:
@property
def fixed_freqs(self):
return [
Expand Down Expand Up @@ -63,6 +63,8 @@ def _disallow_nanoseconds(self, f):
self.assertRaises(ValueError, lambda: f(freq="ns"))
self.assertRaises(ValueError, lambda: f(freq="N"))


class DatetimeIndexTestsMixin(DatetimeIndexTestingFuncMixin):
def test_datetime_index(self):
with self.assertRaisesRegex(TypeError, "Index.name must be a hashable type"):
ps.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"], name=[(1, 2)])
Expand All @@ -71,13 +73,6 @@ def test_datetime_index(self):
):
ps.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"]).all()

def test_ceil(self):
for psidx, pidx in self.idx_pairs:
for freq in self.fixed_freqs:
self.assert_eq(psidx.ceil(freq), pidx.ceil(freq))

self._disallow_nanoseconds(self.psidxs[0].ceil)

def test_floor(self):
for psidx, pidx in self.idx_pairs:
for freq in self.fixed_freqs:
Expand Down Expand Up @@ -110,62 +105,6 @@ def test_strftime(self):
psidx.strftime(date_format="%B %d, %Y"), pidx.strftime(date_format="%B %d, %Y")
)

def test_indexer_between_time(self):
for psidx, pidx in self.idx_pairs:
self.assert_eq(
psidx.indexer_between_time("00:00:00", "00:01:00").sort_values(),
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00")),
)

self.assert_eq(
psidx.indexer_between_time(
datetime.time(0, 0, 0), datetime.time(0, 1, 0)
).sort_values(),
pd.Index(pidx.indexer_between_time(datetime.time(0, 0, 0), datetime.time(0, 1, 0))),
)

self.assert_eq(
psidx.indexer_between_time("00:00:00", "00:01:00", True, False).sort_values(),
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", True, False)),
)

self.assert_eq(
psidx.indexer_between_time("00:00:00", "00:01:00", False, True).sort_values(),
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", False, True)),
)

self.assert_eq(
psidx.indexer_between_time("00:00:00", "00:01:00", False, False).sort_values(),
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", False, False)),
)

self.assert_eq(
psidx.indexer_between_time("00:00:00", "00:01:00", True, True).sort_values(),
pd.Index(pidx.indexer_between_time("00:00:00", "00:01:00", True, True)),
)

def test_indexer_at_time(self):
for psidx, pidx in self.idx_pairs:
self.assert_eq(
psidx.indexer_at_time("00:00:00").sort_values(),
pd.Index(pidx.indexer_at_time("00:00:00")),
)

self.assert_eq(
psidx.indexer_at_time(datetime.time(0, 1, 0)).sort_values(),
pd.Index(pidx.indexer_at_time(datetime.time(0, 1, 0))),
)

self.assert_eq(
psidx.indexer_at_time("00:00:01").sort_values(),
pd.Index(pidx.indexer_at_time("00:00:01")),
)

self.assertRaises(
NotImplementedError,
lambda: ps.DatetimeIndex([0]).indexer_at_time("00:00:00", asof=True),
)

def test_arithmetic_op_exceptions(self):
for psidx, pidx in self.idx_pairs:
py_datetime = pidx.to_pydatetime()
Expand Down
68 changes: 68 additions & 0 deletions python/pyspark/pandas/tests/indexes/test_datetime_at.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import datetime

import pandas as pd

import pyspark.pandas as ps
from pyspark.testing.pandasutils import PandasOnSparkTestCase
from pyspark.pandas.tests.indexes.test_datetime import DatetimeIndexTestingFuncMixin


class DatetimeIndexAtMixin(DatetimeIndexTestingFuncMixin):
def test_indexer_at_time(self):
for psidx, pidx in self.idx_pairs:
self.assert_eq(
psidx.indexer_at_time("00:00:00").sort_values(),
pd.Index(pidx.indexer_at_time("00:00:00")),
)

self.assert_eq(
psidx.indexer_at_time(datetime.time(0, 1, 0)).sort_values(),
pd.Index(pidx.indexer_at_time(datetime.time(0, 1, 0))),
)

self.assert_eq(
psidx.indexer_at_time("00:00:01").sort_values(),
pd.Index(pidx.indexer_at_time("00:00:01")),
)

self.assertRaises(
NotImplementedError,
lambda: ps.DatetimeIndex([0]).indexer_at_time("00:00:00", asof=True),
)


class DatetimeIndexAtTests(
DatetimeIndexAtMixin,
PandasOnSparkTestCase,
):
pass


if __name__ == "__main__":
import unittest
from pyspark.pandas.tests.indexes.test_datetime_at import * # noqa: F401

try:
import xmlrunner

testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
except ImportError:
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
Loading

0 comments on commit 3d447c2

Please sign in to comment.