Skip to content

Commit

Permalink
[SPARK-26036][PYTHON] Break large tests.py files into smaller files
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

This PR continues to break down a big large file into smaller files. See #23021. It targets to follow https://github.com/numpy/numpy/tree/master/numpy.

Basically this PR proposes to break down `pyspark/tests.py` into ...:

```
pyspark
...
├── testing
...
│   └── utils.py
├── tests
│   ├── __init__.py
│   ├── test_appsubmit.py
│   ├── test_broadcast.py
│   ├── test_conf.py
│   ├── test_context.py
│   ├── test_daemon.py
│   ├── test_join.py
│   ├── test_profiler.py
│   ├── test_rdd.py
│   ├── test_readwrite.py
│   ├── test_serializers.py
│   ├── test_shuffle.py
│   ├── test_taskcontext.py
│   ├── test_util.py
│   └── test_worker.py
...
```

## How was this patch tested?

Existing tests should cover.

`cd python` and .`/run-tests-with-coverage`. Manually checked they are actually being ran.

Each test (not officially) can be ran via:

```bash
SPARK_TESTING=1 ./bin/pyspark pyspark.tests.test_context
```

Note that if you're using Mac and Python 3, you might have to `OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES`.

Closes #23033 from HyukjinKwon/SPARK-26036.

Authored-by: hyukjinkwon <gurwls223@apache.org>
Signed-off-by: hyukjinkwon <gurwls223@apache.org>
  • Loading branch information
HyukjinKwon committed Nov 15, 2018
1 parent f6255d7 commit 03306a6
Show file tree
Hide file tree
Showing 43 changed files with 3,093 additions and 2,666 deletions.
19 changes: 16 additions & 3 deletions dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ def __hash__(self):
"python/(?!pyspark/(ml|mllib|sql|streaming))"
],
python_test_goals=[
# doctests
"pyspark.rdd",
"pyspark.context",
"pyspark.conf",
Expand All @@ -318,10 +319,22 @@ def __hash__(self):
"pyspark.serializers",
"pyspark.profiler",
"pyspark.shuffle",
"pyspark.tests",
"pyspark.test_broadcast",
"pyspark.test_serializers",
"pyspark.util",
# unittests
"pyspark.tests.test_appsubmit",
"pyspark.tests.test_broadcast",
"pyspark.tests.test_conf",
"pyspark.tests.test_context",
"pyspark.tests.test_daemon",
"pyspark.tests.test_join",
"pyspark.tests.test_profiler",
"pyspark.tests.test_rdd",
"pyspark.tests.test_readwrite",
"pyspark.tests.test_serializers",
"pyspark.tests.test_shuffle",
"pyspark.tests.test_taskcontext",
"pyspark.tests.test_util",
"pyspark.tests.test_worker",
]
)

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/ml/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
from pyspark.sql.functions import rand
from pyspark.sql.types import DoubleType, IntegerType
from pyspark.storagelevel import *
from pyspark.tests import QuietTest, ReusedPySparkTestCase as PySparkTestCase
from pyspark.testing.utils import QuietTest, ReusedPySparkTestCase as PySparkTestCase

ser = PickleSerializer()

Expand Down
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_appsubmit.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import py4j

from pyspark import SparkContext
from pyspark.tests import SparkSubmitTests
from pyspark.tests.test_appsubmit import SparkSubmitTests


class HiveSparkSubmitTests(SparkSubmitTests):
Expand Down Expand Up @@ -91,6 +91,7 @@ def test_hivecontext(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from pyspark.sql.types import *
from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
pandas_requirement_message, pyarrow_requirement_message
from pyspark.tests import QuietTest
from pyspark.testing.utils import QuietTest
from pyspark.util import _exception_message


Expand Down Expand Up @@ -394,6 +394,7 @@ def conf(cls):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ def test_list_columns(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def test_bitwise_operations(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_conf(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pyspark import HiveContext, Row
from pyspark.sql.types import *
from pyspark.sql.window import Window
from pyspark.tests import ReusedPySparkTestCase
from pyspark.testing.utils import ReusedPySparkTestCase


class HiveContextSQLTests(ReusedPySparkTestCase):
Expand Down Expand Up @@ -258,6 +258,7 @@ def range_frame_match():

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pyspark.sql.utils import AnalysisException, IllegalArgumentException
from pyspark.testing.sqlutils import ReusedSQLTestCase, SQLTestUtils, have_pyarrow, have_pandas, \
pandas_requirement_message, pyarrow_requirement_message
from pyspark.tests import QuietTest
from pyspark.testing.utils import QuietTest


class DataFrameTests(ReusedSQLTestCase):
Expand Down Expand Up @@ -732,6 +732,7 @@ def test_query_execution_listener_on_collect_with_arrow(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_datasources.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def test_ignore_column_of_all_nulls(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ def test_sort_with_nulls_order(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_aggregator(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_pandas_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pyspark.sql.utils import ParseException
from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
pandas_requirement_message, pyarrow_requirement_message
from pyspark.tests import QuietTest
from pyspark.testing.utils import QuietTest


@unittest.skipIf(
Expand Down Expand Up @@ -211,6 +211,7 @@ def foofoo(x, y):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pyspark.sql.utils import AnalysisException
from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
pandas_requirement_message, pyarrow_requirement_message
from pyspark.tests import QuietTest
from pyspark.testing.utils import QuietTest


@unittest.skipIf(
Expand Down Expand Up @@ -498,6 +498,7 @@ def test_register_vectorized_udf_basic(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pyspark.sql.types import *
from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
pandas_requirement_message, pyarrow_requirement_message
from pyspark.tests import QuietTest
from pyspark.testing.utils import QuietTest


@unittest.skipIf(
Expand Down Expand Up @@ -525,6 +525,7 @@ def test_mixed_scalar_udfs_followed_by_grouby_apply(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_pandas_udf_scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from pyspark.testing.sqlutils import ReusedSQLTestCase, test_compiled,\
test_not_compiled_message, have_pandas, have_pyarrow, pandas_requirement_message, \
pyarrow_requirement_message
from pyspark.tests import QuietTest
from pyspark.testing.utils import QuietTest


@unittest.skipIf(
Expand Down Expand Up @@ -802,6 +802,7 @@ def test_datasource_with_udf(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_pandas_udf_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pyspark.sql.window import Window
from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
pandas_requirement_message, pyarrow_requirement_message
from pyspark.tests import QuietTest
from pyspark.testing.utils import QuietTest


@unittest.skipIf(
Expand Down Expand Up @@ -257,6 +257,7 @@ def test_invalid_args(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def count_bucketed_cols(names, table="pyspark_bucket"):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def test_BinaryType_serialization(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession, SQLContext, Row
from pyspark.testing.sqlutils import ReusedSQLTestCase
from pyspark.tests import PySparkTestCase
from pyspark.testing.utils import PySparkTestCase


class SparkSessionTests(ReusedSQLTestCase):
Expand Down Expand Up @@ -315,6 +315,7 @@ def test_use_custom_class_for_extensions(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ def collectBatch(df, id):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,7 @@ def __init__(self, **kwargs):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
7 changes: 4 additions & 3 deletions python/pyspark/sql/tests/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from pyspark.sql.types import *
from pyspark.sql.utils import AnalysisException
from pyspark.testing.sqlutils import ReusedSQLTestCase, test_compiled, test_not_compiled_message
from pyspark.tests import QuietTest
from pyspark.testing.utils import QuietTest


class UDFTests(ReusedSQLTestCase):
Expand Down Expand Up @@ -649,6 +649,7 @@ def test_udf_init_shouldnt_initialize_context(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
5 changes: 3 additions & 2 deletions python/pyspark/sql/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def test_capture_illegalargument_exception(self):

try:
import xmlrunner
unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
testRunner = xmlrunner.XMLTestRunner(output='target/test-reports')
except ImportError:
unittest.main(verbosity=2)
testRunner = None
unittest.main(testRunner=testRunner, verbosity=2)
Loading

0 comments on commit 03306a6

Please sign in to comment.