Skip to content

Commit

Permalink
[SPARK-44961][PYTHON][CONNECT][TESTS] Make PySpark (pyspark-connect m…
Browse files Browse the repository at this point in the history
…odule) tests passing without any dependency

### What changes were proposed in this pull request?

This PR proposes to fix the tests to properly run or skip when there aren't optional dependencies installed.

### Why are the changes needed?

Currently, it fails as below:

```
./python/run-tests --python-executables=python3 --modules=pyspark-connect
...
2c5289024a/python3__pyspark.sql.connect.window__nvbbzy7q.log)
Finished test(python3): pyspark.sql.connect.session (0s)
Traceback (most recent call last):
  File "/Users/hyukjin.kwon/workspace/forked/spark/python/pyspark/sql/pandas/utils.py", line 27, in require_minimum_pandas_version
    import pandas
ModuleNotFoundError: No module named 'pandas'
```

PySpark tests should pass without optional dependencies.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Manually ran as described above.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes apache#42676 from HyukjinKwon/spark-connect-test.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
HyukjinKwon authored and dongjoon-hyun committed Aug 25, 2023
1 parent 632dd33 commit 12f3c81
Show file tree
Hide file tree
Showing 9 changed files with 26 additions and 24 deletions.
4 changes: 4 additions & 0 deletions python/pyspark/sql/connect/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__)

from typing import Any, Optional, Union, cast
import warnings

Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/sql/connect/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@
CachedRemoteRelation,
)
from pyspark.sql.connect.readwriter import DataFrameReader
from pyspark.sql.connect.streaming import DataStreamReader, StreamingQueryManager
from pyspark.sql.connect.streaming.readwriter import DataStreamReader
from pyspark.sql.connect.streaming.query import StreamingQueryManager
from pyspark.sql.pandas.serializers import ArrowStreamPandasSerializer
from pyspark.sql.pandas.types import to_arrow_schema, to_arrow_type, _deduplicate_field_names
from pyspark.sql.session import classproperty, SparkSession as PySparkSession
Expand Down
6 changes: 0 additions & 6 deletions python/pyspark/sql/connect/streaming/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

from pyspark.sql.connect.streaming.query import StreamingQuery # noqa: F401
from pyspark.sql.connect.streaming.readwriter import DataStreamReader # noqa: F401
from pyspark.sql.connect.streaming.readwriter import DataStreamWriter # noqa: F401
from pyspark.sql.connect.streaming.query import StreamingQueryManager # noqa: F401
from pyspark.errors import StreamingQueryException # noqa: F401
5 changes: 3 additions & 2 deletions python/pyspark/sql/connect/streaming/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__)

import json
import sys
Expand All @@ -35,8 +38,6 @@
)
from pyspark.errors import PySparkPicklingError

__all__ = ["StreamingQuery", "StreamingQueryManager"]

if TYPE_CHECKING:
from pyspark.sql.connect.session import SparkSession

Expand Down
3 changes: 0 additions & 3 deletions python/pyspark/sql/connect/streaming/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__)
Expand Down Expand Up @@ -42,8 +41,6 @@
from pyspark.sql.connect.dataframe import DataFrame
from pyspark.sql._typing import SupportsProcess

__all__ = ["DataStreamReader", "DataStreamWriter"]


class DataStreamReader(OptionUtils):
def __init__(self, client: "SparkSession") -> None:
Expand Down
9 changes: 4 additions & 5 deletions python/pyspark/sql/tests/connect/client/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,15 @@
import uuid
from typing import Optional

from pyspark.sql.connect.client import SparkConnectClient, ChannelBuilder
import pyspark.sql.connect.proto as proto
from pyspark.testing.connectutils import should_test_connect, connect_requirement_message

from pyspark.sql.connect.client.core import Retrying
from pyspark.sql.connect.client.reattach import RetryException

if should_test_connect:
import pandas as pd
import pyarrow as pa
from pyspark.sql.connect.client import SparkConnectClient, ChannelBuilder
from pyspark.sql.connect.client.core import Retrying
from pyspark.sql.connect.client.reattach import RetryException
import pyspark.sql.connect.proto as proto


@unittest.skipIf(not should_test_connect, connect_requirement_message)
Expand Down
8 changes: 5 additions & 3 deletions python/pyspark/sql/tests/connect/test_parity_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@

import unittest

from pyspark.errors.exceptions.connect import SparkConnectException
from pyspark.sql.connect.column import Column
from pyspark.sql.tests.test_functions import FunctionsTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.connectutils import should_test_connect, ReusedConnectTestCase

if should_test_connect:
from pyspark.errors.exceptions.connect import SparkConnectException
from pyspark.sql.connect.column import Column


class FunctionsParityTests(FunctionsTestsMixin, ReusedConnectTestCase):
Expand Down
6 changes: 4 additions & 2 deletions python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from pyspark.sql.connect.types import UnparsedDataType
from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.tests.pandas.test_pandas_udf import PandasUDFTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.connectutils import should_test_connect, ReusedConnectTestCase

if should_test_connect:
from pyspark.sql.connect.types import UnparsedDataType


class PandasUDFParityTests(PandasUDFTestsMixin, ReusedConnectTestCase):
Expand Down
6 changes: 4 additions & 2 deletions python/pyspark/sql/tests/connect/test_parity_readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
#
import unittest

from pyspark.sql.connect.readwriter import DataFrameWriterV2
from pyspark.sql.tests.test_readwriter import ReadwriterTestsMixin, ReadwriterV2TestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
from pyspark.testing.connectutils import should_test_connect, ReusedConnectTestCase

if should_test_connect:
from pyspark.sql.connect.readwriter import DataFrameWriterV2


class ReadwriterParityTests(ReadwriterTestsMixin, ReusedConnectTestCase):
Expand Down

0 comments on commit 12f3c81

Please sign in to comment.