From 44eba46cc8b90be990177450141c48746fa5b67d Mon Sep 17 00:00:00 2001 From: Patrick Marx <6949483+codesorcery@users.noreply.github.com> Date: Wed, 3 Jul 2024 09:22:11 +0900 Subject: [PATCH] [SPARK-48710][PYTHON][3.5] Limit NumPy version to supported range (>=1.15,<2) ### What changes were proposed in this pull request? * Add a constraint for `numpy<2` to the PySpark package ### Why are the changes needed? PySpark references some code which was removed with NumPy 2.0. Thus, if `numpy>=2` is installed, executing PySpark may fail. https://github.com/apache/spark/pull/47083 updates the `master` branch to be compatible with NumPy 2. This PR adds a version bound for older releases, where it won't be applied. ### Does this PR introduce _any_ user-facing change? NumPy will be limited to `numpy<2` when installing `pypspark` with extras `ml`, `mllib`, `sql`, `pandas_on_spark` or `connect`. ### How was this patch tested? Via existing CI jobs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47175 from codesorcery/SPARK-48710-numpy-upper-bound. Authored-by: Patrick Marx <6949483+codesorcery@users.noreply.github.com> Signed-off-by: Hyukjin Kwon --- python/setup.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/setup.py b/python/setup.py index 2110c2839ca70..ddd961d0412b5 100755 --- a/python/setup.py +++ b/python/setup.py @@ -307,17 +307,17 @@ def run(self): # if you're updating the versions or dependencies. install_requires=["py4j==0.10.9.7"], extras_require={ - "ml": ["numpy>=1.15"], - "mllib": ["numpy>=1.15"], + "ml": ["numpy>=1.15,<2"], + "mllib": ["numpy>=1.15,<2"], "sql": [ "pandas>=%s" % _minimum_pandas_version, "pyarrow>=%s" % _minimum_pyarrow_version, - "numpy>=1.15", + "numpy>=1.15,<2", ], "pandas_on_spark": [ "pandas>=%s" % _minimum_pandas_version, "pyarrow>=%s" % _minimum_pyarrow_version, - "numpy>=1.15", + "numpy>=1.15,<2", ], "connect": [ "pandas>=%s" % _minimum_pandas_version, @@ -325,7 +325,7 @@ def run(self): "grpcio>=%s" % _minimum_grpc_version, "grpcio-status>=%s" % _minimum_grpc_version, "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version, - "numpy>=1.15", + "numpy>=1.15,<2", ], }, python_requires=">=3.8",