Skip to content

Commit

Permalink
ENH: Compatible with NumPy 2.x (#817)
Browse files Browse the repository at this point in the history
Co-authored-by: hucorz <chaohui6014@gmail.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 14, 2024
1 parent 5b471fb commit 9700db7
Show file tree
Hide file tree
Showing 52 changed files with 330 additions and 220 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ jobs:
pip install --upgrade --upgrade-strategy only-if-needed --no-cache-dir ".[doc]"
else
pip install -e "git+https://github.com/xorbitsai/xoscar.git@main#subdirectory=python&egg=xoscar"
pip install "numpy<2.0.0" scipy cython pyftpdlib coverage flaky numexpr
pip install -U numpy scipy cython pyftpdlib coverage flaky numexpr
if [[ "$MODULE" == "xorbits/pandas" ]]; then
pip install openpyxl
Expand Down Expand Up @@ -360,7 +360,7 @@ jobs:
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR="$HADOOP_HOME/lib/native"
export PATH="$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin"
pytest --timeout=1500 -W ignore::PendingDeprecationWarning xorbits/_mars -m hadoop
pytest --ignore xorbits/_mars/learn --timeout=1500 -W ignore::PendingDeprecationWarning xorbits/_mars -m hadoop
elif [[ "$MODULE" == "vineyard" ]]; then
pytest --timeout=1500 -W ignore::PendingDeprecationWarning \
--cov-config=setup.cfg --cov-report=xml --cov=xorbits xorbits/_mars/storage/tests/test_libs.py
Expand Down
12 changes: 6 additions & 6 deletions doc/source/user_guide/storage_backend.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ create a YAML configuration file named `file.yml` which specify `backends` and `
"@inherits": "@default"
storage:
backends: [disk]
disk:
root_dirs: "/tmp"
backends: [disk]
disk:
root_dirs: "/tmp"
Start the worker using the :code:`-f file.yml` option:

Expand Down Expand Up @@ -86,9 +86,9 @@ and start the worker by adding :code:`-f file.yml` option.
"@inherits": "@default"
storage:
backends: [disk]
disk:
root_dirs: "/mnt/xorbits"
backends: [disk]
disk:
root_dirs: "/mnt/xorbits"
GPU
Expand Down
2 changes: 1 addition & 1 deletion python/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ include_package_data = True
packages = find:
install_requires =
xoscar>=0.0.8
numpy>=1.14.0,<2.0.0
numpy>=1.14.0
pandas>=1.0.0
scipy>=1.0.0; sys_platform!="win32" or python_version>="3.10"
scipy>=1.0.0,<=1.9.1; sys_platform=="win32" and python_version<"3.10"
Expand Down
7 changes: 6 additions & 1 deletion python/xorbits/_mars/core/operand/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@
import numpy as np
from xoscar.metrics import Metrics

from ....utils import is_numpy_2

try:
from numpy.core._exceptions import UFuncTypeError
if is_numpy_2():
from numpy._core._exceptions import UFuncTypeError
else:
from numpy.core._exceptions import UFuncTypeError
except ImportError: # pragma: no cover
UFuncTypeError = None

Expand Down
2 changes: 1 addition & 1 deletion python/xorbits/_mars/dataframe/indexing/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def _call_dataframe_series(self, lhs: TileableType, rhs: TileableType):
series_index = rhs.index_value.to_pandas()
dtypes = lhs.dtypes.reindex(
lhs.dtypes.index.join(series_index, how=self.join)
).fillna(np.dtype(np.float_))
).fillna(np.dtype(np.float64))
l_shape[1] = r_size = len(dtypes)
col_val = r_idx_val = parse_index(dtypes.index, store_data=True)

Expand Down
24 changes: 12 additions & 12 deletions python/xorbits/_mars/dataframe/missing/tests/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,36 +241,36 @@ def test_isna(setup):
isna(midx)

# list
l = [1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT]
l = [1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT]
actual = isna(l).execute().fetch()
expected = pd.isna(l)
np.testing.assert_array_equal(expected, actual)

# tuple
t = (1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT)
t = (1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT)
assert not isna(t)

# numpy ndarray
narr = np.array((1, 2, 3, np.Inf, np.NaN))
narr = np.array((1, 2, 3, np.inf, np.nan))
actual = isna(narr).execute().fetch()
expected = pd.isna(narr)
np.testing.assert_array_equal(expected, actual)

# pandas index
pi = pd.Index((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
pi = pd.Index((1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT))
actual = isna(pi).execute().fetch()
expected = pd.isna(pi)
np.testing.assert_array_equal(expected, actual)

# pandas series
ps = pd.Series((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
ps = pd.Series((1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT))
actual = isna(ps).execute().fetch()
expected = pd.isna(ps)
pd.testing.assert_series_equal(expected, actual)

# pandas dataframe
pdf = pd.DataFrame(
{"foo": (1, 2, 3, np.Inf, pd.NA), "bar": (4, 5, 6, np.NaN, pd.NaT)}
{"foo": (1, 2, 3, np.inf, pd.NA), "bar": (4, 5, 6, np.nan, pd.NaT)}
)
actual = isna(pdf).execute().fetch()
expected = pd.isna(pdf)
Expand Down Expand Up @@ -324,36 +324,36 @@ def test_notna(setup):
notna(midx)

# list
l = [1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT]
l = [1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT]
actual = notna(l).execute().fetch()
expected = pd.notna(l)
np.testing.assert_array_equal(expected, actual)

# tuple
t = (1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT)
t = (1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT)
assert notna(t)

# numpy ndarray
narr = np.array((1, 2, 3, np.Inf, np.NaN))
narr = np.array((1, 2, 3, np.inf, np.nan))
actual = notna(narr).execute().fetch()
expected = pd.notna(narr)
np.testing.assert_array_equal(expected, actual)

# pandas index
pi = pd.Index((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
pi = pd.Index((1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT))
actual = notna(pi).execute().fetch()
expected = pd.notna(pi)
np.testing.assert_array_equal(expected, actual)

# pandas series
ps = pd.Series((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
ps = pd.Series((1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT))
actual = notna(ps).execute().fetch()
expected = pd.notna(ps)
pd.testing.assert_series_equal(expected, actual)

# pandas dataframe
pdf = pd.DataFrame(
{"foo": (1, 2, 3, np.Inf, pd.NA), "bar": (4, 5, 6, np.NaN, pd.NaT)}
{"foo": (1, 2, 3, np.inf, pd.NA), "bar": (4, 5, 6, np.nan, pd.NaT)}
)
actual = notna(pdf).execute().fetch()
expected = pd.notna(pdf)
Expand Down
2 changes: 1 addition & 1 deletion python/xorbits/_mars/dataframe/reduction/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def _call_dataframe(self, df):
# handle pandas Dtypes in the future more carefully.
reduced_dtype = np.dtype("O")
else:
reduced_dtype = np.find_common_type(dtypes, [])
reduced_dtype = np.result_type(*dtypes)

if level is not None:
return self._call_groupby_level(df[reduced_cols], level)
Expand Down
14 changes: 7 additions & 7 deletions python/xorbits/_mars/dataframe/statistics/corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _set_inputs(self, inputs):
def __call__(self, df_or_series):
if isinstance(df_or_series, SERIES_TYPE):
inputs = filter_inputs([df_or_series, self.other])
return self.new_scalar(inputs, dtype=np.dtype(np.float_))
return self.new_scalar(inputs, dtype=np.dtype(np.float64))
else:

def _filter_numeric(obj):
Expand All @@ -63,7 +63,7 @@ def _filter_numeric(obj):
inputs = filter_inputs([df_or_series, self.other])
if self.axis is None:
dtypes = pd.Series(
[np.dtype(np.float_)] * len(df_or_series.dtypes),
[np.dtype(np.float64)] * len(df_or_series.dtypes),
index=df_or_series.dtypes.index,
)
return self.new_dataframe(
Expand All @@ -88,7 +88,7 @@ def _filter_numeric(obj):
return self.new_series(
inputs,
shape=shape,
dtype=np.dtype(np.float_),
dtype=np.dtype(np.float64),
index_value=new_index_value,
)

Expand All @@ -115,8 +115,8 @@ def _tile_pearson_cross(left, right, min_periods):
right.fillna(0).to_tensor(),
)

nna_left = left.notna().to_tensor().astype(np.float_)
nna_right = right.notna().to_tensor().astype(np.float_)
nna_left = left.notna().to_tensor().astype(np.float64)
nna_right = right.notna().to_tensor().astype(np.float64)

sum_left = left_tensor.T.dot(nna_right)
sum_right = right_tensor.T.dot(nna_left)
Expand All @@ -143,8 +143,8 @@ def _tile_pearson_align(cls, left, right, axis):
if has_unknown_shape(left, right):
yield left.chunks + right.chunks + [left, right]

nna_left = left.notna().astype(np.float_)
nna_right = right.notna().astype(np.float_)
nna_left = left.notna().astype(np.float64)
nna_right = right.notna().astype(np.float64)

left, right = left.fillna(0), right.fillna(0)

Expand Down
2 changes: 1 addition & 1 deletion python/xorbits/_mars/learn/contrib/lightgbm/_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __call__(self):
shape = (self.data.shape[0],)

if self._proba:
dtype = np.dtype(np.float_)
dtype = np.dtype(np.float64)
elif hasattr(self.model, "classes_"):
dtype = np.array(self.model.classes_).dtype
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,10 @@
shape = 60, 55
n_samples, n_features = shape
rng = check_random_state(42)
X = rng.randint(-100, 20, np.product(shape)).reshape(shape)
X = rng.randint(-100, 20, np.prod(shape)).reshape(shape)
X = sp.csr_matrix(np.maximum(X, 0), dtype=np.float64)
X.data[:] = 1 + np.log(X.data)
Xdense = X.A
n_samples = n_samples
n_features = n_features
Xdense = X.toarray()


def test_attributes(setup):
Expand Down
8 changes: 7 additions & 1 deletion python/xorbits/_mars/learn/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@
import warnings

import numpy as np
from numpy.core.numeric import ComplexWarning

from ....utils import is_numpy_2

if is_numpy_2():
from numpy.exceptions import ComplexWarning
else:
from numpy.core.numeric import ComplexWarning

try:
from sklearn.exceptions import DataConversionWarning
Expand Down
4 changes: 2 additions & 2 deletions python/xorbits/_mars/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,9 @@
# noinspection PyUnresolvedReferences
from ..core import ExecutableTuple

from numpy import __version__ as np_ver
from .utils import is_numpy_2

if np_ver >= "2.0.0":
if is_numpy_2():
from numpy.exceptions import AxisError
else:
from numpy import AxisError
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def test_unify_chunk_add():

def test_frexp():
t1 = ones((3, 4, 5), chunk_size=2)
t2 = empty((3, 4, 5), dtype=np.float_, chunk_size=2)
t2 = empty((3, 4, 5), dtype=np.float64, chunk_size=2)
op_type = type(t1.op)

o1, o2 = frexp(t1)
Expand Down
16 changes: 14 additions & 2 deletions python/xorbits/_mars/tensor/array_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ..lib import sparse
from ..lib.sparse.core import get_dense_module, issparse
from ..utils import is_same_module, lazy_import
from .utils import is_numpy_2

cp = lazy_import("cupy", rename="cp")

Expand All @@ -43,6 +44,18 @@ def is_cupy(x):
return False


def get_device_id(input_data):
# numpy2.x added `device` attribute to `np.ndarray`
# `np.ndarray.device` return `cpu` and does not have `id` attribute
# while `cupy.ndarray.device.id` return the GPU device id
if hasattr(input_data, "device") and not (
is_numpy_2() and isinstance(input_data, np.ndarray)
):
return input_data.device.id
else:
return -1


def get_array_module(x, nosparse=False):
if issparse(x):
if nosparse:
Expand Down Expand Up @@ -117,8 +130,7 @@ def as_same_device(inputs, device=None, ret_extra=False, copy_if_not_writeable=F
if device is None:
try:
device = _most_nbytes_device(
(i.device.id if hasattr(i, "device") else -1, i.nbytes)
for i in input_tensors
(get_device_id(i), i.nbytes) for i in input_tensors
)
except ValueError:
device = -1
Expand Down
4 changes: 2 additions & 2 deletions python/xorbits/_mars/tensor/base/broadcast_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from ... import opcodes as OperandDef
from ...serialization.serializables import KeyField, TupleField
from ..array_utils import device, get_array_module
from ..array_utils import device, get_array_module, get_device_id
from ..datasource import tensor as astensor
from ..operands import TensorHasInput, TensorOperandMixin

Expand Down Expand Up @@ -80,7 +80,7 @@ def tile(cls, op):
def execute(cls, ctx, op):
xp = get_array_module(ctx[op.input.key])
input_data = ctx[op.input.key]
device_id = input_data.device.id if hasattr(input_data, "device") else -1
device_id = get_device_id(input_data)

with device(device_id):
shape = op.shape
Expand Down
10 changes: 7 additions & 3 deletions python/xorbits/_mars/tensor/base/expand_dims.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
import numpy as np

from ..datasource import tensor as astensor
from ..utils import is_numpy_2

if is_numpy_2():
from numpy.exceptions import AxisError
else:
from numpy import AxisError


def expand_dims(a, axis):
Expand Down Expand Up @@ -77,9 +83,7 @@ def expand_dims(a, axis):
a = astensor(a)

if axis > a.ndim or axis < -a.ndim - 1:
raise np.AxisError(
f"Axis must be between -{a.ndim + 1} and {a.ndim}, got {axis}"
)
raise AxisError(f"Axis must be between -{a.ndim + 1} and {a.ndim}, got {axis}")

axis = axis if axis >= 0 else axis + a.ndim + 1
indexes = (slice(None),) * axis + (np.newaxis,) + (slice(None),) * (a.ndim - axis)
Expand Down
9 changes: 6 additions & 3 deletions python/xorbits/_mars/tensor/base/rollaxis.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from ..utils import is_numpy_2, validate_axis

from ..utils import validate_axis
if is_numpy_2():
from numpy.exceptions import AxisError
else:
from numpy import AxisError


def rollaxis(tensor, axis, start=0):
Expand Down Expand Up @@ -66,7 +69,7 @@ def rollaxis(tensor, axis, start=0):
start += n
msg = "'%s' arg requires %d <= %s < %d, but %d was passed in"
if not (0 <= start < n + 1):
raise np.AxisError(msg % ("start", -n, "start", n + 1, start))
raise AxisError(msg % ("start", -n, "start", n + 1, start))
if axis < start:
# it's been removed
start -= 1
Expand Down
Loading

0 comments on commit 9700db7

Please sign in to comment.