Skip to content

Commit

Permalink
Support dask arrays in datetime_to_numeric (#6556)
Browse files Browse the repository at this point in the history
Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
  • Loading branch information
dcherian and max-sixty authored May 31, 2022
1 parent 4615074 commit 95a47af
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 13 deletions.
22 changes: 19 additions & 3 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,14 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
# Compute timedelta object.
# For np.datetime64, this can silently yield garbage due to overflow.
# One option is to enforce 1970-01-01 as the universal offset.
array = array - offset

# This map_blocks call is for backwards compatibility.
# dask == 2021.04.1 does not support subtracting object arrays
# which is required for cftime
if is_duck_dask_array(array) and np.issubdtype(array.dtype, np.object):
array = array.map_blocks(lambda a, b: a - b, offset, meta=array._meta)
else:
array = array - offset

# Scalar is converted to 0d-array
if not hasattr(array, "dtype"):
Expand Down Expand Up @@ -517,10 +524,19 @@ def pd_timedelta_to_float(value, datetime_unit):
return np_timedelta64_to_float(value, datetime_unit)


def _timedelta_to_seconds(array):
return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6


def py_timedelta_to_float(array, datetime_unit):
"""Convert a timedelta object to a float, possibly at a loss of resolution."""
array = np.asarray(array)
array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6
array = asarray(array)
if is_duck_dask_array(array):
array = array.map_blocks(
_timedelta_to_seconds, meta=np.array([], dtype=np.float64)
)
else:
array = _timedelta_to_seconds(array)
conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit)
return conversion_factor * array

Expand Down
49 changes: 39 additions & 10 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,39 +675,68 @@ def test_multiple_dims(dtype, dask, skipna, func):
assert_allclose(actual, expected)


def test_datetime_to_numeric_datetime64():
@pytest.mark.parametrize("dask", [True, False])
def test_datetime_to_numeric_datetime64(dask):
if dask and not has_dask:
pytest.skip("requires dask")

times = pd.date_range("2000", periods=5, freq="7D").values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h")
if dask:
import dask.array

times = dask.array.from_array(times, chunks=-1)

with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h")
expected = 24 * np.arange(0, 35, 7)
np.testing.assert_array_equal(result, expected)

offset = times[1]
result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h")
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit="h"
)
expected = 24 * np.arange(-7, 28, 7)
np.testing.assert_array_equal(result, expected)

dtype = np.float32
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype)
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit="h", dtype=dtype
)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)


@requires_cftime
def test_datetime_to_numeric_cftime():
@pytest.mark.parametrize("dask", [True, False])
def test_datetime_to_numeric_cftime(dask):
if dask and not has_dask:
pytest.skip("requires dask")

times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int)
if dask:
import dask.array

times = dask.array.from_array(times, chunks=-1)
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int)
expected = 24 * np.arange(0, 35, 7)
np.testing.assert_array_equal(result, expected)

offset = times[1]
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit="h", dtype=int
)
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit="h", dtype=int
)
expected = 24 * np.arange(-7, 28, 7)
np.testing.assert_array_equal(result, expected)

dtype = np.float32
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype)
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit="h", dtype=dtype
)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)

Expand Down

0 comments on commit 95a47af

Please sign in to comment.