Merge remote-tracking branch 'upstream/main' into soft_import

resolved conflict stemming from #9505 by accepting this branches changes (#9505 was just a slight edit on the old try import .. except pattern)
pydata · Oct 3, 2024 · 3c5d35d · 3c5d35d
2 parents bcba465 + 8c1c31a
commit 3c5d35d
Show file tree

Hide file tree

Showing 82 changed files with 1,326 additions and 430 deletions.
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
@@ -1,2 +1,2 @@
 github: numfocus
-custom: http://numfocus.org/donate-to-xarray
+custom: https://numfocus.org/donate-to-xarray
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -58,6 +58,9 @@ jobs:
             python-version: "3.10"
             os: ubuntu-latest
           # Latest python version:
+          - env: "all-but-numba"
+            python-version: "3.12"
+            os: ubuntu-latest
           - env: "all-but-dask"
             # Not 3.12 because of pint
             python-version: "3.11"

diff --git a/.github/workflows/nightly-wheels.yml b/.github/workflows/nightly-wheels.yml
@@ -13,7 +13,7 @@ jobs:
           fetch-depth: 0
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
 
       - name: Install dependencies
         run: |
@@ -38,7 +38,7 @@ jobs:
           fi
 
       - name: Upload wheel
-        uses: scientific-python/upload-nightly-action@b67d7fcc0396e1128a474d1ab2b48aa94680f9fc # 0.5.0
+        uses: scientific-python/upload-nightly-action@82396a2ed4269ba06c6b2988bb4fd568ef3c3d6b # 0.6.1
         with:
           anaconda_nightly_upload_token: ${{ secrets.ANACONDA_NIGHTLY }}
           artifacts_path: dist
diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml
@@ -88,7 +88,7 @@ jobs:
           path: dist
       - name: Publish package to TestPyPI
         if: github.event_name == 'push'
-        uses: pypa/gh-action-pypi-publish@v1.10.1
+        uses: pypa/gh-action-pypi-publish@v1.10.2
         with:
           repository_url: https://test.pypi.org/legacy/
           verbose: true
@@ -111,6 +111,6 @@ jobs:
           name: releases
           path: dist
       - name: Publish package to PyPI
-        uses: pypa/gh-action-pypi-publish@v1.10.1
+        uses: pypa/gh-action-pypi-publish@v1.10.2
         with:
           verbose: true
diff --git a/README.md b/README.md
@@ -88,7 +88,7 @@ Xarray is a fiscally sponsored project of
 [NumFOCUS](https://numfocus.org), a nonprofit dedicated to supporting
 the open source scientific computing community. If you like Xarray and
 want to support our mission, please consider making a
-[donation](https://numfocus.salsalabs.org/donate-to-xarray/) to support
+[donation](https://numfocus.org/donate-to-xarray) to support
 our efforts.
 
 ## History

diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py
@@ -18,15 +18,15 @@ def decorator(func):
 def requires_dask():
     try:
         import dask  # noqa: F401
-    except ImportError:
-        raise NotImplementedError()
+    except ImportError as err:
+        raise NotImplementedError() from err
 
 
 def requires_sparse():
     try:
         import sparse  # noqa: F401
-    except ImportError:
-        raise NotImplementedError()
+    except ImportError as err:
+        raise NotImplementedError() from err
 
 
 def randn(shape, frac_nan=None, chunks=None, seed=0):

diff --git a/asv_bench/benchmarks/accessors.py b/asv_bench/benchmarks/accessors.py
@@ -16,10 +16,10 @@ def setup(self, calendar):
         self.da = xr.DataArray(data, dims="time", coords={"time": time})
 
     def time_dayofyear(self, calendar):
-        self.da.time.dt.dayofyear
+        _ = self.da.time.dt.dayofyear
 
     def time_year(self, calendar):
-        self.da.time.dt.year
+        _ = self.da.time.dt.year
 
     def time_floor(self, calendar):
-        self.da.time.dt.floor("D")
+        _ = self.da.time.dt.floor("D")
diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py
@@ -606,8 +606,8 @@ def setup(self):
 
         try:
             import distributed
-        except ImportError:
-            raise NotImplementedError()
+        except ImportError as err:
+            raise NotImplementedError() from err
 
         self.client = distributed.Client()
         self.write = create_delayed_write()

diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py
@@ -68,8 +68,8 @@ def parse_requirements(fname) -> Iterator[tuple[str, int, int, int | None]]:
 
         try:
             version_tup = tuple(int(x) for x in version.split("."))
-        except ValueError:
-            raise ValueError("non-numerical version: " + row)
+        except ValueError as err:
+            raise ValueError("non-numerical version: " + row) from err
 
         if len(version_tup) == 2:
             yield (pkg, *version_tup, None)  # type: ignore[misc]

diff --git a/ci/requirements/all-but-numba.yml b/ci/requirements/all-but-numba.yml
@@ -0,0 +1,54 @@
+name: xarray-tests
+channels:
+  - conda-forge
+  - nodefaults
+dependencies:
+  # Pin a "very new numpy" (updated Sept 24, 2024)
+  - numpy>=2.1.1
+  - aiobotocore
+  - array-api-strict
+  - boto3
+  - bottleneck
+  - cartopy
+  - cftime
+  - dask-core
+  - dask-expr # dask raises a deprecation warning without this, breaking doctests
+  - distributed
+  - flox
+  - fsspec
+  - h5netcdf
+  - h5py
+  - hdf5
+  - hypothesis
+  - iris
+  - lxml # Optional dep of pydap
+  - matplotlib-base
+  - nc-time-axis
+  - netcdf4
+  # numba, sparse, numbagg, numexpr often conflicts with newer versions of numpy.
+  # This environment helps us test xarray with the latest versions
+  # of numpy
+  # - numba
+  # - numbagg
+  # - numexpr
+  # - sparse
+  - opt_einsum
+  - packaging
+  - pandas
+  # - pint>=0.22
+  - pip
+  - pooch
+  - pre-commit
+  - pyarrow # pandas raises a deprecation warning without this, breaking doctests
+  - pydap
+  - pytest
+  - pytest-cov
+  - pytest-env
+  - pytest-xdist
+  - pytest-timeout
+  - rasterio
+  - scipy
+  - seaborn
+  - toolz
+  - typing_extensions
+  - zarr
diff --git a/doc/ecosystem.rst b/doc/ecosystem.rst
@@ -26,7 +26,7 @@ Geosciences
 - `OGGM <https://oggm.org/>`_: Open Global Glacier Model
 - `Oocgcm <https://oocgcm.readthedocs.io/>`_: Analysis of large gridded geophysical datasets
 - `Open Data Cube <https://www.opendatacube.org/>`_: Analysis toolkit of continental scale Earth Observation data from satellites.
-- `Pangaea: <https://pangaea.readthedocs.io/en/latest/>`_: xarray extension for gridded land surface & weather model output).
+- `Pangaea <https://pangaea.readthedocs.io/en/latest/>`_: xarray extension for gridded land surface & weather model output).
 - `Pangeo <https://pangeo.io>`_: A community effort for big data geoscience in the cloud.
 - `PyGDX <https://pygdx.readthedocs.io/en/latest/>`_: Python 3 package for
   accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom

diff --git a/doc/howdoi.rst b/doc/howdoi.rst
@@ -58,7 +58,7 @@ How do I ...
    * - apply a function on all data variables in a Dataset
      - :py:meth:`Dataset.map`
    * - write xarray objects with complex values to a netCDF file
-     - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True``
+     - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf"`` or :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="netCDF4", auto_complex=True``
    * - make xarray objects look like other xarray objects
      - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interp_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interp_like`, :py:meth:`DataArray.broadcast_like`
    * - Make sure my datasets have values at the same coordinate locations

diff --git a/doc/user-guide/hierarchical-data.rst b/doc/user-guide/hierarchical-data.rst
@@ -200,7 +200,7 @@ and even the distinguishing feature of the common ancestor of any two species (t
 
 .. ipython:: python
 
-    [node.name for node in primates.ancestors]
+    [node.name for node in reversed(primates.parents)]
     primates.root.name
     primates.find_common_ancestor(dinosaurs).name
 

diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst
@@ -566,29 +566,12 @@ This is not CF-compliant but again facilitates roundtripping of xarray datasets.
 Invalid netCDF files
 ~~~~~~~~~~~~~~~~~~~~
 
-The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't
+The library ``h5netcdf`` allows writing some dtypes that aren't
 allowed in netCDF4 (see
-`h5netcdf documentation <https://github.com/shoyer/h5netcdf#invalid-netcdf-files>`_).
+`h5netcdf documentation <https://github.com/h5netcdf/h5netcdf#invalid-netcdf-files>`_).
 This feature is available through :py:meth:`DataArray.to_netcdf` and
 :py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``
-and currently raises a warning unless ``invalid_netcdf=True`` is set:
-
-.. ipython:: python
-    :okwarning:
-
-    # Writing complex valued data
-    da = xr.DataArray([1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j])
-    da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True)
-
-    # Reading it back
-    reopened = xr.open_dataarray("complex.nc", engine="h5netcdf")
-    reopened
-
-.. ipython:: python
-    :suppress:
-
-    reopened.close()
-    os.remove("complex.nc")
+and currently raises a warning unless ``invalid_netcdf=True`` is set.
 
 .. warning::
 

diff --git a/doc/user-guide/pandas.rst b/doc/user-guide/pandas.rst
@@ -103,7 +103,7 @@ DataFrames:
     xr.DataArray.from_series(s)
 
 Both the ``from_series`` and ``from_dataframe`` methods use reindexing, so they
-work even if not the hierarchical index is not a full tensor product:
+work even if the hierarchical index is not a full tensor product:
 
 .. ipython:: python
 
@@ -126,7 +126,7 @@ Particularly after a roundtrip, the following deviations are noted:
 To avoid these problems, the third-party `ntv-pandas <https://github.com/loco-philippe/ntv-pandas>`__ library offers lossless and reversible conversions between
 ``Dataset``/ ``DataArray`` and pandas ``DataFrame`` objects.
 
-This solution is particularly interesting for converting any ``DataFrame`` into a ``Dataset`` (the converter find the multidimensional structure hidden by the tabular structure).
+This solution is particularly interesting for converting any ``DataFrame`` into a ``Dataset`` (the converter finds the multidimensional structure hidden by the tabular structure).
 
 The `ntv-pandas examples <https://github.com/loco-philippe/ntv-pandas/tree/main/example>`__ show how to improve the conversion for the previous ``Dataset`` example and for more complex examples.
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -32,6 +32,11 @@ New Features
   `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Added zarr backends for :py:func:`open_groups` (:issue:`9430`, :pull:`9469`).
   By `Eni Awowale <https://github.com/eni-awowale>`_.
+- Added support for vectorized interpolation using additional interpolators
+  from the ``scipy.interpolate`` module (:issue:`9049`, :pull:`9526`).
+  By `Holly Mandel <https://github.com/hollymandel>`_.
+- Implement handling of complex numbers (netcdf4/h5netcdf) and enums (h5netcdf) (:issue:`9246`, :issue:`3297`, :pull:`9509`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
@@ -47,13 +52,20 @@ Bug fixes
 - Make illegal path-like variable names when constructing a DataTree from a Dataset
   (:issue:`9339`, :pull:`9378`)
   By `Etienne Schalk <https://github.com/etienneschalk>`_.
+- Work around `upstream pandas issue
+  <https://github.com/pandas-dev/pandas/issues/56996>`_ to ensure that we can
+  decode times encoded with small integer dtype values (e.g. ``np.int32``) in
+  environments with NumPy 2.0 or greater without needing to fall back to cftime
+  (:pull:`9518`). By `Spencer Clark <https://github.com/spencerkclark>`_.
 - Fix bug when encoding times with missing values as floats in the case when
   the non-missing times could in theory be encoded with integers
   (:issue:`9488`, :pull:`9497`). By `Spencer Clark
   <https://github.com/spencerkclark>`_.
 - Fix a few bugs affecting groupby reductions with `flox`. (:issue:`8090`, :issue:`9398`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
-
+- Fix the safe_chunks validation option on the to_zarr method
+  (:issue:`5511`, :pull:`9559`). By `Joseph Nowak
+  <https://github.com/josephnowak>`_.
 
 Documentation
 ~~~~~~~~~~~~~

diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py
@@ -132,3 +132,12 @@ def test_roundtrip_pandas_dataframe_datetime(df) -> None:
     roundtripped.columns.name = "cols"  # why?
     pd.testing.assert_frame_equal(df, roundtripped)
     xr.testing.assert_identical(dataset, roundtripped.to_xarray())
+
+
+def test_roundtrip_1d_pandas_extension_array() -> None:
+    df = pd.DataFrame({"cat": pd.Categorical(["a", "b", "c"])})
+    arr = xr.Dataset.from_dataframe(df)["cat"]
+    roundtripped = arr.to_pandas()
+    assert (df["cat"] == roundtripped).all()
+    assert df["cat"].dtype == roundtripped.dtype
+    xr.testing.assert_identical(arr, roundtripped.to_xarray())
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,9 +27,14 @@ dependencies = [
   "pandas>=2.1",
 ]
 
+# We don't encode minimum requirements here (though if we can write a script to
+# generate the text from `min_deps_check.py`, that's welcome...). We do add
+# `numba>=0.54` here because of https://github.com/astral-sh/uv/issues/7881;
+# note that it's not a direct dependency of xarray.
+
 [project.optional-dependencies]
-accel = ["scipy", "bottleneck", "numbagg", "flox", "opt_einsum"]
-complete = ["xarray[accel,io,parallel,viz,dev]"]
+accel = ["scipy", "bottleneck", "numbagg", "numba>=0.54", "flox", "opt_einsum"]
+complete = ["xarray[accel,etc,io,parallel,viz]"]
 dev = [
   "hypothesis",
   "mypy",
@@ -40,11 +45,14 @@ dev = [
   "pytest-xdist",
   "pytest-timeout",
   "ruff",
+  "sphinx",
+  "sphinx_autosummary_accessors",
   "xarray[complete]",
 ]
 io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr<3", "fsspec", "cftime", "pooch"]
+etc = ["sparse"]
 parallel = ["dask[complete]"]
-viz = ["matplotlib", "seaborn", "nc-time-axis"]
+viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"]
 
 [project.urls]
 Documentation = "https://docs.xarray.dev"
@@ -207,32 +215,13 @@ warn_return_any = true
 module = ["xarray.namedarray.*", "xarray.tests.test_namedarray"]
 
 [tool.pyright]
-# include = ["src"]
-# exclude = ["**/node_modules",
-# "**/__pycache__",
-# "src/experimental",
-# "src/typestubs"
-# ]
-# ignore = ["src/oldstuff"]
 defineConstant = {DEBUG = true}
-# stubPath = "src/stubs"
-# venv = "env367"
 
 # Enabling this means that developers who have disabled the warning locally —
 # because not all dependencies are installable — are overridden
 # reportMissingImports = true
 reportMissingTypeStubs = false
 
-# pythonVersion = "3.6"
-# pythonPlatform = "Linux"
-
-# executionEnvironments = [
-# { root = "src/web", pythonVersion = "3.5", pythonPlatform = "Windows", extraPaths = [ "src/service_libs" ] },
-# { root = "src/sdk", pythonVersion = "3.0", extraPaths = [ "src/backend" ] },
-# { root = "src/tests", extraPaths = ["src/tests/e2e", "src/sdk" ]},
-# { root = "src" }
-# ]
-
 [tool.ruff]
 extend-exclude = [
   "doc",
@@ -246,13 +235,14 @@ extend-exclude = [
 extend-safe-fixes = [
   "TID252", # absolute imports
 ]
-extend-ignore = [
+ignore = [
   "E402",
   "E501",
   "E731",
   "UP007",
 ]
 extend-select = [
+  "B", # flake8-bugbear
   "F", # Pyflakes
   "E", # Pycodestyle
   "W",
@@ -322,7 +312,6 @@ filterwarnings = [
   "default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning",
   "default:Duplicate dimension names present:UserWarning:xarray.namedarray.core",
   "default:::xarray.tests.test_strategies", # TODO: remove once we know how to deal with a changed signature in protocols
-  "ignore:__array__ implementation doesn't accept a copy keyword, so passing copy=False failed.",
 ]
 
 log_cli_level = "INFO"