Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into soft_import
Browse files Browse the repository at this point in the history
resolved conflict stemming from #9505 by accepting this branches changes (#9505 was just a slight edit on the old try import .. except pattern)
  • Loading branch information
scott-huberty committed Oct 3, 2024
2 parents bcba465 + 8c1c31a commit 3c5d35d
Show file tree
Hide file tree
Showing 82 changed files with 1,326 additions and 430 deletions.
2 changes: 1 addition & 1 deletion .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
github: numfocus
custom: http://numfocus.org/donate-to-xarray
custom: https://numfocus.org/donate-to-xarray
3 changes: 3 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ jobs:
python-version: "3.10"
os: ubuntu-latest
# Latest python version:
- env: "all-but-numba"
python-version: "3.12"
os: ubuntu-latest
- env: "all-but-dask"
# Not 3.12 because of pint
python-version: "3.11"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/nightly-wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: "3.11"
python-version: "3.12"

- name: Install dependencies
run: |
Expand All @@ -38,7 +38,7 @@ jobs:
fi
- name: Upload wheel
uses: scientific-python/upload-nightly-action@b67d7fcc0396e1128a474d1ab2b48aa94680f9fc # 0.5.0
uses: scientific-python/upload-nightly-action@82396a2ed4269ba06c6b2988bb4fd568ef3c3d6b # 0.6.1
with:
anaconda_nightly_upload_token: ${{ secrets.ANACONDA_NIGHTLY }}
artifacts_path: dist
4 changes: 2 additions & 2 deletions .github/workflows/pypi-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
path: dist
- name: Publish package to TestPyPI
if: github.event_name == 'push'
uses: pypa/gh-action-pypi-publish@v1.10.1
uses: pypa/gh-action-pypi-publish@v1.10.2
with:
repository_url: https://test.pypi.org/legacy/
verbose: true
Expand All @@ -111,6 +111,6 @@ jobs:
name: releases
path: dist
- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@v1.10.1
uses: pypa/gh-action-pypi-publish@v1.10.2
with:
verbose: true
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ Xarray is a fiscally sponsored project of
[NumFOCUS](https://numfocus.org), a nonprofit dedicated to supporting
the open source scientific computing community. If you like Xarray and
want to support our mission, please consider making a
[donation](https://numfocus.salsalabs.org/donate-to-xarray/) to support
[donation](https://numfocus.org/donate-to-xarray) to support
our efforts.

## History
Expand Down
8 changes: 4 additions & 4 deletions asv_bench/benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def decorator(func):
def requires_dask():
try:
import dask # noqa: F401
except ImportError:
raise NotImplementedError()
except ImportError as err:
raise NotImplementedError() from err


def requires_sparse():
try:
import sparse # noqa: F401
except ImportError:
raise NotImplementedError()
except ImportError as err:
raise NotImplementedError() from err


def randn(shape, frac_nan=None, chunks=None, seed=0):
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def setup(self, calendar):
self.da = xr.DataArray(data, dims="time", coords={"time": time})

def time_dayofyear(self, calendar):
self.da.time.dt.dayofyear
_ = self.da.time.dt.dayofyear

def time_year(self, calendar):
self.da.time.dt.year
_ = self.da.time.dt.year

def time_floor(self, calendar):
self.da.time.dt.floor("D")
_ = self.da.time.dt.floor("D")
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/dataset_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,8 +606,8 @@ def setup(self):

try:
import distributed
except ImportError:
raise NotImplementedError()
except ImportError as err:
raise NotImplementedError() from err

self.client = distributed.Client()
self.write = create_delayed_write()
Expand Down
4 changes: 2 additions & 2 deletions ci/min_deps_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def parse_requirements(fname) -> Iterator[tuple[str, int, int, int | None]]:

try:
version_tup = tuple(int(x) for x in version.split("."))
except ValueError:
raise ValueError("non-numerical version: " + row)
except ValueError as err:
raise ValueError("non-numerical version: " + row) from err

if len(version_tup) == 2:
yield (pkg, *version_tup, None) # type: ignore[misc]
Expand Down
54 changes: 54 additions & 0 deletions ci/requirements/all-but-numba.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: xarray-tests
channels:
- conda-forge
- nodefaults
dependencies:
# Pin a "very new numpy" (updated Sept 24, 2024)
- numpy>=2.1.1
- aiobotocore
- array-api-strict
- boto3
- bottleneck
- cartopy
- cftime
- dask-core
- dask-expr # dask raises a deprecation warning without this, breaking doctests
- distributed
- flox
- fsspec
- h5netcdf
- h5py
- hdf5
- hypothesis
- iris
- lxml # Optional dep of pydap
- matplotlib-base
- nc-time-axis
- netcdf4
# numba, sparse, numbagg, numexpr often conflicts with newer versions of numpy.
# This environment helps us test xarray with the latest versions
# of numpy
# - numba
# - numbagg
# - numexpr
# - sparse
- opt_einsum
- packaging
- pandas
# - pint>=0.22
- pip
- pooch
- pre-commit
- pyarrow # pandas raises a deprecation warning without this, breaking doctests
- pydap
- pytest
- pytest-cov
- pytest-env
- pytest-xdist
- pytest-timeout
- rasterio
- scipy
- seaborn
- toolz
- typing_extensions
- zarr
2 changes: 1 addition & 1 deletion doc/ecosystem.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Geosciences
- `OGGM <https://oggm.org/>`_: Open Global Glacier Model
- `Oocgcm <https://oocgcm.readthedocs.io/>`_: Analysis of large gridded geophysical datasets
- `Open Data Cube <https://www.opendatacube.org/>`_: Analysis toolkit of continental scale Earth Observation data from satellites.
- `Pangaea: <https://pangaea.readthedocs.io/en/latest/>`_: xarray extension for gridded land surface & weather model output).
- `Pangaea <https://pangaea.readthedocs.io/en/latest/>`_: xarray extension for gridded land surface & weather model output).
- `Pangeo <https://pangeo.io>`_: A community effort for big data geoscience in the cloud.
- `PyGDX <https://pygdx.readthedocs.io/en/latest/>`_: Python 3 package for
accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom
Expand Down
2 changes: 1 addition & 1 deletion doc/howdoi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ How do I ...
* - apply a function on all data variables in a Dataset
- :py:meth:`Dataset.map`
* - write xarray objects with complex values to a netCDF file
- :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True``
- :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf"`` or :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="netCDF4", auto_complex=True``
* - make xarray objects look like other xarray objects
- :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interp_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interp_like`, :py:meth:`DataArray.broadcast_like`
* - Make sure my datasets have values at the same coordinate locations
Expand Down
2 changes: 1 addition & 1 deletion doc/user-guide/hierarchical-data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ and even the distinguishing feature of the common ancestor of any two species (t

.. ipython:: python
[node.name for node in primates.ancestors]
[node.name for node in reversed(primates.parents)]
primates.root.name
primates.find_common_ancestor(dinosaurs).name
Expand Down
23 changes: 3 additions & 20 deletions doc/user-guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -566,29 +566,12 @@ This is not CF-compliant but again facilitates roundtripping of xarray datasets.
Invalid netCDF files
~~~~~~~~~~~~~~~~~~~~

The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't
The library ``h5netcdf`` allows writing some dtypes that aren't
allowed in netCDF4 (see
`h5netcdf documentation <https://github.com/shoyer/h5netcdf#invalid-netcdf-files>`_).
`h5netcdf documentation <https://github.com/h5netcdf/h5netcdf#invalid-netcdf-files>`_).
This feature is available through :py:meth:`DataArray.to_netcdf` and
:py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``
and currently raises a warning unless ``invalid_netcdf=True`` is set:

.. ipython:: python
:okwarning:
# Writing complex valued data
da = xr.DataArray([1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j])
da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True)
# Reading it back
reopened = xr.open_dataarray("complex.nc", engine="h5netcdf")
reopened
.. ipython:: python
:suppress:
reopened.close()
os.remove("complex.nc")
and currently raises a warning unless ``invalid_netcdf=True`` is set.

.. warning::

Expand Down
4 changes: 2 additions & 2 deletions doc/user-guide/pandas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ DataFrames:
xr.DataArray.from_series(s)
Both the ``from_series`` and ``from_dataframe`` methods use reindexing, so they
work even if not the hierarchical index is not a full tensor product:
work even if the hierarchical index is not a full tensor product:

.. ipython:: python
Expand All @@ -126,7 +126,7 @@ Particularly after a roundtrip, the following deviations are noted:
To avoid these problems, the third-party `ntv-pandas <https://github.com/loco-philippe/ntv-pandas>`__ library offers lossless and reversible conversions between
``Dataset``/ ``DataArray`` and pandas ``DataFrame`` objects.

This solution is particularly interesting for converting any ``DataFrame`` into a ``Dataset`` (the converter find the multidimensional structure hidden by the tabular structure).
This solution is particularly interesting for converting any ``DataFrame`` into a ``Dataset`` (the converter finds the multidimensional structure hidden by the tabular structure).

The `ntv-pandas examples <https://github.com/loco-philippe/ntv-pandas/tree/main/example>`__ show how to improve the conversion for the previous ``Dataset`` example and for more complex examples.

Expand Down
14 changes: 13 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ New Features
`Tom Nicholas <https://github.com/TomNicholas>`_.
- Added zarr backends for :py:func:`open_groups` (:issue:`9430`, :pull:`9469`).
By `Eni Awowale <https://github.com/eni-awowale>`_.
- Added support for vectorized interpolation using additional interpolators
from the ``scipy.interpolate`` module (:issue:`9049`, :pull:`9526`).
By `Holly Mandel <https://github.com/hollymandel>`_.
- Implement handling of complex numbers (netcdf4/h5netcdf) and enums (h5netcdf) (:issue:`9246`, :issue:`3297`, :pull:`9509`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand All @@ -47,13 +52,20 @@ Bug fixes
- Make illegal path-like variable names when constructing a DataTree from a Dataset
(:issue:`9339`, :pull:`9378`)
By `Etienne Schalk <https://github.com/etienneschalk>`_.
- Work around `upstream pandas issue
<https://github.com/pandas-dev/pandas/issues/56996>`_ to ensure that we can
decode times encoded with small integer dtype values (e.g. ``np.int32``) in
environments with NumPy 2.0 or greater without needing to fall back to cftime
(:pull:`9518`). By `Spencer Clark <https://github.com/spencerkclark>`_.
- Fix bug when encoding times with missing values as floats in the case when
the non-missing times could in theory be encoded with integers
(:issue:`9488`, :pull:`9497`). By `Spencer Clark
<https://github.com/spencerkclark>`_.
- Fix a few bugs affecting groupby reductions with `flox`. (:issue:`8090`, :issue:`9398`).
By `Deepak Cherian <https://github.com/dcherian>`_.

- Fix the safe_chunks validation option on the to_zarr method
(:issue:`5511`, :pull:`9559`). By `Joseph Nowak
<https://github.com/josephnowak>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
9 changes: 9 additions & 0 deletions properties/test_pandas_roundtrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,12 @@ def test_roundtrip_pandas_dataframe_datetime(df) -> None:
roundtripped.columns.name = "cols" # why?
pd.testing.assert_frame_equal(df, roundtripped)
xr.testing.assert_identical(dataset, roundtripped.to_xarray())


def test_roundtrip_1d_pandas_extension_array() -> None:
df = pd.DataFrame({"cat": pd.Categorical(["a", "b", "c"])})
arr = xr.Dataset.from_dataframe(df)["cat"]
roundtripped = arr.to_pandas()
assert (df["cat"] == roundtripped).all()
assert df["cat"].dtype == roundtripped.dtype
xr.testing.assert_identical(arr, roundtripped.to_xarray())
37 changes: 13 additions & 24 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,14 @@ dependencies = [
"pandas>=2.1",
]

# We don't encode minimum requirements here (though if we can write a script to
# generate the text from `min_deps_check.py`, that's welcome...). We do add
# `numba>=0.54` here because of https://github.com/astral-sh/uv/issues/7881;
# note that it's not a direct dependency of xarray.

[project.optional-dependencies]
accel = ["scipy", "bottleneck", "numbagg", "flox", "opt_einsum"]
complete = ["xarray[accel,io,parallel,viz,dev]"]
accel = ["scipy", "bottleneck", "numbagg", "numba>=0.54", "flox", "opt_einsum"]
complete = ["xarray[accel,etc,io,parallel,viz]"]
dev = [
"hypothesis",
"mypy",
Expand All @@ -40,11 +45,14 @@ dev = [
"pytest-xdist",
"pytest-timeout",
"ruff",
"sphinx",
"sphinx_autosummary_accessors",
"xarray[complete]",
]
io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr<3", "fsspec", "cftime", "pooch"]
etc = ["sparse"]
parallel = ["dask[complete]"]
viz = ["matplotlib", "seaborn", "nc-time-axis"]
viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"]

[project.urls]
Documentation = "https://docs.xarray.dev"
Expand Down Expand Up @@ -207,32 +215,13 @@ warn_return_any = true
module = ["xarray.namedarray.*", "xarray.tests.test_namedarray"]

[tool.pyright]
# include = ["src"]
# exclude = ["**/node_modules",
# "**/__pycache__",
# "src/experimental",
# "src/typestubs"
# ]
# ignore = ["src/oldstuff"]
defineConstant = {DEBUG = true}
# stubPath = "src/stubs"
# venv = "env367"

# Enabling this means that developers who have disabled the warning locally —
# because not all dependencies are installable — are overridden
# reportMissingImports = true
reportMissingTypeStubs = false

# pythonVersion = "3.6"
# pythonPlatform = "Linux"

# executionEnvironments = [
# { root = "src/web", pythonVersion = "3.5", pythonPlatform = "Windows", extraPaths = [ "src/service_libs" ] },
# { root = "src/sdk", pythonVersion = "3.0", extraPaths = [ "src/backend" ] },
# { root = "src/tests", extraPaths = ["src/tests/e2e", "src/sdk" ]},
# { root = "src" }
# ]

[tool.ruff]
extend-exclude = [
"doc",
Expand All @@ -246,13 +235,14 @@ extend-exclude = [
extend-safe-fixes = [
"TID252", # absolute imports
]
extend-ignore = [
ignore = [
"E402",
"E501",
"E731",
"UP007",
]
extend-select = [
"B", # flake8-bugbear
"F", # Pyflakes
"E", # Pycodestyle
"W",
Expand Down Expand Up @@ -322,7 +312,6 @@ filterwarnings = [
"default:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning",
"default:Duplicate dimension names present:UserWarning:xarray.namedarray.core",
"default:::xarray.tests.test_strategies", # TODO: remove once we know how to deal with a changed signature in protocols
"ignore:__array__ implementation doesn't accept a copy keyword, so passing copy=False failed.",
]

log_cli_level = "INFO"
Expand Down
Loading

0 comments on commit 3c5d35d

Please sign in to comment.