diff --git a/.binder/environment.yml b/.binder/environment.yml new file mode 100644 index 00000000000..13b6b99e6fc --- /dev/null +++ b/.binder/environment.yml @@ -0,0 +1,39 @@ +name: xarray-examples +channels: + - conda-forge +dependencies: + - python=3.7 + - boto3 + - bottleneck + - cartopy + - cdms2 + - cfgrib + - cftime + - coveralls + - dask + - distributed + - dask_labextension + - h5netcdf + - h5py + - hdf5 + - iris + - lxml # Optional dep of pydap + - matplotlib + - nc-time-axis + - netcdf4 + - numba + - numpy + - pandas + - pint + - pip + - pydap + - pynio + - rasterio + - scipy + - seaborn + - sparse + - toolz + - xarray + - zarr + - pip: + - numbagg diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000000..30c1e18f33c --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,2 @@ +github: numfocus +custom: http://numfocus.org/donate-to-xarray diff --git a/HOW_TO_RELEASE b/HOW_TO_RELEASE.md similarity index 74% rename from HOW_TO_RELEASE rename to HOW_TO_RELEASE.md index 5bf9bf38ded..cdeb0e19a3e 100644 --- a/HOW_TO_RELEASE +++ b/HOW_TO_RELEASE.md @@ -1,9 +1,11 @@ -How to issue an xarray release in 15 easy steps +How to issue an xarray release in 14 easy steps Time required: about an hour. 1. Ensure your master branch is synced to upstream: - git pull upstream master + ``` + git pull upstream master + ``` 2. Look over whats-new.rst and the docs. Make sure "What's New" is complete (check the date!) and consider adding a brief summary note describing the release at the top. @@ -12,37 +14,53 @@ Time required: about an hour. - Function/method references should include links to the API docs. - Sometimes notes get added in the wrong section of whats-new, typically due to a bad merge. Check for these before a release by using git diff, - e.g., ``git diff v0.X.Y whats-new.rst`` where 0.X.Y is the previous + e.g., `git diff v0.X.Y whats-new.rst` where 0.X.Y is the previous release. 3. If you have any doubts, run the full test suite one final time! - py.test + ``` + pytest + ``` 4. On the master branch, commit the release in git: + ``` git commit -a -m 'Release v0.X.Y' + ``` 5. Tag the release: + ``` git tag -a v0.X.Y -m 'v0.X.Y' + ``` 6. Build source and binary wheels for pypi: + ``` git clean -xdf # this deletes all uncommited changes! python setup.py bdist_wheel sdist + ``` 7. Use twine to register and upload the release on pypi. Be careful, you can't take this back! + ``` twine upload dist/xarray-0.X.Y* + ``` You will need to be listed as a package owner at https://pypi.python.org/pypi/xarray for this to work. 8. Push your changes to master: + ``` git push upstream master git push upstream --tags + ``` 9. Update the stable branch (used by ReadTheDocs) and switch back to master: + ``` git checkout stable git rebase master git push upstream stable git checkout master - It's OK to force push to 'stable' if necessary. - We also update the stable branch with `git cherrypick` for documentation - only fixes that apply the current released version. + ``` + It's OK to force push to 'stable' if necessary. (We also update the stable + branch with `git cherrypick` for documentation only fixes that apply the + current released version.) 10. Add a section for the next release (v.X.(Y+1)) to doc/whats-new.rst. 11. Commit your changes and push to master again: - git commit -a -m 'Revert to dev version' + ``` + git commit -a -m 'New whatsnew section' git push upstream master + ``` You're done pushing to master! 12. Issue the release on GitHub. Click on "Draft a new release" at https://github.com/pydata/xarray/releases. Type in the version number, but @@ -53,11 +71,22 @@ Time required: about an hour. 14. Issue the release announcement! For bug fix releases, I usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - pydata@googlegroups.com, xarray@googlegroups.com, - numpy-discussion@scipy.org, scipy-user@scipy.org, - pyaos@lists.johnny-lin.com + - pydata@googlegroups.com + - xarray@googlegroups.com + - numpy-discussion@scipy.org + - scipy-user@scipy.org + - pyaos@lists.johnny-lin.com + Google search will turn up examples of prior release announcements (look for "ANN xarray"). + You can get a list of contributors with: + ``` + git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format="%aN" | sort -u + ``` + or by replacing `v0.X.Y` with the _previous_ release in: + ``` + git log v0.X.Y.. --format="%aN" | sort -u + ``` Note on version numbering: diff --git a/ci/azure/install.yml b/ci/azure/install.yml index fee886ba804..e4f3a0b9e16 100644 --- a/ci/azure/install.yml +++ b/ci/azure/install.yml @@ -16,16 +16,18 @@ steps: --pre \ --upgrade \ matplotlib \ + numpy \ pandas \ scipy - # numpy \ # FIXME https://github.com/pydata/xarray/issues/3409 pip install \ --no-deps \ --upgrade \ git+https://github.com/dask/dask \ git+https://github.com/dask/distributed \ git+https://github.com/zarr-developers/zarr \ - git+https://github.com/Unidata/cftime + git+https://github.com/Unidata/cftime \ + git+https://github.com/mapbox/rasterio \ + git+https://github.com/pydata/bottleneck condition: eq(variables['UPSTREAM_DEV'], 'true') displayName: Install upstream dev dependencies diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index e521ee4a4b8..97488e7f581 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -6,9 +6,13 @@ dependencies: - python=3.7 - bottleneck - cartopy + - cfgrib - h5netcdf + - ipykernel - ipython - iris + - jupyter_client + - nbsphinx - netcdf4 - numpy - numpydoc @@ -16,6 +20,6 @@ dependencies: - rasterio - seaborn - sphinx - - sphinx-gallery - sphinx_rtd_theme + - xarray - zarr diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml index 10fe69253e8..820160b19cc 100644 --- a/ci/requirements/py36.yml +++ b/ci/requirements/py36.yml @@ -25,7 +25,7 @@ dependencies: - nc-time-axis - netcdf4 - numba - - numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409 + - numpy - pandas - pint - pip diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index 827c664a222..4a7aaf7d32b 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -25,7 +25,7 @@ dependencies: - nc-time-axis - netcdf4 - numba - - numpy<1.18 # FIXME https://github.com/pydata/xarray/issues/3409 + - numpy - pandas - pint - pip diff --git a/doc/README.rst b/doc/README.rst index af7bc96092c..0579f85d85f 100644 --- a/doc/README.rst +++ b/doc/README.rst @@ -1,3 +1,5 @@ +:orphan: + xarray ------ diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 8f82b30a442..027c732697f 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -2,6 +2,8 @@ .. This extra page is a work around for sphinx not having any support for .. hiding an autosummary table. +:orphan: + .. currentmodule:: xarray .. autosummary:: @@ -30,9 +32,11 @@ core.groupby.DatasetGroupBy.first core.groupby.DatasetGroupBy.last core.groupby.DatasetGroupBy.fillna + core.groupby.DatasetGroupBy.quantile core.groupby.DatasetGroupBy.where Dataset.argsort + Dataset.astype Dataset.clip Dataset.conj Dataset.conjugate @@ -71,6 +75,7 @@ core.groupby.DataArrayGroupBy.first core.groupby.DataArrayGroupBy.last core.groupby.DataArrayGroupBy.fillna + core.groupby.DataArrayGroupBy.quantile core.groupby.DataArrayGroupBy.where DataArray.argsort diff --git a/doc/api.rst b/doc/api.rst index d2309f28226..a1fae3deb03 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -675,3 +675,12 @@ arguments for the ``from_store`` and ``dump_to_store`` Dataset methods: backends.FileManager backends.CachingFileManager backends.DummyFileManager + +Deprecated / Pending Deprecation +================================ + + Dataset.drop + DataArray.drop + Dataset.apply + core.groupby.DataArrayGroupBy.apply + core.groupby.DatasetGroupBy.apply diff --git a/doc/combining.rst b/doc/combining.rst index 4593d410d23..05b7f2efc50 100644 --- a/doc/combining.rst +++ b/doc/combining.rst @@ -255,11 +255,11 @@ Combining along multiple dimensions ``combine_nested``. For combining many objects along multiple dimensions xarray provides -:py:func:`~xarray.combine_nested`` and :py:func:`~xarray.combine_by_coords`. These +:py:func:`~xarray.combine_nested` and :py:func:`~xarray.combine_by_coords`. These functions use a combination of ``concat`` and ``merge`` across different variables to combine many objects into one. -:py:func:`~xarray.combine_nested`` requires specifying the order in which the +:py:func:`~xarray.combine_nested` requires specifying the order in which the objects should be combined, while :py:func:`~xarray.combine_by_coords` attempts to infer this ordering automatically from the coordinates in the data. @@ -310,4 +310,4 @@ These functions can be used by :py:func:`~xarray.open_mfdataset` to open many files as one dataset. The particular function used is specified by setting the argument ``'combine'`` to ``'by_coords'`` or ``'nested'``. This is useful for situations where your data is split across many files in multiple locations, -which have some known relationship between one another. \ No newline at end of file +which have some known relationship between one another. diff --git a/doc/computation.rst b/doc/computation.rst index 663c546be20..1ac30f55ee7 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -95,6 +95,9 @@ for filling missing values via 1D interpolation. Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification of which values to use as the index in the interpolation. +xarray also provides the ``max_gap`` keyword argument to limit the interpolation to +data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na` +for more. Aggregation =========== @@ -322,8 +325,8 @@ Broadcasting by dimension name ``DataArray`` objects are automatically align themselves ("broadcasting" in the numpy parlance) by dimension name instead of axis order. With xarray, you do not need to transpose arrays or insert dimensions of length 1 to get array -operations to work, as commonly done in numpy with :py:func:`np.reshape` or -:py:const:`np.newaxis`. +operations to work, as commonly done in numpy with :py:func:`numpy.reshape` or +:py:data:`numpy.newaxis`. This is best illustrated by a few examples. Consider two one-dimensional arrays with different sizes aligned along different dimensions: @@ -563,7 +566,7 @@ to set ``axis=-1``. As an example, here is how we would wrap Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays nicely with tools for building vectorized functions, like -:func:`numpy.broadcast_arrays` and :func:`numpy.vectorize`. For high performance +:py:func:`numpy.broadcast_arrays` and :py:class:`numpy.vectorize`. For high performance needs, consider using Numba's :doc:`vectorize and guvectorize `. In addition to wrapping functions, ``apply_ufunc`` can automatically parallelize diff --git a/doc/conf.py b/doc/conf.py index 7c1557a1e66..11abda6bb63 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,10 +15,16 @@ import datetime import os +import pathlib import subprocess import sys from contextlib import suppress +# make sure the source version is preferred (#3567) +root = pathlib.Path(__file__).absolute().parent.parent +os.environ["PYTHONPATH"] = str(root) +sys.path.insert(0, str(root)) + import xarray allowed_failures = set() @@ -76,7 +82,7 @@ "numpydoc", "IPython.sphinxext.ipython_directive", "IPython.sphinxext.ipython_console_highlighting", - "sphinx_gallery.gen_gallery", + "nbsphinx", ] extlinks = { @@ -84,12 +90,16 @@ "pull": ("https://github.com/pydata/xarray/pull/%s", "PR"), } -sphinx_gallery_conf = { - "examples_dirs": "gallery", - "gallery_dirs": "auto_gallery", - "backreferences_dir": False, - "expected_failing_examples": list(allowed_failures), -} +nbsphinx_timeout = 600 +nbsphinx_execute = "always" +nbsphinx_prolog = """ +{% set docname = env.doc2path(env.docname, base=None) %} + +You can run this notebook in a `live session `_ |Binder| or view it `on Github `_. + +.. |Binder| image:: https://mybinder.org/badge.svg + :target: https://mybinder.org/v2/gh/pydata/xarray/master?urlpath=lab/tree/doc/{{ docname }} +""" autosummary_generate = True autodoc_typehints = "none" @@ -137,7 +147,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ["_build"] +exclude_patterns = ["_build", "**.ipynb_checkpoints"] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -340,9 +350,12 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), - "iris": ("http://scitools.org.uk/iris/docs/latest/", None), - "numpy": ("https://docs.scipy.org/doc/numpy/", None), - "numba": ("https://numba.pydata.org/numba-doc/latest/", None), - "matplotlib": ("https://matplotlib.org/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), + "iris": ("https://scitools.org.uk/iris/docs/latest", None), + "numpy": ("https://docs.scipy.org/doc/numpy", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), + "numba": ("https://numba.pydata.org/numba-doc/latest", None), + "matplotlib": ("https://matplotlib.org", None), + "dask": ("https://docs.dask.org/en/latest", None), + "cftime": ("https://unidata.github.io/cftime", None), } diff --git a/doc/contributing.rst b/doc/contributing.rst index 028ec47e014..3cd0b3e8868 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -151,7 +151,9 @@ We'll now kick off a two-step process: .. code-block:: none # Create and activate the build environment - conda env create -f ci/requirements/py36.yml + # This is for Linux and MacOS. On Windows, use py37-windows.yml instead. + conda env create -f ci/requirements/py37.yml + conda activate xarray-tests # or with older versions of Anaconda: diff --git a/doc/dask.rst b/doc/dask.rst index 11f378aa376..ed99ffaa896 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -285,7 +285,7 @@ automate `embarrassingly parallel `__ "map" type operations where a function written for processing NumPy arrays should be repeatedly applied to xarray objects containing Dask arrays. It works similarly to -:py:func:`dask.array.map_blocks` and :py:func:`dask.array.atop`, but without +:py:func:`dask.array.map_blocks` and :py:func:`dask.array.blockwise`, but without requiring an intermediate layer of abstraction. For the best performance when using Dask's multi-threaded scheduler, wrap a diff --git a/doc/data-structures.rst b/doc/data-structures.rst index 93cdc7e9765..504d820a234 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -45,7 +45,7 @@ Creating a DataArray The :py:class:`~xarray.DataArray` constructor takes: - ``data``: a multi-dimensional array of values (e.g., a numpy ndarray, - :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or :py:class:`~pandas.Panel`) + :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or ``pandas.Panel``) - ``coords``: a list or dictionary of coordinates. If a list, it should be a list of tuples where the first element is the dimension name and the second element is the corresponding coordinate array_like object. @@ -125,7 +125,7 @@ As a dictionary with coords across multiple dimensions: If you create a ``DataArray`` by supplying a pandas :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or -:py:class:`~pandas.Panel`, any non-specified arguments in the +``pandas.Panel``, any non-specified arguments in the ``DataArray`` constructor will be filled in from the pandas object: .. ipython:: python @@ -301,7 +301,7 @@ names, and its data is aligned to any existing dimensions. You can also create an dataset from: -- A :py:class:`pandas.DataFrame` or :py:class:`pandas.Panel` along its columns and items +- A :py:class:`pandas.DataFrame` or ``pandas.Panel`` along its columns and items respectively, by passing it into the :py:class:`~xarray.Dataset` directly - A :py:class:`pandas.DataFrame` with :py:meth:`Dataset.from_dataframe `, which will additionally handle MultiIndexes See :ref:`pandas` @@ -485,14 +485,14 @@ in xarray: :py:class:`pandas.Index` internally to store their values. - **non-dimension coordinates** are variables that contain coordinate - data, but are not a dimension coordinate. They can be multidimensional - (see :ref:`examples.multidim`), and there is no relationship between the - name of a non-dimension coordinate and the name(s) of its dimension(s). - Non-dimension coordinates can be useful for indexing or plotting; otherwise, - xarray does not make any direct use of the values associated with them. - They are not used for alignment or automatic indexing, nor are they required - to match when doing arithmetic - (see :ref:`coordinates math`). + data, but are not a dimension coordinate. They can be multidimensional (see + :ref:`/examples/multidimensional-coords.ipynb`), and there is no + relationship between the name of a non-dimension coordinate and the + name(s) of its dimension(s). Non-dimension coordinates can be + useful for indexing or plotting; otherwise, xarray does not make any + direct use of the values associated with them. They are not used + for alignment or automatic indexing, nor are they required to match + when doing arithmetic (see :ref:`coordinates math`). .. note:: diff --git a/doc/examples.rst b/doc/examples.rst index 4d726d494e8..ce56102cc9d 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -7,4 +7,6 @@ Examples examples/weather-data examples/monthly-means examples/multidimensional-coords - auto_gallery/index + examples/visualization_gallery + examples/ROMS_ocean_model + examples/ERA5-GRIB-example diff --git a/doc/examples/ERA5-GRIB-example.ipynb b/doc/examples/ERA5-GRIB-example.ipynb new file mode 100644 index 00000000000..b82a07a64e6 --- /dev/null +++ b/doc/examples/ERA5-GRIB-example.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GRIB Data Example " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "GRIB format is commonly used to disemminate atmospheric model data. With Xarray and the cfgrib engine, GRIB data can easily be analyzed and visualized." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To read GRIB data, you can use `xarray.load_dataset`. The only extra code you need is to specify the engine as `cfgrib`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.tutorial.load_dataset('era5-2mt-2019-03-uk.grib', engine='cfgrib')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's create a simple plot of 2-m air temperature in degrees Celsius:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds - 273.15\n", + "ds.t2m[0].plot(cmap=plt.cm.coolwarm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With CartoPy, we can create a more detailed plot, using built-in shapefiles to help provide geographic context:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cartopy.crs as ccrs\n", + "import cartopy\n", + "fig = plt.figure(figsize=(10,10))\n", + "ax = plt.axes(projection=ccrs.Robinson())\n", + "ax.coastlines(resolution='10m')\n", + "plot = ds.t2m[0].plot(cmap=plt.cm.coolwarm, transform=ccrs.PlateCarree(), cbar_kwargs={'shrink':0.6})\n", + "plt.title('ERA5 - 2m temperature British Isles March 2019')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can also pull out a time series for a given location easily:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds.t2m.sel(longitude=0,latitude=51.5).plot()\n", + "plt.title('ERA5 - London 2m temperature March 2019')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/examples/ROMS_ocean_model.ipynb b/doc/examples/ROMS_ocean_model.ipynb new file mode 100644 index 00000000000..74536bbe28f --- /dev/null +++ b/doc/examples/ROMS_ocean_model.ipynb @@ -0,0 +1,226 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ROMS Ocean Model Example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Regional Ocean Modeling System ([ROMS](http://myroms.org)) is an open source hydrodynamic model that is used for simulating currents and water properties in coastal and estuarine regions. ROMS is one of a few standard ocean models, and it has an active user community.\n", + "\n", + "ROMS uses a regular C-Grid in the horizontal, similar to other structured grid ocean and atmospheric models, and a stretched vertical coordinate (see [the ROMS documentation](https://www.myroms.org/wiki/Vertical_S-coordinate) for more details). Both of these require special treatment when using `xarray` to analyze ROMS ocean model output. This example notebook shows how to create a lazily evaluated vertical coordinate, and make some basic plots. The `xgcm` package is required to do analysis that is aware of the horizontal C-Grid." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import cartopy.crs as ccrs\n", + "import cartopy.feature as cfeature\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load a sample ROMS file. This is a subset of a full model available at \n", + "\n", + " http://barataria.tamu.edu/thredds/catalog.html?dataset=txla_hindcast_agg\n", + " \n", + "The subsetting was done using the following command on one of the output files:\n", + "\n", + " #open dataset\n", + " ds = xr.open_dataset('/d2/shared/TXLA_ROMS/output_20yr_obc/2001/ocean_his_0015.nc')\n", + " \n", + " # Turn on chunking to activate dask and parallelize read/write.\n", + " ds = ds.chunk({'ocean_time': 1})\n", + " \n", + " # Pick out some of the variables that will be included as coordinates\n", + " ds = ds.set_coords(['Cs_r', 'Cs_w', 'hc', 'h', 'Vtransform'])\n", + " \n", + " # Select a a subset of variables. Salt will be visualized, zeta is used to \n", + " # calculate the vertical coordinate\n", + " variables = ['salt', 'zeta']\n", + " ds[variables].isel(ocean_time=slice(47, None, 7*24), \n", + " xi_rho=slice(300, None)).to_netcdf('ROMS_example.nc', mode='w')\n", + "\n", + "So, the `ROMS_example.nc` file contains a subset of the grid, one 3D variable, and two time steps." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load in ROMS dataset as an xarray object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load in the file\n", + "ds = xr.tutorial.open_dataset('ROMS_example.nc', chunks={'ocean_time': 1})\n", + "\n", + "# This is a way to turn on chunking and lazy evaluation. Opening with mfdataset, or \n", + "# setting the chunking in the open_dataset would also achive this.\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add a lazilly calculated vertical coordinates\n", + "\n", + "Write equations to calculate the vertical coordinate. These will be only evaluated when data is requested. Information about the ROMS vertical coordinate can be found (here)[https://www.myroms.org/wiki/Vertical_S-coordinate]\n", + "\n", + "In short, for `Vtransform==2` as used in this example, \n", + "\n", + "$Z_0 = (h_c \\, S + h \\,C) / (h_c + h)$\n", + "\n", + "$z = Z_0 (\\zeta + h) + \\zeta$\n", + "\n", + "where the variables are defined as in the link above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if ds.Vtransform == 1:\n", + " Zo_rho = ds.hc * (ds.s_rho - ds.Cs_r) + ds.Cs_r * ds.h\n", + " z_rho = Zo_rho + ds.zeta * (1 + Zo_rho/ds.h)\n", + "elif ds.Vtransform == 2:\n", + " Zo_rho = (ds.hc * ds.s_rho + ds.Cs_r * ds.h) / (ds.hc + ds.h)\n", + " z_rho = ds.zeta + (ds.zeta + ds.h) * Zo_rho\n", + "\n", + "ds.coords['z_rho'] = z_rho.transpose() # needing transpose seems to be an xarray bug\n", + "ds.salt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A naive vertical slice\n", + "\n", + "Create a slice using the s-coordinate as the vertical dimension is typically not very informative." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "ds.salt.isel(xi_rho=50, ocean_time=0).plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can feed coordinate information to the plot method to give a more informative cross-section that uses the depths. Note that we did not need to slice the depth or longitude information separately, this was done automatically as the variable was sliced." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "section = ds.salt.isel(xi_rho=50, eta_rho=slice(0, 167), ocean_time=0)\n", + "section.plot(x='lon_rho', y='z_rho', figsize=(15, 6), clim=(25, 35))\n", + "plt.ylim([-100, 1]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A plan view\n", + "\n", + "Now make a naive plan view, without any projection information, just using lon/lat as x/y. This looks OK, but will appear compressed because lon and lat do not have an aspect constrained by the projection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds.salt.isel(s_rho=-1, ocean_time=0).plot(x='lon_rho', y='lat_rho')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And let's use a projection to make it nicer, and add a coast." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "proj = ccrs.LambertConformal(central_longitude=-92, central_latitude=29)\n", + "fig = plt.figure(figsize=(15, 5))\n", + "ax = plt.axes(projection=proj)\n", + "ds.salt.isel(s_rho=-1, ocean_time=0).plot(x='lon_rho', y='lat_rho', \n", + " transform=ccrs.PlateCarree())\n", + "\n", + "coast_10m = cfeature.NaturalEarthFeature('physical', 'land', '10m',\n", + " edgecolor='k', facecolor='0.8')\n", + "ax.add_feature(coast_10m)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/examples/monthly-means.ipynb b/doc/examples/monthly-means.ipynb new file mode 100644 index 00000000000..fad40e019de --- /dev/null +++ b/doc/examples/monthly-means.ipynb @@ -0,0 +1,325 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculating Seasonal Averages from Timeseries of Monthly Means \n", + "=====\n", + "\n", + "Author: [Joe Hamman](https://github.com/jhamman/)\n", + "\n", + "The data used for this example can be found in the [xarray-data](https://github.com/pydata/xarray-data) repository. You may need to change the path to `rasm.nc` below.\n", + "\n", + "Suppose we have a netCDF or `xarray.Dataset` of monthly mean data and we want to calculate the seasonal average. To do this properly, we need to calculate the weighted average considering that each month has a different number of days." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:51:35.958210Z", + "start_time": "2018-11-28T20:51:35.936966Z" + } + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xarray as xr\n", + "from netCDF4 import num2date\n", + "import matplotlib.pyplot as plt " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Some calendar information so we can support any netCDF calendar. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:51:35.991620Z", + "start_time": "2018-11-28T20:51:35.960336Z" + } + }, + "outputs": [], + "source": [ + "dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", + " '365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", + " 'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", + " 'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", + " 'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", + " 'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", + " '366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", + " '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]} " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### A few calendar functions to determine the number of days in each month\n", + "If you were just using the standard calendar, it would be easy to use the `calendar.month_range` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:51:36.015151Z", + "start_time": "2018-11-28T20:51:35.994079Z" + } + }, + "outputs": [], + "source": [ + "def leap_year(year, calendar='standard'):\n", + " \"\"\"Determine if year is a leap year\"\"\"\n", + " leap = False\n", + " if ((calendar in ['standard', 'gregorian',\n", + " 'proleptic_gregorian', 'julian']) and\n", + " (year % 4 == 0)):\n", + " leap = True\n", + " if ((calendar == 'proleptic_gregorian') and\n", + " (year % 100 == 0) and\n", + " (year % 400 != 0)):\n", + " leap = False\n", + " elif ((calendar in ['standard', 'gregorian']) and\n", + " (year % 100 == 0) and (year % 400 != 0) and\n", + " (year < 1583)):\n", + " leap = False\n", + " return leap\n", + "\n", + "def get_dpm(time, calendar='standard'):\n", + " \"\"\"\n", + " return a array of days per month corresponding to the months provided in `months`\n", + " \"\"\"\n", + " month_length = np.zeros(len(time), dtype=np.int)\n", + " \n", + " cal_days = dpm[calendar]\n", + " \n", + " for i, (month, year) in enumerate(zip(time.month, time.year)):\n", + " month_length[i] = cal_days[month]\n", + " if leap_year(year, calendar=calendar) and month == 2:\n", + " month_length[i] += 1\n", + " return month_length" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Open the `Dataset`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:51:36.072316Z", + "start_time": "2018-11-28T20:51:36.016594Z" + } + }, + "outputs": [], + "source": [ + "ds = xr.tutorial.open_dataset('rasm').load()\n", + "print(ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Now for the heavy lifting:\n", + "We first have to come up with the weights,\n", + "- calculate the month lengths for each monthly data record\n", + "- calculate weights using `groupby('time.season')`\n", + "\n", + "Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:51:36.132413Z", + "start_time": "2018-11-28T20:51:36.073708Z" + } + }, + "outputs": [], + "source": [ + "# Make a DataArray with the number of days in each month, size = len(time)\n", + "month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar='noleap'),\n", + " coords=[ds.time], name='month_length')\n", + "\n", + "# Calculate the weights by grouping by 'time.season'.\n", + "# Conversion to float type ('astype(float)') only necessary for Python 2.x\n", + "weights = month_length.groupby('time.season') / month_length.astype(float).groupby('time.season').sum()\n", + "\n", + "# Test that the sum of the weights for each season is 1.0\n", + "np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))\n", + "\n", + "# Calculate the weighted average\n", + "ds_weighted = (ds * weights).groupby('time.season').sum(dim='time')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:51:36.152913Z", + "start_time": "2018-11-28T20:51:36.133997Z" + } + }, + "outputs": [], + "source": [ + "print(ds_weighted)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:51:36.190765Z", + "start_time": "2018-11-28T20:51:36.154416Z" + } + }, + "outputs": [], + "source": [ + "# only used for comparisons\n", + "ds_unweighted = ds.groupby('time.season').mean('time')\n", + "ds_diff = ds_weighted - ds_unweighted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:51:40.264871Z", + "start_time": "2018-11-28T20:51:36.192467Z" + } + }, + "outputs": [], + "source": [ + "# Quick plot to show the results\n", + "notnull = pd.notnull(ds_unweighted['Tair'][0])\n", + "\n", + "fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14,12))\n", + "for i, season in enumerate(('DJF', 'MAM', 'JJA', 'SON')):\n", + " ds_weighted['Tair'].sel(season=season).where(notnull).plot.pcolormesh(\n", + " ax=axes[i, 0], vmin=-30, vmax=30, cmap='Spectral_r', \n", + " add_colorbar=True, extend='both')\n", + " \n", + " ds_unweighted['Tair'].sel(season=season).where(notnull).plot.pcolormesh(\n", + " ax=axes[i, 1], vmin=-30, vmax=30, cmap='Spectral_r', \n", + " add_colorbar=True, extend='both')\n", + "\n", + " ds_diff['Tair'].sel(season=season).where(notnull).plot.pcolormesh(\n", + " ax=axes[i, 2], vmin=-0.1, vmax=.1, cmap='RdBu_r',\n", + " add_colorbar=True, extend='both')\n", + "\n", + " axes[i, 0].set_ylabel(season)\n", + " axes[i, 1].set_ylabel('')\n", + " axes[i, 2].set_ylabel('')\n", + "\n", + "for ax in axes.flat:\n", + " ax.axes.get_xaxis().set_ticklabels([])\n", + " ax.axes.get_yaxis().set_ticklabels([])\n", + " ax.axes.axis('tight')\n", + " ax.set_xlabel('')\n", + " \n", + "axes[0, 0].set_title('Weighted by DPM')\n", + "axes[0, 1].set_title('Equal Weighting')\n", + "axes[0, 2].set_title('Difference')\n", + " \n", + "plt.tight_layout()\n", + "\n", + "fig.suptitle('Seasonal Surface Air Temperature', fontsize=16, y=1.02)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:51:40.284898Z", + "start_time": "2018-11-28T20:51:40.266406Z" + } + }, + "outputs": [], + "source": [ + "# Wrap it into a simple function\n", + "def season_mean(ds, calendar='standard'):\n", + " # Make a DataArray of season/year groups\n", + " year_season = xr.DataArray(ds.time.to_index().to_period(freq='Q-NOV').to_timestamp(how='E'),\n", + " coords=[ds.time], name='year_season')\n", + "\n", + " # Make a DataArray with the number of days in each month, size = len(time)\n", + " month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar),\n", + " coords=[ds.time], name='month_length')\n", + " # Calculate the weights by grouping by 'time.season'\n", + " weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n", + "\n", + " # Test that the sum of the weights for each season is 1.0\n", + " np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))\n", + "\n", + " # Calculate the weighted average\n", + " return (ds * weights).groupby('time.season').sum(dim='time')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/examples/monthly-means.rst b/doc/examples/monthly-means.rst deleted file mode 100644 index 7d620f1bca3..00000000000 --- a/doc/examples/monthly-means.rst +++ /dev/null @@ -1,244 +0,0 @@ -.. _monthly means example: - -Calculating Seasonal Averages from Timeseries of Monthly Means -============================================================== - -Author: `Joe Hamman `__ - -The data used for this example can be found in the -`xarray-data `__ repository. - -Suppose we have a netCDF or ``xarray.Dataset`` of monthly mean data and -we want to calculate the seasonal average. To do this properly, we need -to calculate the weighted average considering that each month has a -different number of days. - -.. code:: python - - %matplotlib inline - import numpy as np - import pandas as pd - import xarray as xr - from netCDF4 import num2date - import matplotlib.pyplot as plt - - print("numpy version : ", np.__version__) - print("pandas version : ", pd.__version__) - print("xarray version : ", xr.__version__) - - -.. parsed-literal:: - - numpy version : 1.11.1 - pandas version : 0.18.1 - xarray version : 0.8.2 - - -Some calendar information so we can support any netCDF calendar. -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code:: python - - dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], - '365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], - 'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], - 'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], - 'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], - 'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], - '366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], - '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]} - -A few calendar functions to determine the number of days in each month -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you were just using the standard calendar, it would be easy to use -the ``calendar.month_range`` function. - -.. code:: python - - def leap_year(year, calendar='standard'): - """Determine if year is a leap year""" - leap = False - if ((calendar in ['standard', 'gregorian', - 'proleptic_gregorian', 'julian']) and - (year % 4 == 0)): - leap = True - if ((calendar == 'proleptic_gregorian') and - (year % 100 == 0) and - (year % 400 != 0)): - leap = False - elif ((calendar in ['standard', 'gregorian']) and - (year % 100 == 0) and (year % 400 != 0) and - (year < 1583)): - leap = False - return leap - - def get_dpm(time, calendar='standard'): - """ - return a array of days per month corresponding to the months provided in `months` - """ - month_length = np.zeros(len(time), dtype=np.int) - - cal_days = dpm[calendar] - - for i, (month, year) in enumerate(zip(time.month, time.year)): - month_length[i] = cal_days[month] - if leap_year(year, calendar=calendar) and month == 2: - month_length[i] += 1 - return month_length - -Open the ``Dataset`` -^^^^^^^^^^^^^^^^^^^^ - -.. code:: python - - ds = xr.tutorial.load_dataset('rasm') - print(ds) - - -.. parsed-literal:: - - - Dimensions: (time: 36, x: 275, y: 205) - Coordinates: - * time (time) datetime64[ns] 1980-09-16T12:00:00 1980-10-17 ... - * y (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ... - * x (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ... - Data variables: - Tair (time, y, x) float64 nan nan nan nan nan nan nan nan nan nan ... - yc (y, x) float64 16.53 16.78 17.02 17.27 17.51 17.76 18.0 18.25 ... - xc (y, x) float64 189.2 189.4 189.6 189.7 189.9 190.1 190.2 190.4 ... - Attributes: - title: /workspace/jhamman/processed/R1002RBRxaaa01a/lnd/temp/R1002RBRxaaa01a.vic.ha.1979-09-01.nc - institution: U.W. - source: RACM R1002RBRxaaa01a - output_frequency: daily - output_mode: averaged - convention: CF-1.4 - references: Based on the initial model of Liang et al., 1994, JGR, 99, 14,415- 14,429. - comment: Output from the Variable Infiltration Capacity (VIC) model. - nco_openmp_thread_number: 1 - NCO: 4.3.7 - history: history deleted for brevity - - -Now for the heavy lifting: -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We first have to come up with the weights, - calculate the month lengths -for each monthly data record - calculate weights using -``groupby('time.season')`` - -Finally, we just need to multiply our weights by the ``Dataset`` and sum -along the time dimension. - -.. code:: python - - # Make a DataArray with the number of days in each month, size = len(time) - month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar='noleap'), - coords=[ds.time], name='month_length') - - # Calculate the weights by grouping by 'time.season'. - # Conversion to float type ('astype(float)') only necessary for Python 2.x - weights = month_length.groupby('time.season') / month_length.astype(float).groupby('time.season').sum() - - # Test that the sum of the weights for each season is 1.0 - np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4)) - - # Calculate the weighted average - ds_weighted = (ds * weights).groupby('time.season').sum(dim='time') - -.. code:: python - - print(ds_weighted) - - -.. parsed-literal:: - - - Dimensions: (season: 4, x: 275, y: 205) - Coordinates: - * y (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ... - * x (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ... - * season (season) object 'DJF' 'JJA' 'MAM' 'SON' - Data variables: - Tair (season, y, x) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... - xc (season, y, x) float64 189.2 189.4 189.6 189.7 189.9 190.1 ... - yc (season, y, x) float64 16.53 16.78 17.02 17.27 17.51 17.76 18.0 ... - - -.. code:: python - - # only used for comparisons - ds_unweighted = ds.groupby('time.season').mean('time') - ds_diff = ds_weighted - ds_unweighted - -.. code:: python - - # Quick plot to show the results - notnull = pd.notnull(ds_unweighted['Tair'][0]) - - fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14,12)) - for i, season in enumerate(('DJF', 'MAM', 'JJA', 'SON')): - ds_weighted['Tair'].sel(season=season).where(notnull).plot.pcolormesh( - ax=axes[i, 0], vmin=-30, vmax=30, cmap='Spectral_r', - add_colorbar=True, extend='both') - - ds_unweighted['Tair'].sel(season=season).where(notnull).plot.pcolormesh( - ax=axes[i, 1], vmin=-30, vmax=30, cmap='Spectral_r', - add_colorbar=True, extend='both') - - ds_diff['Tair'].sel(season=season).where(notnull).plot.pcolormesh( - ax=axes[i, 2], vmin=-0.1, vmax=.1, cmap='RdBu_r', - add_colorbar=True, extend='both') - - axes[i, 0].set_ylabel(season) - axes[i, 1].set_ylabel('') - axes[i, 2].set_ylabel('') - - for ax in axes.flat: - ax.axes.get_xaxis().set_ticklabels([]) - ax.axes.get_yaxis().set_ticklabels([]) - ax.axes.axis('tight') - ax.set_xlabel('') - - axes[0, 0].set_title('Weighted by DPM') - axes[0, 1].set_title('Equal Weighting') - axes[0, 2].set_title('Difference') - - plt.tight_layout() - - fig.suptitle('Seasonal Surface Air Temperature', fontsize=16, y=1.02) - - - - -.. parsed-literal:: - - - - - - -.. image:: monthly_means_output.png - - -.. code:: python - - # Wrap it into a simple function - def season_mean(ds, calendar='standard'): - # Make a DataArray of season/year groups - year_season = xr.DataArray(ds.time.to_index().to_period(freq='Q-NOV').to_timestamp(how='E'), - coords=[ds.time], name='year_season') - - # Make a DataArray with the number of days in each month, size = len(time) - month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar), - coords=[ds.time], name='month_length') - # Calculate the weights by grouping by 'time.season' - weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum() - - # Test that the sum of the weights for each season is 1.0 - np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4)) - - # Calculate the weighted average - return (ds * weights).groupby('time.season').sum(dim='time') diff --git a/doc/examples/multidimensional-coords.ipynb b/doc/examples/multidimensional-coords.ipynb new file mode 100644 index 00000000000..6fa9ddff5f5 --- /dev/null +++ b/doc/examples/multidimensional-coords.ipynb @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Working with Multidimensional Coordinates\n", + "\n", + "Author: [Ryan Abernathey](https://github.com/rabernat)\n", + "\n", + "Many datasets have _physical coordinates_ which differ from their _logical coordinates_. Xarray provides several ways to plot and analyze such datasets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:49:56.068395Z", + "start_time": "2018-11-28T20:49:56.035349Z" + } + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xarray as xr\n", + "import cartopy.crs as ccrs\n", + "from matplotlib import pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As an example, consider this dataset from the [xarray-data](https://github.com/pydata/xarray-data) repository." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:50:13.629720Z", + "start_time": "2018-11-28T20:50:13.484542Z" + } + }, + "outputs": [], + "source": [ + "ds = xr.tutorial.open_dataset('rasm').load()\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, the _logical coordinates_ are `x` and `y`, while the _physical coordinates_ are `xc` and `yc`, which represent the latitudes and longitude of the data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:50:15.836061Z", + "start_time": "2018-11-28T20:50:15.768376Z" + } + }, + "outputs": [], + "source": [ + "print(ds.xc.attrs)\n", + "print(ds.yc.attrs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plotting ##\n", + "\n", + "Let's examine these coordinate variables by plotting them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:50:17.928556Z", + "start_time": "2018-11-28T20:50:17.031211Z" + } + }, + "outputs": [], + "source": [ + "fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(14,4))\n", + "ds.xc.plot(ax=ax1)\n", + "ds.yc.plot(ax=ax2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the variables `xc` (longitude) and `yc` (latitude) are two-dimensional scalar fields.\n", + "\n", + "If we try to plot the data variable `Tair`, by default we get the logical coordinates." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:50:20.567749Z", + "start_time": "2018-11-28T20:50:19.999393Z" + } + }, + "outputs": [], + "source": [ + "ds.Tair[0].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to visualize the data on a conventional latitude-longitude grid, we can take advantage of xarray's ability to apply [cartopy](http://scitools.org.uk/cartopy/index.html) map projections." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:50:31.131708Z", + "start_time": "2018-11-28T20:50:30.444697Z" + } + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(14,6))\n", + "ax = plt.axes(projection=ccrs.PlateCarree())\n", + "ax.set_global()\n", + "ds.Tair[0].plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), x='xc', y='yc', add_colorbar=False)\n", + "ax.coastlines()\n", + "ax.set_ylim([0,90]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multidimensional Groupby ##\n", + "\n", + "The above example allowed us to visualize the data on a regular latitude-longitude grid. But what if we want to do a calculation that involves grouping over one of these physical coordinates (rather than the logical coordinates), for example, calculating the mean temperature at each latitude. This can be achieved using xarray's `groupby` function, which accepts multidimensional variables. By default, `groupby` will use every unique value in the variable, which is probably not what we want. Instead, we can use the `groupby_bins` function to specify the output coordinates of the group. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2018-11-28T20:50:43.670463Z", + "start_time": "2018-11-28T20:50:43.245501Z" + } + }, + "outputs": [], + "source": [ + "# define two-degree wide latitude bins\n", + "lat_bins = np.arange(0,91,2)\n", + "# define a label for each bin corresponding to the central latitude\n", + "lat_center = np.arange(1,90,2)\n", + "# group according to those bins and take the mean\n", + "Tair_lat_mean = ds.Tair.groupby_bins('xc', lat_bins, labels=lat_center).mean(dim=xr.ALL_DIMS)\n", + "# plot the result\n", + "Tair_lat_mean.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting coordinate for the `groupby_bins` operation got the `_bins` suffix appended: `xc_bins`. This help us distinguish it from the original multidimensional variable `xc`.\n", + "\n", + "**Note**: This group-by-latitude approach does not take into account the finite-size geometry of grid cells. It simply bins each value according to the coordinates at the cell center. Xarray has no understanding of grid cells and their geometry. More precise geographic regridding for Xarray data is available via the [xesmf](https://xesmf.readthedocs.io) package." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/examples/multidimensional-coords.rst b/doc/examples/multidimensional-coords.rst deleted file mode 100644 index 55569b7662a..00000000000 --- a/doc/examples/multidimensional-coords.rst +++ /dev/null @@ -1,118 +0,0 @@ -.. _examples.multidim: - -Working with Multidimensional Coordinates -========================================= - -Author: `Ryan Abernathey `__ - -Many datasets have *physical coordinates* which differ from their -*logical coordinates*. Xarray provides several ways to plot and analyze -such datasets. - - -.. ipython:: python - - import numpy as np - import pandas as pd - import xarray as xr - import netCDF4 - import cartopy.crs as ccrs - import matplotlib.pyplot as plt - -As an example, consider this dataset from the -`xarray-data `__ repository. - - -.. ipython:: python - - ds = xr.tutorial.open_dataset('rasm').load() - ds - -In this example, the *logical coordinates* are ``x`` and ``y``, while -the *physical coordinates* are ``xc`` and ``yc``, which represent the -latitudes and longitude of the data. - - -.. ipython:: python - - ds.xc.attrs - ds.yc.attrs - - -Plotting --------- - -Let's examine these coordinate variables by plotting them. - -.. ipython:: python - - fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(9,3)) - ds.xc.plot(ax=ax1); - @savefig xarray_multidimensional_coords_8_2.png width=100% - ds.yc.plot(ax=ax2); - -Note that the variables ``xc`` (longitude) and ``yc`` (latitude) are -two-dimensional scalar fields. - -If we try to plot the data variable ``Tair``, by default we get the -logical coordinates. - -.. ipython:: python - :suppress: - - f = plt.figure(figsize=(6, 4)) - -.. ipython:: python - - @savefig xarray_multidimensional_coords_10_1.png width=5in - ds.Tair[0].plot(); - - -In order to visualize the data on a conventional latitude-longitude -grid, we can take advantage of xarray's ability to apply -`cartopy `__ map projections. - -.. ipython:: python - - plt.figure(figsize=(7,2)); - ax = plt.axes(projection=ccrs.PlateCarree()); - ds.Tair[0].plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), - x='xc', y='yc', add_colorbar=False); - @savefig xarray_multidimensional_coords_12_0.png width=100% - ax.coastlines(); - -Multidimensional Groupby ------------------------- - -The above example allowed us to visualize the data on a regular -latitude-longitude grid. But what if we want to do a calculation that -involves grouping over one of these physical coordinates (rather than -the logical coordinates), for example, calculating the mean temperature -at each latitude. This can be achieved using xarray's ``groupby`` -function, which accepts multidimensional variables. By default, -``groupby`` will use every unique value in the variable, which is -probably not what we want. Instead, we can use the ``groupby_bins`` -function to specify the output coordinates of the group. - -.. ipython:: python - :suppress: - - f = plt.figure(figsize=(6, 4.5)) - -.. ipython:: python - - # define two-degree wide latitude bins - lat_bins = np.arange(0, 91, 2) - # define a label for each bin corresponding to the central latitude - lat_center = np.arange(1, 90, 2) - # group according to those bins and take the mean - Tair_lat_mean = (ds.Tair.groupby_bins('xc', lat_bins, labels=lat_center) - .mean(...)) - # plot the result - @savefig xarray_multidimensional_coords_14_1.png width=5in - Tair_lat_mean.plot(); - - -Note that the resulting coordinate for the ``groupby_bins`` operation -got the ``_bins`` suffix appended: ``xc_bins``. This help us distinguish -it from the original multidimensional variable ``xc``. diff --git a/doc/examples/visualization_gallery.ipynb b/doc/examples/visualization_gallery.ipynb new file mode 100644 index 00000000000..f8d5b1ae458 --- /dev/null +++ b/doc/examples/visualization_gallery.ipynb @@ -0,0 +1,296 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization Gallery\n", + "\n", + "This notebook shows common visualization issues encountered in Xarray." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cartopy.crs as ccrs\n", + "import matplotlib.pyplot as plt\n", + "import xarray as xr\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load example dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.tutorial.load_dataset('air_temperature')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multiple plots and map projections\n", + "\n", + "Control the map projection parameters on multiple axes\n", + "\n", + "This example illustrates how to plot multiple maps and control their extent\n", + "and aspect ratio.\n", + "\n", + "For more details see [this discussion](https://github.com/pydata/xarray/issues/1397#issuecomment-299190567) on github." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "air = ds.air.isel(time=[0, 724]) - 273.15\n", + "\n", + "# This is the map projection we want to plot *onto*\n", + "map_proj = ccrs.LambertConformal(central_longitude=-95, central_latitude=45)\n", + "\n", + "p = air.plot(transform=ccrs.PlateCarree(), # the data's projection\n", + " col='time', col_wrap=1, # multiplot settings\n", + " aspect=ds.dims['lon'] / ds.dims['lat'], # for a sensible figsize\n", + " subplot_kws={'projection': map_proj}) # the plot's projection\n", + "\n", + "# We have to set the map's options on all axes\n", + "for ax in p.axes.flat:\n", + " ax.coastlines()\n", + " ax.set_extent([-160, -30, 5, 75])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Centered colormaps\n", + "\n", + "Xarray's automatic colormaps choice" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "air = ds.air.isel(time=0)\n", + "\n", + "f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(8, 6))\n", + "\n", + "# The first plot (in kelvins) chooses \"viridis\" and uses the data's min/max\n", + "air.plot(ax=ax1, cbar_kwargs={'label': 'K'})\n", + "ax1.set_title('Kelvins: default')\n", + "ax2.set_xlabel('')\n", + "\n", + "# The second plot (in celsius) now chooses \"BuRd\" and centers min/max around 0\n", + "airc = air - 273.15\n", + "airc.plot(ax=ax2, cbar_kwargs={'label': '°C'})\n", + "ax2.set_title('Celsius: default')\n", + "ax2.set_xlabel('')\n", + "ax2.set_ylabel('')\n", + "\n", + "# The center doesn't have to be 0\n", + "air.plot(ax=ax3, center=273.15, cbar_kwargs={'label': 'K'})\n", + "ax3.set_title('Kelvins: center=273.15')\n", + "\n", + "# Or it can be ignored\n", + "airc.plot(ax=ax4, center=False, cbar_kwargs={'label': '°C'})\n", + "ax4.set_title('Celsius: center=False')\n", + "ax4.set_ylabel('')\n", + "\n", + "# Make it nice\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Control the plot's colorbar\n", + "\n", + "Use ``cbar_kwargs`` keyword to specify the number of ticks.\n", + "The ``spacing`` kwarg can be used to draw proportional ticks." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "air2d = ds.air.isel(time=500)\n", + "\n", + "# Prepare the figure\n", + "f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(14, 4))\n", + "\n", + "# Irregular levels to illustrate the use of a proportional colorbar\n", + "levels = [245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 310, 340]\n", + "\n", + "# Plot data\n", + "air2d.plot(ax=ax1, levels=levels)\n", + "air2d.plot(ax=ax2, levels=levels, cbar_kwargs={'ticks': levels})\n", + "air2d.plot(ax=ax3, levels=levels, cbar_kwargs={'ticks': levels,\n", + " 'spacing': 'proportional'})\n", + "\n", + "# Show plots\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multiple lines from a 2d DataArray\n", + "\n", + "Use ``xarray.plot.line`` on a 2d DataArray to plot selections as\n", + "multiple lines.\n", + "\n", + "See ``plotting.multiplelines`` for more details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "air = ds.air - 273.15 # to celsius\n", + "\n", + "# Prepare the figure\n", + "f, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharey=True)\n", + "\n", + "# Selected latitude indices\n", + "isel_lats = [10, 15, 20]\n", + "\n", + "# Temperature vs longitude plot - illustrates the \"hue\" kwarg\n", + "air.isel(time=0, lat=isel_lats).plot.line(ax=ax1, hue='lat')\n", + "ax1.set_ylabel('°C')\n", + "\n", + "# Temperature vs time plot - illustrates the \"x\" and \"add_legend\" kwargs\n", + "air.isel(lon=30, lat=isel_lats).plot.line(ax=ax2, x='time', add_legend=False)\n", + "ax2.set_ylabel('')\n", + "\n", + "# Show\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `imshow()` and rasterio map projections\n", + "\n", + "\n", + "Using rasterio's projection information for more accurate plots.\n", + "\n", + "This example extends `recipes.rasterio` and plots the image in the\n", + "original map projection instead of relying on pcolormesh and a map\n", + "transformation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif'\n", + "da = xr.open_rasterio(url)\n", + "\n", + "# The data is in UTM projection. We have to set it manually until\n", + "# https://github.com/SciTools/cartopy/issues/813 is implemented\n", + "crs = ccrs.UTM('18N')\n", + "\n", + "# Plot on a map\n", + "ax = plt.subplot(projection=crs)\n", + "da.plot.imshow(ax=ax, rgb='band', transform=crs)\n", + "ax.coastlines('10m', color='r')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Parsing rasterio geocoordinates\n", + "\n", + "Converting a projection's cartesian coordinates into 2D longitudes and\n", + "latitudes.\n", + "\n", + "These new coordinates might be handy for plotting and indexing, but it should\n", + "be kept in mind that a grid which is regular in projection coordinates will\n", + "likely be irregular in lon/lat. It is often recommended to work in the data's\n", + "original map projection (see `recipes.rasterio_rgb`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from rasterio.warp import transform\n", + "import numpy as np\n", + "\n", + "url = 'https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif'\n", + "da = xr.open_rasterio(url)\n", + "\n", + "# Compute the lon/lat coordinates with rasterio.warp.transform\n", + "ny, nx = len(da['y']), len(da['x'])\n", + "x, y = np.meshgrid(da['x'], da['y'])\n", + "\n", + "# Rasterio works with 1D arrays\n", + "lon, lat = transform(da.crs, {'init': 'EPSG:4326'},\n", + " x.flatten(), y.flatten())\n", + "lon = np.asarray(lon).reshape((ny, nx))\n", + "lat = np.asarray(lat).reshape((ny, nx))\n", + "da.coords['lon'] = (('y', 'x'), lon)\n", + "da.coords['lat'] = (('y', 'x'), lat)\n", + "\n", + "# Compute a greyscale out of the rgb image\n", + "greyscale = da.mean(dim='band')\n", + "\n", + "# Plot on a map\n", + "ax = plt.subplot(projection=ccrs.PlateCarree())\n", + "greyscale.plot(ax=ax, x='lon', y='lat', transform=ccrs.PlateCarree(),\n", + " cmap='Greys_r', add_colorbar=False)\n", + "ax.coastlines('10m', color='r')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/io.rst b/doc/io.rst index 986c4374e89..2e50e5639da 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -749,8 +749,8 @@ The ``x`` and ``y`` coordinates are generated out of the file's metadata (``bounds``, ``width``, ``height``), and they can be understood as cartesian coordinates defined in the file's projection provided by the ``crs`` attribute. ``crs`` is a PROJ4 string which can be parsed by e.g. `pyproj`_ or rasterio. -See :ref:`recipes.rasterio` for an example of how to convert these to -longitudes and latitudes. +See :ref:`/examples/visualization_gallery.ipynb#Parsing-rasterio-geocoordinates` +for an example of how to convert these to longitudes and latitudes. .. warning:: diff --git a/doc/pandas.rst b/doc/pandas.rst index 4f3088b4c34..72abf6609f6 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -112,7 +112,7 @@ automatically stacking them into a ``MultiIndex``. :py:meth:`DataArray.to_pandas() ` is a shortcut that lets you convert a DataArray directly into a pandas object with the same dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`, -2D to :py:class:`~pandas.DataFrame` and 3D to :py:class:`~pandas.Panel`): +2D to :py:class:`~pandas.DataFrame` and 3D to ``pandas.Panel``): .. ipython:: python diff --git a/doc/plotting.rst b/doc/plotting.rst index e9d30fb63c8..270988b99de 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -782,7 +782,7 @@ coordinates. Multidimensional coordinates ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -See also: :ref:`examples.multidim`. +See also: :ref:`/examples/multidimensional-coords.ipynb`. You can plot irregular grids defined by multidimensional coordinates with xarray, but you'll have to tell the plot function to use these coordinates diff --git a/doc/related-projects.rst b/doc/related-projects.rst index fd77ce56a0a..a8af05f3074 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -25,6 +25,7 @@ Geosciences - `PyGDX `_: Python 3 package for accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom subclass. +- `pyXpcm `_: xarray-based Profile Classification Modelling (PCM), mostly for ocean data. - `Regionmask `_: plotting and creation of masks of spatial regions - `rioxarray `_: geospatial xarray extension powered by rasterio - `salem `_: Adds geolocalised subsetting, masking, and plotting operations to xarray's data structures via accessors. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cb0d965e5dc..219b2184b5c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -13,18 +13,67 @@ What's New import xarray as xr np.random.seed(123456) -.. _whats-new.0.14.1: +.. _whats-new.0.15.0: -v0.14.1 (unreleased) + +v0.15.0 (unreleased) -------------------- Breaking changes ~~~~~~~~~~~~~~~~ -- Broken compatibility with cftime < 1.0.3. + +New Features +~~~~~~~~~~~~ +- :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` + now work with dask Variables. By `Deepak Cherian `_. +- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` + and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) + By `Deepak Cherian `_ - .. note:: +Bug fixes +~~~~~~~~~ +- Fix plotting with transposed 2D non-dimensional coordinates. (:issue:`3138`, :pull:`3441`) + By `Deepak Cherian `_. + + +Documentation +~~~~~~~~~~~~~ +- Switch doc examples to use nbsphinx and replace sphinx_gallery with + notebook. + (:pull:`3105`, :pull:`3106`, :pull:`3121`) + By `Ryan Abernathey ` +- Added example notebook demonstrating use of xarray with Regional Ocean + Modeling System (ROMS) ocean hydrodynamic model output. + (:pull:`3116`). + By `Robert Hetland ` +- Added example notebook demonstrating the visualization of ERA5 GRIB + data. (:pull:`3199`) + By `Zach Bruick ` and + `Stephan Siemen ` +- Added examples for `DataArray.quantile`, `Dataset.quantile` and + `GroupBy.quantile`. (:pull:`3576`) + By `Justus Magin `_. + +Internal Changes +~~~~~~~~~~~~~~~~ + + +- Removed internal method ``Dataset._from_vars_and_coord_names``, + which was dominated by ``Dataset._construct_direct``. (:pull:`3565`) + By `Maximilian Roos `_ + + +v0.14.1 (19 Nov 2019) +--------------------- + +Breaking changes +~~~~~~~~~~~~~~~~ + +- Broken compatibility with ``cftime < 1.0.3`` . By `Deepak Cherian `_. + + .. warning:: cftime version 1.0.4 is broken (`cftime/126 `_); @@ -34,24 +83,37 @@ Breaking changes module included in versions of netCDF4 prior to 1.4 that eventually became the cftime package, has been removed in favor of relying solely on the standalone ``cftime`` package (:pull:`3450`). + `cftime `_ package, has been removed in favor of relying solely on + the standalone ``cftime`` package (:pull:`3450`). By `Spencer Clark `_. New Features ~~~~~~~~~~~~ -- :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` have been added for dropping labels. +- Added the ``sparse`` option to :py:meth:`~xarray.DataArray.unstack`, + :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`, + :py:meth:`~xarray.Dataset.reindex` (:issue:`3518`). + By `Keisuke Fujii `_. +- Added the ``fill_value`` option to :py:meth:`DataArray.unstack` and + :py:meth:`Dataset.unstack` (:issue:`3518`, :pull:`3541`). + By `Keisuke Fujii `_. +- Added the ``max_gap`` kwarg to :py:meth:`~xarray.DataArray.interpolate_na` and + :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data + gap that will be filled by interpolation. By `Deepak Cherian `_. +- Added :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` for dropping labels. :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for - dropping variables (including coordinates). The existing ``drop`` methods remain as a backward compatible + dropping variables (including coordinates). The existing :py:meth:`Dataset.drop` & + :py:meth:`DataArray.drop` methods remain as a backward compatible option for dropping either labels or variables, but using the more specific methods is encouraged. (:pull:`3475`) By `Maximilian Roos `_ -- :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` have been added for +- Added :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` for mapping / applying a function over each item in the collection, reflecting the widely used and least surprising name for this operation. The existing ``apply`` methods remain for backward compatibility, though using the ``map`` methods is encouraged. (:pull:`3459`) By `Maximilian Roos `_ -- :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (`...`) +- :py:meth:`Dataset.transpose` and :py:meth:`DataArray.transpose` now support an ellipsis (``...``) to represent all 'other' dimensions. For example, to move one dimension to the front, use ``.transpose('x', ...)``. (:pull:`3421`) By `Maximilian Roos `_ @@ -59,56 +121,76 @@ New Features `...` directly. As before, you can use this to instruct a ``groupby`` operation to reduce over all dimensions. While we have no plans to remove ``xr.ALL_DIMS``, we suggest using `...`. (:pull:`3418`) +- Changed ``xr.ALL_DIMS`` to equal python's ``Ellipsis`` (``...``), and changed internal usages to use + ``...`` directly. As before, you can use this to instruct a ``groupby`` operation + to reduce over all dimensions. While we have no plans to remove ``xr.ALL_DIMS``, we suggest + using ``...``. (:pull:`3418`) By `Maximilian Roos `_ - :py:func:`xarray.dot`, and :py:meth:`DataArray.dot` now support the ``dims=...`` option to sum over the union of dimensions of all input arrays (:issue:`3423`) by `Mathias Hauser `_. - Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve - representation of objects in jupyter. By default this feature is turned off - for now. Enable it with :py:meth:`xarray.set_options(display_style="html")`. + representation of objects in Jupyter. By default this feature is turned off + for now. Enable it with ``xarray.set_options(display_style="html")``. (:pull:`3425`) by `Benoit Bovy `_ and `Julia Signell `_. - Implement `dask deterministic hashing `_ for xarray objects. Note that xarray objects with a dask.array backend already used deterministic hashing in previous releases; this change implements it when whole - xarray objects are embedded in a dask graph, e.g. when :meth:`DataArray.map` is + xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map` is invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`) By `Deepak Cherian `_ and `Guido Imperiale `_. - xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk. See :ref:`io.coordinates` for more. (:issue:`3351`, :pull:`3487`) By `Deepak Cherian `_. +- Add the documented-but-missing :py:meth:`DatasetGroupBy.quantile`. + (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. Bug fixes ~~~~~~~~~ +- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when + calling :py:meth:`Dataset.rename`, :py:meth:`Dataset.rename_dims` and :py:meth:`Dataset.rename_vars`. + By `Mathias Hauser `_. (:issue:`3522`). +- Fix a bug in :py:meth:`DataArray.set_index` in case that an existing dimension becomes a level + variable of MultiIndex. (:pull:`3520`). By `Keisuke Fujii `_. +- Harmonize ``_FillValue``, ``missing_value`` during encoding and decoding steps. (:pull:`3502`) + By `Anderson Banihirwe `_. - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). By `Deepak Cherian `_. -- Use dask names to compare dask objects prior to comparing values after computation. +- Make alignment and concatenation significantly more efficient by using dask names to compare dask + objects prior to comparing values after computation. This change makes it more convenient to carry + around large non-dimensional coordinate variables backed by dask arrays. Existing workarounds involving + ``reset_coords(drop=True)`` should now be unnecessary in most cases. (:issue:`3068`, :issue:`3311`, :issue:`3454`, :pull:`3453`). By `Deepak Cherian `_. -- Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. - By `Anderson Banihirwe `_. +- Add support for cftime>=1.0.4. By `Anderson Banihirwe `_. - Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`). In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated. By `Deepak Cherian `_. -- Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and - :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions. - (:issue:`3402`). By `Deepak Cherian `_ +- Fix :py:meth:`GroupBy.reduce` when reducing over multiple dimensions. + (:issue:`3402`). By `Deepak Cherian `_ - Allow appending datetime and bool data variables to zarr stores. - (:issue:`3480`). By `Akihiro Matsukawa `_. + (:issue:`3480`). By `Akihiro Matsukawa `_. +- Add support for numpy >=1.18 (); bugfix mean() on datetime64 arrays on dask backend + (:issue:`3409`, :pull:`3537`). By `Guido Imperiale `_. +- Add support for pandas >=0.26 (:issue:`3440`). + By `Deepak Cherian `_. +- Add support for pseudonetcdf >=3.1 (:pull:`3485`). + By `Barron Henderson `_. Documentation ~~~~~~~~~~~~~ -- Fix leap year condition in example (http://xarray.pydata.org/en/stable/examples/monthly-means.html) - by `Mickaël Lalande `_. +- Fix leap year condition in `monthly means example `_. + By `Mickaël Lalande `_. - Fix the documentation of :py:meth:`DataArray.resample` and - :py:meth:`Dataset.resample` and explicitly state that a + :py:meth:`Dataset.resample`, explicitly stating that a datetime-like dimension is required. (:pull:`3400`) By `Justus Magin `_. -- Update the terminology page to address multidimensional coordinates. (:pull:`3410`) +- Update the :ref:`terminology` page to address multidimensional coordinates. (:pull:`3410`) By `Jon Thielen `_. - Fix the documentation of :py:meth:`Dataset.integrate` and :py:meth:`DataArray.integrate` and add an example to @@ -117,9 +199,9 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ - - Added integration tests against `pint `_. - (:pull:`3238`, :pull:`3447`, :pull:`3508`) by `Justus Magin `_. + (:pull:`3238`, :pull:`3447`, :pull:`3493`, :pull:`3508`) + by `Justus Magin `_. .. note:: @@ -138,6 +220,9 @@ Internal Changes - Enable type checking on default sentinel values (:pull:`3472`) By `Maximilian Roos `_ +- Add :py:meth:`Variable._replace` for simpler replacing of a subset of attributes (:pull:`3472`) + By `Maximilian Roos `_ + .. _whats-new.0.14.0: v0.14.0 (14 Oct 2019) @@ -168,15 +253,15 @@ Breaking changes (:issue:`3222`, :issue:`3293`, :issue:`3340`, :issue:`3346`, :issue:`3358`). By `Guido Imperiale `_. -- Dropped the `drop=False` optional parameter from :meth:`Variable.isel`. +- Dropped the ``drop=False`` optional parameter from :py:meth:`Variable.isel`. It was unused and doesn't make sense for a Variable. (:pull:`3375`). By `Guido Imperiale `_. -- Remove internal usage of `collections.OrderedDict`. After dropping support for - Python <=3.5, most uses of `OrderedDict` in Xarray were no longer necessary. We - have removed the internal use of the `OrderedDict` in favor of Python's builtin - `dict` object which is now ordered itself. This change will be most obvious when - interacting with the `attrs` property on the Dataset and DataArray objects. +- Remove internal usage of :py:class:`collections.OrderedDict`. After dropping support for + Python <=3.5, most uses of ``OrderedDict`` in Xarray were no longer necessary. We + have removed the internal use of the ``OrderedDict`` in favor of Python's builtin + ``dict`` object which is now ordered itself. This change will be most obvious when + interacting with the ``attrs`` property on Dataset and DataArray objects. (:issue:`3380`, :pull:`3389`). By `Joe Hamman `_. New functions/methods @@ -202,48 +287,48 @@ Enhancements - Added a ``GroupBy.dims`` property that mirrors the dimensions of each group (:issue:`3344`). -- Speed up :meth:`Dataset.isel` up to 33% and :meth:`DataArray.isel` up to 25% for small +- Speed up :py:meth:`Dataset.isel` up to 33% and :py:meth:`DataArray.isel` up to 25% for small arrays (:issue:`2799`, :pull:`3375`). By `Guido Imperiale `_. Bug fixes ~~~~~~~~~ - Reintroduce support for :mod:`weakref` (broken in v0.13.0). Support has been - reinstated for :class:`DataArray` and :class:`Dataset` objects only. Internal xarray - objects remain unaddressable by weakref in order to save memory + reinstated for :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects only. + Internal xarray objects remain unaddressable by weakref in order to save memory (:issue:`3317`). By `Guido Imperiale `_. - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord now plot the correct data for 2D DataArrays - (:issue:`3334`). By `Tom Nicholas `_. + (:issue:`3334`). By `Tom Nicholas `_. - Make :py:func:`~xarray.concat` more robust when merging variables present in some datasets but - not others (:issue:`508`). By `Deepak Cherian `_. + not others (:issue:`508`). By `Deepak Cherian `_. - The default behaviour of reducing across all dimensions for :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects has now been properly removed as was done for :py:class:`~xarray.core.groupby.DatasetGroupBy` in 0.13.0 (:issue:`3337`). - Use `xarray.ALL_DIMS` if you need to replicate previous behaviour. + Use ``xarray.ALL_DIMS`` if you need to replicate previous behaviour. Also raise nicer error message when no groups are created (:issue:`1764`). By `Deepak Cherian `_. - Fix error in concatenating unlabeled dimensions (:pull:`3362`). - By `Deepak Cherian `_. + By `Deepak Cherian `_. - Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created. - (:pull:`3362`). By `Deepak Cherian `_. + (:pull:`3362`). By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ - Created a glossary of important xarray terms (:issue:`2410`, :pull:`3352`). - By `Gregory Gundersen `_. + By `Gregory Gundersen `_. - Created a "How do I..." section (:ref:`howdoi`) for solutions to common questions. (:pull:`3357`). - By `Deepak Cherian `_. + By `Deepak Cherian `_. - Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` (pull:`3331`, pull:`3331`). By `Justus Magin `_. - Add examples for :py:meth:`align`, :py:meth:`merge`, :py:meth:`combine_by_coords`, :py:meth:`full_like`, :py:meth:`zeros_like`, :py:meth:`ones_like`, :py:meth:`Dataset.pipe`, - :py:meth:`Dataset.assign`, :py:meth:`Dataset.reindex`, :py:meth:`Dataset.fillna` (pull:`3328`). + :py:meth:`Dataset.assign`, :py:meth:`Dataset.reindex`, :py:meth:`Dataset.fillna` (:pull:`3328`). By `Anderson Banihirwe `_. - Fixed documentation to clean up an unwanted file created in ``ipython`` example - (:pull:`3353`). By `Gregory Gundersen `_. + (:pull:`3353`). By `Gregory Gundersen `_. .. _whats-new.0.13.0: @@ -297,7 +382,7 @@ Breaking changes - :py:meth:`DataArray.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous positional arguments were deprecated) - Reindexing with variables of a different dimension now raise an error (previously deprecated) -- :py:func:`~xarray.broadcast_array` is removed (previously deprecated in favor of +- ``xarray.broadcast_array`` is removed (previously deprecated in favor of :py:func:`~xarray.broadcast`) - :py:meth:`Variable.expand_dims` is removed (previously deprecated in favor of :py:meth:`Variable.set_dims`) @@ -307,7 +392,7 @@ New functions/methods - xarray can now wrap around any `NEP18 `_ compliant - numpy-like library (important: read notes about NUMPY_EXPERIMENTAL_ARRAY_FUNCTION in + numpy-like library (important: read notes about ``NUMPY_EXPERIMENTAL_ARRAY_FUNCTION`` in the above link). Added explicit test coverage for `sparse `_. (:issue:`3117`, :issue:`3202`). This requires `sparse>=0.8.0`. By `Nezar Abdennur `_ @@ -333,7 +418,7 @@ New functions/methods - Added :py:meth:`DataArray.broadcast_like` and :py:meth:`Dataset.broadcast_like`. By `Deepak Cherian `_ and `David Mertz - `_. + `_. - Dataset plotting API for visualizing dependencies between two DataArrays! Currently only :py:meth:`Dataset.plot.scatter` is implemented. @@ -379,21 +464,21 @@ Enhancements By `Gerardo Rivera `_. - :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used - with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`. + with ``engine="h5netcdf"``. It is passed to ``h5netcdf.File``. By `Ulrich Herter `_. -- :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index +- ``xarray.Dataset.drop`` now supports keyword arguments; dropping index labels by using both ``dim`` and ``labels`` or using a :py:class:`~xarray.core.coordinates.DataArrayCoordinates` object are deprecated (:issue:`2910`). - By `Gregory Gundersen `_. + By `Gregory Gundersen `_. - Added examples of :py:meth:`Dataset.set_index` and :py:meth:`DataArray.set_index`, as well are more specific error messages when the user passes invalid arguments (:issue:`3176`). By `Gregory Gundersen `_. -- :py:func:`filter_by_attrs` now filters the coordinates as well as the variables. +- :py:meth:`Dataset.filter_by_attrs` now filters the coordinates as well as the variables. By `Spencer Jones `_. Bug fixes @@ -420,7 +505,7 @@ Bug fixes By `Hasan Ahmad `_. - Fixed bug in ``combine_by_coords()`` causing a `ValueError` if the input had an unused dimension with coordinates which were not monotonic (:issue:`3150`). - By `Tom Nicholas `_. + By `Tom Nicholas `_. - Fixed crash when applying ``distributed.Client.compute()`` to a DataArray (:issue:`3171`). By `Guido Imperiale `_. - Better error message when using groupby on an empty DataArray (:issue:`3037`). @@ -444,7 +529,7 @@ Documentation - Fixed documentation to clean up unwanted files created in ``ipython`` examples (:issue:`3227`). - By `Gregory Gundersen `_. + By `Gregory Gundersen `_. .. _whats-new.0.12.3: @@ -514,7 +599,7 @@ New functions/methods To avoid FutureWarnings switch to using ``combine_nested`` or ``combine_by_coords``, (or set the ``combine`` argument in ``open_mfdataset``). (:issue:`2159`) - By `Tom Nicholas `_. + By `Tom Nicholas `_. - :py:meth:`~xarray.DataArray.rolling_exp` and :py:meth:`~xarray.Dataset.rolling_exp` added, similar to pandas' @@ -560,12 +645,12 @@ Enhancements to existing functionality :py:meth:`DataArray.groupby_bins`, and :py:meth:`DataArray.resample` now accept a keyword argument ``restore_coord_dims`` which keeps the order of the dimensions of multi-dimensional coordinates intact (:issue:`1856`). - By `Peter Hausamann `_. + By `Peter Hausamann `_. - Clean up Python 2 compatibility in code (:issue:`2950`) By `Guido Imperiale `_. - Better warning message when supplying invalid objects to ``xr.merge`` (:issue:`2948`). By `Mathias Hauser `_. -- Add ``errors`` keyword argument to :py:meth:`Dataset.drop` and :py:meth:`Dataset.drop_dims` +- Add ``errors`` keyword argument to ``Dataset.drop`` and :py:meth:`Dataset.drop_dims` that allows ignoring errors if a passed label or dimension is not in the dataset (:issue:`2994`). By `Andrew Ross `_. @@ -761,7 +846,7 @@ Bug fixes `Spencer Clark `_. - Line plots with the ``x`` argument set to a non-dimensional coord now plot the correct data for 1D DataArrays. - (:issue:`2725`). By `Tom Nicholas `_. + (:issue:`2725`). By `Tom Nicholas `_. - Subtracting a scalar ``cftime.datetime`` object from a :py:class:`CFTimeIndex` now results in a :py:class:`pandas.TimedeltaIndex` instead of raising a ``TypeError`` (:issue:`2671`). By `Spencer Clark @@ -777,14 +862,14 @@ Bug fixes By `Yohai Bar-Sinai `_. - Fixed error when trying to reduce a DataArray using a function which does not require an axis argument. (:issue:`2768`) - By `Tom Nicholas `_. + By `Tom Nicholas `_. - Concatenating a sequence of :py:class:`~xarray.DataArray` with varying names sets the name of the output array to ``None``, instead of the name of the first input array. If the names are the same it sets the name to that, instead to the name of the first DataArray in the list as it did before. - (:issue:`2775`). By `Tom Nicholas `_. + (:issue:`2775`). By `Tom Nicholas `_. -- Per `CF conventions +- Per the `CF conventions section on calendars `_, specifying ``'standard'`` as the calendar type in :py:meth:`~xarray.cftime_range` now correctly refers to the ``'gregorian'`` @@ -802,7 +887,7 @@ Bug fixes (e.g. '2000-01-01T00:00:00-05:00') no longer raises an error (:issue:`2649`). By `Spencer Clark `_. - Fixed performance regression with ``open_mfdataset`` (:issue:`2662`). - By `Tom Nicholas `_. + By `Tom Nicholas `_. - Fixed supplying an explicit dimension in the ``concat_dim`` argument to to ``open_mfdataset`` (:issue:`2647`). By `Ben Root `_. @@ -867,13 +952,13 @@ Enhancements but were not explicitly closed. This is mostly useful for debugging; we recommend enabling it in your test suites if you use xarray for IO. By `Stephan Hoyer `_ -- Support Dask ``HighLevelGraphs`` by `Matthew Rocklin `_. +- Support Dask ``HighLevelGraphs`` by `Matthew Rocklin `_. - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the ``loffset`` kwarg just like Pandas. By `Deepak Cherian `_ - Datasets are now guaranteed to have a ``'source'`` encoding, so the source file name is always stored (:issue:`2550`). - By `Tom Nicholas `_. + By `Tom Nicholas `_. - The ``apply`` methods for ``DatasetGroupBy``, ``DataArrayGroupBy``, ``DatasetResample`` and ``DataArrayResample`` now support passing positional arguments to the applied function as a tuple to the ``args`` argument. @@ -995,7 +1080,7 @@ Enhancements dataset and dataarray attrs upon operations. The option is set with ``xarray.set_options(keep_attrs=True)``, and the default is to use the old behaviour. - By `Tom Nicholas `_. + By `Tom Nicholas `_. - Added a new backend for the GRIB file format based on ECMWF *cfgrib* python driver and *ecCodes* C-library. (:issue:`2475`) By `Alessandro Amici `_, @@ -1051,7 +1136,7 @@ Bug fixes CFTimeIndex is now allowed (:issue:`2484`). By `Spencer Clark `_. - Avoid use of Dask's deprecated ``get=`` parameter in tests - by `Matthew Rocklin `_. + by `Matthew Rocklin `_. - An ``OverflowError`` is now accurately raised and caught during the encoding process if a reference date is used that is so distant that the dates must be encoded using cftime rather than NumPy (:issue:`2272`). @@ -1097,7 +1182,7 @@ Enhancements (:issue:`2230`) By `Keisuke Fujii `_. -- :py:meth:`plot()` now accepts the kwargs +- :py:func:`~plot.plot()` now accepts the kwargs ``xscale, yscale, xlim, ylim, xticks, yticks`` just like Pandas. Also ``xincrease=False, yincrease=False`` now use matplotlib's axis inverting methods instead of setting limits. By `Deepak Cherian `_. (:issue:`2224`) @@ -1164,7 +1249,7 @@ Bug fixes - Follow up the renamings in dask; from dask.ghost to dask.overlap By `Keisuke Fujii `_. -- Now :py:func:`xr.apply_ufunc` raises a ValueError when the size of +- Now :py:func:`~xarray.apply_ufunc` raises a ValueError when the size of ``input_core_dims`` is inconsistent with the number of arguments. (:issue:`2341`) By `Keisuke Fujii `_. @@ -1247,7 +1332,7 @@ Enhancements - :py:meth:`~xarray.DataArray.interp` and :py:meth:`~xarray.Dataset.interp` methods are newly added. - See :ref:`interpolating values with interp` for the detail. + See :ref:`interp` for the detail. (:issue:`2079`) By `Keisuke Fujii `_. @@ -1364,7 +1449,7 @@ non-standard calendars used in climate modeling. Documentation ~~~~~~~~~~~~~ -- New FAQ entry, :ref:`faq.other_projects`. +- New FAQ entry, :ref:`related-projects`. By `Deepak Cherian `_. - :ref:`assigning_values` now includes examples on how to select and assign values to a :py:class:`~xarray.DataArray` with ``.loc``. @@ -1420,7 +1505,7 @@ Bug fixes - ``ValueError`` is raised when coordinates with the wrong size are assigned to a :py:class:`DataArray`. (:issue:`2112`) By `Keisuke Fujii `_. -- Fixed a bug in :py:meth:`~xarary.DatasArray.rolling` with bottleneck. Also, +- Fixed a bug in :py:meth:`~xarray.DataArray.rolling` with bottleneck. Also, fixed a bug in rolling an integer dask array. (:issue:`2113`) By `Keisuke Fujii `_. - Fixed a bug where `keep_attrs=True` flag was neglected if @@ -1457,7 +1542,7 @@ Enhancements supplied list, returning a bool array. See :ref:`selecting values with isin` for full details. Similar to the ``np.isin`` function. By `Maximilian Roos `_. -- Some speed improvement to construct :py:class:`~xarray.DataArrayRolling` +- Some speed improvement to construct :py:class:`~xarray.core.rolling.DataArrayRolling` object (:issue:`1993`) By `Keisuke Fujii `_. - Handle variables with different values for ``missing_value`` and @@ -1537,8 +1622,8 @@ Enhancements NumPy. By `Stephan Hoyer `_. - Improve :py:func:`~xarray.DataArray.rolling` logic. - :py:func:`~xarray.DataArrayRolling` object now supports - :py:func:`~xarray.DataArrayRolling.construct` method that returns a view + :py:func:`~xarray.core.rolling.DataArrayRolling` object now supports + :py:func:`~xarray.core.rolling.DataArrayRolling.construct` method that returns a view of the DataArray / Dataset object with the rolling-window dimension added to the last axis. This enables more flexible operation, such as strided rolling, windowed rolling, ND-rolling, short-time FFT and convolution. @@ -1609,7 +1694,7 @@ Enhancements 1D coordinate (e.g. time) and a 2D coordinate (e.g. depth as a function of time) (:issue:`1737`). By `Deepak Cherian `_. -- :py:func:`~plot()` rotates x-axis ticks if x-axis is time. +- :py:func:`~plot.plot()` rotates x-axis ticks if x-axis is time. By `Deepak Cherian `_. - :py:func:`~plot.line()` can draw multiple lines if provided with a 2D variable. @@ -1884,7 +1969,7 @@ Enhancements concatenated array/dataset (:issue:`1521`). By `Guido Imperiale `_. -- Speed-up (x 100) of :py:func:`~xarray.conventions.decode_cf_datetime`. +- Speed-up (x 100) of ``xarray.conventions.decode_cf_datetime``. By `Christian Chwala `_. **IO related improvements** @@ -2530,7 +2615,7 @@ Enhancements raising an error (:issue:`1082`). By `Stephan Hoyer `_. - Options for axes sharing between subplots are exposed to - :py:class:`FacetGrid` and :py:func:`~xarray.plot.plot`, so axes + :py:class:`~xarray.plot.FacetGrid` and :py:func:`~xarray.plot.plot`, so axes sharing can be disabled for polar plots. By `Bas Hoonhout `_. - New utility functions :py:func:`~xarray.testing.assert_equal`, @@ -2546,8 +2631,8 @@ Enhancements similar to what the command line utility ``ncdump -h`` produces (:issue:`1150`). By `Joe Hamman `_. - Added the ability write unlimited netCDF dimensions with the ``scipy`` and - ``netcdf4`` backends via the new :py:attr:`~xray.Dataset.encoding` attribute - or via the ``unlimited_dims`` argument to :py:meth:`~xray.Dataset.to_netcdf`. + ``netcdf4`` backends via the new ``xray.Dataset.encoding`` attribute + or via the ``unlimited_dims`` argument to ``xray.Dataset.to_netcdf``. By `Joe Hamman `_. - New :py:meth:`~DataArray.quantile` method to calculate quantiles from DataArray objects (:issue:`1187`). @@ -2626,10 +2711,9 @@ Bug fixes Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- :py:meth:`~xarray.Dataset.isel_points` and - :py:meth:`~xarray.Dataset.sel_points` now use vectorised indexing in numpy - and dask (:issue:`1161`), which can result in several orders of magnitude - speedup. +- ``xarray.Dataset.isel_points`` and ``xarray.Dataset.sel_points`` now + use vectorised indexing in numpy and dask (:issue:`1161`), which can + result in several orders of magnitude speedup. By `Jonathan Chambers `_. .. _whats-new.0.8.2: @@ -2727,7 +2811,7 @@ Enhancements - Groupby operations now support grouping over multidimensional variables. A new method called :py:meth:`~xarray.Dataset.groupby_bins` has also been added to allow users to specify bins for grouping. The new features are described in - :ref:`groupby.multidim` and :ref:`examples.multidim`. + :ref:`groupby.multidim` and :ref:`/examples/multidimensional-coords.ipynb`. By `Ryan Abernathey `_. - DataArray and Dataset method :py:meth:`where` now supports a ``drop=True`` @@ -2738,16 +2822,17 @@ Enhancements any number of ``Dataset`` and/or ``DataArray`` variables. See :ref:`merge` for more details. By `Stephan Hoyer `_. -- DataArray and Dataset method :py:meth:`resample` now supports the +- :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now support the ``keep_attrs=False`` option that determines whether variable and dataset attributes are retained in the resampled object. By `Jeremy McGibbon `_. -- Better multi-index support in DataArray and Dataset :py:meth:`sel` and - :py:meth:`loc` methods, which now behave more closely to pandas and which - also accept dictionaries for indexing based on given level names and labels - (see :ref:`multi-level indexing`). By - `Benoit Bovy `_. +- Better multi-index support in :py:meth:`DataArray.sel`, + :py:meth:`DataArray.loc`, :py:meth:`Dataset.sel` and + :py:meth:`Dataset.loc`, which now behave more closely to pandas and + which also accept dictionaries for indexing based on given level names + and labels (see :ref:`multi-level indexing`). + By `Benoit Bovy `_. - New (experimental) decorators :py:func:`~xarray.register_dataset_accessor` and :py:func:`~xarray.register_dataarray_accessor` for registering custom xarray @@ -2763,7 +2848,7 @@ Enhancements allowing more control on the colorbar (:issue:`872`). By `Fabien Maussion `_. -- New Dataset method :py:meth:`filter_by_attrs`, akin to +- New Dataset method :py:meth:`Dataset.filter_by_attrs`, akin to ``netCDF4.Dataset.get_variables_by_attributes``, to easily filter data variables using its attributes. `Filipe Fernandes `_. @@ -2890,7 +2975,7 @@ Enhancements - Numerical operations now return empty objects on no overlapping labels rather than raising ``ValueError`` (:issue:`739`). -- :py:class:`~pd.Series` is now supported as valid input to the ``Dataset`` +- :py:class:`~pandas.Series` is now supported as valid input to the ``Dataset`` constructor (:issue:`740`). Bug fixes @@ -2909,7 +2994,7 @@ Bug fixes reindexing leads to NaN values (:issue:`738`). - ``Dataset.rename`` and ``DataArray.rename`` support the old and new names being the same (:issue:`724`). -- Fix :py:meth:`~xarray.Dataset.from_dataset` for DataFrames with Categorical +- Fix :py:meth:`~xarray.Dataset.from_dataframe` for DataFrames with Categorical column and a MultiIndex index (:issue:`737`). - Fixes to ensure xarray works properly after the upcoming pandas v0.18 and NumPy v1.11 releases. @@ -2960,7 +3045,7 @@ recommend switching your import statements to ``import xarray as xr``. Breaking changes ~~~~~~~~~~~~~~~~ -- The internal data model used by :py:class:`~xray.DataArray` has been +- The internal data model used by ``xray.DataArray`` has been rewritten to fix several outstanding issues (:issue:`367`, :issue:`634`, `this stackoverflow report`_). Internally, ``DataArray`` is now implemented in terms of ``._variable`` and ``._coords`` attributes instead of holding @@ -2998,7 +3083,7 @@ Breaking changes * x (x) int64 0 1 2 - It is no longer possible to convert a DataArray to a Dataset with - :py:meth:`xray.DataArray.to_dataset` if it is unnamed. This will now + ``xray.DataArray.to_dataset`` if it is unnamed. This will now raise ``ValueError``. If the array is unnamed, you need to supply the ``name`` argument. @@ -3067,7 +3152,7 @@ Enhancements - Plotting: more control on colormap parameters (:issue:`642`). ``vmin`` and ``vmax`` will not be silently ignored anymore. Setting ``center=False`` prevents automatic selection of a divergent colormap. -- New :py:meth:`~xray.Dataset.shift` and :py:meth:`~xray.Dataset.roll` methods +- New ``xray.Dataset.shift`` and ``xray.Dataset.roll`` methods for shifting/rotating datasets or arrays along a dimension: .. ipython:: python @@ -3081,9 +3166,9 @@ Enhancements moves both data and coordinates. - Assigning a ``pandas`` object directly as a ``Dataset`` variable is now permitted. Its index names correspond to the ``dims`` of the ``Dataset``, and its data is aligned. -- Passing a :py:class:`pandas.DataFrame` or :py:class:`pandas.Panel` to a Dataset constructor +- Passing a :py:class:`pandas.DataFrame` or ``pandas.Panel`` to a Dataset constructor is now permitted. -- New function :py:func:`~xray.broadcast` for explicitly broadcasting +- New function ``xray.broadcast`` for explicitly broadcasting ``DataArray`` and ``Dataset`` objects against each other. For example: .. ipython:: python @@ -3141,7 +3226,7 @@ API Changes ~~~~~~~~~~~ - The handling of colormaps and discrete color lists for 2D plots in - :py:meth:`~xray.DataArray.plot` was changed to provide more compatibility + ``xray.DataArray.plot`` was changed to provide more compatibility with matplotlib's ``contour`` and ``contourf`` functions (:issue:`538`). Now discrete lists of colors should be specified using ``colors`` keyword, rather than ``cmap``. @@ -3149,10 +3234,10 @@ API Changes Enhancements ~~~~~~~~~~~~ -- Faceted plotting through :py:class:`~xray.plot.FacetGrid` and the - :py:meth:`~xray.plot.plot` method. See :ref:`plotting.faceting` for more details +- Faceted plotting through ``xray.plot.FacetGrid`` and the + ``xray.plot.plot`` method. See :ref:`plotting.faceting` for more details and examples. -- :py:meth:`~xray.Dataset.sel` and :py:meth:`~xray.Dataset.reindex` now support +- ``xray.Dataset.sel`` and ``xray.Dataset.reindex`` now support the ``tolerance`` argument for controlling nearest-neighbor selection (:issue:`629`): @@ -3169,12 +3254,12 @@ Enhancements * x (x) float64 0.9 1.5 This feature requires pandas v0.17 or newer. -- New ``encoding`` argument in :py:meth:`~xray.Dataset.to_netcdf` for writing +- New ``encoding`` argument in ``xray.Dataset.to_netcdf`` for writing netCDF files with compression, as described in the new documentation section on :ref:`io.netcdf.writing_encoded`. -- Add :py:attr:`~xray.Dataset.real` and :py:attr:`~xray.Dataset.imag` +- Add ``xray.Dataset.real`` and ``xray.Dataset.imag`` attributes to Dataset and DataArray (:issue:`553`). -- More informative error message with :py:meth:`~xray.Dataset.from_dataframe` +- More informative error message with ``xray.Dataset.from_dataframe`` if the frame has duplicate columns. - xray now uses deterministic names for dask arrays it creates or opens from disk. This allows xray users to take advantage of dask's nascent support for @@ -3189,9 +3274,9 @@ Bug fixes - Aggregation functions now correctly skip ``NaN`` for data for ``complex128`` dtype (:issue:`554`). - Fixed indexing 0d arrays with unicode dtype (:issue:`568`). -- :py:meth:`~xray.DataArray.name` and Dataset keys must be a string or None to +- ``xray.DataArray.name`` and Dataset keys must be a string or None to be written to netCDF (:issue:`533`). -- :py:meth:`~xray.DataArray.where` now uses dask instead of numpy if either the +- ``xray.DataArray.where`` now uses dask instead of numpy if either the array or ``other`` is a dask array. Previously, if ``other`` was a numpy array the method was evaluated eagerly. - Global attributes are now handled more consistently when loading remote @@ -3218,24 +3303,24 @@ v0.6.0 (21 August 2015) This release includes numerous bug fixes and enhancements. Highlights include the introduction of a plotting module and the new Dataset and DataArray -methods :py:meth:`~xray.Dataset.isel_points`, :py:meth:`~xray.Dataset.sel_points`, -:py:meth:`~xray.Dataset.where` and :py:meth:`~xray.Dataset.diff`. There are no +methods ``xray.Dataset.isel_points``, ``xray.Dataset.sel_points``, +``xray.Dataset.where`` and ``xray.Dataset.diff``. There are no breaking changes from v0.5.2. Enhancements ~~~~~~~~~~~~ - Plotting methods have been implemented on DataArray objects - :py:meth:`~xray.DataArray.plot` through integration with matplotlib + ``xray.DataArray.plot`` through integration with matplotlib (:issue:`185`). For an introduction, see :ref:`plotting`. - Variables in netCDF files with multiple missing values are now decoded as NaN after issuing a warning if open_dataset is called with mask_and_scale=True. - We clarified our rules for when the result from an xray operation is a copy - vs. a view (see :ref:`copies vs views` for more details). + vs. a view (see :ref:`copies_vs_views` for more details). - Dataset variables are now written to netCDF files in order of appearance when using the netcdf4 backend (:issue:`479`). -- Added :py:meth:`~xray.Dataset.isel_points` and :py:meth:`~xray.Dataset.sel_points` +- Added ``xray.Dataset.isel_points`` and ``xray.Dataset.sel_points`` to support pointwise indexing of Datasets and DataArrays (:issue:`475`). .. ipython:: @@ -3280,7 +3365,7 @@ Enhancements x (points) |S1 'a' 'b' 'g' * points (points) int64 0 1 2 -- New :py:meth:`~xray.Dataset.where` method for masking xray objects according +- New ``xray.Dataset.where`` method for masking xray objects according to some criteria. This works particularly well with multi-dimensional data: .. ipython:: python @@ -3291,11 +3376,10 @@ Enhancements @savefig where_example.png width=4in height=4in ds.distance.where(ds.distance < 100).plot() -- Added new methods :py:meth:`DataArray.diff ` - and :py:meth:`Dataset.diff ` for finite - difference calculations along a given axis. +- Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff`` + for finite difference calculations along a given axis. -- New :py:meth:`~xray.DataArray.to_masked_array` convenience method for +- New ``xray.DataArray.to_masked_array`` convenience method for returning a numpy.ma.MaskedArray. .. ipython:: python @@ -3304,7 +3388,7 @@ Enhancements da.where(da < 0.5) da.where(da < 0.5).to_masked_array(copy=True) -- Added new flag "drop_variables" to :py:meth:`~xray.open_dataset` for +- Added new flag "drop_variables" to ``xray.open_dataset`` for excluding variables from being parsed. This may be useful to drop variables with problems or inconsistent values. @@ -3333,7 +3417,7 @@ options for ``xray.concat``. Backwards incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- The optional arguments ``concat_over`` and ``mode`` in :py:func:`~xray.concat` have +- The optional arguments ``concat_over`` and ``mode`` in ``xray.concat`` have been removed and replaced by ``data_vars`` and ``coords``. The new arguments are both more easily understood and more robustly implemented, and allowed us to fix a bug where ``concat`` accidentally loaded data into memory. If you set values for @@ -3343,16 +3427,16 @@ Backwards incompatible changes Enhancements ~~~~~~~~~~~~ -- :py:func:`~xray.open_mfdataset` now supports a ``preprocess`` argument for +- ``xray.open_mfdataset`` now supports a ``preprocess`` argument for preprocessing datasets prior to concatenaton. This is useful if datasets cannot be otherwise merged automatically, e.g., if the original datasets have conflicting index coordinates (:issue:`443`). -- :py:func:`~xray.open_dataset` and :py:func:`~xray.open_mfdataset` now use a +- ``xray.open_dataset`` and ``xray.open_mfdataset`` now use a global thread lock by default for reading from netCDF files with dask. This avoids possible segmentation faults for reading from netCDF4 files when HDF5 is not configured properly for concurrent access (:issue:`444`). - Added support for serializing arrays of complex numbers with `engine='h5netcdf'`. -- The new :py:func:`~xray.save_mfdataset` function allows for saving multiple +- The new ``xray.save_mfdataset`` function allows for saving multiple datasets to disk simultaneously. This is useful when processing large datasets with dask.array. For example, to save a dataset too big to fit into memory to one file per year, we could write: @@ -3371,7 +3455,7 @@ Bug fixes - Fixed ``min``, ``max``, ``argmin`` and ``argmax`` for arrays with string or unicode types (:issue:`453`). -- :py:func:`~xray.open_dataset` and :py:func:`~xray.open_mfdataset` support +- ``xray.open_dataset`` and ``xray.open_mfdataset`` support supplying chunks as a single integer. - Fixed a bug in serializing scalar datetime variable to netCDF. - Fixed a bug that could occur in serialization of 0-dimensional integer arrays. @@ -3388,9 +3472,9 @@ adds the ``pipe`` method, copied from pandas. Enhancements ~~~~~~~~~~~~ -- Added :py:meth:`~xray.Dataset.pipe`, replicating the `new pandas method`_ in version +- Added ``xray.Dataset.pipe``, replicating the `new pandas method`_ in version 0.16.2. See :ref:`transforming datasets` for more details. -- :py:meth:`~xray.Dataset.assign` and :py:meth:`~xray.Dataset.assign_coords` +- ``xray.Dataset.assign`` and ``xray.Dataset.assign_coords`` now assign new variables in sorted (alphabetical) order, mirroring the behavior in pandas. Previously, the order was arbitrary. @@ -3412,7 +3496,7 @@ Highlights The headline feature in this release is experimental support for out-of-core computing (data that doesn't fit into memory) with dask_. This includes a new -top-level function :py:func:`~xray.open_mfdataset` that makes it easy to open +top-level function ``xray.open_mfdataset`` that makes it easy to open a collection of netCDF (using dask) as a single ``xray.Dataset`` object. For more on dask, read the `blog post introducing xray + dask`_ and the new documentation section :doc:`dask`. @@ -3427,7 +3511,7 @@ Backwards incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The logic used for choosing which variables are concatenated with - :py:func:`~xray.concat` has changed. Previously, by default any variables + ``xray.concat`` has changed. Previously, by default any variables which were equal across a dimension were not concatenated. This lead to some surprising behavior, where the behavior of groupby and concat operations could depend on runtime values (:issue:`268`). For example: @@ -3462,8 +3546,8 @@ Backwards incompatible changes Enhancements ~~~~~~~~~~~~ -- New :py:meth:`~xray.Dataset.to_array` and enhanced - :py:meth:`~xray.DataArray.to_dataset` methods make it easy to switch back +- New ``xray.Dataset.to_array`` and enhanced + ``xray.DataArray.to_dataset`` methods make it easy to switch back and forth between arrays and datasets: .. ipython:: python @@ -3473,7 +3557,7 @@ Enhancements ds.to_array() ds.to_array().to_dataset(dim='variable') -- New :py:meth:`~xray.Dataset.fillna` method to fill missing values, modeled +- New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: .. ipython:: python @@ -3485,7 +3569,7 @@ Enhancements index based alignment and broadcasting like standard binary operations. It also can be applied by group, as illustrated in :ref:`fill with climatology`. -- New :py:meth:`~xray.Dataset.assign` and :py:meth:`~xray.Dataset.assign_coords` +- New ``xray.Dataset.assign`` and ``xray.Dataset.assign_coords`` methods patterned off the new :py:meth:`DataFrame.assign ` method in pandas: @@ -3497,8 +3581,8 @@ Enhancements These methods return a new Dataset (or DataArray) with updated data or coordinate variables. -- :py:meth:`~xray.Dataset.sel` now supports the ``method`` parameter, which works - like the paramter of the same name on :py:meth:`~xray.Dataset.reindex`. It +- ``xray.Dataset.sel`` now supports the ``method`` parameter, which works + like the paramter of the same name on ``xray.Dataset.reindex``. It provides a simple interface for doing nearest-neighbor interpolation: .. use verbatim because I can't seem to install pandas 0.16.1 on RTD :( @@ -3535,7 +3619,7 @@ Enhancements - Accessing data from remote datasets now has retrying logic (with exponential backoff) that should make it robust to occasional bad responses from DAP servers. -- You can control the width of the Dataset repr with :py:class:`xray.set_options`. +- You can control the width of the Dataset repr with ``xray.set_options``. It can be used either as a context manager, in which case the default is restored outside the context: @@ -3561,7 +3645,7 @@ Deprecations ~~~~~~~~~~~~ - The method ``load_data()`` has been renamed to the more succinct - :py:meth:`~xray.Dataset.load`. + ``xray.Dataset.load``. v0.4.1 (18 March 2015) ---------------------- @@ -3574,7 +3658,7 @@ Enhancements - New documentation sections on :ref:`time-series` and :ref:`combining multiple files`. -- :py:meth:`~xray.Dataset.resample` lets you resample a dataset or data array to +- ``xray.Dataset.resample`` lets you resample a dataset or data array to a new temporal resolution. The syntax is the `same as pandas`_, except you need to supply the time dimension explicitly: @@ -3617,7 +3701,7 @@ Enhancements array.resample('1D', dim='time', how='first') -- :py:meth:`~xray.Dataset.swap_dims` allows for easily swapping one dimension +- ``xray.Dataset.swap_dims`` allows for easily swapping one dimension out for another: .. ipython:: python @@ -3627,7 +3711,7 @@ Enhancements ds.swap_dims({'x': 'y'}) This was possible in earlier versions of xray, but required some contortions. -- :py:func:`~xray.open_dataset` and :py:meth:`~xray.Dataset.to_netcdf` now +- ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now accept an ``engine`` argument to explicitly select which underlying library (netcdf4 or scipy) is used for reading/writing a netCDF file. @@ -3662,7 +3746,7 @@ Breaking changes - We now automatically align index labels in arithmetic, dataset construction, merging and updating. This means the need for manually invoking methods like - :py:func:`~xray.align` and :py:meth:`~xray.Dataset.reindex_like` should be + ``xray.align`` and ``xray.Dataset.reindex_like`` should be vastly reduced. :ref:`For arithmetic`, we align @@ -3714,7 +3798,7 @@ Breaking changes (a + b).coords This functionality can be controlled through the ``compat`` option, which - has also been added to the :py:class:`~xray.Dataset` constructor. + has also been added to the ``xray.Dataset`` constructor. - Datetime shortcuts such as ``'time.month'`` now return a ``DataArray`` with the name ``'month'``, not ``'time.month'`` (:issue:`345`). This makes it easier to index the resulting arrays when they are used with ``groupby``: @@ -3752,7 +3836,7 @@ Breaking changes Enhancements ~~~~~~~~~~~~ -- Support for :py:meth:`~xray.Dataset.reindex` with a fill method. This +- Support for ``xray.Dataset.reindex`` with a fill method. This provides a useful shortcut for upsampling: .. ipython:: python @@ -3766,16 +3850,15 @@ Enhancements - Use functions that return generic ndarrays with DataArray.groupby.apply and Dataset.apply (:issue:`327` and :issue:`329`). Thanks Jeff Gerard! - Consolidated the functionality of ``dumps`` (writing a dataset to a netCDF3 - bytestring) into :py:meth:`~xray.Dataset.to_netcdf` (:issue:`333`). -- :py:meth:`~xray.Dataset.to_netcdf` now supports writing to groups in netCDF4 + bytestring) into ``xray.Dataset.to_netcdf`` (:issue:`333`). +- ``xray.Dataset.to_netcdf`` now supports writing to groups in netCDF4 files (:issue:`333`). It also finally has a full docstring -- you should read it! -- :py:func:`~xray.open_dataset` and :py:meth:`~xray.Dataset.to_netcdf` now +- ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now work on netCDF3 files when netcdf4-python is not installed as long as scipy is available (:issue:`333`). -- The new :py:meth:`Dataset.drop ` and - :py:meth:`DataArray.drop ` methods makes it easy to drop - explicitly listed variables or index labels: +- The new ``xray.Dataset.drop`` and ``xray.DataArray.drop`` methods + makes it easy to drop explicitly listed variables or index labels: .. ipython:: python :okwarning: @@ -3788,12 +3871,12 @@ Enhancements arr = xray.DataArray([1, 2, 3], coords=[('x', list('abc'))]) arr.drop(['a', 'c'], dim='x') -- :py:meth:`~xray.Dataset.broadcast_equals` has been added to correspond to +- ``xray.Dataset.broadcast_equals`` has been added to correspond to the new ``compat`` option. - Long attributes are now truncated at 500 characters when printing a dataset (:issue:`338`). This should make things more convenient for working with datasets interactively. -- Added a new documentation example, :ref:`monthly means example`. Thanks Joe +- Added a new documentation example, :ref:`/examples/monthly-means.ipynb`. Thanks Joe Hamman! Bug fixes @@ -3814,8 +3897,8 @@ Deprecations ~~~~~~~~~~~~ - ``dump`` and ``dumps`` have been deprecated in favor of - :py:meth:`~xray.Dataset.to_netcdf`. -- ``drop_vars`` has been deprecated in favor of :py:meth:`~xray.Dataset.drop`. + ``xray.Dataset.to_netcdf``. +- ``drop_vars`` has been deprecated in favor of ``xray.Dataset.drop``. Future plans ~~~~~~~~~~~~ @@ -3945,10 +4028,10 @@ backwards incompatible changes. New features ~~~~~~~~~~~~ -- Added :py:meth:`~xray.Dataset.count` and :py:meth:`~xray.Dataset.dropna` +- Added ``xray.Dataset.count`` and ``xray.Dataset.dropna`` methods, copied from pandas, for working with missing values (:issue:`247`, :issue:`58`). -- Added :py:meth:`DataArray.to_pandas ` for +- Added ``xray.DataArray.to_pandas`` for converting a data array into the pandas object with the same dimensionality (1D to Series, 2D to DataFrame, etc.) (:issue:`255`). - Support for reading gzipped netCDF3 files (:issue:`239`). @@ -3981,7 +4064,7 @@ New features of arrays of metadata that describe the grid on which the points in "variable" arrays lie. They are preserved (when unambiguous) even though mathematical operations. -- **Dataset math** :py:class:`~xray.Dataset` objects now support all arithmetic +- **Dataset math** ``xray.Dataset`` objects now support all arithmetic operations directly. Dataset-array operations map across all dataset variables; dataset-dataset operations act on each pair of variables with the same name. @@ -3997,7 +4080,7 @@ Backwards incompatible changes - ``Dataset.__eq__`` and ``Dataset.__ne__`` are now element-wise operations instead of comparing all values to obtain a single boolean. Use the method - :py:meth:`~xray.Dataset.equals` instead. + ``xray.Dataset.equals`` instead. Deprecations ~~~~~~~~~~~~ @@ -4006,7 +4089,7 @@ Deprecations - ``Dataset.select_vars`` deprecated: index a ``Dataset`` with a list of variable names instead. - ``DataArray.select_vars`` and ``DataArray.drop_vars`` deprecated: use - :py:meth:`~xray.DataArray.reset_coords` instead. + ``xray.DataArray.reset_coords`` instead. v0.2 (14 August 2014) --------------------- @@ -4016,16 +4099,16 @@ fixes. Here are the highlights: - There is now a direct constructor for ``DataArray`` objects, which makes it possible to create a DataArray without using a Dataset. This is highlighted - in the refreshed :doc:`tutorial`. + in the refreshed ``tutorial``. - You can perform aggregation operations like ``mean`` directly on - :py:class:`~xray.Dataset` objects, thanks to Joe Hamman. These aggregation + ``xray.Dataset`` objects, thanks to Joe Hamman. These aggregation methods also worked on grouped datasets. - xray now works on Python 2.6, thanks to Anna Kuznetsova. - A number of methods and attributes were given more sensible (usually shorter) names: ``labeled`` -> ``sel``, ``indexed`` -> ``isel``, ``select`` -> ``select_vars``, ``unselect`` -> ``drop_vars``, ``dimensions`` -> ``dims``, ``coordinates`` -> ``coords``, ``attributes`` -> ``attrs``. -- New :py:meth:`~xray.Dataset.load_data` and :py:meth:`~xray.Dataset.close` +- New ``xray.Dataset.load_data`` and ``xray.Dataset.close`` methods for datasets facilitate lower level of control of data loaded from disk. diff --git a/examples/xarray_multidimensional_coords.ipynb b/examples/xarray_multidimensional_coords.ipynb deleted file mode 100644 index 508d6f388d5..00000000000 --- a/examples/xarray_multidimensional_coords.ipynb +++ /dev/null @@ -1,370 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "toc": true - }, - "source": [ - "

Table of Contents

\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Working with Multidimensional Coordinates\n", - "\n", - "Author: [Ryan Abernathey](https://github.com/rabernat)\n", - "\n", - "Many datasets have _physical coordinates_ which differ from their _logical coordinates_. Xarray provides several ways to plot and analyze such datasets." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:49:56.068395Z", - "start_time": "2018-11-28T20:49:56.035349Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "numpy version : 1.17.2\n", - "pandas version : 0.25.1\n", - "xarray version : 0.13.0+45.g4296096b.dirty\n" - ] - } - ], - "source": [ - "%matplotlib inline\n", - "import numpy as np\n", - "import pandas as pd\n", - "import xarray as xr\n", - "import cartopy.crs as ccrs\n", - "from matplotlib import pyplot as plt\n", - "\n", - "print(\"numpy version : \", np.__version__)\n", - "print(\"pandas version : \", pd.__version__)\n", - "print(\"xarray version : \", xr.__version__)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As an example, consider this dataset from the [xarray-data](https://github.com/pydata/xarray-data) repository." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:50:13.629720Z", - "start_time": "2018-11-28T20:50:13.484542Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "\n", - "Dimensions: (time: 36, x: 275, y: 205)\n", - "Coordinates:\n", - " * time (time) object 1980-09-16 12:00:00 ... 1983-08-17 00:00:00\n", - " xc (y, x) float64 189.2 189.4 189.6 189.7 ... 17.65 17.4 17.15 16.91\n", - " yc (y, x) float64 16.53 16.78 17.02 17.27 ... 28.26 28.01 27.76 27.51\n", - "Dimensions without coordinates: x, y\n", - "Data variables:\n", - " Tair (time, y, x) float64 nan nan nan nan nan ... 29.8 28.66 28.19 28.21\n", - "Attributes:\n", - " title: /workspace/jhamman/processed/R1002RBRxaaa01a/l...\n", - " institution: U.W.\n", - " source: RACM R1002RBRxaaa01a\n", - " output_frequency: daily\n", - " output_mode: averaged\n", - " convention: CF-1.4\n", - " references: Based on the initial model of Liang et al., 19...\n", - " comment: Output from the Variable Infiltration Capacity...\n", - " nco_openmp_thread_number: 1\n", - " NCO: \"4.6.0\"\n", - " history: Tue Dec 27 14:15:22 2016: ncatted -a dimension..." - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = xr.tutorial.open_dataset('rasm').load()\n", - "ds" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this example, the _logical coordinates_ are `x` and `y`, while the _physical coordinates_ are `xc` and `yc`, which represent the latitudes and longitude of the data." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:50:15.836061Z", - "start_time": "2018-11-28T20:50:15.768376Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'long_name': 'longitude of grid cell center', 'units': 'degrees_east', 'bounds': 'xv'}\n", - "{'long_name': 'latitude of grid cell center', 'units': 'degrees_north', 'bounds': 'yv'}\n" - ] - } - ], - "source": [ - "print(ds.xc.attrs)\n", - "print(ds.yc.attrs)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Plotting ##\n", - "\n", - "Let's examine these coordinate variables by plotting them." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:50:17.928556Z", - "start_time": "2018-11-28T20:50:17.031211Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(14,4))\n", - "ds.xc.plot(ax=ax1)\n", - "ds.yc.plot(ax=ax2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that the variables `xc` (longitude) and `yc` (latitude) are two-dimensional scalar fields.\n", - "\n", - "If we try to plot the data variable `Tair`, by default we get the logical coordinates." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:50:20.567749Z", - "start_time": "2018-11-28T20:50:19.999393Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "ds.Tair[0].plot()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to visualize the data on a conventional latitude-longitude grid, we can take advantage of xarray's ability to apply [cartopy](http://scitools.org.uk/cartopy/index.html) map projections." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:50:31.131708Z", - "start_time": "2018-11-28T20:50:30.444697Z" - } - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.figure(figsize=(14,6))\n", - "ax = plt.axes(projection=ccrs.PlateCarree())\n", - "ax.set_global()\n", - "ds.Tair[0].plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), x='xc', y='yc', add_colorbar=False)\n", - "ax.coastlines()\n", - "ax.set_ylim([0,90]);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Multidimensional Groupby ##\n", - "\n", - "The above example allowed us to visualize the data on a regular latitude-longitude grid. But what if we want to do a calculation that involves grouping over one of these physical coordinates (rather than the logical coordinates), for example, calculating the mean temperature at each latitude. This can be achieved using xarray's `groupby` function, which accepts multidimensional variables. By default, `groupby` will use every unique value in the variable, which is probably not what we want. Instead, we can use the `groupby_bins` function to specify the output coordinates of the group. " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:50:43.670463Z", - "start_time": "2018-11-28T20:50:43.245501Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEJCAYAAACdePCvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dd3hUZd7/8fd30hMSQkgIIQWQEnoNqIgdEQURXez6WB+3+PzWuuqu6+7jru6zrm7Rta3dXV0bKnbFhihYCL13MIQWSgIkQNr9+yPjSktIIDNnMvN5XVeuyZycM+ebcyXzmXOf+9y3OecQEZHI4/O6ABER8YYCQEQkQikAREQilAJARCRCKQBERCKUAkBEJEIFPADM7Gkz22Rm8/dadp+ZLTazuWb2hpmlBroOERHZlwX6PgAzOwHYCfzTOdfHv2wk8KlzrtrM7gVwzt12qNdKT093nTp1CmS5IiJhZ8aMGZudcxn7L48O9I6dc1PMrNN+yybt9fRrYHxjXqtTp04UFhY2X3EiIhHAzNYcbHkoXAO4Cnjf6yJERCKNpwFgZncA1cALDaxzrZkVmllhSUlJ8IoTEQlzngWAmV0OjAEucQ1ciHDOPe6cK3DOFWRkHNCEJSIihyng1wAOxsxGAbcBJzrnKryoQUQk0gWjG+iLwFdAvpmtNbOrgYeAZOAjM5ttZo8Fug4REdlXMHoBXXSQxU8Fer8iItKwUOgFJCIiHojoANhWXsnHCzcy67ttXpciIhJ0nlwE9oJzjjVbKpi+eisz1myjcM02lm/aCUBibBSf3XISmSnxHlcpIhI8EREAD3+2nGemrmLzzkoAUuKjKeiUxjkDs+mSkcTPX5rNH99fzF8vGOBxpSIiwRMRAdAmMZYTumVQ0CmNgk5t6JrRCp/P/vPza4/fzkOfLeeSo/Mo6JTmYaUiIsET8MHgmlNBQYELxFhAFZXVnHL/56Qnx/LmdcOJ2iscRERaOjOb4Zwr2H95RF8E/l5ibDS/Gt2T+cXbeaWwyOtyRESCQgHgd1a/LIZ2SuO+D5dQVlHldTkiIgGnAPAzM347thelFZX89eOlXpcjIhJwCoC99O7QmouPzuNfX69hyYYdXpcjIhJQCoD93HxaPq3iornr7QW0pAvkIiJNpQDYT5ukWG4Z2Z1pK7bwwfwNXpcjIhIwCoCDuPjojvTMSuHudxexq7KmSds657jimW95duqqAFUnItI8FAAHEeUz/vesXhSX7uIfU1Y0adtZRaVMXlLCY5+vpKZWTUgiEroUAPU4+qi2nN47k2enrWZ3VePPAl6fuRaADdt38+XyzYEqT0TkiCkAGnDZMZ0orajiwwWNuxawp7qGt+es54w+7UlNjOFV3VQmIiFMAdCAYV3akpuWwIvffteo9T9dtImyXVVcODSPcQOymbRwo24qE5GQpQBogM9nXDgkj69XbmXV5vJDrv/azGLaJcdxXJe2jB+cQ2V1LW/NKQ5CpSIiTReMOYGfNrNNZjZ/r2VpZvaRmS3zP7YJdB2H67zBOUT5jJemN3wWsGXnHiYv2cS4gdlER/no3SGFHu2TeXXG2iBVKiLSNME4A3gWGLXfstuBT5xz3YBP/M9DUruUeEb0bMeEwrVUVtfWu97bc9ZRXes4d1A2UDe0xHkFucxdW6a7ikUkJAU8AJxzU4Ct+y0+G3jO//1zwLhA13EkLhyax5bySj5etLHedV6bWez/1J/yn2XjBnQg2mdMmKGLwSISery6BpDpnFsP4H9s51EdjXJCtwyyU+u/GLx04w7mFZdx7qCcfZa3bRXHqT3b8casYqpq6j97EBHxQshfBDaza82s0MwKS0pKPKkhymecX5DLF8s2U7S14oCfvz6zmCifMbZ/hwN+dt7gXDbvrGTyEm9qFxGpj1cBsNHMsgD8j5vqW9E597hzrsA5V5CRkRG0Avd3/pAcfAYvT9+3Oaem1jFxVjEnds8gIznugO1Oys8gvVWc7gkQkZDjVQC8BVzu//5y4E2P6mi0rNYJnJTfjlcKi6jeqznnqxVb2LB9Nz/ar/nne9FRPs4dlM2nizexeeeeYJUrInJIwegG+iLwFZBvZmvN7Grgj8BpZrYMOM3/PORdNDSPTTv28OniH05YXpu5luT4aE7tWf9ljPMG51DtP1MQEQkVwegFdJFzLss5F+Ocy3HOPeWc2+KcO9U5183/uH8voZB0cn4GmSlx/7kYvHNPNR/M38CYfh2Ij4mqd7tumcn0z01lwoy1mmNAREJGyF8EDiXRUT7OG5zL50tLWFe6iw/mb2BXVQ0/8vf9b8j4wTks3rCDBeu2B6FSEZFDUwA00QVDcnHAK4VFvD5zLR3bJjK446FvZB7brwOx0T5dDBaRkKEAaKLctESGd03nX1+t4auVWzh3YA5mdsjtWifGcHrv9rw5Zx17qps2yYyISCAoAA7DRf47g52DcwYeuvnne+cNzqG0oor352mqSRHxngLgMIzomUl6qziGdk4jr21io7c7rms6Pdonc+fE+Sze0LRrAVvLK3XmICLNylpSr5SCggJXWFjodRkArCjZSVJsNO1bxzdpu3WluzjnkakYxhvXDSOrdcIht3nyi5Xc/e4iYqN89OyQwsDcVPrntqZ/Tiqd2ibh8x26CUpEIpeZzXDOFRywXAEQfIvWb+e8x74iOzWBV396LCnxMQddzznHfR8u4ZHJKxjRM5MuGUnMLiplXnEZFf7J6lPiozn6qLbcN74fqYmxwfw1RKSFqC8Aor0oJtL1zErhH5cN5vKnv+XH/5zBc1cNJTZ639a4mlrHryfO48Vvi7hoaB53j+tDlP+Tfk2tY/mmncwu2sbMNaW8XFjE6zOLuWp4Zy9+HRFpoXQNwCPHdU3nT+P78dXKLdw6YQ61tT+cie2uquG6F2by4rdF/M/JXfnDOT+8+UPd4HT57ZO5YEge947vR35mMpMW6sKyiDSNAsBD5w7K4Ren5zNx9jrum7QEqLu7+Kpnp/PBgg38Zkwvbjk9/5DdTEf2zuTbVVvZVl4ZjLJFJEyoCchjPzupC8Wlu3h08gpaxUXzwfwNLFq/nb9e0J9zBh58gLn9jezVnr9/upxPFm9i/ODGbSMiogDwmJnxu7G92Vi2m/s+XEJ8jI8n/quAk3s0fo6cPtkpZLWOZ9KCDQoAEWk0BUAIiI7y8feLB3L/h0sZ3S+rUUNL7M3MGNkrk5cLi9hVWUNCbP0D04mIfE/XAEJEYmw0vzmrV5Pf/L83snd7dlfV8sUyzTwmIo2jAAgTQzunkRIfzYcL6p+4XkRkbwqAMBET5ePUnpl8snjjPjOWiYjURwEQRkb2yqS0oorpq7d5XYqItAAKgDByQvcMYqN9uilMRBrF0wAwsxvNbIGZzTezF82saSOryT6S4qI5vms6kxZs1NSTInJIngWAmWUDPwcKnHN9gCjgQq/qCRcje2dSXLqLhes19aSINMzrJqBoIMHMooFEYJ3H9bR4p/bMxGcwSb2BROQQPAsA51wxcD/wHbAeKHPOTfKqnnCR3iqOgo5pTFqoABCRhnnZBNQGOBvoDHQAkszs0oOsd62ZFZpZYUmJbnJqjJG9M1m0fjtFWyu8LkVEQpiXTUAjgFXOuRLnXBXwOjBs/5Wcc4875wqccwUZGRlBL7IlOq1XJoDOAkSkQV4GwHfAMWaWaHXjHZ8KLPKwnrDRsW0SPdonM2mBuoOKSP28vAbwDTABmAnM89fyuFf1hJuRvTKZvnorWzVHgIjUw9NeQM653zrnejjn+jjnLnPO7fGynnAysnd7ah18skjNQCJycF53A5UA6d0hhQ6t4zU4nIjUS/MBhCkzY2Tv9rz47Xfc9PJsdlfXsKeqdp/H1IRY/u/cvuSmJXpdroh4QGcAYWz84BwykuP4dvVWlm7cyYbtu9ldVUtcjI92yfHMKy5j/GPTWLpxh9eliogHrCWNGVNQUOAKCwu9LiNsLNmwg8ue+obKmlqevXIoA3JTvS5JRALAzGY45wr2X64zgAiW3z6ZCT8ZRkp8DJc88TXTlm/2uiQRCSIFQITLa5vIhJ8cS06bRK54Zjof6t4BkYihABDapcTz8o+PoVeHFH72wkxem7HW65JEJAgUAAJAamIsL1xzNMce1ZabX53Ds1NXeV2SiASYAkD+IykumqeuKGBEz0zuemchG7fv9rokEQkgBYDsIy46itvP6IFz8P689V6XIyIBpACQA3Rt14oe7ZN5Z64CQCScKQDkoMb0y6JwzTbWl+3yuhQRCRAFgBzUmX2zAHhvnrqFioQrBYAc1FEZreiVlcK7cxs/TXPZrirWleqMQaSlUABIvUb3y2Lmd6UUN+JN3TnHNc9NZ+Rfp7B8084gVCciR0oBIPUa7W8GakxvoG9XbWX66m1UVFZz7b8K2bG7KtDlicgRUgBIvTqlJ9EnO6VRvYEenryC9FaxPHXFENZsqeCmV+ZQW9tyBhoUiUQKAGnQ6L4dmF1UStHWinrXmbe2jClLS7h6+FGcnN+OO87syUcLN/LQZ8uDWKmINJWnAWBmqWY2wcwWm9kiMzvWy3rkQP9pBppf/1nAI5OXkxwfzaXH5AFw5XGdOHdgNn/9eKmmpBQJYV6fATwAfOCc6wH0BxZ5XI/sJ69tIv1yWvNuPc1Ayzft4IMFG7hiWCeS42OAutnI/nBuX3plpXDDS7NZWaKLwiKhyLMAMLMU4ATgKQDnXKVzrtSreqR+o/tmMWdt2UGbgR6dvJL46CiuPK7zPsvjY6L4x2WDiYn2ce2/ZrBzT3WwyhWRRvLyDOAooAR4xsxmmdmTZpbkYT1Sj+9vCnt3v95ARVsrmDi7mIuG5pGWFHvAdjltEnno4oGs2lzOza/M1kVhkRDjZQBEA4OAR51zA4Fy4Pb9VzKza82s0MwKS0pKgl2jALlpifTPTT2gGeiJL1biM/jvEzrXsyUM65LOL8/owYcLNvKPKSsDXaqINIGXAbAWWOuc+8b/fAJ1gbAP59zjzrkC51xBRkZGUAuUH4zpm8W84jLWbCkHYNOO3bw0vYgfDcohq3VCg9tePbwzp/RoxxNfrKS6pjYY5YpII3gWAM65DUCRmeX7F50KLPSqHmnYGX3bAz80Az315Sqqa2r5yYldDrmtmXF+QS5byyv5auWWgNYpIo3ndS+g/we8YGZzgQHAHzyuR+qR0yaRgXl1zUBlFVU8/9UaxvTrQKf0xl22OSk/g6TYqHp7E4lI8HkaAM652f7mnX7OuXHOuW1e1iMNG903iwXrtnPX2wsor6zhpycd+tP/9+JjojitVyYfLNhAlZqBREKC12cA0oJ83xvo9VnFjOjZjp5ZKU3afnS/DpRWVDFthZqBREKBAkAarUNqAoM7tgHgZyd3bfL2x3dLJzkumnfmNH6IaREJHAWANMlNp3XnxhHdGZTXpsnbft8M9OGCDVRWqxlIxGsKAGmS47qmc/2Iboe9/Zj+WWzfXc3U5ZubsSoRORwKAAmq4V0zSI6P1oTzIiFAASBBFRvt4/Te7Zm0cAN7qmu8LkckoikAJOhG98tix+5qvliqZiARLykAJOiGd02ndULMAYPLiUhwKQAk6GKifIzq3Z6PFm5kd5WagUS8ogAQT4zul8XOPdV8vlQjvIp4RQEgnji2S1vaJMZobCARDzUYAFYnK1jFSOSIifIxqk8WHy9SM5CIVxoMAOecA94JUi0SYcb0y6KisobJSzZ5XYpIRGpME9C3ZnbARC0iR+rozmm0TYrVTWEiHmlMAAynLgSWmNlM//y9MwNdmIS/6Cgfo/q055NFm6io1KTxIsEW3Yh1xgW8ColYo/tl8cI33/H+vA2M6Z9FbJQPM/O6LJGIUG8AmFmSc64cUD89CZijO7clIzmOm1+dw82vziHaZyTGRpEUF133FRvFgNxUzh+SS+8Orb0uVySsNHQGMAE4A1gAOGDvj2UOyAtgXRIhonzGU5cXMGPNNioqayjfU73PY9muKl6cXsRzX62hT3YKFxTkMnZANq0TYrwuXaTFs7qOPi1DQUGBKyws9LoMCbLSikomzirm5cK1LFq/nbjoumsHFxTkcmyXtmoyEjkEM5vhnCs4YHljAsDMWgNdgPjvlznnpjVTYVFAIVDsnBvT0LoKgMjmnGN+8XZeKSxi4uxiduyu5rqTu/CL03t4XZpISKsvAA7ZC8jMrgamAZ8C9/of/9CMtV0PLGrG15MwZWb0zWnN78f1YfodIxjbvwOPT1nJypKdXpcm0iI1phvoDUABsNo5dzwwGGiWjttmlgOMBp5sjteTyBEfE8Wvx/QkLjqKu9/V5weRw9GYANjtnNsFYGaxzrkFQHOdc/8NuBWod4JYM7vWzArNrLCkRB2S5AftkuP5+ald+XTxJj7T3cQiTVZvAJjZ9z2E1ptZKvA28KGZvQZsPNIdm9kYYJNzbkZD6znnHnfOFTjnCjIyMo50txJmrhjWmaPSk/j92wuPaKL5sooqHv5sOec/9pUGqJOI0dAZwLcAzrmxzrlS59ydwN3AC8DZzbDv44CxZrYaeAk4xcyeb4bXlQgSG+3jzjG9WLm5nOemrW7y9mu3VfC7txdy7B8/4b4Pl7BmaznX/Xsmt02Yq7uTJew1dB/AAX3rnHOfNNeOnXO/BH4JYGYnAbc45y5trteXyHFyj3aclJ/Bg58sY9zAbDKS4w65zcJ123l8ygrenrseA8b278B/n3AUXdu14q8fLeXRz1cwfc1WHrxwIH2ydQOahKd6u4Ga2VrgL/Vt6Jyr92dNLuKHAFA3UDksK0p2cvpfp/CjQTncO75fg+vd9fZCpiwtISk2iouG5nHV8M50SE3YZ71pyzdz4yuz2VZexa2j8rnquM74fLrfQFqm+rqBNnQGEAW04iBnAs3NOTcZmBzo/Uj46pLRiiuP68STX67i0mM60jdn30/tVTW1PD5lJQ98soyEmChuHZXPJUd3rPeO4mFd03n/+hO4dcJc7n53EV8s28z95/Vv1NmFSEvR0BnATOdcSA0DrTMAacj23VWccv9kOrZNYsJPjv3PHcLzi8u4dcJcFq7fzui+Wfzv2N6NfiN3zvH812u4+91FJMdHc9fYPpzZt73uPpYW5XBuBNNfuLQoKfEx3Hp6D2as2cabs9exu6qGez9YzNkPT6Vk5x4eu3QwD18yqEmf4s2My47txFv/M5z2reO57t8zuea5QopLdwXwNxEJjobOANKcc1uDXE+DdAYgh1Jb6xj3yFQ2lO2mVVw0KzeXc35BDnec2YvWiUc2gFx1TS3PTlvNnyctxQxuGZnP5cM6EaVrAxLimnwGEGpv/iKN4fMZvz2rN5t27KGyppbnrz6aP43vf8Rv/lA3gc01xx/FpBtPYGjnNH73zkLOeWQqC9aVNUPlIsGn0UAlLC3duIOcNgkkxjZmzqOmc87x9tz1/O7tBWyrqOKa4ztz44juxMdEBWR/IkfisAeDE2mJumcmB+zNH+quDYzt34GPbzqR8YNy+MfnKzn7oaksWr89YPsUaW4KAJEjkJoYy73j+/HMFUPYUl7J2Q9N5YkpK6mtbTln1hK5FAAizeDkHu348IbjOSk/g3veW8QlT37DOvUUkhCnABBpJm1bxfGPywZz74/6MmdtKaP+NoW35qzzuiyReikARJqRmXHBkDzev/54urRrxc9fnMUNL81iV2WN16WJHEABIBIAHdsm8eqPj+XGEd15c846Lnriazbv3ON1WSL7UACIBEh0lI/rR3Tj0UsGs3jDds55ZCrLN2n6SgkdCgCRABvVpz0vXXssuypr+NGj0/h65RavSxIBFAAiQTEgN5U3fnYc6a1iueypb3hj1lqvSxJRAIgES25aIq//9DgGd2zDjS/P4cFPltGS7sSX8KMAEAmi1okx/POqozl3YDZ/+Wgpt782TzeNiWcCd6+8iBxUbLSPP5/fnw6pCTz02XLSWsVy26geXpclEUgBIOIBM+Pmkd3ZWlHJo5NXkNsmkYuPzvO6LIkwnjUBmVmumX1mZovMbIGZXe9VLSJeMDN+N7Y3J+dncOeb8/lsySavS5II4+U1gGrgZudcT+AY4Doz6+VhPSJBFx3l46GLB9GjfTLXvTCT+cWaW0CCx7MAcM6td87N9H+/A1gEZHtVj4hXkuKiefqKIaQmxHDVs9M1iJwETUj0AjKzTsBA4BtvKxHxRmZKPM9cOZRdlTVc+cx0tu+u8rokiQCeB4CZtQJeA25wzh0wm4aZXWtmhWZWWFJSEvwCRYIkv30yj146mBUlO/nZ8zOpqqn1uiQJc54GgJnFUPfm/4Jz7vWDreOce9w5V+CcK8jIyAhugSJBNrxbOv93bl++XL6Z37w53+tyJMx52QvIgKeARc65v3hVh0ioOa8glx+feBQvflvEF8t01iuB4+UZwHHAZcApZjbb/3Wmh/WIhIwbR3Snc3oSd06cz+4qzSUggeFlL6AvnXPmnOvnnBvg/3rPq3pEQkl8TBS/P7sPq7dU8Mhny70uR8KU5xeBReTghndL5+wBHXj08xWaR0ACQgEgEsJ+PboXCTFR/HriPI0cKs1OASASwjKS47jtjB58vXIrr88s9rocCTMKAJEQd9GQPAblpXLPe4vYVl7pdTkSRhQAIiHO5zPuOacvZbuq+OP7i70uR8KIAkCkBeiZlcI1wzvzcmER367a6nU5EiYUACItxPUjupGdmsAdb8yjslrDRMiRUwCItBCJsdH87uzeLNu0kye+WOl1ORIGFAAiLcipPTMZ3TeLv328lDlFpV6XIy2cAkCkhbnnnD60S47nun/PpGyXho2Ww6cAEGlhUhNj+fvFA9lQtpvbJszVDWJy2BQAIi3QoLw23Doqnw8WbOCfX63xuhxpoRQAIi3UNcOP4pQe7bjn3UWaS1gOiwJApIXy+Yz7z+tPWlIs1/17Jjs0jaQ0kQJApAVLS6q7HrB22y5uf10DxknTKABEWrghndK46bTuvDt3Pf/+9juvy5EWRAEgEgZ+emIXTuiewV1vL2Thuu1elyMthAJAJAz4fMZfzu9PakIM1780S9NISqN4GgBmNsrMlpjZcjO73ctaRFq69FZx3Du+H8s27eSBT5Z5XY60AJ4FgJlFAQ8DZwC9gIvMrJdX9YiEg5Pz23FBQS7/+HwFs77b5nU5EuK8PAMYCix3zq10zlUCLwFne1iPSFi4Y0xP2qfEc8urc9QUJA3yMgCygaK9nq/1LxORI5ASH8O94/uxoqScv3y01OtyJIR5GQB2kGUHdGI2s2vNrNDMCktKSoJQlkjLd3y3DC4amscTX6xkxhpNICMH52UArAVy93qeA6zbfyXn3OPOuQLnXEFGRkbQihNp6e4Y3ZMOrRO45dW57Ko8/KagNVvKWVe6q1luMnPOUbJjD8s27jji15IjF+3hvqcD3cysM1AMXAhc7GE9ImGlVVw0fxrfj0ue/Ib7Jy3hzjFN72Mxp6iUcx+dRk2tIzE2ii4ZreiSkVT32K4VndomkRgbRZTP8PmMKDN8Pogyo8Y5VpaUs2zjDpZu3MnSjTtYtmknW/0T2z931VBO7K4PdV7yLACcc9Vm9j/Ah0AU8LRzboFX9YiEo+O6pnPZMR15euoqRvVpz5BOaY3etrK6llsnzCWjVRzXndyFFSXlrCjZyfTV25g4+4CT9QYlx0XTvX0yp/fOpFu7ZJ6Ztoo/fbCY47um4/MdrDVYgsHLMwCcc+8B73lZg0i4u/2MHkxeuolfvDqH964/nsTYxv3bPzJ5OUs27uDpKwo4pUfmPj+rqKxmZUk5q7eUs6eqllrnqHWOmlqocY7aWocZdGybRPfMVrRPicfshzf6tKRYbnh5Nu/OW89Z/Ts06+8rjWctafCogoICV1hY6HUZIi3OVyu2cNETX3PuoGzuH9//kJ+6l2zYwZi/f1E3/eSFA5u9ntpax5kPfsHuqho+uulEYqI0KEEgmdkM51zB/st11EUiwLFd2nLDiG68PrOYu99d1OAF3eqaWm6dMIeU+Bh+c1bvgNTj8xm3jspn9ZYKXiksOvQGEhAKAJEIcf2p3bjyuE48PXUVf/u4/qEinp66ijlry/jfsb1JS4oNWD0n57djSKc2PPDxsiPqpSSHTwEgEiHMjDtH9+K8wTk88Mkynvxi5QHrrNpczp8nLeW0XpmM6ZcV8HpuHdWDTTv28Oy01QHdlxycAkAkgvh8xh9/1I8z+7bn7ncX8fL0H+YPqK113PbaXGKjfdw9rs8+F20DZUinNE7t0Y5HJy+nrEIzmgWbAkAkwkT5jL9dMJATu2dw++vzeGduXZfOF779jm9XbeXO0b3ITIkPWj23nJ7Pjj3VPDZlRdD22Rjz1paxeEN4z62gABCJQLHRPh67dDBDOqZx48uz+fc33/HH9xZxfLd0zivICWotPbNSOLt/B56ZuoqN23cHdd/1ef7rNYx7ZCpnPvAFd729gJ17qr0uKSAUACIRKiE2iievKCC/fTK/emMeDvjDOX2D0vSzv5tOy6e6xvGgx/MY1NQ6fv/OQn49cT4ndEvnwqF5PDN1Naf95XMmLdjgaW2BoAAQiWAp8TH886qjOaF7Bvec04fctERP6shrm8jFR+fx0vQiVm0ub9K2VTW1lFVUUVF5ZJ/Sy/dU8+N/FfLUl6u4YlgnnvivAv5wTl9e++kwWifEcO2/ZnDtPwtZV7rriPYTSnQjmIiEhE07dnPinyZzXNe2nF+Qy9bySrZWVLJ1p/+xvJJS/xt9+Z6ausfKGiqrawHwGfTNSWVYl7YM69KWgo5pJMRGNWrf68t2cfWzhSzesJ3fntWby4d12ufnVTW1PPXlKv728VKizLhpZD4ndk9n7bZdrCvdTXFpBcX+79dv30WvrBQuHJLHCd0ziAqBoS7quxFMASAiIePPk5bw90+X77MsISaKtKRY2iTF0CYxlqTYaBLjov7z2Co2msS4aEorKvlqxRZmF5VSXeuIiTIG5rXh2KPaMjAvlZw2CWS1TiApbt+hMOatLePq56ZTUVnD3y8eyMn57eqtr2hrBb+eOJ/Pl+47NH2Uz2ifEk92mwQyWsXx9cotbCmvJKt1POcV5HJ+QQ45bbw5uwIFgIi0AJXVtRSu2UpyXBz9h6UAAAilSURBVAxprWJJS4xt9Kf475XvqaZwzTamrdjMVyu2MK+4jL3f5lonxJDVOp7s1AQykuN4c/Y60pJiefqKIeS3Tz7k6zvnmLJsM9vKK8luk0B2agLtkuOI3ms4i8rqWj5ZtJGXphcxZVldWAzvms5FQ/MY2Stzn3WDQQEgIhGprKKKpZt2sK60rolmXeku1pftotj/fc+sZB68aCDtkgPT9bW4dBevFhbxyvQi1pXtZlBeKg9eNDCoZwQKABERD9XUOt6aU8xvJi7ADP40vh+j+gT2buvvaTA4EREPRfmMcwbm8O7Pj6dzehI/eX4mv544j91V3o2DpAAQEQmivLaJvPqTYVx7wlE8//V3jHt4Kss3eTNFpgJARCTIYqN9/OrMnjxz5RA27djDWX+fyiuFRc0y73JTKABERDxycn473r/+eAbkpnLrhLm8O299UPfvSQCY2X1mttjM5prZG2aW6kUdIiJey0yJ5/lrjiY7NYFXCtcGdd9enQF8BPRxzvUDlgK/9KgOERHPRfmMcQM78OWyEkp27Anafj0JAOfcJOfc9wN3fA0Ed/hBEZEQM25ANrUO3p6zLmj7DIVrAFcB73tdhIiIl7plJtO7Qwpvzi4O2j4DFgBm9rGZzT/I19l7rXMHUA280MDrXGtmhWZWWFJSUt9qIiIt3rgB2cxZW8bKkp1B2V/AAsA5N8I51+cgX28CmNnlwBjgEtdA3yfn3OPOuQLnXEFGRkagyhUR8dxZ/TtgBhNnB6cZyKteQKOA24CxzrkKL2oQEQk17VvHM6xLW96cXRyUewK8ugbwEJAMfGRms83sMY/qEBEJKWcPyGbNlgpmFZUGfF9e9QLq6pzLdc4N8H/9xIs6RERCzag+7YmL9vHmrMBfDA6FXkAiIuKXEh/DiJ6ZvD13PVU1tQHdlwJARCTEjBuYzdbySr5ctjmg+1EAiIiEmBO7Z5CaGMMbAW4GUgCIiISY2GgfZ/bN4qOFGynfU33oDQ6TAkBEJASdMzCbXVU1TFq4IWD7UACIiISgwXltyE5N4I1ZgbspTAEgIhKCfEEYIVQBICISogI9QqgCQEQkRAV6hFAFgIhICAvkCKEKABGREDZ2QAd6ZaWwpbyy2V87utlfUUREmk1mSjzvXX98QF5bZwAiIhFKASAiEqEUACIiEUoBICISoRQAIiIRSgEgIhKhFAAiIhFKASAiEqHMOed1DY1mZiXAmiZskg4Edk61lkfH5EA6JgfSMTlQSz4mHZ1zGfsvbFEB0FRmVuicK/C6jlCiY3IgHZMD6ZgcKByPiZqAREQilAJARCRChXsAPO51ASFIx+RAOiYH0jE5UNgdk7C+BiAiIvUL9zMAERGpR1gGgJmNMrMlZrbczG73uh4vmFmumX1mZovMbIGZXe9fnmZmH5nZMv9jG69rDTYzizKzWWb2jv+5jolZqplNMLPF/r+ZYyP9uJjZjf7/nflm9qKZxYfbMQm7ADCzKOBh4AygF3CRmfXytipPVAM3O+d6AscA1/mPw+3AJ865bsAn/ueR5npg0V7PdUzgAeAD51wPoD91xydij4uZZQM/Bwqcc32AKOBCwuyYhF0AAEOB5c65lc65SuAl4GyPawo659x659xM//c7qPuHzqbuWDznX+05YJw3FXrDzHKA0cCTey2O9GOSApwAPAXgnKt0zpUS4ceFuhkTE8wsGkgE1hFmxyQcAyAbKNrr+Vr/sohlZp2AgcA3QKZzbj3UhQTQzrvKPPE34Fagdq9lkX5MjgJKgGf8TWNPmlkSEXxcnHPFwP3Ad8B6oMw5N4kwOybhGAB2kGUR29XJzFoBrwE3OOe2e12Pl8xsDLDJOTfD61pCTDQwCHjUOTcQKKeFN20cKX/b/tlAZ6ADkGRml3pbVfMLxwBYC+Tu9TyHulO3iGNmMdS9+b/gnHvdv3ijmWX5f54FbPKqPg8cB4w1s9XUNQ2eYmbPE9nHBOr+Z9Y6577xP59AXSBE8nEZAaxyzpU456qA14FhhNkxCccAmA50M7POZhZL3YWbtzyuKejMzKhr013knPvLXj96C7jc//3lwJvBrs0rzrlfOudynHOdqPu7+NQ5dykRfEwAnHMbgCIzy/cvOhVYSGQfl++AY8ws0f+/dCp119HC6piE5Y1gZnYmdW29UcDTzrl7PC4p6MxsOPAFMI8f2rt/Rd11gFeAPOr+yM9zzm31pEgPmdlJwC3OuTFm1pYIPyZmNoC6C+OxwErgSuo+IEbscTGzu4ALqOtRNwu4BmhFGB2TsAwAERE5tHBsAhIRkUZQAIiIRCgFgIhIhFIAiIhEKAWAiEiEUgCIiEQoBYDIETCzyWZ2wEThZjY2Uocil5Yj2usCRMKRc+4tIvAOdGlZdAYgEc3MhpjZXP9kH0n+CUD6mNmtZjbPzOaY2R8P8TKXmtk0/8QhQ/2ve4WZPeT//lkze9C/zkozG+9fnmVmU8xstn/b4wP864rsQ2cAEtGcc9PN7C3gbiABeJ66wQTHAUc75yrMLO0QL5PknBtmZicATwN9DrJOFjAc6EHdmcEE4GLgQ+fcPf6JjBKb5ZcSaSQFgAj8jrpBBHdTNwvUn4BnnHMVAI0Y6+VF/3pTzCzFzFIPss5E51wtsNDMMv3LpgNP+0dtneicm90Mv4tIo6kJSATSqBvkKxmIp25OiaYMkrX/ugfbds9e3xvUBQZ1M3EVA/8ys/9qwj5FjpgCQAQeB+4EXgDuBSYBV5lZItRNGn+I7S/wrzecupmjyhqzUzPrSN0ENU9QN3T3oMMrX+TwqAlIIpr/U3e1c+7f/nb4adRN/vEWUGhmlcB71A2lXZ9tZjYNSAGuasLuTwJ+YWZVwE5AZwASVBoOWkQkQqkJSEQkQqkJSKQRzOxh6uYU3tsDzrlnvKhHpDmoCUhEJEKpCUhEJEIpAEREIpQCQEQkQikAREQilAJARCRC/X/SMuzxcYvG9QAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# define two-degree wide latitude bins\n", - "lat_bins = np.arange(0,91,2)\n", - "# define a label for each bin corresponding to the central latitude\n", - "lat_center = np.arange(1,90,2)\n", - "# group according to those bins and take the mean\n", - "Tair_lat_mean = ds.Tair.groupby_bins('xc', lat_bins, labels=lat_center).mean(dim=xr.ALL_DIMS)\n", - "# plot the result\n", - "Tair_lat_mean.plot()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that the resulting coordinate for the `groupby_bins` operation got the `_bins` suffix appended: `xc_bins`. This help us distinguish it from the original multidimensional variable `xc`." - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": true, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/examples/xarray_seasonal_means.ipynb b/examples/xarray_seasonal_means.ipynb deleted file mode 100644 index d6a354ecdf5..00000000000 --- a/examples/xarray_seasonal_means.ipynb +++ /dev/null @@ -1,427 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "toc": true - }, - "source": [ - "

Table of Contents

\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Calculating Seasonal Averages from Timeseries of Monthly Means \n", - "=====\n", - "\n", - "Author: [Joe Hamman](https://github.com/jhamman/)\n", - "\n", - "The data used for this example can be found in the [xray-data](https://github.com/xray/xray-data) repository. You may need to change the path to `rasm.nc` below.\n", - "\n", - "Suppose we have a netCDF or xray Dataset of monthly mean data and we want to calculate the seasonal average. To do this properly, we need to calculate the weighted average considering that each month has a different number of days.\n", - "\n", - "Suppose we have a netCDF or `xarray.Dataset` of monthly mean data and we want to calculate the seasonal average. To do this properly, we need to calculate the weighted average considering that each month has a different number of days." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:35.958210Z", - "start_time": "2018-11-28T20:51:35.936966Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "numpy version : 1.17.2\n", - "pandas version : 0.25.1\n", - "xarray version : 0.13.0+45.g4296096b.dirty\n" - ] - } - ], - "source": [ - "%matplotlib inline\n", - "import numpy as np\n", - "import pandas as pd\n", - "import xarray as xr\n", - "from netCDF4 import num2date\n", - "import matplotlib.pyplot as plt \n", - "\n", - "print(\"numpy version : \", np.__version__)\n", - "print(\"pandas version : \", pd.__version__)\n", - "print(\"xarray version : \", xr.__version__)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Some calendar information so we can support any netCDF calendar. " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:35.991620Z", - "start_time": "2018-11-28T20:51:35.960336Z" - } - }, - "outputs": [], - "source": [ - "dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " '365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " 'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " '366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", - " '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]} " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### A few calendar functions to determine the number of days in each month\n", - "If you were just using the standard calendar, it would be easy to use the `calendar.month_range` function." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:36.015151Z", - "start_time": "2018-11-28T20:51:35.994079Z" - } - }, - "outputs": [], - "source": [ - "def leap_year(year, calendar='standard'):\n", - " \"\"\"Determine if year is a leap year\"\"\"\n", - " leap = False\n", - " if ((calendar in ['standard', 'gregorian',\n", - " 'proleptic_gregorian', 'julian']) and\n", - " (year % 4 == 0)):\n", - " leap = True\n", - " if ((calendar == 'proleptic_gregorian') and\n", - " (year % 100 == 0) and\n", - " (year % 400 != 0)):\n", - " leap = False\n", - " elif ((calendar in ['standard', 'gregorian']) and\n", - " (year % 100 == 0) and (year % 400 != 0) and\n", - " (year < 1583)):\n", - " leap = False\n", - " return leap\n", - "\n", - "def get_dpm(time, calendar='standard'):\n", - " \"\"\"\n", - " return a array of days per month corresponding to the months provided in `months`\n", - " \"\"\"\n", - " month_length = np.zeros(len(time), dtype=np.int)\n", - " \n", - " cal_days = dpm[calendar]\n", - " \n", - " for i, (month, year) in enumerate(zip(time.month, time.year)):\n", - " month_length[i] = cal_days[month]\n", - " if leap_year(year, calendar=calendar):\n", - " month_length[i] += 1\n", - " return month_length" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Open the `Dataset`" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:36.072316Z", - "start_time": "2018-11-28T20:51:36.016594Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Dimensions: (time: 36, x: 275, y: 205)\n", - "Coordinates:\n", - " * time (time) object 1980-09-16 12:00:00 ... 1983-08-17 00:00:00\n", - " xc (y, x) float64 189.2 189.4 189.6 189.7 ... 17.65 17.4 17.15 16.91\n", - " yc (y, x) float64 16.53 16.78 17.02 17.27 ... 28.26 28.01 27.76 27.51\n", - "Dimensions without coordinates: x, y\n", - "Data variables:\n", - " Tair (time, y, x) float64 nan nan nan nan nan ... 29.8 28.66 28.19 28.21\n", - "Attributes:\n", - " title: /workspace/jhamman/processed/R1002RBRxaaa01a/l...\n", - " institution: U.W.\n", - " source: RACM R1002RBRxaaa01a\n", - " output_frequency: daily\n", - " output_mode: averaged\n", - " convention: CF-1.4\n", - " references: Based on the initial model of Liang et al., 19...\n", - " comment: Output from the Variable Infiltration Capacity...\n", - " nco_openmp_thread_number: 1\n", - " NCO: \"4.6.0\"\n", - " history: Tue Dec 27 14:15:22 2016: ncatted -a dimension...\n" - ] - } - ], - "source": [ - "ds = xr.tutorial.open_dataset('rasm').load()\n", - "print(ds)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Now for the heavy lifting:\n", - "We first have to come up with the weights,\n", - "- calculate the month lengths for each monthly data record\n", - "- calculate weights using `groupby('time.season')`\n", - "\n", - "Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:36.132413Z", - "start_time": "2018-11-28T20:51:36.073708Z" - } - }, - "outputs": [], - "source": [ - "# Make a DataArray with the number of days in each month, size = len(time)\n", - "month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar='noleap'),\n", - " coords=[ds.time], name='month_length')\n", - "\n", - "# Calculate the weights by grouping by 'time.season'.\n", - "# Conversion to float type ('astype(float)') only necessary for Python 2.x\n", - "weights = month_length.groupby('time.season') / month_length.astype(float).groupby('time.season').sum()\n", - "\n", - "# Test that the sum of the weights for each season is 1.0\n", - "np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))\n", - "\n", - "# Calculate the weighted average\n", - "ds_weighted = (ds * weights).groupby('time.season').sum(dim='time')" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:36.152913Z", - "start_time": "2018-11-28T20:51:36.133997Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Dimensions: (season: 4, x: 275, y: 205)\n", - "Coordinates:\n", - " xc (y, x) float64 189.2 189.4 189.6 189.7 ... 17.65 17.4 17.15 16.91\n", - " yc (y, x) float64 16.53 16.78 17.02 17.27 ... 28.26 28.01 27.76 27.51\n", - " * season (season) object 'DJF' 'JJA' 'MAM' 'SON'\n", - "Dimensions without coordinates: x, y\n", - "Data variables:\n", - " Tair (season, y, x) float64 0.0 0.0 0.0 0.0 ... 23.15 22.08 21.73 21.96\n" - ] - } - ], - "source": [ - "print(ds_weighted)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:36.190765Z", - "start_time": "2018-11-28T20:51:36.154416Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "xarray/core/nanops.py:140: RuntimeWarning: Mean of empty slice\n", - " return np.nanmean(a, axis=axis, dtype=dtype)\n" - ] - } - ], - "source": [ - "# only used for comparisons\n", - "ds_unweighted = ds.groupby('time.season').mean('time')\n", - "ds_diff = ds_weighted - ds_unweighted" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:40.264871Z", - "start_time": "2018-11-28T20:51:36.192467Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0.5, 1.02, 'Seasonal Surface Air Temperature')" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# Quick plot to show the results\n", - "notnull = pd.notnull(ds_unweighted['Tair'][0])\n", - "\n", - "fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(14,12))\n", - "for i, season in enumerate(('DJF', 'MAM', 'JJA', 'SON')):\n", - " ds_weighted['Tair'].sel(season=season).where(notnull).plot.pcolormesh(\n", - " ax=axes[i, 0], vmin=-30, vmax=30, cmap='Spectral_r', \n", - " add_colorbar=True, extend='both')\n", - " \n", - " ds_unweighted['Tair'].sel(season=season).where(notnull).plot.pcolormesh(\n", - " ax=axes[i, 1], vmin=-30, vmax=30, cmap='Spectral_r', \n", - " add_colorbar=True, extend='both')\n", - "\n", - " ds_diff['Tair'].sel(season=season).where(notnull).plot.pcolormesh(\n", - " ax=axes[i, 2], vmin=-0.1, vmax=.1, cmap='RdBu_r',\n", - " add_colorbar=True, extend='both')\n", - "\n", - " axes[i, 0].set_ylabel(season)\n", - " axes[i, 1].set_ylabel('')\n", - " axes[i, 2].set_ylabel('')\n", - "\n", - "for ax in axes.flat:\n", - " ax.axes.get_xaxis().set_ticklabels([])\n", - " ax.axes.get_yaxis().set_ticklabels([])\n", - " ax.axes.axis('tight')\n", - " ax.set_xlabel('')\n", - " \n", - "axes[0, 0].set_title('Weighted by DPM')\n", - "axes[0, 1].set_title('Equal Weighting')\n", - "axes[0, 2].set_title('Difference')\n", - " \n", - "plt.tight_layout()\n", - "\n", - "fig.suptitle('Seasonal Surface Air Temperature', fontsize=16, y=1.02)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2018-11-28T20:51:40.284898Z", - "start_time": "2018-11-28T20:51:40.266406Z" - } - }, - "outputs": [], - "source": [ - "# Wrap it into a simple function\n", - "def season_mean(ds, calendar='standard'):\n", - " # Make a DataArray of season/year groups\n", - " year_season = xr.DataArray(ds.time.to_index().to_period(freq='Q-NOV').to_timestamp(how='E'),\n", - " coords=[ds.time], name='year_season')\n", - "\n", - " # Make a DataArray with the number of days in each month, size = len(time)\n", - " month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar),\n", - " coords=[ds.time], name='month_length')\n", - " # Calculate the weights by grouping by 'time.season'\n", - " weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n", - "\n", - " # Test that the sum of the weights for each season is 1.0\n", - " np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))\n", - "\n", - " # Calculate the weighted average\n", - " return (ds * weights).groupby('time.season').sum(dim='time')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": true, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/readthedocs.yml b/readthedocs.yml index 6429780e7d7..c64fa1b7b02 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -4,5 +4,5 @@ conda: file: ci/requirements/doc.yml python: version: 3.7 - setup_py_install: true + setup_py_install: false formats: [] diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 945b3937c43..23d09ba5e33 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -729,13 +729,13 @@ def open_mfdataset( ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated) ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or ``combine='nested'`` in future. Requires dask to be installed. See documentation for - details on dask [1]. Attributes from the first dataset file are used for the + details on dask [1]_. Attributes from the first dataset file are used for the combined dataset. Parameters ---------- paths : str or sequence - Either a string glob in the form "path/to/my/files/*.nc" or an explicit list of + Either a string glob in the form ``"path/to/my/files/*.nc"`` or an explicit list of files to open. Paths can be given as strings or as pathlib Paths. If concatenation along more than one dimension is desired, then ``paths`` must be a nested list-of-lists (see ``manual_combine`` for details). (A string glob will @@ -745,7 +745,7 @@ def open_mfdataset( In general, these should divide the dimensions of each dataset. If int, chunk each dimension by ``chunks``. By default, chunks will be chosen to load entire input files into memory at once. This has a major impact on performance: please - see the full documentation for more details [2]. + see the full documentation for more details [2]_. concat_dim : str, or list of str, DataArray, Index or None, optional Dimensions to concatenate files along. You only need to provide this argument if any of the dimensions along which you want to concatenate is not a dimension @@ -761,6 +761,7 @@ def open_mfdataset( 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for potential conflicts when merging: + * 'broadcast_equals': all values must be equal when variables are broadcast against each other to ensure common dimensions. * 'equals': all values and dimensions must be the same. @@ -770,6 +771,7 @@ def open_mfdataset( must be equal. The returned dataset then contains the combination of all non-null values. * 'override': skip comparing and pick variable from first dataset + preprocess : callable, optional If provided, call this function on each dataset prior to concatenation. You can find the file-name from which each dataset was loaded in diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 559c5e16287..4005d4fbf6d 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -506,7 +506,7 @@ def strftime(self, date_format): Returns ------- - Index + pandas.Index Index of formatted strings Examples diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 5f9c8932b6b..2b5f87ab0cd 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -8,7 +8,6 @@ from ..core import dtypes, duck_array_ops, indexing from ..core.pycompat import dask_array_type -from ..core.utils import equivalent from ..core.variable import Variable @@ -152,18 +151,25 @@ def encode(self, variable, name=None): fv = encoding.get("_FillValue") mv = encoding.get("missing_value") - if fv is not None and mv is not None and not equivalent(fv, mv): + if ( + fv is not None + and mv is not None + and not duck_array_ops.allclose_or_equiv(fv, mv) + ): raise ValueError( - "Variable {!r} has multiple fill values {}. " - "Cannot encode data. ".format(name, [fv, mv]) + f"Variable {name!r} has conflicting _FillValue ({fv}) and missing_value ({mv}). Cannot encode data." ) if fv is not None: + # Ensure _FillValue is cast to same dtype as data's + encoding["_FillValue"] = data.dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if not pd.isnull(fill_value): data = duck_array_ops.fillna(data, fill_value) if mv is not None: + # Ensure missing_value is cast to same dtype as data's + encoding["missing_value"] = data.dtype.type(mv) fill_value = pop_to(encoding, attrs, "missing_value", name=name) if not pd.isnull(fill_value) and fv is None: data = duck_array_ops.fillna(data, fill_value) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 41ff5a3b32d..908119f7995 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -108,7 +108,7 @@ def align( Returns ------- - aligned : same as *objects + aligned : same as `*objects` Tuple of objects with aligned coordinates. Raises @@ -466,6 +466,7 @@ def reindex_variables( tolerance: Any = None, copy: bool = True, fill_value: Optional[Any] = dtypes.NA, + sparse: bool = False, ) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: """Conform a dictionary of aligned variables onto a new set of variables, filling in missing values with NaN. @@ -503,6 +504,8 @@ def reindex_variables( the input. In either case, new xarray objects are always returned. fill_value : scalar, optional Value to use for newly missing values + sparse: bool, optional + Use an sparse-array Returns ------- @@ -571,6 +574,8 @@ def reindex_variables( for name, var in variables.items(): if name not in indexers: + if sparse: + var = var._as_sparse(fill_value=fill_value) key = tuple( slice(None) if d in unchanged_dims else int_indexers.get(d, slice(None)) for d in var.dims diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 3308dcef285..b9db30a9f92 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -531,6 +531,7 @@ def combine_by_coords( * 'all': All data variables will be concatenated. * list of str: The listed data variables will be concatenated, in addition to the 'minimal' data variables. + If objects are DataArrays, `data_vars` must be 'all'. coords : {'minimal', 'different', 'all' or list of str}, optional As per the 'data_vars' kwarg, but for coordinate variables. @@ -747,6 +748,7 @@ def auto_combine( 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for potential conflicts: + - 'broadcast_equals': all values must be equal when variables are broadcast against each other to ensure common dimensions. - 'equals': all values and dimensions must be the same. diff --git a/xarray/core/common.py b/xarray/core/common.py index 2afe4b4c3a7..a74318b2f90 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -91,15 +91,23 @@ def wrapped_func(self, dim=None, **kwargs): # type: ignore return wrapped_func - _reduce_extra_args_docstring = """dim : str or sequence of str, optional + _reduce_extra_args_docstring = dedent( + """ + dim : str or sequence of str, optional Dimension(s) over which to apply `{name}`. By default `{name}` is - applied over all dimensions.""" + applied over all dimensions. + """ + ).strip() - _cum_extra_args_docstring = """dim : str or sequence of str, optional + _cum_extra_args_docstring = dedent( + """ + dim : str or sequence of str, optional Dimension over which to apply `{name}`. axis : int or sequence of int, optional Axis over which to apply `{name}`. Only one of the 'dim' - and 'axis' arguments can be supplied.""" + and 'axis' arguments can be supplied. + """ + ).strip() class AbstractArray(ImplementsArrayReduce): @@ -454,7 +462,7 @@ def assign_coords(self, coords=None, **coords_kwargs): def assign_attrs(self, *args, **kwargs): """Assign new attrs to this object. - Returns a new object equivalent to self.attrs.update(*args, **kwargs). + Returns a new object equivalent to ``self.attrs.update(*args, **kwargs)``. Parameters ---------- @@ -481,7 +489,7 @@ def pipe( **kwargs, ) -> T: """ - Apply func(self, *args, **kwargs) + Apply ``func(self, *args, **kwargs)`` This method replicates the pandas method of the same name. @@ -810,6 +818,7 @@ def rolling_exp( ---------- window : A single mapping from a dimension name to window value, optional + dim : str Name of the dimension to create the rolling exponential window along (e.g., `time`). @@ -848,6 +857,7 @@ def coarsen( ---------- dim: dict, optional Mapping from the dimension name to the window size. + dim : str Name of the dimension to create the rolling iterator along (e.g., `time`). @@ -858,7 +868,7 @@ def coarsen( multiple of the window size. If 'trim', the excess entries are dropped. If 'pad', NA will be padded. side : 'left' or 'right' or mapping from dimension to 'left' or 'right' - coord_func: function (name) that is applied to the coordintes, + coord_func : function (name) that is applied to the coordintes, or a mapping from coordinate name to function (name). Returns @@ -921,7 +931,7 @@ def resample( Parameters ---------- indexer : {dim: freq}, optional - Mapping from the dimension name to resample frequency. The + Mapping from the dimension name to resample frequency [1]_. The dimension must be datetime-like. skipna : bool, optional Whether to skip missing values when aggregating in downsampling. diff --git a/xarray/core/computation.py b/xarray/core/computation.py index bb5ab07d8dd..643c1137d6c 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -342,7 +342,7 @@ def _fast_dataset( variables.update(coord_variables) coord_names = set(coord_variables) - return Dataset._from_vars_and_coord_names(variables, coord_names) + return Dataset._construct_direct(variables, coord_names) def apply_dataset_vfunc( @@ -947,7 +947,7 @@ def earth_mover_distance(first_samples, appropriately for use in `apply`. You may find helper functions such as numpy.broadcast_arrays helpful in writing your function. `apply_ufunc` also works well with numba's vectorize and guvectorize. Further explanation with - examples are provided in the xarray documentation [3]. + examples are provided in the xarray documentation [3]_. See also -------- diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 5b4fc078236..5ccbfa3f2b4 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -45,6 +45,7 @@ def concat( * 'all': All data variables will be concatenated. * list of str: The listed data variables will be concatenated, in addition to the 'minimal' data variables. + If objects are DataArrays, data_vars must be 'all'. coords : {'minimal', 'different', 'all' or list of str}, optional These coordinate variables will be concatenated together: diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index eb2ceb1be07..3d51c9b4271 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -309,7 +309,7 @@ def to_dataset(self) -> "Dataset": from .dataset import Dataset coords = {k: v.copy(deep=False) for k, v in self._data._coords.items()} - return Dataset._from_vars_and_coord_names(coords, set(coords)) + return Dataset._construct_direct(coords, set(coords)) def __delitem__(self, key: Hashable) -> None: del self._data._coords[key] diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index a192fe08cee..64f21b0eb01 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -48,10 +48,10 @@ assert_coordinate_consistent, remap_label_indexers, ) -from .dataset import Dataset, merge_indexes, split_indexes +from .dataset import Dataset, split_indexes from .formatting import format_item -from .indexes import Indexes, default_indexes -from .merge import PANDAS_TYPES +from .indexes import Indexes, propagate_indexes, default_indexes +from .merge import PANDAS_TYPES, _extract_indexes_from_coords from .options import OPTIONS from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs from .variable import ( @@ -239,7 +239,7 @@ class DataArray(AbstractArray, DataWithCoords): ---------- dims : tuple Dimension names associated with this array. - values : np.ndarray + values : numpy.ndarray Access or modify DataArray values as a numpy array. coords : dict-like Dictionary of DataArray objects that label values along each dimension. @@ -249,14 +249,14 @@ class DataArray(AbstractArray, DataWithCoords): Dictionary for holding arbitrary metadata. """ - _accessors: Optional[Dict[str, Any]] # noqa + _cache: Dict[str, Any] _coords: Dict[Any, Variable] _indexes: Optional[Dict[Hashable, pd.Index]] _name: Optional[Hashable] _variable: Variable __slots__ = ( - "_accessors", + "_cache", "_coords", "_file_obj", "_indexes", @@ -367,13 +367,15 @@ def __init__( data = as_compatible_data(data) coords, dims = _infer_coords_and_dims(data.shape, coords, dims) variable = Variable(dims, data, attrs, encoding, fastpath=True) + indexes = dict( + _extract_indexes_from_coords(coords) + ) # needed for to_dataset # These fully describe a DataArray self._variable = variable assert isinstance(coords, dict) self._coords = coords self._name = name - self._accessors = None # TODO(shoyer): document this argument, once it becomes part of the # public interface. @@ -401,6 +403,7 @@ def _replace_maybe_drop_dims( ) -> "DataArray": if variable.dims == self.dims and variable.shape == self.shape: coords = self._coords.copy() + indexes = self._indexes elif variable.dims == self.dims: # Shape has changed (e.g. from reduce(..., keepdims=True) new_sizes = dict(zip(self.dims, variable.shape)) @@ -409,12 +412,19 @@ def _replace_maybe_drop_dims( for k, v in self._coords.items() if v.shape == tuple(new_sizes[d] for d in v.dims) } + changed_dims = [ + k for k in variable.dims if variable.sizes[k] != self.sizes[k] + ] + indexes = propagate_indexes(self._indexes, exclude=changed_dims) else: allowed_dims = set(variable.dims) coords = { k: v for k, v in self._coords.items() if set(v.dims) <= allowed_dims } - return self._replace(variable, coords, name) + indexes = propagate_indexes( + self._indexes, exclude=(set(self.dims) - allowed_dims) + ) + return self._replace(variable, coords, name, indexes=indexes) def _overwrite_indexes(self, indexes: Mapping[Hashable, Any]) -> "DataArray": if not len(indexes): @@ -445,19 +455,21 @@ def _from_temp_dataset( return self._replace(variable, coords, name, indexes=indexes) def _to_dataset_split(self, dim: Hashable) -> Dataset: + """ splits dataarray along dimension 'dim' """ + def subset(dim, label): array = self.loc[{dim: label}] - if dim in array.coords: - del array.coords[dim] array.attrs = {} - return array + return as_variable(array) variables = {label: subset(dim, label) for label in self.get_index(dim)} - - coords = self.coords.to_dataset() - if dim in coords: - del coords[dim] - return Dataset(variables, coords, self.attrs) + variables.update({k: v for k, v in self._coords.items() if k != dim}) + indexes = propagate_indexes(self._indexes, exclude=dim) + coord_names = set(self._coords) - set([dim]) + dataset = Dataset._construct_direct( + variables, coord_names, indexes=indexes, attrs=self.attrs + ) + return dataset def _to_dataset_whole( self, name: Hashable = None, shallow_copy: bool = True @@ -481,8 +493,10 @@ def _to_dataset_whole( if shallow_copy: for k in variables: variables[k] = variables[k].copy(deep=False) + indexes = self._indexes + coord_names = set(self._coords) - dataset = Dataset._from_vars_and_coord_names(variables, coord_names) + dataset = Dataset._construct_direct(variables, coord_names, indexes=indexes) return dataset def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset: @@ -928,7 +942,8 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": """ variable = self.variable.copy(deep=deep, data=data) coords = {k: v.copy(deep=deep) for k, v in self._coords.items()} - return self._replace(variable, coords) + indexes = self._indexes + return self._replace(variable, coords, indexes=indexes) def __copy__(self) -> "DataArray": return self.copy(deep=False) @@ -1316,7 +1331,7 @@ def interp( values. kwargs: dictionary Additional keyword passed to scipy's interpolator. - **coords_kwarg : {dim: coordinate, ...}, optional + ``**coords_kwarg`` : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -1601,10 +1616,10 @@ def set_index( -------- DataArray.reset_index """ - _check_inplace(inplace) - indexes = either_dict_or_kwargs(indexes, indexes_kwargs, "set_index") - coords, _ = merge_indexes(indexes, self._coords, set(), append=append) - return self._replace(coords=coords) + ds = self._to_temp_dataset().set_index( + indexes, append=append, inplace=inplace, **indexes_kwargs + ) + return self._from_temp_dataset(ds) def reset_index( self, @@ -1727,7 +1742,10 @@ def stack( return self._from_temp_dataset(ds) def unstack( - self, dim: Union[Hashable, Sequence[Hashable], None] = None + self, + dim: Union[Hashable, Sequence[Hashable], None] = None, + fill_value: Any = dtypes.NA, + sparse: bool = False, ) -> "DataArray": """ Unstack existing dimensions corresponding to MultiIndexes into @@ -1740,6 +1758,8 @@ def unstack( dim : hashable or sequence of hashable, optional Dimension(s) over which to unstack. By default unstacks all MultiIndexes. + fill_value: value to be filled. By default, np.nan + sparse: use sparse-array if True Returns ------- @@ -1771,7 +1791,7 @@ def unstack( -------- DataArray.stack """ - ds = self._to_temp_dataset().unstack(dim) + ds = self._to_temp_dataset().unstack(dim, fill_value, sparse) return self._from_temp_dataset(ds) def to_unstacked_dataset(self, dim, level=0): @@ -2018,44 +2038,71 @@ def fillna(self, value: Any) -> "DataArray": def interpolate_na( self, - dim=None, + dim: Hashable = None, method: str = "linear", limit: int = None, use_coordinate: Union[bool, str] = True, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, **kwargs: Any, ) -> "DataArray": - """Interpolate values according to different methods. + """Fill in NaNs by interpolating according to different methods. Parameters ---------- dim : str Specifies the dimension along which to interpolate. - method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial', 'barycentric', 'krog', 'pchip', - 'spline', 'akima'}, optional + method : str, optional String indicating which method to use for interpolation: - 'linear': linear interpolation (Default). Additional keyword - arguments are passed to ``numpy.interp`` - - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial': are passed to ``scipy.interpolate.interp1d``. If - method=='polynomial', the ``order`` keyword argument must also be + arguments are passed to :py:func:`numpy.interp` + - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': + are passed to :py:func:`scipy.interpolate.interp1d`. If + ``method='polynomial'``, the ``order`` keyword argument must also be provided. - - 'barycentric', 'krog', 'pchip', 'spline', and `akima`: use their - respective``scipy.interpolate`` classes. - use_coordinate : boolean or str, default True + - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their + respective :py:class:`scipy.interpolate` classes. + + use_coordinate : bool, str, default True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if - eqaully-spaced along `dim`. If True, the IndexVariable `dim` is - used. If use_coordinate is a string, it specifies the name of a + eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is + used. If ``use_coordinate`` is a string, it specifies the name of a coordinate variariable to use as the index. limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0 - or None for no limit. + or None for no limit. This filling is done regardless of the size of + the gap in the data. To only interpolate over gaps less than a given length, + see ``max_gap``. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + Maximum size of gap, a continuous sequence of NaNs, that will be filled. + Use None for no limit. When interpolating along a datetime64 dimension + and ``use_coordinate=True``, ``max_gap`` can be one of the following: + + - a string that is valid input for pandas.to_timedelta + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + + Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled + dimensions has not been implemented yet. Gap length is defined as the difference + between coordinate values at the first data point after a gap and the last value + before a gap. For gaps at the beginning (end), gap length is defined as the difference + between coordinate values at the first (last) valid data point and the first (last) NaN. + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + kwargs : dict, optional + parameters passed verbatim to the underlying interpolation function Returns ------- - DataArray + interpolated: DataArray + Filled in DataArray. See also -------- @@ -2070,6 +2117,7 @@ def interpolate_na( method=method, limit=limit, use_coordinate=use_coordinate, + max_gap=max_gap, **kwargs, ) @@ -2918,11 +2966,48 @@ def quantile( is a scalar. If multiple percentiles are given, first axis of the result corresponds to the quantile and a quantile dimension is added to the return array. The other dimensions are the - dimensions that remain after the reduction of the array. + dimensions that remain after the reduction of the array. See Also -------- numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile + + Examples + -------- + + >>> da = xr.DataArray( + ... data=[[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]], + ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, + ... dims=("x", "y"), + ... ) + + Single quantile + >>> da.quantile(0) # or da.quantile(0, dim=...) + + array(0.7) + Coordinates: + quantile float64 0.0 + >>> da.quantile(0, dim="x") + + array([0.7, 4.2, 2.6, 1.5]) + Coordinates: + * y (y) float64 1.0 1.5 2.0 2.5 + quantile float64 0.0 + + Multiple quantiles + >>> da.quantile([0, 0.5, 1]) + + array([0.7, 3.4, 9.4]) + Coordinates: + * quantile (quantile) float64 0.0 0.5 1.0 + >>> da.quantile([0, 0.5, 1], dim="x") + + array([[0.7 , 4.2 , 2.6 , 1.5 ], + [3.6 , 5.75, 6. , 1.7 ], + [6.5 , 7.3 , 9.4 , 1.9 ]]) + Coordinates: + * y (y) float64 1.0 1.5 2.0 2.5 + * quantile (quantile) float64 0.0 0.5 1.0 """ ds = self._to_temp_dataset().quantile( @@ -3043,8 +3128,8 @@ def integrate( Coordinate(s) used for the integration. datetime_unit: str, optional Can be used to specify the unit if datetime coordinate is used. - One of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', - 'ps', 'fs', 'as'} + One of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', + 'fs', 'as'} Returns ------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 15a7209ab24..61dde6a393b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -59,7 +59,13 @@ remap_label_indexers, ) from .duck_array_ops import datetime_to_numeric -from .indexes import Indexes, default_indexes, isel_variable_and_index, roll_index +from .indexes import ( + Indexes, + default_indexes, + isel_variable_and_index, + propagate_indexes, + roll_index, +) from .merge import ( dataset_merge_method, dataset_update_method, @@ -204,6 +210,7 @@ def merge_indexes( """ vars_to_replace: Dict[Hashable, Variable] = {} vars_to_remove: List[Hashable] = [] + dims_to_replace: Dict[Hashable, Hashable] = {} error_msg = "{} is not the name of an existing variable." for dim, var_names in indexes.items(): @@ -244,7 +251,7 @@ def merge_indexes( if not len(names) and len(var_names) == 1: idx = pd.Index(variables[var_names[0]].values) - else: + else: # MultiIndex for n in var_names: try: var = variables[n] @@ -256,15 +263,22 @@ def merge_indexes( levels.append(cat.categories) idx = pd.MultiIndex(levels, codes, names=names) + for n in names: + dims_to_replace[n] = dim vars_to_replace[dim] = IndexVariable(dim, idx) vars_to_remove.extend(var_names) new_variables = {k: v for k, v in variables.items() if k not in vars_to_remove} new_variables.update(vars_to_replace) + + # update dimensions if necessary GH: 3512 + for k, v in new_variables.items(): + if any(d in dims_to_replace for d in v.dims): + new_dims = [dims_to_replace.get(d, d) for d in v.dims] + new_variables[k] = v._replace(dims=new_dims) new_coord_names = coord_names | set(vars_to_replace) new_coord_names -= set(vars_to_remove) - return new_variables, new_coord_names @@ -411,8 +425,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): coordinates used for label based indexing. """ - _accessors: Optional[Dict[str, Any]] _attrs: Optional[Dict[Hashable, Any]] + _cache: Dict[str, Any] _coord_names: Set[Hashable] _dims: Dict[Hashable, int] _encoding: Optional[Dict[Hashable, Any]] @@ -420,8 +434,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): _variables: Dict[Hashable, Variable] __slots__ = ( - "_accessors", "_attrs", + "_cache", "_coord_names", "_dims", "_encoding", @@ -527,7 +541,6 @@ def __init__( data_vars, coords, compat=compat ) - self._accessors = None self._attrs = dict(attrs) if attrs is not None else None self._file_obj = None self._encoding = None @@ -862,13 +875,8 @@ def _construct_direct( obj._attrs = attrs obj._file_obj = file_obj obj._encoding = encoding - obj._accessors = None return obj - @classmethod - def _from_vars_and_coord_names(cls, variables, coord_names, attrs=None): - return cls._construct_direct(variables, coord_names, attrs=attrs) - def _replace( self, variables: Dict[Hashable, Variable] = None, @@ -1503,7 +1511,7 @@ def to_netcdf( Nested dictionary with variable names as keys and dictionaries of variable specific encodings as values, e.g., ``{'my_variable': {'dtype': 'int16', 'scale_factor': 0.1, - 'zlib': True}, ...}`` + 'zlib': True}, ...}`` The `h5netcdf` engine supports both the NetCDF4-style compression encoding parameters ``{'zlib': True, 'complevel': 9}`` and the h5py @@ -2112,7 +2120,7 @@ def thin( indexers: Union[Mapping[Hashable, int], int] = None, **indexers_kwargs: Any, ) -> "Dataset": - """Returns a new dataset with each array indexed along every `n`th + """Returns a new dataset with each array indexed along every `n`-th value for the specified dimension(s) Parameters @@ -2121,7 +2129,7 @@ def thin( A dict with keys matching dimensions and integer values `n` or a single integer `n` applied over all dimensions. One of indexers or indexers_kwargs must be provided. - **indexers_kwargs : {dim: n, ...}, optional + ``**indexers_kwargs`` : {dim: n, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2280,6 +2288,7 @@ def reindex( the input. In either case, a new xarray object is always returned. fill_value : scalar, optional Value to use for newly missing values + sparse: use sparse-array. By default, False **indexers_kwarg : {dim: indexer, ...}, optional Keyword arguments in the same form as ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2422,6 +2431,29 @@ def reindex( the original and desired indexes. If you do want to fill in the `NaN` values present in the original dataset, use the :py:meth:`~Dataset.fillna()` method. + """ + return self._reindex( + indexers, + method, + tolerance, + copy, + fill_value, + sparse=False, + **indexers_kwargs, + ) + + def _reindex( + self, + indexers: Mapping[Hashable, Any] = None, + method: str = None, + tolerance: Number = None, + copy: bool = True, + fill_value: Any = dtypes.NA, + sparse: bool = False, + **indexers_kwargs: Any, + ) -> "Dataset": + """ + same to _reindex but support sparse option """ indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") @@ -2438,6 +2470,7 @@ def reindex( tolerance, copy=copy, fill_value=fill_value, + sparse=sparse, ) coord_names = set(self._coord_names) coord_names.update(indexers) @@ -2651,15 +2684,9 @@ def _rename_indexes(self, name_dict, dims_set): continue if isinstance(v, pd.MultiIndex): new_names = [name_dict.get(k, k) for k in v.names] - index = pd.MultiIndex( - v.levels, - v.labels, - v.sortorder, - names=new_names, - verify_integrity=False, - ) + index = v.rename(names=new_names) else: - index = pd.Index(v, name=new_name) + index = v.rename(new_name) indexes[new_name] = index return indexes @@ -3327,7 +3354,7 @@ def ensure_stackable(val): return data_array - def _unstack_once(self, dim: Hashable) -> "Dataset": + def _unstack_once(self, dim: Hashable, fill_value, sparse) -> "Dataset": index = self.get_index(dim) index = index.remove_unused_levels() full_idx = pd.MultiIndex.from_product(index.levels, names=index.names) @@ -3336,7 +3363,9 @@ def _unstack_once(self, dim: Hashable) -> "Dataset": if index.equals(full_idx): obj = self else: - obj = self.reindex({dim: full_idx}, copy=False) + obj = self._reindex( + {dim: full_idx}, copy=False, fill_value=fill_value, sparse=sparse + ) new_dim_names = index.names new_dim_sizes = [lev.size for lev in index.levels] @@ -3362,7 +3391,12 @@ def _unstack_once(self, dim: Hashable) -> "Dataset": variables, coord_names=coord_names, indexes=indexes ) - def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset": + def unstack( + self, + dim: Union[Hashable, Iterable[Hashable]] = None, + fill_value: Any = dtypes.NA, + sparse: bool = False, + ) -> "Dataset": """ Unstack existing dimensions corresponding to MultiIndexes into multiple new dimensions. @@ -3374,6 +3408,8 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset": dim : Hashable or iterable of Hashable, optional Dimension(s) over which to unstack. By default unstacks all MultiIndexes. + fill_value: value to be filled. By default, np.nan + sparse: use sparse-array if True Returns ------- @@ -3411,7 +3447,7 @@ def unstack(self, dim: Union[Hashable, Iterable[Hashable]] = None) -> "Dataset": result = self.copy(deep=False) for dim in dims: - result = result._unstack_once(dim) + result = result._unstack_once(dim, fill_value, sparse) return result def update(self, other: "CoercibleMapping", inplace: bool = None) -> "Dataset": @@ -3471,6 +3507,7 @@ def merge( 'no_conflicts'}, optional String indicating how to compare variables of the same name for potential conflicts: + - 'broadcast_equals': all values must be equal when variables are broadcast against each other to ensure common dimensions. - 'equals': all values and dimensions must be the same. @@ -3479,6 +3516,7 @@ def merge( - 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. + join : {'outer', 'inner', 'left', 'right', 'exact'}, optional Method for joining ``self`` and ``other`` along shared dimensions: @@ -3619,7 +3657,7 @@ def drop_sel(self, labels=None, *, errors="raise", **labels_kwargs): in the dataset. If 'ignore', any given labels that are in the dataset are dropped and no error is raised. **labels_kwargs : {dim: label, ...}, optional - The keyword arguments form of ``dim`` and ``labels` + The keyword arguments form of ``dim`` and ``labels`` Returns ------- @@ -3900,42 +3938,68 @@ def interpolate_na( method: str = "linear", limit: int = None, use_coordinate: Union[bool, Hashable] = True, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, **kwargs: Any, ) -> "Dataset": - """Interpolate values according to different methods. + """Fill in NaNs by interpolating according to different methods. Parameters ---------- - dim : Hashable + dim : str Specifies the dimension along which to interpolate. - method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial', 'barycentric', 'krog', 'pchip', - 'spline'}, optional + + method : str, optional String indicating which method to use for interpolation: - 'linear': linear interpolation (Default). Additional keyword - arguments are passed to ``numpy.interp`` - - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', - 'polynomial': are passed to ``scipy.interpolate.interp1d``. If - method=='polynomial', the ``order`` keyword argument must also be + arguments are passed to :py:func:`numpy.interp` + - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': + are passed to :py:func:`scipy.interpolate.interp1d`. If + ``method='polynomial'``, the ``order`` keyword argument must also be provided. - - 'barycentric', 'krog', 'pchip', 'spline': use their respective - ``scipy.interpolate`` classes. - use_coordinate : boolean or str, default True + - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their + respective :py:class:`scipy.interpolate` classes. + + use_coordinate : bool, str, default True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if - eqaully-spaced along `dim`. If True, the IndexVariable `dim` is - used. If use_coordinate is a string, it specifies the name of a + eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is + used. If ``use_coordinate`` is a string, it specifies the name of a coordinate variariable to use as the index. limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0 - or None for no limit. - kwargs : any - parameters passed verbatim to the underlying interplation function + or None for no limit. This filling is done regardless of the size of + the gap in the data. To only interpolate over gaps less than a given length, + see ``max_gap``. + max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None. + Maximum size of gap, a continuous sequence of NaNs, that will be filled. + Use None for no limit. When interpolating along a datetime64 dimension + and ``use_coordinate=True``, ``max_gap`` can be one of the following: + + - a string that is valid input for pandas.to_timedelta + - a :py:class:`numpy.timedelta64` object + - a :py:class:`pandas.Timedelta` object + + Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled + dimensions has not been implemented yet. Gap length is defined as the difference + between coordinate values at the first data point after a gap and the last value + before a gap. For gaps at the beginning (end), gap length is defined as the difference + between coordinate values at the first (last) valid data point and the first (last) NaN. + For example, consider:: + + + array([nan, nan, nan, 1., nan, nan, 4., nan, nan]) + Coordinates: + * x (x) int64 0 1 2 3 4 5 6 7 8 + + The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + kwargs : dict, optional + parameters passed verbatim to the underlying interpolation function Returns ------- - Dataset + interpolated: Dataset + Filled in Dataset. See also -------- @@ -3951,6 +4015,7 @@ def interpolate_na( method=method, limit=limit, use_coordinate=use_coordinate, + max_gap=max_gap, **kwargs, ) return new @@ -4312,10 +4377,13 @@ def to_array(self, dim="variable", name=None): coords = dict(self.coords) coords[dim] = list(self.data_vars) + indexes = propagate_indexes(self._indexes) dims = (dim,) + broadcast_vars[0].dims - return DataArray(data, coords, dims, attrs=self.attrs, name=name) + return DataArray( + data, coords, dims, attrs=self.attrs, name=name, indexes=indexes + ) def _to_dataframe(self, ordered_dims): columns = [k for k in self.variables if k not in self.dims] @@ -5048,6 +5116,48 @@ def quantile( See Also -------- numpy.nanpercentile, pandas.Series.quantile, DataArray.quantile + + Examples + -------- + + >>> ds = xr.Dataset( + ... {"a": (("x", "y"), [[0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]])}, + ... coords={"x": [7, 9], "y": [1, 1.5, 2, 2.5]}, + ... ) + + Single quantile + >>> ds.quantile(0) # or ds.quantile(0, dim=...) + + Dimensions: () + Coordinates: + quantile float64 0.0 + Data variables: + a float64 0.7 + >>> ds.quantile(0, dim="x") + + Dimensions: (y: 4) + Coordinates: + * y (y) float64 1.0 1.5 2.0 2.5 + quantile float64 0.0 + Data variables: + a (y) float64 0.7 4.2 2.6 1.5 + + Multiple quantiles + >>> ds.quantile([0, 0.5, 1]) + + Dimensions: (quantile: 3) + Coordinates: + * quantile (quantile) float64 0.0 0.5 1.0 + Data variables: + a (quantile) float64 0.7 3.4 9.4 + >>> ds.quantile([0, 0.5, 1], dim="x") + + Dimensions: (quantile: 3, y: 4) + Coordinates: + * y (y) float64 1.0 1.5 2.0 2.5 + * quantile (quantile) float64 0.0 0.5 1.0 + Data variables: + a (quantile, y) float64 0.7 4.2 2.6 1.5 3.6 ... 1.7 6.5 7.3 9.4 1.9 """ if isinstance(dim, str): @@ -5098,11 +5208,7 @@ def quantile( new = self._replace_with_new_dims( variables, coord_names=coord_names, attrs=attrs, indexes=indexes ) - if "quantile" in new.dims: - new.coords["quantile"] = Variable("quantile", q) - else: - new.coords["quantile"] = q - return new + return new.assign_coords(quantile=q) def rank(self, dim, pct=False, keep_attrs=None): """Ranks the data. @@ -5222,7 +5328,7 @@ def integrate(self, coord, datetime_unit=None): datetime_unit Can be specify the unit if datetime coordinate is used. One of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', - 'as'} + 'as'} Returns ------- @@ -5287,7 +5393,9 @@ def _integrate_one(self, coord, datetime_unit=None): datetime_unit, _ = np.datetime_data(coord_var.dtype) elif datetime_unit is None: datetime_unit = "s" # Default to seconds for cftime objects - coord_var = datetime_to_numeric(coord_var, datetime_unit=datetime_unit) + coord_var = coord_var._replace( + data=datetime_to_numeric(coord_var.data, datetime_unit=datetime_unit) + ) variables = {} coord_names = set() diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 71e79335c3d..cf616acb485 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -351,6 +351,26 @@ def f(values, axis=None, skipna=None, **kwargs): _mean = _create_nan_agg_method("mean") +def _datetime_nanmin(array): + """nanmin() function for datetime64. + + Caveats that this function deals with: + + - In numpy < 1.18, min() on datetime64 incorrectly ignores NaT + - numpy nanmin() don't work on datetime64 (all versions at the moment of writing) + - dask min() does not work on datetime64 (all versions at the moment of writing) + """ + assert array.dtype.kind in "mM" + dtype = array.dtype + # (NaT).astype(float) does not produce NaN... + array = where(pandas_isnull(array), np.nan, array.astype(float)) + array = min(array, skipna=True) + if isinstance(array, float): + array = np.array(array) + # ...but (NaN).astype("M8") does produce NaT + return array.astype(dtype) + + def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): """Convert an array containing datetime-like data to an array of floats. @@ -370,7 +390,10 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): """ # TODO: make this function dask-compatible? if offset is None: - offset = array.min() + if array.dtype.kind in "Mm": + offset = _datetime_nanmin(array) + else: + offset = min(array) array = array - offset if not hasattr(array, "dtype"): # scalar is converted to 0d-array @@ -401,7 +424,8 @@ def mean(array, axis=None, skipna=None, **kwargs): array = asarray(array) if array.dtype.kind in "Mm": - offset = min(array) + offset = _datetime_nanmin(array) + # xarray always uses np.datetime64[ns] for np.datetime64 data dtype = "timedelta64[ns]" return ( diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index f473eaa497d..79abbccea39 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -20,10 +20,15 @@ def __get__(self, obj, cls): # we're accessing the attribute of the class, i.e., Dataset.geo return self._accessor + # Use the same dict as @pandas.util.cache_readonly. + # It must be explicitly declared in obj.__slots__. try: - return obj._accessors[self._name] - except TypeError: - obj._accessors = {} + cache = obj._cache + except AttributeError: + cache = obj._cache = {} + + try: + return cache[self._name] except KeyError: pass @@ -35,7 +40,7 @@ def __get__(self, obj, cls): # something else (GH933): raise RuntimeError("error initializing %r accessor." % self._name) - obj._accessors[self._name] = accessor_obj + cache[self._name] = accessor_obj return accessor_obj diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index c73ee3cf7c5..cb8f6538820 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -10,6 +10,7 @@ from .common import ImplementsArrayReduce, ImplementsDatasetReduce from .concat import concat from .formatting import format_array_flat +from .indexes import propagate_indexes from .options import _get_keep_attrs from .pycompat import integer_types from .utils import ( @@ -529,7 +530,7 @@ def _maybe_unstack(self, obj): for dim in self._inserted_dims: if dim in obj.coords: del obj.coords[dim] - del obj.indexes[dim] + obj._indexes = propagate_indexes(obj._indexes, exclude=self._inserted_dims) return obj def fillna(self, value): @@ -557,6 +558,113 @@ def fillna(self, value): out = ops.fillna(self, value) return out + def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + """Compute the qth quantile over each array in the groups and + concatenate them together into a new array. + + Parameters + ---------- + q : float in range of [0,1] (or sequence of floats) + Quantile to compute, which must be between 0 and 1 + inclusive. + dim : `...`, str or sequence of str, optional + Dimension(s) over which to apply quantile. + Defaults to the grouped dimension. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + + * linear: ``i + (j - i) * fraction``, where ``fraction`` is + the fractional part of the index surrounded by ``i`` and + ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + + Returns + ------- + quantiles : Variable + If `q` is a single quantile, then the result is a + scalar. If multiple percentiles are given, first axis of + the result corresponds to the quantile. In either case a + quantile dimension is added to the return array. The other + dimensions are the dimensions that remain after the + reduction of the array. + + See Also + -------- + numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, + DataArray.quantile + + Examples + -------- + + >>> da = xr.DataArray( + ... [[1.3, 8.4, 0.7, 6.9], [0.7, 4.2, 9.4, 1.5], [6.5, 7.3, 2.6, 1.9]], + ... coords={"x": [0, 0, 1], "y": [1, 1, 2, 2]}, + ... dims=("y", "y"), + ... ) + >>> ds = xr.Dataset({"a": da}) + + Single quantile + >>> da.groupby("x").quantile(0) + + array([[0.7, 4.2, 0.7, 1.5], + [6.5, 7.3, 2.6, 1.9]]) + Coordinates: + quantile float64 0.0 + * y (y) int64 1 1 2 2 + * x (x) int64 0 1 + >>> ds.groupby("y").quantile(0, dim=...) + + Dimensions: (y: 2) + Coordinates: + quantile float64 0.0 + * y (y) int64 1 2 + Data variables: + a (y) float64 0.7 0.7 + + Multiple quantiles + >>> da.groupby("x").quantile([0, 0.5, 1]) + + array([[[0.7 , 1. , 1.3 ], + [4.2 , 6.3 , 8.4 ], + [0.7 , 5.05, 9.4 ], + [1.5 , 4.2 , 6.9 ]], + + [[6.5 , 6.5 , 6.5 ], + [7.3 , 7.3 , 7.3 ], + [2.6 , 2.6 , 2.6 ], + [1.9 , 1.9 , 1.9 ]]]) + Coordinates: + * y (y) int64 1 1 2 2 + * quantile (quantile) float64 0.0 0.5 1.0 + * x (x) int64 0 1 + >>> ds.groupby("y").quantile([0, 0.5, 1], dim=...) + + Dimensions: (quantile: 3, y: 2) + Coordinates: + * quantile (quantile) float64 0.0 0.5 1.0 + * y (y) int64 1 2 + Data variables: + a (y, quantile) float64 0.7 5.35 8.4 0.7 2.25 9.4 + """ + if dim is None: + dim = self._group_dim + + out = self.map( + self._obj.__class__.quantile, + shortcut=False, + q=q, + dim=dim, + interpolation=interpolation, + keep_attrs=keep_attrs, + ) + + return out + def where(self, cond, other=dtypes.NA): """Return elements from `self` or `other` depending on `cond`. @@ -675,17 +783,19 @@ def map(self, func, shortcut=False, args=(), **kwargs): Callable to apply to each array. shortcut : bool, optional Whether or not to shortcut evaluation under the assumptions that: + (1) The action of `func` does not depend on any of the array metadata (attributes or coordinates) but only on the data and dimensions. (2) The action of `func` creates arrays with homogeneous metadata, that is, with the same dimensions and attributes. + If these conditions are satisfied `shortcut` provides significant speedup. This should be the case for many common groupby operations (e.g., applying numpy ufuncs). - args : tuple, optional + ``*args`` : tuple, optional Positional arguments passed to `func`. - **kwargs + ``**kwargs`` Used to call `func(ar, **kwargs)` for each array `ar`. Returns @@ -730,67 +840,14 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False): combined = self._restore_dim_order(combined) if coord is not None: if shortcut: - combined._coords[coord.name] = as_variable(coord) + coord_var = as_variable(coord) + combined._coords[coord.name] = coord_var else: combined.coords[coord.name] = coord combined = self._maybe_restore_empty_groups(combined) combined = self._maybe_unstack(combined) return combined - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): - """Compute the qth quantile over each array in the groups and - concatenate them together into a new array. - - Parameters - ---------- - q : float in range of [0,1] (or sequence of floats) - Quantile to compute, which must be between 0 and 1 - inclusive. - dim : `...`, str or sequence of str, optional - Dimension(s) over which to apply quantile. - Defaults to the grouped dimension. - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - * linear: ``i + (j - i) * fraction``, where ``fraction`` is - the fractional part of the index surrounded by ``i`` and - ``j``. - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. - - Returns - ------- - quantiles : Variable - If `q` is a single quantile, then the result - is a scalar. If multiple percentiles are given, first axis of - the result corresponds to the quantile and a quantile dimension - is added to the return array. The other dimensions are the - dimensions that remain after the reduction of the array. - - See Also - -------- - numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, - DataArray.quantile - """ - if dim is None: - dim = self._group_dim - - out = self.map( - self._obj.__class__.quantile, - shortcut=False, - q=q, - dim=dim, - interpolation=interpolation, - keep_attrs=keep_attrs, - ) - - if np.asarray(q, dtype=np.float64).ndim == 0: - out = out.drop_vars("quantile") - return out - def reduce( self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs ): diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 1574f4f18df..8337a0f082a 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -5,6 +5,7 @@ import pandas as pd from . import formatting +from .utils import is_scalar from .variable import Variable @@ -35,9 +36,6 @@ def __contains__(self, key): def __getitem__(self, key): return self._indexes[key] - def __delitem__(self, key): - del self._indexes[key] - def __repr__(self): return formatting.indexes_repr(self) @@ -100,3 +98,22 @@ def roll_index(index: pd.Index, count: int, axis: int = 0) -> pd.Index: return index[-count:].append(index[:-count]) else: return index[:] + + +def propagate_indexes( + indexes: Optional[Dict[Hashable, pd.Index]], exclude: Optional[Any] = None +) -> Optional[Dict[Hashable, pd.Index]]: + """ Creates new indexes dict from existing dict optionally excluding some dimensions. + """ + if exclude is None: + exclude = () + + if is_scalar(exclude): + exclude = (exclude,) + + if indexes is not None: + new_indexes = {k: v for k, v in indexes.items() if k not in exclude} + else: + new_indexes = None # type: ignore + + return new_indexes diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 77dde66484e..117fcaf8f81 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -1,18 +1,46 @@ import warnings from functools import partial -from typing import Any, Callable, Dict, Sequence +from numbers import Number +from typing import Any, Callable, Dict, Hashable, Sequence, Union import numpy as np import pandas as pd from . import utils -from .common import _contains_datetime_like_objects +from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc from .duck_array_ops import dask_array_type from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables +def _get_nan_block_lengths(obj, dim: Hashable, index: Variable): + """ + Return an object where each NaN element in 'obj' is replaced by the + length of the gap the element is in. + """ + + # make variable so that we get broadcasting for free + index = Variable([dim], index) + + # algorithm from https://github.com/pydata/xarray/pull/3302#discussion_r324707072 + arange = ones_like(obj) * index + valid = obj.notnull() + valid_arange = arange.where(valid) + cumulative_nans = valid_arange.ffill(dim=dim).fillna(index[0]) + + nan_block_lengths = ( + cumulative_nans.diff(dim=dim, label="upper") + .reindex({dim: obj[dim]}) + .where(valid) + .bfill(dim=dim) + .where(~valid, 0) + .fillna(index[-1] - valid_arange.max()) + ) + + return nan_block_lengths + + class BaseInterpolator: """Generic interpolator class for normalizing interpolation methods """ @@ -178,7 +206,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): return ds -def get_clean_interp_index(arr, dim, use_coordinate=True): +def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True): """get index to use for x values in interpolation. If use_coordinate is True, the coordinate that shares the name of the @@ -195,23 +223,33 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): index = arr.coords[use_coordinate] if index.ndim != 1: raise ValueError( - "Coordinates used for interpolation must be 1D, " - "%s is %dD." % (use_coordinate, index.ndim) + f"Coordinates used for interpolation must be 1D, " + f"{use_coordinate} is {index.ndim}D." ) + index = index.to_index() + + # TODO: index.name is None for multiindexes + # set name for nice error messages below + if isinstance(index, pd.MultiIndex): + index.name = dim + + if not index.is_monotonic: + raise ValueError(f"Index {index.name!r} must be monotonically increasing") + + if not index.is_unique: + raise ValueError(f"Index {index.name!r} has duplicate values") # raise if index cannot be cast to a float (e.g. MultiIndex) try: index = index.values.astype(np.float64) except (TypeError, ValueError): # pandas raises a TypeError - # xarray/nuppy raise a ValueError + # xarray/numpy raise a ValueError raise TypeError( - "Index must be castable to float64 to support" - "interpolation, got: %s" % type(index) + f"Index {index.name!r} must be castable to float64 to support " + f"interpolation, got {type(index).__name__}." ) - # check index sorting now so we can skip it later - if not (np.diff(index) > 0).all(): - raise ValueError("Index must be monotonicly increasing") + else: axis = arr.get_axis_num(dim) index = np.arange(arr.shape[axis], dtype=np.float64) @@ -220,7 +258,13 @@ def get_clean_interp_index(arr, dim, use_coordinate=True): def interp_na( - self, dim=None, use_coordinate=True, method="linear", limit=None, **kwargs + self, + dim: Hashable = None, + use_coordinate: Union[bool, str] = True, + method: str = "linear", + limit: int = None, + max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None, + **kwargs, ): """Interpolate values according to different methods. """ @@ -230,6 +274,40 @@ def interp_na( if limit is not None: valids = _get_valid_fill_mask(self, dim, limit) + if max_gap is not None: + max_type = type(max_gap).__name__ + if not is_scalar(max_gap): + raise ValueError("max_gap must be a scalar.") + + if ( + dim in self.indexes + and isinstance(self.indexes[dim], pd.DatetimeIndex) + and use_coordinate + ): + if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)): + raise TypeError( + f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}" + ) + + if isinstance(max_gap, str): + try: + max_gap = pd.to_timedelta(max_gap) + except ValueError: + raise ValueError( + f"Could not convert {max_gap!r} to timedelta64 using pandas.to_timedelta" + ) + + if isinstance(max_gap, pd.Timedelta): + max_gap = np.timedelta64(max_gap.value, "ns") + + max_gap = np.timedelta64(max_gap, "ns").astype(np.float64) + + if not use_coordinate: + if not isinstance(max_gap, (Number, np.number)): + raise TypeError( + f"Expected integer or floating point max_gap since use_coordinate=False. Received {max_type}." + ) + # method index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate) interp_class, kwargs = _get_interpolator(method, **kwargs) @@ -253,6 +331,14 @@ def interp_na( if limit is not None: arr = arr.where(valids) + if max_gap is not None: + if dim not in self.coords: + raise NotImplementedError( + "max_gap not implemented for unlabeled coordinates yet." + ) + nan_block_lengths = _get_nan_block_lengths(self, dim, index) + arr = arr.where(nan_block_lengths <= max_gap) + return arr diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 17240faf007..f70e96217e8 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -25,7 +25,7 @@ def _maybe_null_out(result, axis, mask, min_count=1): """ if hasattr(axis, "__len__"): # if tuple or list raise ValueError( - "min_count is not available for reduction " "with more than one dimensions." + "min_count is not available for reduction with more than one dimensions." ) if axis is not None and getattr(result, "ndim", False): diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 78c4466faed..b789f93b4f1 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -347,13 +347,3 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True): inject_reduce_methods(cls) inject_cum_methods(cls) - - -def inject_coarsen_methods(cls): - # standard numpy reduce methods - methods = [(name, getattr(duck_array_ops, name)) for name in NAN_REDUCE_METHODS] - for name, f in methods: - func = cls._reduce_method(f) - func.__name__ = name - func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(name=func.__name__) - setattr(cls, name, func) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index a1864332f4d..ea6d72b2e03 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,12 +1,12 @@ import functools import warnings -from typing import Callable +from typing import Any, Callable, Dict import numpy as np from . import dtypes, duck_array_ops, utils from .dask_array_ops import dask_rolling_wrapper -from .ops import inject_coarsen_methods +from .ops import inject_reduce_methods from .pycompat import dask_array_type try: @@ -542,6 +542,11 @@ def __init__(self, obj, windows, boundary, side, coord_func): self.side = side self.boundary = boundary + absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] + if absent_dims: + raise ValueError( + f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}." + ) if not utils.is_dict_like(coord_func): coord_func = {d: coord_func for d in self.obj.dims} for c in self.obj.coords: @@ -565,18 +570,23 @@ def __repr__(self): class DataArrayCoarsen(Coarsen): __slots__ = () + _reduce_extra_args_docstring = """""" + @classmethod - def _reduce_method(cls, func): + def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): """ - Return a wrapped function for injecting numpy methods. - see ops.inject_coarsen_methods + Return a wrapped function for injecting reduction methods. + see ops.inject_reduce_methods """ + kwargs: Dict[str, Any] = {} + if include_skipna: + kwargs["skipna"] = None def wrapped_func(self, **kwargs): from .dataarray import DataArray reduced = self.obj.variable.coarsen( - self.windows, func, self.boundary, self.side + self.windows, func, self.boundary, self.side, **kwargs ) coords = {} for c, v in self.obj.coords.items(): @@ -585,7 +595,11 @@ def wrapped_func(self, **kwargs): else: if any(d in self.windows for d in v.dims): coords[c] = v.variable.coarsen( - self.windows, self.coord_func[c], self.boundary, self.side + self.windows, + self.coord_func[c], + self.boundary, + self.side, + **kwargs, ) else: coords[c] = v @@ -597,12 +611,17 @@ def wrapped_func(self, **kwargs): class DatasetCoarsen(Coarsen): __slots__ = () + _reduce_extra_args_docstring = """""" + @classmethod - def _reduce_method(cls, func): + def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): """ - Return a wrapped function for injecting numpy methods. - see ops.inject_coarsen_methods + Return a wrapped function for injecting reduction methods. + see ops.inject_reduce_methods """ + kwargs: Dict[str, Any] = {} + if include_skipna: + kwargs["skipna"] = None def wrapped_func(self, **kwargs): from .dataset import Dataset @@ -610,14 +629,18 @@ def wrapped_func(self, **kwargs): reduced = {} for key, da in self.obj.data_vars.items(): reduced[key] = da.variable.coarsen( - self.windows, func, self.boundary, self.side + self.windows, func, self.boundary, self.side, **kwargs ) coords = {} for c, v in self.obj.coords.items(): if any(d in self.windows for d in v.dims): coords[c] = v.variable.coarsen( - self.windows, self.coord_func[c], self.boundary, self.side + self.windows, + self.coord_func[c], + self.boundary, + self.side, + **kwargs, ) else: coords[c] = v.variable @@ -626,5 +649,5 @@ def wrapped_func(self, **kwargs): return wrapped_func -inject_coarsen_methods(DataArrayCoarsen) -inject_coarsen_methods(DatasetCoarsen) +inject_reduce_methods(DataArrayCoarsen) +inject_reduce_methods(DatasetCoarsen) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index cf97c997017..773dcef0aa1 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1,3 +1,4 @@ +import copy import functools import itertools import warnings @@ -24,10 +25,11 @@ from .pycompat import dask_array_type, integer_types from .utils import ( OrderedSet, + _default, decode_numpy_dict_values, either_dict_or_kwargs, - infix_dims, ensure_us_time_resolution, + infix_dims, ) try: @@ -887,7 +889,20 @@ def copy(self, deep=True, data=None): # note: # dims is already an immutable tuple # attributes and encoding will be copied when the new Array is created - return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True) + return self._replace(data=data) + + def _replace( + self, dims=_default, data=_default, attrs=_default, encoding=_default + ) -> "Variable": + if dims is _default: + dims = copy.copy(self._dims) + if data is _default: + data = copy.copy(self.data) + if attrs is _default: + attrs = copy.copy(self._attrs) + if encoding is _default: + encoding = copy.copy(self._encoding) + return type(self)(dims, data, attrs, encoding, fastpath=True) def __copy__(self): return self.copy(deep=False) @@ -978,6 +993,36 @@ def chunk(self, chunks=None, name=None, lock=False): return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True) + def _as_sparse(self, sparse_format=_default, fill_value=dtypes.NA): + """ + use sparse-array as backend. + """ + import sparse + + # TODO what to do if dask-backended? + if fill_value is dtypes.NA: + dtype, fill_value = dtypes.maybe_promote(self.dtype) + else: + dtype = dtypes.result_type(self.dtype, fill_value) + + if sparse_format is _default: + sparse_format = "coo" + try: + as_sparse = getattr(sparse, "as_{}".format(sparse_format.lower())) + except AttributeError: + raise ValueError("{} is not a valid sparse format".format(sparse_format)) + + data = as_sparse(self.data.astype(dtype), fill_value=fill_value) + return self._replace(data=data) + + def _to_dense(self): + """ + Change backend from sparse to np.array + """ + if hasattr(self._data, "todense"): + return self._replace(data=self._data.todense()) + return self.copy(deep=False) + def isel( self: VariableType, indexers: Mapping[Hashable, Any] = None, @@ -1671,40 +1716,45 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile """ - if isinstance(self.data, dask_array_type): - raise TypeError( - "quantile does not work for arrays stored as dask " - "arrays. Load the data via .compute() or .load() " - "prior to calling this method." - ) - - q = np.asarray(q, dtype=np.float64) - - new_dims = list(self.dims) - if dim is not None: - axis = self.get_axis_num(dim) - if utils.is_scalar(dim): - new_dims.remove(dim) - else: - for d in dim: - new_dims.remove(d) - else: - axis = None - new_dims = [] - # Only add the quantile dimension if q is array-like - if q.ndim != 0: - new_dims = ["quantile"] + new_dims - - qs = np.nanpercentile( - self.data, q * 100.0, axis=axis, interpolation=interpolation - ) + from .computation import apply_ufunc if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - attrs = self._attrs if keep_attrs else None - return Variable(new_dims, qs, attrs) + scalar = utils.is_scalar(q) + q = np.atleast_1d(np.asarray(q, dtype=np.float64)) + + if dim is None: + dim = self.dims + + if utils.is_scalar(dim): + dim = [dim] + + def _wrapper(npa, **kwargs): + # move quantile axis to end. required for apply_ufunc + return np.moveaxis(np.nanpercentile(npa, **kwargs), 0, -1) + + axis = np.arange(-1, -1 * len(dim) - 1, -1) + result = apply_ufunc( + _wrapper, + self, + input_core_dims=[dim], + exclude_dims=set(dim), + output_core_dims=[["quantile"]], + output_dtypes=[np.float64], + output_sizes={"quantile": len(q)}, + dask="parallelized", + kwargs={"q": q * 100, "axis": axis, "interpolation": interpolation}, + ) + + # for backward compatibility + result = result.transpose("quantile", ...) + if scalar: + result = result.squeeze("quantile") + if keep_attrs: + result.attrs = self._attrs + return result def rank(self, dim, pct=False): """Ranks the data. @@ -1814,9 +1864,9 @@ def rolling_window( ), ) - def coarsen(self, windows, func, boundary="exact", side="left"): + def coarsen(self, windows, func, boundary="exact", side="left", **kwargs): """ - Apply + Apply reduction function. """ windows = {k: v for k, v in windows.items() if k in self.dims} if not windows: @@ -1828,11 +1878,11 @@ def coarsen(self, windows, func, boundary="exact", side="left"): func = getattr(duck_array_ops, name, None) if func is None: raise NameError(f"{name} is not a valid method.") - return type(self)(self.dims, func(reshaped, axis=axes), self._attrs) + return self._replace(data=func(reshaped, axis=axes, **kwargs)) def _coarsen_reshape(self, windows, boundary, side): """ - Construct a reshaped-array for corsen + Construct a reshaped-array for coarsen """ if not utils.is_dict_like(boundary): boundary = {d: boundary for d in windows.keys()} @@ -2006,6 +2056,14 @@ def chunk(self, chunks=None, name=None, lock=False): # Dummy - do not chunk. This method is invoked e.g. by Dataset.chunk() return self.copy(deep=False) + def _as_sparse(self, sparse_format=_default, fill_value=_default): + # Dummy + return self.copy(deep=False) + + def _to_dense(self): + # Dummy + return self.copy(deep=False) + def _finalize_indexing_result(self, dims, data): if getattr(data, "ndim", 0) != 1: # returns Variable rather than IndexVariable if multi-dimensional diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 5c754c3f49b..d38c9765352 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -269,7 +269,7 @@ def line( if None, use the default for the matplotlib function. add_legend : boolean, optional Add legend with y axis coordinates (2D inputs only). - *args, **kwargs : optional + ``*args``, ``**kwargs`` : optional Additional arguments to matplotlib.pyplot.plot """ # Handle facetgrids first @@ -672,10 +672,22 @@ def newplotfunc( # check if we need to broadcast one dimension if xval.ndim < yval.ndim: - xval = np.broadcast_to(xval, yval.shape) + dims = darray[ylab].dims + if xval.shape[0] == yval.shape[0]: + xval = np.broadcast_to(xval[:, np.newaxis], yval.shape) + else: + xval = np.broadcast_to(xval[np.newaxis, :], yval.shape) - if yval.ndim < xval.ndim: - yval = np.broadcast_to(yval, xval.shape) + elif yval.ndim < xval.ndim: + dims = darray[xlab].dims + if yval.shape[0] == xval.shape[0]: + yval = np.broadcast_to(yval[:, np.newaxis], xval.shape) + else: + yval = np.broadcast_to(yval[np.newaxis, :], xval.shape) + elif xval.ndim == 2: + dims = darray[xlab].dims + else: + dims = (darray[ylab].dims[0], darray[xlab].dims[0]) # May need to transpose for correct x, y labels # xlab may be the name of a coord, we have to check for dim names @@ -685,10 +697,9 @@ def newplotfunc( # we transpose to (y, x, color) to make this work. yx_dims = (ylab, xlab) dims = yx_dims + tuple(d for d in darray.dims if d not in yx_dims) - if dims != darray.dims: - darray = darray.transpose(*dims, transpose_coords=True) - elif darray[xlab].dims[-1] == darray.dims[0]: - darray = darray.transpose(transpose_coords=True) + + if dims != darray.dims: + darray = darray.transpose(*dims, transpose_coords=True) # Pass the data as a masked ndarray too zval = darray.to_masked_array(copy=False) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 20ef58c3c0a..a23527bd49a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3984,6 +3984,7 @@ def test_rasterio_environment(self): with xr.open_rasterio(tmp_file) as actual: assert_allclose(actual, expected) + @pytest.mark.xfail(reason="rasterio 1.1.1 is broken. GH3573") def test_rasterio_vrt(self): import rasterio diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 6cd584daa96..3e0474e7b60 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -20,6 +20,23 @@ def test_CFMaskCoder_decode(): assert_identical(expected, encoded) +def test_CFMaskCoder_encode_missing_fill_values_conflict(): + original = xr.Variable( + ("x",), + [0.0, -1.0, 1.0], + encoding={"_FillValue": np.float32(1e20), "missing_value": np.float64(1e20)}, + ) + coder = variables.CFMaskCoder() + encoded = coder.encode(original) + + assert encoded.dtype == encoded.attrs["missing_value"].dtype + assert encoded.dtype == encoded.attrs["_FillValue"].dtype + + with pytest.warns(variables.SerializationWarning): + roundtripped = coder.decode(coder.encode(original)) + assert_identical(roundtripped, original) + + def test_CFMaskCoder_missing_value(): expected = xr.DataArray( np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]), diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 7c6dc1825a1..a1e34abd0d5 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -14,6 +14,9 @@ from xarray.convert import from_cdms2 from xarray.core import dtypes from xarray.core.common import full_like +from xarray.core.indexes import propagate_indexes +from xarray.core.utils import is_scalar + from xarray.tests import ( LooseVersion, ReturnItem, @@ -1182,6 +1185,16 @@ def test_selection_multiindex_remove_unused(self): expected = expected.set_index(xy=["x", "y"]).unstack() assert_identical(expected, actual) + def test_selection_multiindex_from_level(self): + # GH: 3512 + da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"}) + db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"}) + data = xr.concat([da, db], dim="x").set_index(xy=["x", "y"]) + assert data.dims == ("xy",) + actual = data.sel(y="a") + expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y").drop_vars("y") + assert_equal(actual, expected) + def test_virtual_default_coords(self): array = DataArray(np.zeros((5,)), dims="x") expected = DataArray(range(5), dims="x", name="x") @@ -1229,6 +1242,7 @@ def test_coords(self): assert expected == actual del da.coords["x"] + da._indexes = propagate_indexes(da._indexes, exclude="x") expected = DataArray(da.values, {"y": [0, 1, 2]}, dims=["x", "y"], name="foo") assert_identical(da, expected) @@ -2318,17 +2332,20 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) - def test_quantile(self): - for q in [0.25, [0.50], [0.25, 0.75]]: - for axis, dim in zip( - [None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]] - ): - actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) - expected = np.nanpercentile( - self.dv.values, np.array(q) * 100, axis=axis - ) - np.testing.assert_allclose(actual.values, expected) - assert actual.attrs == self.attrs + @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) + @pytest.mark.parametrize( + "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) + ) + def test_quantile(self, q, axis, dim): + actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) + expected = np.nanpercentile(self.dv.values, np.array(q) * 100, axis=axis) + np.testing.assert_allclose(actual.values, expected) + if is_scalar(q): + assert "quantile" not in actual.dims + else: + assert "quantile" in actual.dims + + assert actual.attrs == self.attrs def test_reduce_keep_attrs(self): # Test dropped attrs diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 67d3b3198dc..7db1911621b 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd import pytest +from pandas.core.indexes.datetimes import DatetimeIndex import xarray as xr from xarray import ( @@ -22,10 +23,12 @@ open_dataset, set_options, ) +from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import dtypes, indexing, utils from xarray.core.common import duck_array_ops, full_like from xarray.core.npcompat import IS_NEP18_ACTIVE from xarray.core.pycompat import integer_types +from xarray.core.utils import is_scalar from . import ( InaccessibleArray, @@ -2458,6 +2461,53 @@ def test_rename_vars(self): with pytest.raises(ValueError): original.rename_vars(names_dict_bad) + @requires_cftime + def test_rename_does_not_change_CFTimeIndex_type(self): + # make sure CFTimeIndex is not converted to DatetimeIndex #3522 + + time = xr.cftime_range(start="2000", periods=6, freq="2MS", calendar="noleap") + orig = Dataset(coords={"time": time}) + + renamed = orig.rename(time="time_new") + assert "time_new" in renamed.indexes + assert isinstance(renamed.indexes["time_new"], CFTimeIndex) + assert renamed.indexes["time_new"].name == "time_new" + + # check original has not changed + assert "time" in orig.indexes + assert isinstance(orig.indexes["time"], CFTimeIndex) + assert orig.indexes["time"].name == "time" + + # note: rename_dims(time="time_new") drops "ds.indexes" + renamed = orig.rename_dims() + assert isinstance(renamed.indexes["time"], CFTimeIndex) + + renamed = orig.rename_vars() + assert isinstance(renamed.indexes["time"], CFTimeIndex) + + def test_rename_does_not_change_DatetimeIndex_type(self): + # make sure DatetimeIndex is conderved on rename + + time = pd.date_range(start="2000", periods=6, freq="2MS") + orig = Dataset(coords={"time": time}) + + renamed = orig.rename(time="time_new") + assert "time_new" in renamed.indexes + assert isinstance(renamed.indexes["time_new"], DatetimeIndex) + assert renamed.indexes["time_new"].name == "time_new" + + # check original has not changed + assert "time" in orig.indexes + assert isinstance(orig.indexes["time"], DatetimeIndex) + assert orig.indexes["time"].name == "time" + + # note: rename_dims(time="time_new") drops "ds.indexes" + renamed = orig.rename_dims() + assert isinstance(renamed.indexes["time"], DatetimeIndex) + + renamed = orig.rename_vars() + assert isinstance(renamed.indexes["time"], DatetimeIndex) + def test_swap_dims(self): original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42}) expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": list("abc")}) @@ -2745,6 +2795,42 @@ def test_unstack_errors(self): with raises_regex(ValueError, "do not have a MultiIndex"): ds.unstack("x") + def test_unstack_fill_value(self): + ds = xr.Dataset( + {"var": (("x",), np.arange(6))}, + coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)}, + ) + # make ds incomplete + ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"]) + # test fill_value + actual = ds.unstack("index", fill_value=-1) + expected = ds.unstack("index").fillna(-1).astype(np.int) + assert actual["var"].dtype == np.int + assert_equal(actual, expected) + + actual = ds["var"].unstack("index", fill_value=-1) + expected = ds["var"].unstack("index").fillna(-1).astype(np.int) + assert actual.equals(expected) + + @requires_sparse + def test_unstack_sparse(self): + ds = xr.Dataset( + {"var": (("x",), np.arange(6))}, + coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)}, + ) + # make ds incomplete + ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"]) + # test fill_value + actual = ds.unstack("index", sparse=True) + expected = ds.unstack("index") + assert actual["var"].variable._to_dense().equals(expected["var"].variable) + assert actual["var"].data.density < 1.0 + + actual = ds["var"].unstack("index", sparse=True) + expected = ds["var"].unstack("index") + assert actual.variable._to_dense().equals(expected.variable) + assert actual.data.density < 1.0 + def test_stack_unstack_fast(self): ds = Dataset( { @@ -4490,21 +4576,24 @@ def test_reduce_keepdims(self): ) assert_identical(expected, actual) - def test_quantile(self): - + @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) + def test_quantile(self, q): ds = create_test_data(seed=123) - for q in [0.25, [0.50], [0.25, 0.75]]: - for dim in [None, "dim1", ["dim1"]]: - ds_quantile = ds.quantile(q, dim=dim) - assert "quantile" in ds_quantile - for var, dar in ds.data_vars.items(): - assert var in ds_quantile - assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) - dim = ["dim1", "dim2"] + for dim in [None, "dim1", ["dim1"]]: ds_quantile = ds.quantile(q, dim=dim) - assert "dim3" in ds_quantile.dims - assert all(d not in ds_quantile.dims for d in dim) + if is_scalar(q): + assert "quantile" not in ds_quantile.dims + else: + assert "quantile" in ds_quantile.dims + + for var, dar in ds.data_vars.items(): + assert var in ds_quantile + assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) + dim = ["dim1", "dim2"] + ds_quantile = ds.quantile(q, dim=dim) + assert "dim3" in ds_quantile.dims + assert all(d not in ds_quantile.dims for d in dim) @requires_bottleneck def test_rank(self): @@ -5408,6 +5497,11 @@ def ds(request): ) +def test_coarsen_absent_dims_error(ds): + with raises_regex(ValueError, "not found in Dataset."): + ds.coarsen(foo=2) + + @pytest.mark.parametrize("dask", [True, False]) @pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")]) def test_coarsen(ds, dask, boundary, side): @@ -5416,12 +5510,11 @@ def test_coarsen(ds, dask, boundary, side): actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max() assert_equal( - actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary=boundary, side=side).max() + actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max() ) # coordinate should be mean by default assert_equal( - actual["time"], - ds["time"].coarsen(time=2, x=3, boundary=boundary, side=side).mean(), + actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean() ) @@ -5432,8 +5525,8 @@ def test_coarsen_coords(ds, dask): # check if coord_func works actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max() - assert_equal(actual["z1"], ds["z1"].coarsen(time=2, x=3, boundary="trim").max()) - assert_equal(actual["time"], ds["time"].coarsen(time=2, x=3, boundary="trim").max()) + assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max()) + assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max()) # raise if exact with pytest.raises(ValueError): @@ -5808,7 +5901,9 @@ def test_trapz_datetime(dask, which_datetime): actual = da.integrate("time", datetime_unit="D") expected_data = np.trapz( - da, duck_array_ops.datetime_to_numeric(da["time"], datetime_unit="D"), axis=0 + da.data, + duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"), + axis=0, ) expected = xr.DataArray( expected_data, diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index f678af2fec5..aee7bbd6b11 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -274,23 +274,39 @@ def assert_dask_array(da, dask): @arm_xfail -@pytest.mark.parametrize("dask", [False, True]) -def test_datetime_reduce(dask): - time = np.array(pd.date_range("15/12/1999", periods=11)) - time[8:11] = np.nan - da = DataArray(np.linspace(0, 365, num=11), dims="time", coords={"time": time}) - - if dask and has_dask: - chunks = {"time": 5} - da = da.chunk(chunks) - - actual = da["time"].mean() - assert not pd.isnull(actual) - actual = da["time"].mean(skipna=False) - assert pd.isnull(actual) - - # test for a 0d array - assert da["time"][0].mean() == da["time"][:1].mean() +@pytest.mark.parametrize("dask", [False, True] if has_dask else [False]) +def test_datetime_mean(dask): + # Note: only testing numpy, as dask is broken upstream + da = DataArray( + np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8"), + dims=["time"], + ) + if dask: + # Trigger use case where a chunk is full of NaT + da = da.chunk({"time": 3}) + + expect = DataArray(np.array("2010-01-02", dtype="M8")) + expect_nat = DataArray(np.array("NaT", dtype="M8")) + + actual = da.mean() + if dask: + assert actual.chunks is not None + assert_equal(actual, expect) + + actual = da.mean(skipna=False) + if dask: + assert actual.chunks is not None + assert_equal(actual, expect_nat) + + # tests for 1d array full of NaT + assert_equal(da[[1]].mean(), expect_nat) + assert_equal(da[[1]].mean(skipna=False), expect_nat) + + # tests for a 0d array + assert_equal(da[0].mean(), da[0]) + assert_equal(da[0].mean(skipna=False), da[0]) + assert_equal(da[1].mean(), expect_nat) + assert_equal(da[1].mean(skipna=False), expect_nat) @requires_cftime diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 581affa3471..97bd31ae050 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -137,42 +137,58 @@ def test_da_groupby_empty(): def test_da_groupby_quantile(): - array = xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])]) + array = xr.DataArray( + data=[1, 2, 3, 4, 5, 6], coords={"x": [1, 1, 1, 2, 2, 2]}, dims="x" + ) # Scalar quantile - expected = xr.DataArray([2, 5], [("x", [1, 2])]) + expected = xr.DataArray( + data=[2, 5], coords={"x": [1, 2], "quantile": 0.5}, dims="x" + ) actual = array.groupby("x").quantile(0.5) assert_identical(expected, actual) # Vector quantile - expected = xr.DataArray([[1, 3], [4, 6]], [("x", [1, 2]), ("quantile", [0, 1])]) + expected = xr.DataArray( + data=[[1, 3], [4, 6]], + coords={"x": [1, 2], "quantile": [0, 1]}, + dims=("x", "quantile"), + ) actual = array.groupby("x").quantile([0, 1]) assert_identical(expected, actual) # Multiple dimensions array = xr.DataArray( - [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], - [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])], + data=[[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], + coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]}, + dims=("x", "y"), ) actual_x = array.groupby("x").quantile(0, dim=...) - expected_x = xr.DataArray([1, 4], [("x", [1, 2])]) + expected_x = xr.DataArray( + data=[1, 4], coords={"x": [1, 2], "quantile": 0}, dims="x" + ) assert_identical(expected_x, actual_x) actual_y = array.groupby("y").quantile(0, dim=...) - expected_y = xr.DataArray([1, 22], [("y", [0, 1])]) + expected_y = xr.DataArray( + data=[1, 22], coords={"y": [0, 1], "quantile": 0}, dims="y" + ) assert_identical(expected_y, actual_y) actual_xx = array.groupby("x").quantile(0) expected_xx = xr.DataArray( - [[1, 11, 22], [4, 15, 24]], [("x", [1, 2]), ("y", [0, 0, 1])] + data=[[1, 11, 22], [4, 15, 24]], + coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0}, + dims=("x", "y"), ) assert_identical(expected_xx, actual_xx) actual_yy = array.groupby("y").quantile(0) expected_yy = xr.DataArray( - [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], - [("x", [1, 1, 1, 2, 2]), ("y", [0, 1])], + data=[[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], + coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0}, + dims=("x", "y"), ) assert_identical(expected_yy, actual_yy) @@ -180,14 +196,14 @@ def test_da_groupby_quantile(): x = [0, 1] foo = xr.DataArray( np.reshape(np.arange(365 * 2), (365, 2)), - coords=dict(time=times, x=x), + coords={"time": times, "x": x}, dims=("time", "x"), ) g = foo.groupby(foo.time.dt.month) actual = g.quantile(0, dim=...) expected = xr.DataArray( - [ + data=[ 0.0, 62.0, 120.0, @@ -201,12 +217,111 @@ def test_da_groupby_quantile(): 610.0, 670.0, ], - [("month", np.arange(1, 13))], + coords={"month": np.arange(1, 13), "quantile": 0}, + dims="month", ) assert_identical(expected, actual) actual = g.quantile(0, dim="time")[:2] - expected = xr.DataArray([[0.0, 1], [62.0, 63]], [("month", [1, 2]), ("x", [0, 1])]) + expected = xr.DataArray( + data=[[0.0, 1], [62.0, 63]], + coords={"month": [1, 2], "x": [0, 1], "quantile": 0}, + dims=("month", "x"), + ) + assert_identical(expected, actual) + + +def test_ds_groupby_quantile(): + ds = xr.Dataset( + data_vars={"a": ("x", [1, 2, 3, 4, 5, 6])}, coords={"x": [1, 1, 1, 2, 2, 2]} + ) + + # Scalar quantile + expected = xr.Dataset( + data_vars={"a": ("x", [2, 5])}, coords={"quantile": 0.5, "x": [1, 2]} + ) + actual = ds.groupby("x").quantile(0.5) + assert_identical(expected, actual) + + # Vector quantile + expected = xr.Dataset( + data_vars={"a": (("x", "quantile"), [[1, 3], [4, 6]])}, + coords={"x": [1, 2], "quantile": [0, 1]}, + ) + actual = ds.groupby("x").quantile([0, 1]) + assert_identical(expected, actual) + + # Multiple dimensions + ds = xr.Dataset( + data_vars={ + "a": ( + ("x", "y"), + [[1, 11, 26], [2, 12, 22], [3, 13, 23], [4, 16, 24], [5, 15, 25]], + ) + }, + coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]}, + ) + + actual_x = ds.groupby("x").quantile(0, dim=...) + expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2], "quantile": 0}) + assert_identical(expected_x, actual_x) + + actual_y = ds.groupby("y").quantile(0, dim=...) + expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1], "quantile": 0}) + assert_identical(expected_y, actual_y) + + actual_xx = ds.groupby("x").quantile(0) + expected_xx = xr.Dataset( + {"a": (("x", "y"), [[1, 11, 22], [4, 15, 24]])}, + coords={"x": [1, 2], "y": [0, 0, 1], "quantile": 0}, + ) + assert_identical(expected_xx, actual_xx) + + actual_yy = ds.groupby("y").quantile(0) + expected_yy = xr.Dataset( + {"a": (("x", "y"), [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]])}, + coords={"x": [1, 1, 1, 2, 2], "y": [0, 1], "quantile": 0}, + ).transpose() + assert_identical(expected_yy, actual_yy) + + times = pd.date_range("2000-01-01", periods=365) + x = [0, 1] + foo = xr.Dataset( + {"a": (("time", "x"), np.reshape(np.arange(365 * 2), (365, 2)))}, + coords=dict(time=times, x=x), + ) + g = foo.groupby(foo.time.dt.month) + + actual = g.quantile(0, dim=...) + expected = xr.Dataset( + { + "a": ( + "month", + [ + 0.0, + 62.0, + 120.0, + 182.0, + 242.0, + 304.0, + 364.0, + 426.0, + 488.0, + 548.0, + 610.0, + 670.0, + ], + ) + }, + coords={"month": np.arange(1, 13), "quantile": 0}, + ) + assert_identical(expected, actual) + + actual = g.quantile(0, dim="time").isel(month=slice(None, 2)) + expected = xr.Dataset( + data_vars={"a": (("month", "x"), [[0.0, 1], [62.0, 63]])}, + coords={"month": [1, 2], "x": [0, 1], "quantile": 0}, + ) assert_identical(expected, actual) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index cfce5d6f645..0b410383a34 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -5,7 +5,13 @@ import pytest import xarray as xr -from xarray.core.missing import NumpyInterpolator, ScipyInterpolator, SplineInterpolator +from xarray.core.missing import ( + NumpyInterpolator, + ScipyInterpolator, + SplineInterpolator, + get_clean_interp_index, + _get_nan_block_lengths, +) from xarray.core.pycompat import dask_array_type from xarray.tests import ( assert_array_equal, @@ -153,7 +159,7 @@ def test_interpolate_pd_compat_polynomial(): def test_interpolate_unsorted_index_raises(): vals = np.array([1, 2, 3], dtype=np.float64) expected = xr.DataArray(vals, dims="x", coords={"x": [2, 1, 3]}) - with raises_regex(ValueError, "Index must be monotonicly increasing"): + with raises_regex(ValueError, "Index 'x' must be monotonically increasing"): expected.interpolate_na(dim="x", method="index") @@ -169,12 +175,19 @@ def test_interpolate_invalid_interpolator_raises(): da.interpolate_na(dim="x", method="foo") +def test_interpolate_duplicate_values_raises(): + data = np.random.randn(2, 3) + da = xr.DataArray(data, coords=[("x", ["a", "a"]), ("y", [0, 1, 2])]) + with raises_regex(ValueError, "Index 'x' has duplicate values"): + da.interpolate_na(dim="x", method="foo") + + def test_interpolate_multiindex_raises(): data = np.random.randn(2, 3) data[1, 1] = np.nan da = xr.DataArray(data, coords=[("x", ["a", "b"]), ("y", [0, 1, 2])]) das = da.stack(z=("x", "y")) - with raises_regex(TypeError, "Index must be castable to float64"): + with raises_regex(TypeError, "Index 'z' must be castable to float64"): das.interpolate_na(dim="z") @@ -439,3 +452,114 @@ def test_ffill_dataset(ds): @requires_bottleneck def test_bfill_dataset(ds): ds.ffill(dim="time") + + +@requires_bottleneck +@pytest.mark.parametrize( + "y, lengths", + [ + [np.arange(9), [[3, 3, 3, 0, 3, 3, 0, 2, 2]]], + [np.arange(9) * 3, [[9, 9, 9, 0, 9, 9, 0, 6, 6]]], + [[0, 2, 5, 6, 7, 8, 10, 12, 14], [[6, 6, 6, 0, 4, 4, 0, 4, 4]]], + ], +) +def test_interpolate_na_nan_block_lengths(y, lengths): + arr = [[np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan]] + da = xr.DataArray(arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": y}) + index = get_clean_interp_index(da, dim="y", use_coordinate=True) + actual = _get_nan_block_lengths(da, dim="y", index=index) + expected = da.copy(data=lengths * 2) + assert_equal(actual, expected) + + +@pytest.fixture +def da_time(): + return xr.DataArray( + [np.nan, 1, 2, np.nan, np.nan, 5, np.nan, np.nan, np.nan, np.nan, 10], + dims=["t"], + ) + + +def test_interpolate_na_max_gap_errors(da_time): + with raises_regex( + NotImplementedError, "max_gap not implemented for unlabeled coordinates" + ): + da_time.interpolate_na("t", max_gap=1) + + with raises_regex(ValueError, "max_gap must be a scalar."): + da_time.interpolate_na("t", max_gap=(1,)) + + da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11) + with raises_regex(TypeError, "Underlying index is"): + da_time.interpolate_na("t", max_gap=1) + + with raises_regex(TypeError, "Expected integer or floating point"): + da_time.interpolate_na("t", max_gap="1H", use_coordinate=False) + + with raises_regex(ValueError, "Could not convert 'huh' to timedelta64"): + da_time.interpolate_na("t", max_gap="huh") + + +@requires_bottleneck +@pytest.mark.parametrize( + "time_range_func", + [pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)], +) +@pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")]) +@pytest.mark.parametrize( + "max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")] +) +def test_interpolate_na_max_gap_time_specifier( + da_time, max_gap, transform, time_range_func +): + da_time["t"] = time_range_func("2001-01-01", freq="H", periods=11) + expected = transform( + da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10]) + ) + actual = transform(da_time).interpolate_na("t", max_gap=max_gap) + assert_equal(actual, expected) + + +@requires_bottleneck +@pytest.mark.parametrize( + "coords", + [ + pytest.param(None, marks=pytest.mark.xfail()), + {"x": np.arange(4), "y": np.arange(11)}, + ], +) +def test_interpolate_na_2d(coords): + da = xr.DataArray( + [ + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + ], + dims=["x", "y"], + coords=coords, + ) + + actual = da.interpolate_na("y", max_gap=2) + expected_y = da.copy( + data=[ + [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11], + ] + ) + assert_equal(actual, expected_y) + + actual = da.interpolate_na("x", max_gap=3) + expected_x = xr.DataArray( + [ + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + ], + dims=["x", "y"], + coords=coords, + ) + assert_equal(actual, expected_x) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 6e283ea01da..a10f0d9a67e 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -265,6 +265,7 @@ def test2d_1d_2d_coordinates_contourf(self): ) a.plot.contourf(x="time", y="depth") + a.plot.contourf(x="depth", y="time") def test3d(self): self.darray.plot() @@ -2149,3 +2150,31 @@ def test_yticks_kwarg(self, da): da.plot(yticks=np.arange(5)) expected = np.arange(5) assert np.all(plt.gca().get_yticks() == expected) + + +@requires_matplotlib +@pytest.mark.parametrize("plotfunc", ["pcolormesh", "contourf", "contour"]) +def test_plot_transposed_nondim_coord(plotfunc): + x = np.linspace(0, 10, 101) + h = np.linspace(3, 7, 101) + s = np.linspace(0, 1, 51) + z = s[:, np.newaxis] * h[np.newaxis, :] + da = xr.DataArray( + np.sin(x) * np.cos(z), + dims=["s", "x"], + coords={"x": x, "s": s, "z": (("s", "x"), z), "zt": (("x", "s"), z.T)}, + ) + getattr(da.plot, plotfunc)(x="x", y="zt") + getattr(da.plot, plotfunc)(x="zt", y="x") + + +@requires_matplotlib +@pytest.mark.parametrize("plotfunc", ["pcolormesh", "imshow"]) +def test_plot_transposes_properly(plotfunc): + # test that we aren't mistakenly transposing when the 2 dimensions have equal sizes. + da = xr.DataArray([np.sin(2 * np.pi / 10 * np.arange(10))] * 10, dims=("y", "x")) + hdl = getattr(da.plot, plotfunc)(x="x", y="y") + # get_array doesn't work for contour, contourf. It returns the colormap intervals. + # pcolormesh returns 1D array but imshow returns a 2D array so it is necessary + # to ravel() on the LHS + assert np.all(hdl.get_array().ravel() == da.to_masked_array().ravel()) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index fd9e9b039ac..0be6f8af464 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -222,7 +222,9 @@ def convert_units(obj, to): if name != obj.name } - new_obj = xr.DataArray(name=name, data=data, coords=coords, attrs=obj.attrs) + new_obj = xr.DataArray( + name=name, data=data, coords=coords, attrs=obj.attrs, dims=obj.dims + ) elif isinstance(obj, unit_registry.Quantity): units = to.get(None) new_obj = obj.to(units) if units is not None else obj @@ -307,19 +309,689 @@ def __repr__(self): class function: - def __init__(self, name): - self.name = name - self.func = getattr(np, name) + def __init__(self, name_or_function, *args, **kwargs): + if callable(name_or_function): + self.name = name_or_function.__name__ + self.func = name_or_function + else: + self.name = name_or_function + self.func = getattr(np, name_or_function) + if self.func is None: + raise AttributeError( + f"module 'numpy' has no attribute named '{self.name}'" + ) + + self.args = args + self.kwargs = kwargs def __call__(self, *args, **kwargs): - return self.func(*args, **kwargs) + all_args = list(self.args) + list(args) + all_kwargs = {**self.kwargs, **kwargs} + + return self.func(*all_args, **all_kwargs) def __repr__(self): return f"function_{self.name}" +def test_apply_ufunc_dataarray(dtype): + func = function( + xr.apply_ufunc, np.mean, input_core_dims=[["x"]], kwargs={"axis": -1} + ) + + array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.m + x = np.arange(20) * unit_registry.s + data_array = xr.DataArray(data=array, dims="x", coords={"x": x}) + + expected = attach_units(func(strip_units(data_array)), extract_units(data_array)) + result = func(data_array) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail( + reason="pint does not implement `np.result_type` and align strips units" +) +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan))) +def test_align_dataarray(fill_value, variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit + array2 = np.linspace(0, 8, 2 * 5).reshape(2, 5).astype(dtype) * data_unit + x = np.arange(2) * original_unit + x_a1 = np.array([10, 5]) * original_unit + x_a2 = np.array([10, 5]) * coord_unit + + y1 = np.arange(5) * original_unit + y2 = np.arange(2, 7) * dim_unit + + data_array1 = xr.DataArray( + data=array1, coords={"x": x, "x_a": ("x", x_a1), "y": y1}, dims=("x", "y") + ) + data_array2 = xr.DataArray( + data=array2, coords={"x": x, "x_a": ("x", x_a2), "y": y2}, dims=("x", "y") + ) + + fill_value = fill_value * data_unit + func = function(xr.align, join="outer", fill_value=fill_value) + if error is not None: + with pytest.raises(error): + func(data_array1, data_array2) + + return + + stripped_kwargs = { + key: strip_units( + convert_units(value, {None: original_unit}) + if isinstance(value, unit_registry.Quantity) + else value + ) + for key, value in func.kwargs.items() + } + units = extract_units(data_array1) + # FIXME: should the expected_b have the same units as data_array1 + # or data_array2? + expected_a, expected_b = tuple( + attach_units(elem, units) + for elem in func( + strip_units(data_array1), + strip_units(convert_units(data_array2, units)), + **stripped_kwargs, + ) + ) + result_a, result_b = func(data_array1, data_array2) + + assert_equal_with_units(expected_a, result_a) + assert_equal_with_units(expected_b, result_b) + + +@pytest.mark.xfail( + reason="pint does not implement `np.result_type` and align strips units" +) +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +@pytest.mark.parametrize("fill_value", (np.float64(10), np.float64(np.nan))) +def test_align_dataset(fill_value, unit, variant, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * original_unit + array2 = np.linspace(0, 10, 2 * 5).reshape(2, 5).astype(dtype) * data_unit + + x = np.arange(2) * original_unit + x_a1 = np.array([10, 5]) * original_unit + x_a2 = np.array([10, 5]) * coord_unit + + y1 = np.arange(5) * original_unit + y2 = np.arange(2, 7) * dim_unit + + ds1 = xr.Dataset( + data_vars={"a": (("x", "y"), array1)}, + coords={"x": x, "x_a": ("x", x_a1), "y": y1}, + ) + ds2 = xr.Dataset( + data_vars={"a": (("x", "y"), array2)}, + coords={"x": x, "x_a": ("x", x_a2), "y": y2}, + ) + + fill_value = fill_value * data_unit + func = function(xr.align, join="outer", fill_value=fill_value) + if error is not None: + with pytest.raises(error): + func(ds1, ds2) + + return + + stripped_kwargs = { + key: strip_units( + convert_units(value, {None: original_unit}) + if isinstance(value, unit_registry.Quantity) + else value + ) + for key, value in func.kwargs.items() + } + units = extract_units(ds1) + # FIXME: should the expected_b have the same units as ds1 or ds2? + expected_a, expected_b = tuple( + attach_units(elem, units) + for elem in func( + strip_units(ds1), strip_units(convert_units(ds2, units)), **stripped_kwargs + ) + ) + result_a, result_b = func(ds1, ds2) + + assert_equal_with_units(expected_a, result_a) + assert_equal_with_units(expected_b, result_b) + + +def test_broadcast_dataarray(dtype): + array1 = np.linspace(0, 10, 2) * unit_registry.Pa + array2 = np.linspace(0, 10, 3) * unit_registry.Pa + + a = xr.DataArray(data=array1, dims="x") + b = xr.DataArray(data=array2, dims="y") + + expected_a, expected_b = tuple( + attach_units(elem, extract_units(a)) + for elem in xr.broadcast(strip_units(a), strip_units(b)) + ) + result_a, result_b = xr.broadcast(a, b) + + assert_equal_with_units(expected_a, result_a) + assert_equal_with_units(expected_b, result_b) + + +def test_broadcast_dataset(dtype): + array1 = np.linspace(0, 10, 2) * unit_registry.Pa + array2 = np.linspace(0, 10, 3) * unit_registry.Pa + + ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("y", array2)}) + + (expected,) = tuple( + attach_units(elem, extract_units(ds)) for elem in xr.broadcast(strip_units(ds)) + ) + (result,) = xr.broadcast(ds) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`combine_by_coords` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +def test_combine_by_coords(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + x = np.arange(1, 4) * 10 * original_unit + y = np.arange(2) * original_unit + z = np.arange(3) * original_unit + + other_array1 = np.ones_like(array1) * data_unit + other_array2 = np.ones_like(array2) * data_unit + other_x = np.arange(1, 4) * 10 * dim_unit + other_y = np.arange(2, 4) * dim_unit + other_z = np.arange(3, 6) * coord_unit + + ds = xr.Dataset( + data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, + coords={"x": x, "y": y, "z": ("x", z)}, + ) + other = xr.Dataset( + data_vars={"a": (("y", "x"), other_array1), "b": (("y", "x"), other_array2)}, + coords={"x": other_x, "y": other_y, "z": ("x", other_z)}, + ) + + if error is not None: + with pytest.raises(error): + xr.combine_by_coords([ds, other]) + + return + + units = extract_units(ds) + expected = attach_units( + xr.combine_by_coords( + [strip_units(ds), strip_units(convert_units(other, units))] + ), + units, + ) + result = xr.combine_by_coords([ds, other]) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="blocked by `where`") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +def test_combine_nested(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + + x = np.arange(1, 4) * 10 * original_unit + y = np.arange(2) * original_unit + z = np.arange(3) * original_unit + + ds1 = xr.Dataset( + data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, + coords={"x": x, "y": y, "z": ("x", z)}, + ) + ds2 = xr.Dataset( + data_vars={ + "a": (("y", "x"), np.ones_like(array1) * data_unit), + "b": (("y", "x"), np.ones_like(array2) * data_unit), + }, + coords={ + "x": np.arange(3) * dim_unit, + "y": np.arange(2, 4) * dim_unit, + "z": ("x", np.arange(-3, 0) * coord_unit), + }, + ) + ds3 = xr.Dataset( + data_vars={ + "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit), + "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit), + }, + coords={ + "x": np.arange(3, 6) * dim_unit, + "y": np.arange(4, 6) * dim_unit, + "z": ("x", np.arange(3, 6) * coord_unit), + }, + ) + ds4 = xr.Dataset( + data_vars={ + "a": (("y", "x"), -1 * np.ones_like(array1) * data_unit), + "b": (("y", "x"), -1 * np.ones_like(array2) * data_unit), + }, + coords={ + "x": np.arange(6, 9) * dim_unit, + "y": np.arange(6, 8) * dim_unit, + "z": ("x", np.arange(6, 9) * coord_unit), + }, + ) + + func = function(xr.combine_nested, concat_dim=["x", "y"]) + if error is not None: + with pytest.raises(error): + func([[ds1, ds2], [ds3, ds4]]) + + return + + units = extract_units(ds1) + convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) + expected = attach_units( + func( + [ + [strip_units(ds1), convert_and_strip(ds2)], + [convert_and_strip(ds3), convert_and_strip(ds4)], + ] + ), + units, + ) + result = func([[ds1, ds2], [ds3, ds4]]) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`concat` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + ), +) +def test_concat_dataarray(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = {"data": (unit, original_unit), "dims": (original_unit, unit)} + data_unit, dims_unit = variants.get(variant) + + array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m + array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit + x1 = np.arange(5, 15) * original_unit + x2 = np.arange(5) * dims_unit + + arr1 = xr.DataArray(data=array1, coords={"x": x1}, dims="x") + arr2 = xr.DataArray(data=array2, coords={"x": x2}, dims="x") + + if error is not None: + with pytest.raises(error): + xr.concat([arr1, arr2], dim="x") + + return + + expected = attach_units( + xr.concat([strip_units(arr1), strip_units(arr2)], dim="x"), extract_units(arr1) + ) + result = xr.concat([arr1, arr2], dim="x") + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`concat` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + ), +) +def test_concat_dataset(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = {"data": (unit, original_unit), "dims": (original_unit, unit)} + data_unit, dims_unit = variants.get(variant) + + array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m + array2 = np.linspace(-5, 0, 5).astype(dtype) * data_unit + x1 = np.arange(5, 15) * original_unit + x2 = np.arange(5) * dims_unit + + ds1 = xr.Dataset(data_vars={"a": ("x", array1)}, coords={"x": x1}) + ds2 = xr.Dataset(data_vars={"a": ("x", array2)}, coords={"x": x2}) + + if error is not None: + with pytest.raises(error): + xr.concat([ds1, ds2], dim="x") + + return + + expected = attach_units( + xr.concat([strip_units(ds1), strip_units(ds2)], dim="x"), extract_units(ds1) + ) + result = xr.concat([ds1, ds2], dim="x") + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="blocked by `where`") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +def test_merge_dataarray(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * original_unit + array2 = np.linspace(1, 2, 2 * 4).reshape(2, 4).astype(dtype) * data_unit + array3 = np.linspace(0, 2, 3 * 4).reshape(3, 4).astype(dtype) * data_unit + + x = np.arange(2) * original_unit + y = np.arange(3) * original_unit + z = np.arange(4) * original_unit + u = np.linspace(10, 20, 2) * original_unit + v = np.linspace(10, 20, 3) * original_unit + w = np.linspace(10, 20, 4) * original_unit + + arr1 = xr.DataArray( + name="a", + data=array1, + coords={"x": x, "y": y, "u": ("x", u), "v": ("y", v)}, + dims=("x", "y"), + ) + arr2 = xr.DataArray( + name="b", + data=array2, + coords={ + "x": np.arange(2, 4) * dim_unit, + "z": z, + "u": ("x", np.linspace(20, 30, 2) * coord_unit), + "w": ("z", w), + }, + dims=("x", "z"), + ) + arr3 = xr.DataArray( + name="c", + data=array3, + coords={ + "y": np.arange(3, 6) * dim_unit, + "z": np.arange(4, 8) * dim_unit, + "v": ("y", np.linspace(10, 20, 3) * coord_unit), + "w": ("z", np.linspace(10, 20, 4) * coord_unit), + }, + dims=("y", "z"), + ) + + func = function(xr.merge) + if error is not None: + with pytest.raises(error): + func([arr1, arr2, arr3]) + + return + + units = {name: original_unit for name in list("abcuvwxyz")} + convert_and_strip = lambda arr: strip_units(convert_units(arr, units)) + expected = attach_units( + func([strip_units(arr1), convert_and_strip(arr2), convert_and_strip(arr3)]), + units, + ) + result = func([arr1, arr2, arr3]) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="blocked by `where`") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), + "coords", + ), +) +def test_merge_dataset(variant, unit, error, dtype): + original_unit = unit_registry.m + + variants = { + "data": (unit, original_unit, original_unit), + "dims": (original_unit, unit, original_unit), + "coords": (original_unit, original_unit, unit), + } + data_unit, dim_unit, coord_unit = variants.get(variant) + + array1 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + array2 = np.zeros(shape=(2, 3), dtype=dtype) * original_unit + + x = np.arange(11, 14) * original_unit + y = np.arange(2) * original_unit + z = np.arange(3) * original_unit + + ds1 = xr.Dataset( + data_vars={"a": (("y", "x"), array1), "b": (("y", "x"), array2)}, + coords={"x": x, "y": y, "z": ("x", z)}, + ) + ds2 = xr.Dataset( + data_vars={ + "a": (("y", "x"), np.ones_like(array1) * data_unit), + "b": (("y", "x"), np.ones_like(array2) * data_unit), + }, + coords={ + "x": np.arange(3) * dim_unit, + "y": np.arange(2, 4) * dim_unit, + "z": ("x", np.arange(-3, 0) * coord_unit), + }, + ) + ds3 = xr.Dataset( + data_vars={ + "a": (("y", "x"), np.zeros_like(array1) * np.nan * data_unit), + "b": (("y", "x"), np.zeros_like(array2) * np.nan * data_unit), + }, + coords={ + "x": np.arange(3, 6) * dim_unit, + "y": np.arange(4, 6) * dim_unit, + "z": ("x", np.arange(3, 6) * coord_unit), + }, + ) + + func = function(xr.merge) + if error is not None: + with pytest.raises(error): + func([ds1, ds2, ds3]) + + return + + units = extract_units(ds1) + convert_and_strip = lambda ds: strip_units(convert_units(ds, units)) + expected = attach_units( + func([strip_units(ds1), convert_and_strip(ds2), convert_and_strip(ds3)]), units + ) + result = func([ds1, ds2, ds3]) + + assert_equal_with_units(expected, result) + + @pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) -def test_replication(func, dtype): +def test_replication_dataarray(func, dtype): array = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s data_array = xr.DataArray(data=array, dims="x") @@ -330,8 +1002,33 @@ def test_replication(func, dtype): assert_equal_with_units(expected, result) +@pytest.mark.parametrize("func", (xr.zeros_like, xr.ones_like)) +def test_replication_dataset(func, dtype): + array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s + array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa + x = np.arange(20).astype(dtype) * unit_registry.m + y = np.arange(10).astype(dtype) * unit_registry.m + z = y.to(unit_registry.mm) + + ds = xr.Dataset( + data_vars={"a": ("x", array1), "b": ("y", array2)}, + coords={"x": x, "y": y, "z": ("y", z)}, + ) + + numpy_func = getattr(np, func.__name__) + expected = ds.copy( + data={name: numpy_func(array.data) for name, array in ds.data_vars.items()} + ) + result = func(ds) + + assert_equal_with_units(expected, result) + + @pytest.mark.xfail( - reason="np.full_like on Variable strips the unit and pint does not allow mixed args" + reason=( + "pint is undecided on how `full_like` should work, so incorrect errors " + "may be expected: hgrecco/pint#882" + ) ) @pytest.mark.parametrize( "unit,error", @@ -344,8 +1041,9 @@ def test_replication(func, dtype): pytest.param(unit_registry.ms, None, id="compatible_unit"), pytest.param(unit_registry.s, None, id="identical_unit"), ), + ids=repr, ) -def test_replication_full_like(unit, error, dtype): +def test_replication_full_like_dataarray(unit, error, dtype): array = np.linspace(0, 5, 10) * unit_registry.s data_array = xr.DataArray(data=array, dims="x") @@ -360,6 +1058,163 @@ def test_replication_full_like(unit, error, dtype): assert_equal_with_units(expected, result) +@pytest.mark.xfail( + reason=( + "pint is undecided on how `full_like` should work, so incorrect errors " + "may be expected: hgrecco/pint#882" + ) +) +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.m, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.ms, None, id="compatible_unit"), + pytest.param(unit_registry.s, None, id="identical_unit"), + ), + ids=repr, +) +def test_replication_full_like_dataset(unit, error, dtype): + array1 = np.linspace(0, 10, 20).astype(dtype) * unit_registry.s + array2 = np.linspace(5, 10, 10).astype(dtype) * unit_registry.Pa + x = np.arange(20).astype(dtype) * unit_registry.m + y = np.arange(10).astype(dtype) * unit_registry.m + z = y.to(unit_registry.mm) + + ds = xr.Dataset( + data_vars={"a": ("x", array1), "b": ("y", array2)}, + coords={"x": x, "y": y, "z": ("y", z)}, + ) + + fill_value = -1 * unit + if error is not None: + with pytest.raises(error): + xr.full_like(ds, fill_value=fill_value) + + return + + expected = ds.copy( + data={ + name: np.full_like(array, fill_value=fill_value) + for name, array in ds.data_vars.items() + } + ) + result = xr.full_like(ds, fill_value=fill_value) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`where` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize("fill_value", (np.nan, 10.2)) +def test_where_dataarray(fill_value, unit, error, dtype): + array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m + + x = xr.DataArray(data=array, dims="x") + cond = x < 5 * unit_registry.m + # FIXME: this should work without wrapping in array() + fill_value = np.array(fill_value) * unit + + if error is not None: + with pytest.raises(error): + xr.where(cond, x, fill_value) + + return + + fill_value_ = ( + fill_value.to(unit_registry.m) + if isinstance(fill_value, unit_registry.Quantity) + and fill_value.check(unit_registry.m) + else fill_value + ) + expected = attach_units( + xr.where(cond, strip_units(x), strip_units(fill_value_)), extract_units(x) + ) + result = xr.where(cond, x, fill_value) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="`where` strips units") +@pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ids=repr, +) +@pytest.mark.parametrize("fill_value", (np.nan, 10.2)) +def test_where_dataset(fill_value, unit, error, dtype): + array1 = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m + array2 = np.linspace(-5, 0, 10).astype(dtype) * unit_registry.m + x = np.arange(10) * unit_registry.s + + ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) + cond = ds.x < 5 * unit_registry.s + # FIXME: this should work without wrapping in array() + fill_value = np.array(fill_value) * unit + + if error is not None: + with pytest.raises(error): + xr.where(cond, ds, fill_value) + + return + + fill_value_ = ( + fill_value.to(unit_registry.m) + if isinstance(fill_value, unit_registry.Quantity) + and fill_value.check(unit_registry.m) + else fill_value + ) + expected = attach_units( + xr.where(cond, strip_units(ds), strip_units(fill_value_)), extract_units(ds) + ) + result = xr.where(cond, ds, fill_value) + + assert_equal_with_units(expected, result) + + +@pytest.mark.xfail(reason="pint does not implement `np.einsum`") +def test_dot_dataarray(dtype): + array1 = ( + np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) + * unit_registry.m + / unit_registry.s + ) + array2 = ( + np.linspace(10, 20, 10 * 20).reshape(10, 20).astype(dtype) * unit_registry.s + ) + + arr1 = xr.DataArray(data=array1, dims=("x", "y")) + arr2 = xr.DataArray(data=array2, dims=("y", "z")) + + expected = array1.dot(array2) + result = xr.dot(arr1, arr2) + + assert_equal_with_units(expected, result) + + class TestDataArray: @pytest.mark.filterwarnings("error:::pint[.*]") @pytest.mark.parametrize( @@ -1114,7 +1969,7 @@ def test_broadcast_equals(self, unit, dtype): dim={"z": np.linspace(10, 20, 12) * unit_registry.s}, axis=1, ), - method("drop", labels="x"), + method("drop_sel", labels="x"), method("reset_coords", names="x2"), method("copy"), pytest.param( @@ -3190,7 +4045,7 @@ def test_reindex_like(self, unit, error, dtype): marks=pytest.mark.xfail(reason="strips units"), ), pytest.param( - method("apply", np.fabs), + method("map", np.fabs), marks=pytest.mark.xfail(reason="fabs strips units"), ), ), @@ -3365,7 +4220,7 @@ def test_grouped_operations(self, func, dtype): method("rename_dims", x="offset_x"), method("swap_dims", {"x": "x2"}), method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1), - method("drop", labels="x"), + method("drop_sel", labels="x"), method("drop_dims", "z"), method("set_coords", names="c"), method("reset_coords", names="x2"), diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index d394919dbdd..5b5aa1a523f 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -22,6 +22,7 @@ PandasIndexAdapter, VectorizedIndexer, ) +from xarray.core.pycompat import dask_array_type from xarray.core.utils import NDArrayMixin from xarray.core.variable import as_compatible_data, as_variable from xarray.tests import requires_bottleneck @@ -33,6 +34,7 @@ assert_identical, raises_regex, requires_dask, + requires_sparse, source_ndarray, ) @@ -542,6 +544,15 @@ def test_copy_index_with_data_errors(self): with raises_regex(ValueError, "must match shape of object"): orig.copy(data=new_data) + def test_replace(self): + var = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"}) + result = var._replace() + assert_identical(result, var) + + new_data = np.arange(4).reshape(2, 2) + result = var._replace(data=new_data) + assert_array_equal(result.data, new_data) + def test_real_and_imag(self): v = self.cls("x", np.arange(3) - 1j * np.arange(3), {"foo": "bar"}) expected_re = self.cls("x", np.arange(3), {"foo": "bar"}) @@ -1482,23 +1493,31 @@ def test_reduce(self): with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"): v.mean(dim="x", allow_lazy=False) - def test_quantile(self): + @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) + @pytest.mark.parametrize( + "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) + ) + def test_quantile(self, q, axis, dim): v = Variable(["x", "y"], self.d) - for q in [0.25, [0.50], [0.25, 0.75]]: - for axis, dim in zip( - [None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]] - ): - actual = v.quantile(q, dim=dim) + actual = v.quantile(q, dim=dim) + expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) + np.testing.assert_allclose(actual.values, expected) - expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) - np.testing.assert_allclose(actual.values, expected) + @requires_dask + @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) + @pytest.mark.parametrize("axis, dim", [[1, "y"], [[1], ["y"]]]) + def test_quantile_dask(self, q, axis, dim): + v = Variable(["x", "y"], self.d).chunk({"x": 2}) + actual = v.quantile(q, dim=dim) + assert isinstance(actual.data, dask_array_type) + expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) + np.testing.assert_allclose(actual.values, expected) @requires_dask - def test_quantile_dask_raises(self): - # regression for GH1524 - v = Variable(["x", "y"], self.d).chunk(2) + def test_quantile_chunked_dim_error(self): + v = Variable(["x", "y"], self.d).chunk({"x": 2}) - with raises_regex(TypeError, "arrays stored as dask"): + with raises_regex(ValueError, "dimension 'x'"): v.quantile(0.5, dim="x") @requires_dask @@ -1814,6 +1833,26 @@ def test_coarsen_2d(self): expected[1, 1] *= 12 / 11 assert_allclose(actual, expected) + v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4)) + actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") + expected = self.cls(("x", "y"), 4 * np.ones((2, 2))) + assert_equal(actual, expected) + + v[0, 0] = np.nan + v[-1, -1] = np.nan + expected[0, 0] = 3 + expected[-1, -1] = 3 + actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact") + assert_equal(actual, expected) + + actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False) + expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]]) + assert_equal(actual, expected) + + actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True) + expected = self.cls(("x", "y"), [[10, 18], [42, 35]]) + assert_equal(actual, expected) + @requires_dask class TestVariableWithDask(VariableSubclassobjects): @@ -1853,6 +1892,17 @@ def test_getitem_with_mask_nd_indexer(self): ) +@requires_sparse +class TestVariableWithSparse: + # TODO inherit VariableSubclassobjects to cover more tests + + def test_as_sparse(self): + data = np.arange(12).reshape(3, 4) + var = Variable(("x", "y"), data)._as_sparse(fill_value=-1) + actual = var._to_dense() + assert_identical(var, actual) + + class TestIndexVariable(VariableSubclassobjects): cls = staticmethod(IndexVariable) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index e99c0632fe8..d662f2fcaaf 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -42,8 +42,9 @@ def open_dataset( Parameters ---------- name : str - Name of the netcdf file containing the dataset - ie. 'air_temperature' + Name of the file containing the dataset. If no suffix is given, assumed + to be netCDF ('.nc' is appended) + e.g. 'air_temperature' cache_dir : string, optional The directory in which to search for and write cached data. cache : boolean, optional @@ -60,10 +61,13 @@ def open_dataset( xarray.open_dataset """ + root, ext = _os.path.splitext(name) + if not ext: + ext = ".nc" + fullname = root + ext longdir = _os.path.expanduser(cache_dir) - fullname = name + ".nc" localfile = _os.sep.join((longdir, fullname)) - md5name = name + ".md5" + md5name = fullname + ".md5" md5file = _os.sep.join((longdir, md5name)) if not _os.path.exists(localfile): diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py index 0f6fc3b1334..ae2c5c574b6 100644 --- a/xarray/ufuncs.py +++ b/xarray/ufuncs.py @@ -13,6 +13,7 @@ Once NumPy 1.10 comes out with support for overriding ufuncs, this module will hopefully no longer be necessary. """ +import textwrap import warnings as _warnings import numpy as _np @@ -78,10 +79,49 @@ def __call__(self, *args, **kwargs): return res +def _skip_signature(doc, name): + if not isinstance(doc, str): + return doc + + if doc.startswith(name): + signature_end = doc.find("\n\n") + doc = doc[signature_end + 2 :] + + return doc + + +def _remove_unused_reference_labels(doc): + if not isinstance(doc, str): + return doc + + max_references = 5 + for num in range(max_references): + label = f".. [{num}]" + reference = f"[{num}]_" + index = f"{num}. " + + if label not in doc or reference in doc: + continue + + doc = doc.replace(label, index) + + return doc + + +def _dedent(doc): + if not isinstance(doc, str): + return doc + + return textwrap.dedent(doc) + + def _create_op(name): func = _UFuncDispatcher(name) func.__name__ = name doc = getattr(_np, name).__doc__ + + doc = _remove_unused_reference_labels(_skip_signature(_dedent(doc), name)) + func.__doc__ = ( "xarray specific variant of numpy.%s. Handles " "xarray.Dataset, xarray.DataArray, xarray.Variable, "