From 23d76b44cb879337f2e6c33c3f32bdd12f0efbe9 Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Date: Thu, 12 Dec 2019 13:48:50 -0500 Subject: [PATCH 01/10] Minor doc fixes (#3615) --- xarray/core/dataarray.py | 10 +++++----- xarray/core/dataset.py | 12 ++++++------ xarray/core/variable.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b649df6dd56..31cd3c713f6 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1115,7 +1115,7 @@ def thin( **indexers_kwargs: Any, ) -> "DataArray": """Return a new DataArray whose data is given by each `n` value - along the specified dimension(s). Default `n` = 5 + along the specified dimension(s). See Also -------- @@ -1289,7 +1289,7 @@ def reindex( satisfy the equation ``abs(index[indexer] - target) <= tolerance``. fill_value : scalar, optional Value to use for newly missing values - **indexers_kwarg : {dim: indexer, ...}, optional + **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -1338,7 +1338,7 @@ def interp( values. kwargs: dictionary Additional keyword passed to scipy's interpolator. - ``**coords_kwarg`` : {dim: coordinate, ...}, optional + ``**coords_kwargs`` : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -2740,7 +2740,7 @@ def shift( Value to use for newly missing values **shifts_kwargs: The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- @@ -2791,7 +2791,7 @@ def roll( deprecated and will change to False in a future version. Explicitly pass roll_coords to silence the warning. **shifts_kwargs : The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index cc821becd6f..6be06fed117 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1887,7 +1887,7 @@ def isel( drop : bool, optional If ``drop=True``, drop coordinates variables indexed by integers instead of making them scalar. - **indexers_kwarg : {dim: indexer, ...}, optional + **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2033,7 +2033,7 @@ def sel( drop : bool, optional If ``drop=True``, drop coordinates variables in `indexers` instead of making them scalar. - **indexers_kwarg : {dim: indexer, ...}, optional + **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2168,7 +2168,7 @@ def thin( Parameters ---------- - indexers : dict or int, default: 5 + indexers : dict or int A dict with keys matching dimensions and integer values `n` or a single integer `n` applied over all dimensions. One of indexers or indexers_kwargs must be provided. @@ -2332,7 +2332,7 @@ def reindex( fill_value : scalar, optional Value to use for newly missing values sparse: use sparse-array. By default, False - **indexers_kwarg : {dim: indexer, ...}, optional + **indexers_kwargs : {dim: indexer, ...}, optional Keyword arguments in the same form as ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2547,7 +2547,7 @@ def interp( values. kwargs: dictionary, optional Additional keyword passed to scipy's interpolator. - **coords_kwarg : {dim: coordinate, ...}, optional + **coords_kwargs : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -4938,7 +4938,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): Value to use for newly missing values **shifts_kwargs: The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- diff --git a/xarray/core/variable.py b/xarray/core/variable.py index aa04cffb5ea..ac4b367f66d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1137,7 +1137,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): Value to use for newly missing values **shifts_kwargs: The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- @@ -1245,7 +1245,7 @@ def roll(self, shifts=None, **shifts_kwargs): left. **shifts_kwargs: The keyword arguments form of ``shifts``. - One of shifts or shifts_kwarg must be provided. + One of shifts or shifts_kwargs must be provided. Returns ------- From f2b2f9f62ea0f1020262a7ff563bfe74258ffaa1 Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Fri, 13 Dec 2019 11:05:23 -0500 Subject: [PATCH 02/10] Provide shape info in shape mismatch error. (#3619) * Provide shape info in shape mismatch error. * Reword error message. Co-Authored-By: Deepak Cherian --- xarray/core/variable.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ac4b367f66d..17ecdf62730 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -346,7 +346,10 @@ def data(self): def data(self, data): data = as_compatible_data(data) if data.shape != self.shape: - raise ValueError("replacement data must match the Variable's shape") + raise ValueError( + f"replacement data must match the Variable's shape. " + f"replacement data has shape {data.shape}; Variable has shape {self.shape}" + ) self._data = data def load(self, **kwargs): From 6295bc6bca1559680544ea86051f35fa2d367fe1 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 17 Dec 2019 14:50:05 +0100 Subject: [PATCH 03/10] Fix/quantile wrong errmsg (#3635) * test correct error is thrown * quantile: throw out of bounds error * whats-new --- doc/whats-new.rst | 2 ++ xarray/core/variable.py | 6 ++++++ xarray/tests/test_variable.py | 8 ++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1f60d457432..44bff9e7202 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,8 @@ Bug fixes By `Deepak Cherian `_. - Fix issue with Dask-backed datasets raising a ``KeyError`` on some computations involving ``map_blocks`` (:pull:`3598`) By `Tom Augspurger `_. +- Ensure :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` issue the correct error + when ``q`` is out of bounds (:issue:`3634`) by `Mathias Hauser `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 17ecdf62730..4474d973f59 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1731,6 +1731,10 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): scalar = utils.is_scalar(q) q = np.atleast_1d(np.asarray(q, dtype=np.float64)) + # TODO: remove once numpy >= 1.15.0 is the minimum requirement + if np.count_nonzero(q < 0.0) or np.count_nonzero(q > 1.0): + raise ValueError("Quantiles must be in the range [0, 1]") + if dim is None: dim = self.dims @@ -1739,6 +1743,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): def _wrapper(npa, **kwargs): # move quantile axis to end. required for apply_ufunc + + # TODO: use np.nanquantile once numpy >= 1.15.0 is the minimum requirement return np.moveaxis(np.nanpercentile(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 1d83e16a5bd..49a6906d5be 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1542,6 +1542,14 @@ def test_quantile_chunked_dim_error(self): with raises_regex(ValueError, "dimension 'x'"): v.quantile(0.5, dim="x") + @pytest.mark.parametrize("q", [-0.1, 1.1, [2], [0.25, 2]]) + def test_quantile_out_of_bounds(self, q): + v = Variable(["x", "y"], self.d) + + # escape special characters + with raises_regex(ValueError, r"Quantiles must be in the range \[0, 1\]"): + v.quantile(q, dim="x") + @requires_dask @requires_bottleneck def test_rank_dask_raises(self): From 6ad59b93f814b48053b1a9eea61d7c43517105cb Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 17 Dec 2019 17:25:26 +0100 Subject: [PATCH 04/10] silence sphinx warnings round 3 (#3602) * fix the deprecated section and update links to drop * link to interp_like instead of interpolate_like * update links in the manually written parts of the documentation * add missing methods for DatasetGroupBy, DataArrayGroupBy and Variable * update references in whats-new.rst * fix a few mistakes in the reference targets * add missing methods for Data*Rolling, Data*GroupBy and Data*Resample * add all CFTimeIndex methods * fix a few more broken links in whats-new.rst * remove documentation links for some non-public methods / functions * add missing methods to Data*Coarsen * move the coarsen objects into their own section in api.rst * use currentmodule instead of prefixing with ~xarray * add a new tutorial section * add show_versions and set_options * add FacetGrid to api.rst and update links * use plot.line instead of plot.plot * add the CFTimeIndex properties to api-hidden.rst * add the backend objects' methods to api-hidden.rst * add missing dict methods to api.rst * add the coordinates objects to api.rst * add the data store properties to api-hidden.rst * add IndexVariable methods and properties to api-hidden.rst * add properties for *Coarsen, *GroupBy, *Resample and *Rolling to api-hidden.rst * add IndexVariable.get_level_variable to api-hidden.rst * add the accessor methods / properties to api-hidden.rst * add the RollingExp method to api-hidden.rst * fix the docstring of StringAccessor.replace * mention load_store instead of from_store and generate a page for dump_to_store * also add load_store --- doc/api-hidden.rst | 653 +++++++++++++++++++++++++++++++++- doc/api.rst | 37 +- doc/groupby.rst | 6 +- doc/howdoi.rst | 4 +- doc/indexing.rst | 2 +- doc/interpolation.rst | 4 +- doc/io.rst | 86 ++--- doc/pandas.rst | 23 +- doc/plotting.rst | 19 +- doc/terminology.rst | 1 + doc/whats-new.rst | 50 +-- xarray/core/accessor_str.py | 4 +- xarray/core/resample.py | 4 + xarray/core/variable.py | 2 + xarray/util/print_versions.py | 7 + 15 files changed, 801 insertions(+), 101 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 027c732697f..c117b0f4fc7 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -27,6 +27,38 @@ Dataset.std Dataset.var + core.coordinates.DatasetCoordinates.get + core.coordinates.DatasetCoordinates.items + core.coordinates.DatasetCoordinates.keys + core.coordinates.DatasetCoordinates.merge + core.coordinates.DatasetCoordinates.to_dataset + core.coordinates.DatasetCoordinates.to_index + core.coordinates.DatasetCoordinates.update + core.coordinates.DatasetCoordinates.values + core.coordinates.DatasetCoordinates.dims + core.coordinates.DatasetCoordinates.indexes + core.coordinates.DatasetCoordinates.variables + + core.rolling.DatasetCoarsen.all + core.rolling.DatasetCoarsen.any + core.rolling.DatasetCoarsen.argmax + core.rolling.DatasetCoarsen.argmin + core.rolling.DatasetCoarsen.count + core.rolling.DatasetCoarsen.max + core.rolling.DatasetCoarsen.mean + core.rolling.DatasetCoarsen.median + core.rolling.DatasetCoarsen.min + core.rolling.DatasetCoarsen.prod + core.rolling.DatasetCoarsen.std + core.rolling.DatasetCoarsen.sum + core.rolling.DatasetCoarsen.var + core.rolling.DatasetCoarsen.boundary + core.rolling.DatasetCoarsen.coord_func + core.rolling.DatasetCoarsen.obj + core.rolling.DatasetCoarsen.side + core.rolling.DatasetCoarsen.trim_excess + core.rolling.DatasetCoarsen.windows + core.groupby.DatasetGroupBy.assign core.groupby.DatasetGroupBy.assign_coords core.groupby.DatasetGroupBy.first @@ -34,6 +66,69 @@ core.groupby.DatasetGroupBy.fillna core.groupby.DatasetGroupBy.quantile core.groupby.DatasetGroupBy.where + core.groupby.DatasetGroupBy.all + core.groupby.DatasetGroupBy.any + core.groupby.DatasetGroupBy.argmax + core.groupby.DatasetGroupBy.argmin + core.groupby.DatasetGroupBy.count + core.groupby.DatasetGroupBy.max + core.groupby.DatasetGroupBy.mean + core.groupby.DatasetGroupBy.median + core.groupby.DatasetGroupBy.min + core.groupby.DatasetGroupBy.prod + core.groupby.DatasetGroupBy.std + core.groupby.DatasetGroupBy.sum + core.groupby.DatasetGroupBy.var + core.groupby.DatasetGroupBy.dims + core.groupby.DatasetGroupBy.groups + + core.resample.DatasetResample.all + core.resample.DatasetResample.any + core.resample.DatasetResample.apply + core.resample.DatasetResample.argmax + core.resample.DatasetResample.argmin + core.resample.DatasetResample.assign + core.resample.DatasetResample.assign_coords + core.resample.DatasetResample.bfill + core.resample.DatasetResample.count + core.resample.DatasetResample.ffill + core.resample.DatasetResample.fillna + core.resample.DatasetResample.first + core.resample.DatasetResample.last + core.resample.DatasetResample.map + core.resample.DatasetResample.max + core.resample.DatasetResample.mean + core.resample.DatasetResample.median + core.resample.DatasetResample.min + core.resample.DatasetResample.prod + core.resample.DatasetResample.quantile + core.resample.DatasetResample.reduce + core.resample.DatasetResample.std + core.resample.DatasetResample.sum + core.resample.DatasetResample.var + core.resample.DatasetResample.where + core.resample.DatasetResample.dims + core.resample.DatasetResample.groups + + core.rolling.DatasetRolling.argmax + core.rolling.DatasetRolling.argmin + core.rolling.DatasetRolling.count + core.rolling.DatasetRolling.max + core.rolling.DatasetRolling.mean + core.rolling.DatasetRolling.median + core.rolling.DatasetRolling.min + core.rolling.DatasetRolling.prod + core.rolling.DatasetRolling.std + core.rolling.DatasetRolling.sum + core.rolling.DatasetRolling.var + core.rolling.DatasetRolling.center + core.rolling.DatasetRolling.dim + core.rolling.DatasetRolling.min_periods + core.rolling.DatasetRolling.obj + core.rolling.DatasetRolling.rollings + core.rolling.DatasetRolling.window + + core.rolling_exp.RollingExp.mean Dataset.argsort Dataset.astype @@ -47,6 +142,9 @@ Dataset.cumprod Dataset.rank + Dataset.load_store + Dataset.dump_to_store + DataArray.ndim DataArray.nbytes DataArray.shape @@ -71,12 +169,104 @@ DataArray.std DataArray.var + core.coordinates.DataArrayCoordinates.get + core.coordinates.DataArrayCoordinates.items + core.coordinates.DataArrayCoordinates.keys + core.coordinates.DataArrayCoordinates.merge + core.coordinates.DataArrayCoordinates.to_dataset + core.coordinates.DataArrayCoordinates.to_index + core.coordinates.DataArrayCoordinates.update + core.coordinates.DataArrayCoordinates.values + core.coordinates.DataArrayCoordinates.dims + core.coordinates.DataArrayCoordinates.indexes + core.coordinates.DataArrayCoordinates.variables + + core.rolling.DataArrayCoarsen.all + core.rolling.DataArrayCoarsen.any + core.rolling.DataArrayCoarsen.argmax + core.rolling.DataArrayCoarsen.argmin + core.rolling.DataArrayCoarsen.count + core.rolling.DataArrayCoarsen.max + core.rolling.DataArrayCoarsen.mean + core.rolling.DataArrayCoarsen.median + core.rolling.DataArrayCoarsen.min + core.rolling.DataArrayCoarsen.prod + core.rolling.DataArrayCoarsen.std + core.rolling.DataArrayCoarsen.sum + core.rolling.DataArrayCoarsen.var + core.rolling.DataArrayCoarsen.boundary + core.rolling.DataArrayCoarsen.coord_func + core.rolling.DataArrayCoarsen.obj + core.rolling.DataArrayCoarsen.side + core.rolling.DataArrayCoarsen.trim_excess + core.rolling.DataArrayCoarsen.windows + core.groupby.DataArrayGroupBy.assign_coords core.groupby.DataArrayGroupBy.first core.groupby.DataArrayGroupBy.last core.groupby.DataArrayGroupBy.fillna core.groupby.DataArrayGroupBy.quantile core.groupby.DataArrayGroupBy.where + core.groupby.DataArrayGroupBy.all + core.groupby.DataArrayGroupBy.any + core.groupby.DataArrayGroupBy.argmax + core.groupby.DataArrayGroupBy.argmin + core.groupby.DataArrayGroupBy.count + core.groupby.DataArrayGroupBy.max + core.groupby.DataArrayGroupBy.mean + core.groupby.DataArrayGroupBy.median + core.groupby.DataArrayGroupBy.min + core.groupby.DataArrayGroupBy.prod + core.groupby.DataArrayGroupBy.std + core.groupby.DataArrayGroupBy.sum + core.groupby.DataArrayGroupBy.var + core.groupby.DataArrayGroupBy.dims + core.groupby.DataArrayGroupBy.groups + + core.resample.DataArrayResample.all + core.resample.DataArrayResample.any + core.resample.DataArrayResample.apply + core.resample.DataArrayResample.argmax + core.resample.DataArrayResample.argmin + core.resample.DataArrayResample.assign_coords + core.resample.DataArrayResample.bfill + core.resample.DataArrayResample.count + core.resample.DataArrayResample.ffill + core.resample.DataArrayResample.fillna + core.resample.DataArrayResample.first + core.resample.DataArrayResample.last + core.resample.DataArrayResample.map + core.resample.DataArrayResample.max + core.resample.DataArrayResample.mean + core.resample.DataArrayResample.median + core.resample.DataArrayResample.min + core.resample.DataArrayResample.prod + core.resample.DataArrayResample.quantile + core.resample.DataArrayResample.reduce + core.resample.DataArrayResample.std + core.resample.DataArrayResample.sum + core.resample.DataArrayResample.var + core.resample.DataArrayResample.where + core.resample.DataArrayResample.dims + core.resample.DataArrayResample.groups + + core.rolling.DataArrayRolling.argmax + core.rolling.DataArrayRolling.argmin + core.rolling.DataArrayRolling.count + core.rolling.DataArrayRolling.max + core.rolling.DataArrayRolling.mean + core.rolling.DataArrayRolling.median + core.rolling.DataArrayRolling.min + core.rolling.DataArrayRolling.prod + core.rolling.DataArrayRolling.std + core.rolling.DataArrayRolling.sum + core.rolling.DataArrayRolling.var + core.rolling.DataArrayRolling.center + core.rolling.DataArrayRolling.dim + core.rolling.DataArrayRolling.min_periods + core.rolling.DataArrayRolling.obj + core.rolling.DataArrayRolling.window + core.rolling.DataArrayRolling.window_labels DataArray.argsort DataArray.clip @@ -91,6 +281,221 @@ DataArray.cumprod DataArray.rank + core.accessor_dt.DatetimeAccessor.ceil + core.accessor_dt.DatetimeAccessor.floor + core.accessor_dt.DatetimeAccessor.round + core.accessor_dt.DatetimeAccessor.strftime + core.accessor_dt.DatetimeAccessor.day + core.accessor_dt.DatetimeAccessor.dayofweek + core.accessor_dt.DatetimeAccessor.dayofyear + core.accessor_dt.DatetimeAccessor.days_in_month + core.accessor_dt.DatetimeAccessor.daysinmonth + core.accessor_dt.DatetimeAccessor.hour + core.accessor_dt.DatetimeAccessor.microsecond + core.accessor_dt.DatetimeAccessor.minute + core.accessor_dt.DatetimeAccessor.month + core.accessor_dt.DatetimeAccessor.nanosecond + core.accessor_dt.DatetimeAccessor.quarter + core.accessor_dt.DatetimeAccessor.season + core.accessor_dt.DatetimeAccessor.second + core.accessor_dt.DatetimeAccessor.time + core.accessor_dt.DatetimeAccessor.week + core.accessor_dt.DatetimeAccessor.weekday + core.accessor_dt.DatetimeAccessor.weekday_name + core.accessor_dt.DatetimeAccessor.weekofyear + core.accessor_dt.DatetimeAccessor.year + + core.accessor_str.StringAccessor.capitalize + core.accessor_str.StringAccessor.center + core.accessor_str.StringAccessor.contains + core.accessor_str.StringAccessor.count + core.accessor_str.StringAccessor.decode + core.accessor_str.StringAccessor.encode + core.accessor_str.StringAccessor.endswith + core.accessor_str.StringAccessor.find + core.accessor_str.StringAccessor.get + core.accessor_str.StringAccessor.index + core.accessor_str.StringAccessor.isalnum + core.accessor_str.StringAccessor.isalpha + core.accessor_str.StringAccessor.isdecimal + core.accessor_str.StringAccessor.isdigit + core.accessor_str.StringAccessor.islower + core.accessor_str.StringAccessor.isnumeric + core.accessor_str.StringAccessor.isspace + core.accessor_str.StringAccessor.istitle + core.accessor_str.StringAccessor.isupper + core.accessor_str.StringAccessor.len + core.accessor_str.StringAccessor.ljust + core.accessor_str.StringAccessor.lower + core.accessor_str.StringAccessor.lstrip + core.accessor_str.StringAccessor.match + core.accessor_str.StringAccessor.pad + core.accessor_str.StringAccessor.repeat + core.accessor_str.StringAccessor.replace + core.accessor_str.StringAccessor.rfind + core.accessor_str.StringAccessor.rindex + core.accessor_str.StringAccessor.rjust + core.accessor_str.StringAccessor.rstrip + core.accessor_str.StringAccessor.slice + core.accessor_str.StringAccessor.slice_replace + core.accessor_str.StringAccessor.startswith + core.accessor_str.StringAccessor.strip + core.accessor_str.StringAccessor.swapcase + core.accessor_str.StringAccessor.title + core.accessor_str.StringAccessor.translate + core.accessor_str.StringAccessor.upper + core.accessor_str.StringAccessor.wrap + core.accessor_str.StringAccessor.zfill + + Variable.all + Variable.any + Variable.argmax + Variable.argmin + Variable.argsort + Variable.astype + Variable.broadcast_equals + Variable.chunk + Variable.clip + Variable.coarsen + Variable.compute + Variable.concat + Variable.conj + Variable.conjugate + Variable.copy + Variable.count + Variable.cumprod + Variable.cumsum + Variable.equals + Variable.fillna + Variable.get_axis_num + Variable.identical + Variable.isel + Variable.isnull + Variable.item + Variable.load + Variable.max + Variable.mean + Variable.median + Variable.min + Variable.no_conflicts + Variable.notnull + Variable.pad_with_fill_value + Variable.prod + Variable.quantile + Variable.rank + Variable.reduce + Variable.roll + Variable.rolling_window + Variable.round + Variable.searchsorted + Variable.set_dims + Variable.shift + Variable.squeeze + Variable.stack + Variable.std + Variable.sum + Variable.to_base_variable + Variable.to_coord + Variable.to_dict + Variable.to_index + Variable.to_index_variable + Variable.to_variable + Variable.transpose + Variable.unstack + Variable.var + Variable.where + Variable.T + Variable.attrs + Variable.chunks + Variable.data + Variable.dims + Variable.dtype + Variable.encoding + Variable.imag + Variable.nbytes + Variable.ndim + Variable.real + Variable.shape + Variable.size + Variable.sizes + Variable.values + + IndexVariable.all + IndexVariable.any + IndexVariable.argmax + IndexVariable.argmin + IndexVariable.argsort + IndexVariable.astype + IndexVariable.broadcast_equals + IndexVariable.chunk + IndexVariable.clip + IndexVariable.coarsen + IndexVariable.compute + IndexVariable.concat + IndexVariable.conj + IndexVariable.conjugate + IndexVariable.copy + IndexVariable.count + IndexVariable.cumprod + IndexVariable.cumsum + IndexVariable.equals + IndexVariable.fillna + IndexVariable.get_axis_num + IndexVariable.get_level_variable + IndexVariable.identical + IndexVariable.isel + IndexVariable.isnull + IndexVariable.item + IndexVariable.load + IndexVariable.max + IndexVariable.mean + IndexVariable.median + IndexVariable.min + IndexVariable.no_conflicts + IndexVariable.notnull + IndexVariable.pad_with_fill_value + IndexVariable.prod + IndexVariable.quantile + IndexVariable.rank + IndexVariable.reduce + IndexVariable.roll + IndexVariable.rolling_window + IndexVariable.round + IndexVariable.searchsorted + IndexVariable.set_dims + IndexVariable.shift + IndexVariable.squeeze + IndexVariable.stack + IndexVariable.std + IndexVariable.sum + IndexVariable.to_base_variable + IndexVariable.to_coord + IndexVariable.to_dict + IndexVariable.to_index + IndexVariable.to_index_variable + IndexVariable.to_variable + IndexVariable.transpose + IndexVariable.unstack + IndexVariable.var + IndexVariable.where + IndexVariable.T + IndexVariable.attrs + IndexVariable.chunks + IndexVariable.data + IndexVariable.dims + IndexVariable.dtype + IndexVariable.encoding + IndexVariable.imag + IndexVariable.level_names + IndexVariable.name + IndexVariable.nbytes + IndexVariable.ndim + IndexVariable.real + IndexVariable.shape + IndexVariable.size + IndexVariable.sizes + IndexVariable.values + ufuncs.angle ufuncs.arccos ufuncs.arccosh @@ -156,6 +561,252 @@ plot.FacetGrid.set_ticks plot.FacetGrid.map + CFTimeIndex.all + CFTimeIndex.any + CFTimeIndex.append + CFTimeIndex.argmax + CFTimeIndex.argmin + CFTimeIndex.argsort + CFTimeIndex.asof + CFTimeIndex.asof_locs + CFTimeIndex.astype + CFTimeIndex.contains + CFTimeIndex.copy + CFTimeIndex.delete + CFTimeIndex.difference + CFTimeIndex.drop + CFTimeIndex.drop_duplicates + CFTimeIndex.droplevel + CFTimeIndex.dropna + CFTimeIndex.duplicated + CFTimeIndex.equals + CFTimeIndex.factorize + CFTimeIndex.fillna + CFTimeIndex.format + CFTimeIndex.get_duplicates + CFTimeIndex.get_indexer + CFTimeIndex.get_indexer_for + CFTimeIndex.get_indexer_non_unique + CFTimeIndex.get_level_values + CFTimeIndex.get_loc + CFTimeIndex.get_slice_bound + CFTimeIndex.get_value + CFTimeIndex.get_values + CFTimeIndex.groupby + CFTimeIndex.holds_integer + CFTimeIndex.identical + CFTimeIndex.insert + CFTimeIndex.intersection + CFTimeIndex.is_ + CFTimeIndex.is_boolean + CFTimeIndex.is_categorical + CFTimeIndex.is_floating + CFTimeIndex.is_integer + CFTimeIndex.is_interval + CFTimeIndex.is_lexsorted_for_tuple + CFTimeIndex.is_mixed + CFTimeIndex.is_numeric + CFTimeIndex.is_object + CFTimeIndex.is_type_compatible + CFTimeIndex.isin + CFTimeIndex.isna + CFTimeIndex.isnull + CFTimeIndex.item + CFTimeIndex.join + CFTimeIndex.map + CFTimeIndex.max + CFTimeIndex.memory_usage + CFTimeIndex.min + CFTimeIndex.notna + CFTimeIndex.notnull + CFTimeIndex.nunique + CFTimeIndex.putmask + CFTimeIndex.ravel + CFTimeIndex.reindex + CFTimeIndex.rename + CFTimeIndex.repeat + CFTimeIndex.searchsorted + CFTimeIndex.set_names + CFTimeIndex.set_value CFTimeIndex.shift - CFTimeIndex.to_datetimeindex + CFTimeIndex.slice_indexer + CFTimeIndex.slice_locs + CFTimeIndex.sort + CFTimeIndex.sort_values + CFTimeIndex.sortlevel CFTimeIndex.strftime + CFTimeIndex.summary + CFTimeIndex.symmetric_difference + CFTimeIndex.take + CFTimeIndex.to_datetimeindex + CFTimeIndex.to_flat_index + CFTimeIndex.to_frame + CFTimeIndex.to_list + CFTimeIndex.to_native_types + CFTimeIndex.to_numpy + CFTimeIndex.to_series + CFTimeIndex.tolist + CFTimeIndex.transpose + CFTimeIndex.union + CFTimeIndex.unique + CFTimeIndex.value_counts + CFTimeIndex.view + CFTimeIndex.where + + CFTimeIndex.T + CFTimeIndex.array + CFTimeIndex.asi8 + CFTimeIndex.base + CFTimeIndex.data + CFTimeIndex.date_type + CFTimeIndex.day + CFTimeIndex.dayofweek + CFTimeIndex.dayofyear + CFTimeIndex.dtype + CFTimeIndex.dtype_str + CFTimeIndex.empty + CFTimeIndex.flags + CFTimeIndex.has_duplicates + CFTimeIndex.hasnans + CFTimeIndex.hour + CFTimeIndex.inferred_type + CFTimeIndex.is_all_dates + CFTimeIndex.is_monotonic + CFTimeIndex.is_monotonic_increasing + CFTimeIndex.is_monotonic_decreasing + CFTimeIndex.is_unique + CFTimeIndex.itemsize + CFTimeIndex.microsecond + CFTimeIndex.minute + CFTimeIndex.month + CFTimeIndex.name + CFTimeIndex.names + CFTimeIndex.nbytes + CFTimeIndex.ndim + CFTimeIndex.nlevels + CFTimeIndex.second + CFTimeIndex.shape + CFTimeIndex.size + CFTimeIndex.strides + CFTimeIndex.values + CFTimeIndex.year + + backends.NetCDF4DataStore.close + backends.NetCDF4DataStore.encode + backends.NetCDF4DataStore.encode_attribute + backends.NetCDF4DataStore.encode_variable + backends.NetCDF4DataStore.get + backends.NetCDF4DataStore.get_attrs + backends.NetCDF4DataStore.get_dimensions + backends.NetCDF4DataStore.get_encoding + backends.NetCDF4DataStore.get_variables + backends.NetCDF4DataStore.items + backends.NetCDF4DataStore.keys + backends.NetCDF4DataStore.load + backends.NetCDF4DataStore.open + backends.NetCDF4DataStore.open_store_variable + backends.NetCDF4DataStore.prepare_variable + backends.NetCDF4DataStore.set_attribute + backends.NetCDF4DataStore.set_attributes + backends.NetCDF4DataStore.set_dimension + backends.NetCDF4DataStore.set_dimensions + backends.NetCDF4DataStore.set_variable + backends.NetCDF4DataStore.set_variables + backends.NetCDF4DataStore.store + backends.NetCDF4DataStore.store_dataset + backends.NetCDF4DataStore.sync + backends.NetCDF4DataStore.values + backends.NetCDF4DataStore.attrs + backends.NetCDF4DataStore.autoclose + backends.NetCDF4DataStore.dimensions + backends.NetCDF4DataStore.ds + backends.NetCDF4DataStore.format + backends.NetCDF4DataStore.is_remote + backends.NetCDF4DataStore.lock + backends.NetCDF4DataStore.variables + + backends.H5NetCDFStore.close + backends.H5NetCDFStore.encode + backends.H5NetCDFStore.encode_attribute + backends.H5NetCDFStore.encode_variable + backends.H5NetCDFStore.get + backends.H5NetCDFStore.get_attrs + backends.H5NetCDFStore.get_dimensions + backends.H5NetCDFStore.get_encoding + backends.H5NetCDFStore.get_variables + backends.H5NetCDFStore.items + backends.H5NetCDFStore.keys + backends.H5NetCDFStore.load + backends.H5NetCDFStore.open_store_variable + backends.H5NetCDFStore.prepare_variable + backends.H5NetCDFStore.set_attribute + backends.H5NetCDFStore.set_attributes + backends.H5NetCDFStore.set_dimension + backends.H5NetCDFStore.set_dimensions + backends.H5NetCDFStore.set_variable + backends.H5NetCDFStore.set_variables + backends.H5NetCDFStore.store + backends.H5NetCDFStore.store_dataset + backends.H5NetCDFStore.sync + backends.H5NetCDFStore.values + backends.H5NetCDFStore.attrs + backends.H5NetCDFStore.dimensions + backends.H5NetCDFStore.ds + backends.H5NetCDFStore.variables + + backends.PydapDataStore.close + backends.PydapDataStore.get + backends.PydapDataStore.get_attrs + backends.PydapDataStore.get_dimensions + backends.PydapDataStore.get_encoding + backends.PydapDataStore.get_variables + backends.PydapDataStore.items + backends.PydapDataStore.keys + backends.PydapDataStore.load + backends.PydapDataStore.open + backends.PydapDataStore.open_store_variable + backends.PydapDataStore.values + backends.PydapDataStore.attrs + backends.PydapDataStore.dimensions + backends.PydapDataStore.variables + + backends.ScipyDataStore.close + backends.ScipyDataStore.encode + backends.ScipyDataStore.encode_attribute + backends.ScipyDataStore.encode_variable + backends.ScipyDataStore.get + backends.ScipyDataStore.get_attrs + backends.ScipyDataStore.get_dimensions + backends.ScipyDataStore.get_encoding + backends.ScipyDataStore.get_variables + backends.ScipyDataStore.items + backends.ScipyDataStore.keys + backends.ScipyDataStore.load + backends.ScipyDataStore.open_store_variable + backends.ScipyDataStore.prepare_variable + backends.ScipyDataStore.set_attribute + backends.ScipyDataStore.set_attributes + backends.ScipyDataStore.set_dimension + backends.ScipyDataStore.set_dimensions + backends.ScipyDataStore.set_variable + backends.ScipyDataStore.set_variables + backends.ScipyDataStore.store + backends.ScipyDataStore.store_dataset + backends.ScipyDataStore.sync + backends.ScipyDataStore.values + backends.ScipyDataStore.attrs + backends.ScipyDataStore.dimensions + backends.ScipyDataStore.ds + backends.ScipyDataStore.variables + + backends.FileManager.acquire + backends.FileManager.acquire_context + backends.FileManager.close + + backends.CachingFileManager.acquire + backends.CachingFileManager.acquire_context + backends.CachingFileManager.close + + backends.DummyFileManager.acquire + backends.DummyFileManager.acquire_context + backends.DummyFileManager.close diff --git a/doc/api.rst b/doc/api.rst index 8b523b7837c..d3491e020fd 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -31,6 +31,8 @@ Top-level functions ones_like dot map_blocks + show_versions + set_options Dataset ======= @@ -74,7 +76,9 @@ and values given by ``DataArray`` objects. Dataset.__setitem__ Dataset.__delitem__ Dataset.update + Dataset.get Dataset.items + Dataset.keys Dataset.values Dataset contents @@ -537,6 +541,15 @@ DataArray methods DataArray.unify_chunks DataArray.map_blocks +Coordinates objects +=================== + +.. autosummary:: + :toctree: generated/ + + core.coordinates.DataArrayCoordinates + core.coordinates.DatasetCoordinates + GroupBy objects =============== @@ -564,6 +577,16 @@ Rolling objects core.rolling.DatasetRolling.reduce core.rolling_exp.RollingExp +Coarsen objects +=============== + +.. autosummary:: + :toctree: generated/ + + core.rolling.DataArrayCoarsen + core.rolling.DatasetCoarsen + + Resample objects ================ @@ -625,6 +648,7 @@ Plotting plot.imshow plot.line plot.pcolormesh + plot.FacetGrid Faceting -------- @@ -644,6 +668,14 @@ Faceting plot.FacetGrid.set_xlabels plot.FacetGrid.set_ylabels +Tutorial +======== + +.. autosummary:: + :toctree: generated/ + + tutorial.open_dataset + tutorial.load_dataset Testing ======= @@ -681,7 +713,7 @@ Advanced API These backends provide a low-level interface for lazily loading data from external file-formats or protocols, and can be manually invoked to create -arguments for the ``from_store`` and ``dump_to_store`` Dataset methods: +arguments for the ``load_store`` and ``dump_to_store`` Dataset methods: .. autosummary:: :toctree: generated/ @@ -697,6 +729,9 @@ arguments for the ``from_store`` and ``dump_to_store`` Dataset methods: Deprecated / Pending Deprecation ================================ +.. autosummary:: + :toctree: generated/ + Dataset.drop DataArray.drop Dataset.apply diff --git a/doc/groupby.rst b/doc/groupby.rst index f5943703765..927e192eb6c 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -94,7 +94,7 @@ Apply ~~~~~ To apply a function to each group, you can use the flexible -:py:meth:`~xarray.DatasetGroupBy.map` method. The resulting objects are automatically +:py:meth:`~xarray.core.groupby.DatasetGroupBy.map` method. The resulting objects are automatically concatenated back together along the group axis: .. ipython:: python @@ -104,8 +104,8 @@ concatenated back together along the group axis: arr.groupby('letters').map(standardize) -GroupBy objects also have a :py:meth:`~xarray.DatasetGroupBy.reduce` method and -methods like :py:meth:`~xarray.DatasetGroupBy.mean` as shortcuts for applying an +GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and +methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an aggregation function: .. ipython:: python diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 91644ba2718..80266bd3b84 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -22,7 +22,7 @@ How do I ... * - change the order of dimensions - :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose` * - remove a variable from my object - - :py:meth:`Dataset.drop`, :py:meth:`DataArray.drop` + - :py:meth:`Dataset.drop_vars`, :py:meth:`DataArray.drop_vars` * - remove dimensions of length 1 or 0 - :py:meth:`DataArray.squeeze`, :py:meth:`Dataset.squeeze` * - remove all variables with a particular dimension @@ -48,7 +48,7 @@ How do I ... * - write xarray objects with complex values to a netCDF file - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True`` * - make xarray objects look like other xarray objects - - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interpolate_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interpolate_like`, :py:meth:`DataArray.broadcast_like` + - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interp_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interp_like`, :py:meth:`DataArray.broadcast_like` * - replace NaNs with other values - :py:meth:`Dataset.fillna`, :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill`, :py:meth:`Dataset.interpolate_na`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.interpolate_na` * - extract the year, month, day or similar from a DataArray of time values diff --git a/doc/indexing.rst b/doc/indexing.rst index e8482ac66b3..cfbb84a8343 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -132,7 +132,7 @@ use them explicitly to slice data. There are two ways to do this: The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, -Python :py:func:`slice` objects or 1-dimensional arrays. +Python :py:class:`slice` objects or 1-dimensional arrays. .. note:: diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 7c750506cf3..63e9a7cd35e 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -48,7 +48,7 @@ array-like, which gives the interpolated result as an array. # interpolation da.interp(time=[2.5, 3.5]) -To interpolate data with a :py:func:`numpy.datetime64` coordinate you can pass a string. +To interpolate data with a :py:doc:`numpy.datetime64 ` coordinate you can pass a string. .. ipython:: python @@ -128,7 +128,7 @@ It is now possible to safely compute the difference ``other - interpolated``. Interpolation methods --------------------- -We use :py:func:`scipy.interpolate.interp1d` for 1-dimensional interpolation and +We use :py:class:`scipy.interpolate.interp1d` for 1-dimensional interpolation and :py:func:`scipy.interpolate.interpn` for multi-dimensional interpolation. The interpolation method can be specified by the optional ``method`` argument. diff --git a/doc/io.rst b/doc/io.rst index 2e50e5639da..e910943236f 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -1,3 +1,4 @@ +.. currentmodule:: xarray .. _io: Reading and writing files @@ -23,8 +24,8 @@ netCDF The recommended way to store xarray data structures is `netCDF`__, which is a binary file format for self-described datasets that originated in the geosciences. xarray is based on the netCDF data model, so netCDF files -on disk directly correspond to :py:class:`~xarray.Dataset` objects (more accurately, -a group in a netCDF file directly corresponds to a to :py:class:`~xarray.Dataset` object. +on disk directly correspond to :py:class:`Dataset` objects (more accurately, +a group in a netCDF file directly corresponds to a to :py:class:`Dataset` object. See :ref:`io.netcdf_groups` for more.) NetCDF is supported on almost all platforms, and parsers exist @@ -47,7 +48,7 @@ read/write netCDF V4 files and use the compression options described below). __ https://github.com/Unidata/netcdf4-python We can save a Dataset to disk using the -:py:meth:`~Dataset.to_netcdf` method: +:py:meth:`Dataset.to_netcdf` method: .. ipython:: python @@ -65,13 +66,13 @@ the ``format`` and ``engine`` arguments. .. tip:: Using the `h5netcdf `_ package - by passing ``engine='h5netcdf'`` to :py:meth:`~xarray.open_dataset` can + by passing ``engine='h5netcdf'`` to :py:meth:`open_dataset` can sometimes be quicker than the default ``engine='netcdf4'`` that uses the `netCDF4 `_ package. We can load netCDF files to create a new Dataset using -:py:func:`~xarray.open_dataset`: +:py:func:`open_dataset`: .. ipython:: python @@ -79,9 +80,9 @@ We can load netCDF files to create a new Dataset using ds_disk Similarly, a DataArray can be saved to disk using the -:py:attr:`DataArray.to_netcdf ` method, and loaded -from disk using the :py:func:`~xarray.open_dataarray` function. As netCDF files -correspond to :py:class:`~xarray.Dataset` objects, these functions internally +:py:meth:`DataArray.to_netcdf` method, and loaded +from disk using the :py:func:`open_dataarray` function. As netCDF files +correspond to :py:class:`Dataset` objects, these functions internally convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back when loading, ensuring that the ``DataArray`` that is loaded is always exactly the same as the one that was saved. @@ -108,9 +109,9 @@ is modified: the original file on disk is never touched. xarray's lazy loading of remote or on-disk datasets is often but not always desirable. Before performing computationally intense operations, it is often a good idea to load a Dataset (or DataArray) entirely into memory by - invoking the :py:meth:`~xarray.Dataset.load` method. + invoking the :py:meth:`Dataset.load` method. -Datasets have a :py:meth:`~xarray.Dataset.close` method to close the associated +Datasets have a :py:meth:`Dataset.close` method to close the associated netCDF file. However, it's often cleaner to use a ``with`` statement: .. ipython:: python @@ -135,17 +136,17 @@ to the original netCDF file, regardless if they exist in the original dataset. Groups ~~~~~~ -NetCDF groups are not supported as part of the :py:class:`~xarray.Dataset` data model. +NetCDF groups are not supported as part of the :py:class:`Dataset` data model. Instead, groups can be loaded individually as Dataset objects. To do so, pass a ``group`` keyword argument to the -:py:func:`~xarray.open_dataset` function. The group can be specified as a path-like +:py:func:`open_dataset` function. The group can be specified as a path-like string, e.g., to access subgroup ``'bar'`` within group ``'foo'`` pass ``'/foo/bar'`` as the ``group`` argument. In a similar way, the ``group`` keyword argument can be given to the -:py:meth:`~xarray.Dataset.to_netcdf` method to write to a group +:py:meth:`Dataset.to_netcdf` method to write to a group in a netCDF file. When writing multiple groups in one file, pass ``mode='a'`` to -:py:meth:`~xarray.Dataset.to_netcdf` to ensure that each call does not delete the file. +:py:meth:`Dataset.to_netcdf` to ensure that each call does not delete the file. .. _io.encoding: @@ -155,7 +156,7 @@ Reading encoded data NetCDF files follow some conventions for encoding datetime arrays (as numbers with a "units" attribute) and for packing and unpacking data (as described by the "scale_factor" and "add_offset" attributes). If the argument -``decode_cf=True`` (default) is given to :py:func:`~xarray.open_dataset`, xarray will attempt +``decode_cf=True`` (default) is given to :py:func:`open_dataset`, xarray will attempt to automatically decode the values in the netCDF objects according to `CF conventions`_. Sometimes this will fail, for example, if a variable has an invalid "units" or "calendar" attribute. For these cases, you can @@ -164,8 +165,8 @@ turn this decoding off manually. .. _CF conventions: http://cfconventions.org/ You can view this encoding information (among others) in the -:py:attr:`DataArray.encoding ` and -:py:attr:`DataArray.encoding ` attributes: +:py:attr:`DataArray.encoding` and +:py:attr:`DataArray.encoding` attributes: .. ipython:: :verbatim: @@ -206,13 +207,13 @@ Reading multi-file datasets NetCDF files are often encountered in collections, e.g., with different files corresponding to different model runs or one file per timestamp. xarray can straightforwardly combine such files into a single Dataset by making use of -:py:func:`~xarray.concat`, :py:func:`~xarray.merge`, :py:func:`~xarray.combine_nested` and -:py:func:`~xarray.combine_by_coords`. For details on the difference between these +:py:func:`concat`, :py:func:`merge`, :py:func:`combine_nested` and +:py:func:`combine_by_coords`. For details on the difference between these functions see :ref:`combining data`. Xarray includes support for manipulating datasets that don't fit into memory with dask_. If you have dask installed, you can open multiple files -simultaneously in parallel using :py:func:`~xarray.open_mfdataset`:: +simultaneously in parallel using :py:func:`open_mfdataset`:: xr.open_mfdataset('my/files/*.nc', parallel=True) @@ -221,7 +222,7 @@ single xarray dataset. It is the recommended way to open multiple files with xarray. For more details on parallel reading, see :ref:`combining.multi`, :ref:`dask.io` and a `blog post`_ by Stephan Hoyer. -:py:func:`~xarray.open_mfdataset` takes many kwargs that allow you to +:py:func:`open_mfdataset` takes many kwargs that allow you to control its behaviour (for e.g. ``parallel``, ``combine``, ``compat``, ``join``, ``concat_dim``). See its docstring for more details. @@ -246,14 +247,14 @@ See its docstring for more details. .. _dask: http://dask.pydata.org .. _blog post: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/ -Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`~xarray.open_mfdataset`. +Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`open_mfdataset`. One can use the ``preprocess`` argument to provide a function that takes a dataset and returns a modified Dataset. -:py:func:`~xarray.open_mfdataset` will call ``preprocess`` on every dataset +:py:func:`open_mfdataset` will call ``preprocess`` on every dataset (corresponding to each file) prior to combining them. -If :py:func:`~xarray.open_mfdataset` does not meet your needs, other approaches are possible. +If :py:func:`open_mfdataset` does not meet your needs, other approaches are possible. The general pattern for parallel reading of multiple files using dask, modifying those datasets and then combining into a single ``Dataset`` is:: @@ -459,9 +460,9 @@ Invalid netCDF files The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't allowed in netCDF4 (see -`h5netcdf documentation `_. -This feature is availabe through :py:func:`DataArray.to_netcdf` and -:py:func:`Dataset.to_netcdf` when used with ``engine="h5netcdf"`` +`h5netcdf documentation `_). +This feature is availabe through :py:meth:`DataArray.to_netcdf` and +:py:meth:`Dataset.to_netcdf` when used with ``engine="h5netcdf"`` and currently raises a warning unless ``invalid_netcdf=True`` is set: .. ipython:: python @@ -494,7 +495,7 @@ The Iris_ tool allows easy reading of common meteorological and climate model fo (including GRIB and UK MetOffice PP files) into ``Cube`` objects which are in many ways very similar to ``DataArray`` objects, while enforcing a CF-compliant data model. If iris is installed xarray can convert a ``DataArray`` into a ``Cube`` using -:py:meth:`~xarray.DataArray.to_iris`: +:py:meth:`DataArray.to_iris`: .. ipython:: python @@ -506,7 +507,7 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using cube Conversely, we can create a new ``DataArray`` object from a ``Cube`` using -:py:meth:`~xarray.DataArray.from_iris`: +:py:meth:`DataArray.from_iris`: .. ipython:: python @@ -608,7 +609,7 @@ over the network until we look at particular values: .. image:: _static/opendap-prism-tmax.png Some servers require authentication before we can access the data. For this -purpose we can explicitly create a :py:class:`~xarray.backends.PydapDataStore` +purpose we can explicitly create a :py:class:`backends.PydapDataStore` and pass in a `Requests`__ session object. For example for HTTP Basic authentication:: @@ -671,8 +672,8 @@ this version of xarray will work in future versions. When pickling an object opened from a NetCDF file, the pickle file will contain a reference to the file on disk. If you want to store the actual - array values, load it into memory first with :py:meth:`~xarray.Dataset.load` - or :py:meth:`~xarray.Dataset.compute`. + array values, load it into memory first with :py:meth:`Dataset.load` + or :py:meth:`Dataset.compute`. .. _dictionary io: @@ -680,7 +681,7 @@ Dictionary ---------- We can convert a ``Dataset`` (or a ``DataArray``) to a dict using -:py:meth:`~xarray.Dataset.to_dict`: +:py:meth:`Dataset.to_dict`: .. ipython:: python @@ -688,7 +689,7 @@ We can convert a ``Dataset`` (or a ``DataArray``) to a dict using d We can create a new xarray object from a dict using -:py:meth:`~xarray.Dataset.from_dict`: +:py:meth:`Dataset.from_dict`: .. ipython:: python @@ -723,7 +724,7 @@ Rasterio GeoTIFFs and other gridded raster datasets can be opened using `rasterio`_, if rasterio is installed. Here is an example of how to use -:py:func:`~xarray.open_rasterio` to read one of rasterio's `test files`_: +:py:func:`open_rasterio` to read one of rasterio's `test files`_: .. ipython:: :verbatim: @@ -782,8 +783,7 @@ Xarray's Zarr backend allows xarray to leverage these capabilities. Xarray can't open just any zarr dataset, because xarray requires special metadata (attributes) describing the dataset dimensions and coordinates. At this time, xarray can only open zarr datasets that have been written by -xarray. To write a dataset with zarr, we use the -:py:attr:`Dataset.to_zarr ` method. +xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method. To write to a local directory, we pass a path to a directory .. ipython:: python @@ -830,7 +830,7 @@ can be omitted as it will internally be set to ``'a'``. To store variable length strings use ``dtype=object``. To read back a zarr dataset that has been created this way, we use the -:py:func:`~xarray.open_zarr` method: +:py:func:`open_zarr` method: .. ipython:: python @@ -899,12 +899,12 @@ opening the store. (For more information on this feature, consult the If you have zarr version 2.3 or greater, xarray can write and read stores with consolidated metadata. To write consolidated metadata, pass the ``consolidated=True`` option to the -:py:attr:`Dataset.to_zarr ` method:: +:py:attr:`Dataset.to_zarr` method:: ds.to_zarr('foo.zarr', consolidated=True) To read a consolidated store, pass the ``consolidated=True`` option to -:py:func:`~xarray.open_zarr`:: +:py:func:`open_zarr`:: ds = xr.open_zarr('foo.zarr', consolidated=True) @@ -926,7 +926,7 @@ GRIB format via cfgrib xarray supports reading GRIB files via ECMWF cfgrib_ python driver and ecCodes_ C-library, if they are installed. To open a GRIB file supply ``engine='cfgrib'`` -to :py:func:`~xarray.open_dataset`: +to :py:func:`open_dataset`: .. ipython:: :verbatim: @@ -948,7 +948,7 @@ Formats supported by PyNIO xarray can also read GRIB, HDF4 and other file formats supported by PyNIO_, if PyNIO is installed. To use PyNIO to read such files, supply -``engine='pynio'`` to :py:func:`~xarray.open_dataset`. +``engine='pynio'`` to :py:func:`open_dataset`. We recommend installing PyNIO via conda:: @@ -970,7 +970,7 @@ identify readers heuristically, or format can be specified via a key in `backend_kwargs`. To use PseudoNetCDF to read such files, supply -``engine='pseudonetcdf'`` to :py:func:`~xarray.open_dataset`. +``engine='pseudonetcdf'`` to :py:func:`open_dataset`. Add ``backend_kwargs={'format': ''}`` where `` options are listed on the PseudoNetCDF page. diff --git a/doc/pandas.rst b/doc/pandas.rst index 72abf6609f6..a84c89ab938 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -1,3 +1,4 @@ +.. currentmodule:: xarray .. _pandas: =================== @@ -32,9 +33,9 @@ Tabular data is easiest to work with when it meets the criteria for __ http://www.jstatsoft.org/v59/i10/ -In this "tidy data" format, we can represent any :py:class:`~xarray.Dataset` and -:py:class:`~xarray.DataArray` in terms of :py:class:`pandas.DataFrame` and -:py:class:`pandas.Series`, respectively (and vice-versa). The representation +In this "tidy data" format, we can represent any :py:class:`Dataset` and +:py:class:`DataArray` in terms of :py:class:`~pandas.DataFrame` and +:py:class:`~pandas.Series`, respectively (and vice-versa). The representation works by flattening non-coordinates to 1D, and turning the tensor product of coordinate indexes into a :py:class:`pandas.MultiIndex`. @@ -42,7 +43,7 @@ Dataset and DataFrame --------------------- To convert any dataset to a ``DataFrame`` in tidy form, use the -:py:meth:`Dataset.to_dataframe() ` method: +:py:meth:`Dataset.to_dataframe()` method: .. ipython:: python @@ -61,11 +62,11 @@ use ``DataFrame`` methods like :py:meth:`~pandas.DataFrame.reset_index`, :py:meth:`~pandas.DataFrame.stack` and :py:meth:`~pandas.DataFrame.unstack`. For datasets containing dask arrays where the data should be lazily loaded, see the -:py:meth:`Dataset.to_dask_dataframe() ` method. +:py:meth:`Dataset.to_dask_dataframe()` method. To create a ``Dataset`` from a ``DataFrame``, use the -:py:meth:`~xarray.Dataset.from_dataframe` class method or the equivalent -:py:meth:`pandas.DataFrame.to_xarray ` method: +:py:meth:`Dataset.from_dataframe` class method or the equivalent +:py:meth:`pandas.DataFrame.to_xarray` method: .. ipython:: python @@ -83,7 +84,7 @@ DataArray and Series -------------------- ``DataArray`` objects have a complementary representation in terms of a -:py:class:`pandas.Series`. Using a Series preserves the ``Dataset`` to +:py:class:`~pandas.Series`. Using a Series preserves the ``Dataset`` to ``DataArray`` relationship, because ``DataFrames`` are dict-like containers of ``Series``. The methods are very similar to those for working with DataFrames: @@ -109,7 +110,7 @@ Multi-dimensional data Tidy data is great, but it sometimes you want to preserve dimensions instead of automatically stacking them into a ``MultiIndex``. -:py:meth:`DataArray.to_pandas() ` is a shortcut that +:py:meth:`DataArray.to_pandas()` is a shortcut that lets you convert a DataArray directly into a pandas object with the same dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`, 2D to :py:class:`~pandas.DataFrame` and 3D to ``pandas.Panel``): @@ -122,7 +123,7 @@ dimensionality (i.e., a 1D array is converted to a :py:class:`~pandas.Series`, df To perform the inverse operation of converting any pandas objects into a data -array with the same shape, simply use the :py:class:`~xarray.DataArray` +array with the same shape, simply use the :py:class:`DataArray` constructor: .. ipython:: python @@ -143,7 +144,7 @@ preserve all use of multi-indexes: However, you will need to set dimension names explicitly, either with the ``dims`` argument on in the ``DataArray`` constructor or by calling -:py:class:`~xarray.Dataset.rename` on the new object. +:py:class:`~Dataset.rename` on the new object. .. _panel transition: diff --git a/doc/plotting.rst b/doc/plotting.rst index d77a170ce85..ea9816780a7 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -1,3 +1,4 @@ +.. currentmodule:: xarray .. _plotting: Plotting @@ -10,8 +11,8 @@ Labeled data enables expressive computations. These same labels can also be used to easily create informative plots. xarray's plotting capabilities are centered around -:py:class:`xarray.DataArray` objects. -To plot :py:class:`xarray.Dataset` objects +:py:class:`DataArray` objects. +To plot :py:class:`Dataset` objects simply access the relevant DataArrays, ie ``dset['var1']``. Dataset specific plotting routines are also available (see :ref:`plot-dataset`). Here we focus mostly on arrays 2d or larger. If your data fits @@ -94,7 +95,7 @@ One Dimension Simple Example ================ -The simplest way to make a plot is to call the :py:func:`xarray.DataArray.plot()` method. +The simplest way to make a plot is to call the :py:func:`DataArray.plot()` method. .. ipython:: python @@ -227,7 +228,7 @@ It is required to explicitly specify either Thus, we could have made the previous plot by specifying ``hue='lat'`` instead of ``x='time'``. If required, the automatic legend can be turned off using ``add_legend=False``. Alternatively, -``hue`` can be passed directly to :py:func:`xarray.plot` as `air.isel(lon=10, lat=[19,21,22]).plot(hue='lat')`. +``hue`` can be passed directly to :py:func:`xarray.plot.line` as `air.isel(lon=10, lat=[19,21,22]).plot.line(hue='lat')`. ======================== @@ -256,7 +257,7 @@ made using 1D data. The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy -when plotting data grouped with :py:func:`xarray.Dataset.groupby_bins`. +when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. ipython:: python @@ -295,7 +296,7 @@ Two Dimensions Simple Example ================ -The default method :py:meth:`xarray.DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional. +The default method :py:meth:`DataArray.plot` calls :py:func:`xarray.plot.pcolormesh` by default when the data is two-dimensional. .. ipython:: python @@ -573,7 +574,7 @@ Faceted plotting supports other arguments common to xarray 2d plots. FacetGrid Objects =================== -The object returned, ``g`` in the above examples, is a :py:class:`~xarray.plot.FacetGrid`` object +The object returned, ``g`` in the above examples, is a :py:class:`~xarray.plot.FacetGrid` object that links a :py:class:`DataArray` to a matplotlib figure with a particular structure. This object can be used to control the behavior of the multiple plots. It borrows an API and code from `Seaborn's FacetGrid @@ -612,11 +613,11 @@ they have been plotted. plt.draw() -:py:class:`~xarray.FacetGrid` objects have methods that let you customize the automatically generated +:py:class:`~xarray.plot.FacetGrid` objects have methods that let you customize the automatically generated axis labels, axis ticks and plot titles. See :py:meth:`~xarray.plot.FacetGrid.set_titles`, :py:meth:`~xarray.plot.FacetGrid.set_xlabels`, :py:meth:`~xarray.plot.FacetGrid.set_ylabels` and :py:meth:`~xarray.plot.FacetGrid.set_ticks` for more information. -Plotting functions can be applied to each subset of the data by calling :py:meth:`~xarray.plot.FacetGrid.map_dataarray` or to each subplot by calling :py:meth:`FacetGrid.map`. +Plotting functions can be applied to each subset of the data by calling :py:meth:`~xarray.plot.FacetGrid.map_dataarray` or to each subplot by calling :py:meth:`~xarray.plot.FacetGrid.map`. TODO: add an example of using the ``map`` method to plot dataset variables (e.g., with ``plt.quiver``). diff --git a/doc/terminology.rst b/doc/terminology.rst index d1265e4da9d..ab6d856920a 100644 --- a/doc/terminology.rst +++ b/doc/terminology.rst @@ -1,3 +1,4 @@ +.. currentmodule:: xarray .. _terminology: Terminology diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 44bff9e7202..d03cfb948fa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,8 +28,8 @@ New Features - :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` now work with dask Variables. By `Deepak Cherian `_. -- Added the :py:meth:`count` reduction method to both :py:class:`DatasetCoarsen` - and :py:class:`DataArrayCoarsen` objects. (:pull:`3500`) +- Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen` + and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ Bug fixes @@ -74,7 +74,7 @@ Internal Changes :py:meth:`DataArray.isel`, and :py:meth:`DataArray.__getitem__` when indexing by int, slice, list of int, scalar ndarray, or 1-dimensional ndarray. (:pull:`3533`) by `Guido Imperiale `_. -- Removed internal method ``Dataset._from_vars_and_coord_names``, +- Removed internal method ``Dataset._from_vars_and_coord_names``, which was dominated by ``Dataset._construct_direct``. (:pull:`3565`) By `Maximilian Roos `_ @@ -101,8 +101,8 @@ Breaking changes New Features ~~~~~~~~~~~~ -- Added the ``sparse`` option to :py:meth:`~xarray.DataArray.unstack`, - :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`, +- Added the ``sparse`` option to :py:meth:`~xarray.DataArray.unstack`, + :py:meth:`~xarray.Dataset.unstack`, :py:meth:`~xarray.DataArray.reindex`, :py:meth:`~xarray.Dataset.reindex` (:issue:`3518`). By `Keisuke Fujii `_. - Added the ``fill_value`` option to :py:meth:`DataArray.unstack` and @@ -112,13 +112,13 @@ New Features :py:meth:`~xarray.Dataset.interpolate_na`. This controls the maximum size of the data gap that will be filled by interpolation. By `Deepak Cherian `_. - Added :py:meth:`Dataset.drop_sel` & :py:meth:`DataArray.drop_sel` for dropping labels. - :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for + :py:meth:`Dataset.drop_vars` & :py:meth:`DataArray.drop_vars` have been added for dropping variables (including coordinates). The existing :py:meth:`Dataset.drop` & :py:meth:`DataArray.drop` methods remain as a backward compatible option for dropping either labels or variables, but using the more specific methods is encouraged. (:pull:`3475`) By `Maximilian Roos `_ -- Added :py:meth:`Dataset.map` & :py:meth:`GroupBy.map` & :py:meth:`Resample.map` for +- Added :py:meth:`Dataset.map` & ``GroupBy.map`` & ``Resample.map`` for mapping / applying a function over each item in the collection, reflecting the widely used and least surprising name for this operation. The existing ``apply`` methods remain for backward compatibility, though using the ``map`` @@ -137,7 +137,7 @@ New Features - :py:func:`xarray.dot`, and :py:meth:`DataArray.dot` now support the ``dims=...`` option to sum over the union of dimensions of all input arrays (:issue:`3423`) by `Mathias Hauser `_. -- Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve +- Added new ``Dataset._repr_html_`` and ``DataArray._repr_html_`` to improve representation of objects in Jupyter. By default this feature is turned off for now. Enable it with ``xarray.set_options(display_style="html")``. (:pull:`3425`) by `Benoit Bovy `_ and @@ -146,25 +146,26 @@ New Features `_ for xarray objects. Note that xarray objects with a dask.array backend already used deterministic hashing in previous releases; this change implements it when whole - xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map` is + xarray objects are embedded in a dask graph, e.g. when :py:meth:`DataArray.map_blocks` is invoked. (:issue:`3378`, :pull:`3446`, :pull:`3515`) By `Deepak Cherian `_ and `Guido Imperiale `_. +- Add the documented-but-missing :py:meth:`~core.groupby.DatasetGroupBy.quantile`. - xarray now respects the ``DataArray.encoding["coordinates"]`` attribute when writing to disk. See :ref:`io.coordinates` for more. (:issue:`3351`, :pull:`3487`) By `Deepak Cherian `_. -- Add the documented-but-missing :py:meth:`DatasetGroupBy.quantile`. +- Add the documented-but-missing :py:meth:`~core.groupby.DatasetGroupBy.quantile`. (:issue:`3525`, :pull:`3527`). By `Justus Magin `_. Bug fixes ~~~~~~~~~ -- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when +- Ensure an index of type ``CFTimeIndex`` is not converted to a ``DatetimeIndex`` when calling :py:meth:`Dataset.rename`, :py:meth:`Dataset.rename_dims` and :py:meth:`Dataset.rename_vars`. By `Mathias Hauser `_. (:issue:`3522`). - Fix a bug in :py:meth:`DataArray.set_index` in case that an existing dimension becomes a level variable of MultiIndex. (:pull:`3520`). By `Keisuke Fujii `_. - Harmonize ``_FillValue``, ``missing_value`` during encoding and decoding steps. (:pull:`3502`) - By `Anderson Banihirwe `_. + By `Anderson Banihirwe `_. - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed but cloudpickle isn't (:issue:`3401`) by `Rhys Doyle `_ - Fix grouping over variables with NaNs. (:issue:`2383`, :pull:`3406`). @@ -179,7 +180,7 @@ Bug fixes - Rolling reduction operations no longer compute dask arrays by default. (:issue:`3161`). In addition, the ``allow_lazy`` kwarg to ``reduce`` is deprecated. By `Deepak Cherian `_. -- Fix :py:meth:`GroupBy.reduce` when reducing over multiple dimensions. +- Fix ``GroupBy.reduce`` when reducing over multiple dimensions. (:issue:`3402`). By `Deepak Cherian `_ - Allow appending datetime and bool data variables to zarr stores. (:issue:`3480`). By `Akihiro Matsukawa `_. @@ -229,7 +230,7 @@ Internal Changes - Enable type checking on default sentinel values (:pull:`3472`) By `Maximilian Roos `_ -- Add :py:meth:`Variable._replace` for simpler replacing of a subset of attributes (:pull:`3472`) +- Add ``Variable._replace`` for simpler replacing of a subset of attributes (:pull:`3472`) By `Maximilian Roos `_ .. _whats-new.0.14.0: @@ -285,7 +286,7 @@ New functions/methods Enhancements ~~~~~~~~~~~~ -- :py:class:`~xarray.core.GroupBy` enhancements. By `Deepak Cherian `_. +- ``core.groupby.GroupBy`` enhancements. By `Deepak Cherian `_. - Added a repr (:pull:`3344`). Example:: @@ -320,7 +321,7 @@ Bug fixes - Fix error in concatenating unlabeled dimensions (:pull:`3362`). By `Deepak Cherian `_. - Warn if the ``dim`` kwarg is passed to rolling operations. This is redundant since a dimension is - specified when the :py:class:`DatasetRolling` or :py:class:`DataArrayRolling` object is created. + specified when the :py:class:`~core.rolling.DatasetRolling` or :py:class:`~core.rolling.DataArrayRolling` object is created. (:pull:`3362`). By `Deepak Cherian `_. Documentation @@ -393,7 +394,7 @@ Breaking changes - Reindexing with variables of a different dimension now raise an error (previously deprecated) - ``xarray.broadcast_array`` is removed (previously deprecated in favor of :py:func:`~xarray.broadcast`) -- :py:meth:`Variable.expand_dims` is removed (previously deprecated in favor of +- ``Variable.expand_dims`` is removed (previously deprecated in favor of :py:meth:`Variable.set_dims`) New functions/methods @@ -478,8 +479,7 @@ Enhancements - ``xarray.Dataset.drop`` now supports keyword arguments; dropping index labels by using both ``dim`` and ``labels`` or using a - :py:class:`~xarray.core.coordinates.DataArrayCoordinates` object are - deprecated (:issue:`2910`). + :py:class:`~core.coordinates.DataArrayCoordinates` object are deprecated (:issue:`2910`). By `Gregory Gundersen `_. - Added examples of :py:meth:`Dataset.set_index` and @@ -627,7 +627,7 @@ New functions/methods By `Alan Brammer `_ and `Ryan May `_. -- :py:meth:`~xarray.core.GroupBy.quantile` is now a method of ``GroupBy`` +- ``GroupBy.quantile`` is now a method of ``GroupBy`` objects (:issue:`3018`). By `David Huard `_. @@ -1169,7 +1169,7 @@ Announcements of note: for more details. - We have a new :doc:`roadmap` that outlines our future development plans. -- `Dataset.apply` now properly documents the way `func` is called. +- ``Dataset.apply`` now properly documents the way `func` is called. By `Matti Eskelinen `_. Enhancements @@ -1601,7 +1601,7 @@ Backwards incompatible changes Enhancements ~~~~~~~~~~~~ -- Added :py:func:`~xarray.dot`, equivalent to :py:func:`np.einsum`. +- Added :py:func:`~xarray.dot`, equivalent to :py:func:`numpy.einsum`. Also, :py:func:`~xarray.DataArray.dot` now supports ``dims`` option, which specifies the dimensions to sum over. (:issue:`1951`) @@ -1786,7 +1786,7 @@ Bug fixes coordinates of target, destination and keys. If there are any conflict among these coordinates, ``IndexError`` will be raised. By `Keisuke Fujii `_. -- Properly point :py:meth:`DataArray.__dask_scheduler__` to +- Properly point ``DataArray.__dask_scheduler__`` to ``dask.threaded.get``. By `Matthew Rocklin `_. - Bug fixes in :py:meth:`DataArray.plot.imshow`: all-NaN arrays and arrays with size one in some dimension can now be plotted, which is good for @@ -1998,7 +1998,7 @@ Enhancements - Support for :py:class:`pathlib.Path` objects added to :py:func:`~xarray.open_dataset`, :py:func:`~xarray.open_mfdataset`, - :py:func:`~xarray.to_netcdf`, and :py:func:`~xarray.save_mfdataset` + ``xarray.to_netcdf``, and :py:func:`~xarray.save_mfdataset` (:issue:`799`): .. ipython:: @@ -2406,7 +2406,7 @@ Enhancements By `Stephan Hoyer `_ and `Phillip J. Wolfram `_. -- New aggregation on rolling objects :py:meth:`DataArray.rolling(...).count()` +- New aggregation on rolling objects :py:meth:`~core.rolling.DataArrayRolling.count` which providing a rolling count of valid values (:issue:`1138`). Bug fixes diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 8838e71e6ca..6a975b948eb 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -854,12 +854,10 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True): ---------- pat : string or compiled regex String can be a character sequence or regular expression. - repl : string or callable Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. See :func:`re.sub`. - n : int, default -1 (all) Number of replacements to make from start case : boolean, default None @@ -873,7 +871,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True): - If True, assumes the passed-in pattern is a regular expression. - If False, treats the pattern as a literal string - Cannot be set to False if `pat` is a compiled regex or `repl` is - a callable. + a callable. Returns ------- diff --git a/xarray/core/resample.py b/xarray/core/resample.py index fb388490d06..2b3b7da6217 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -184,6 +184,7 @@ def map(self, func, shortcut=False, args=(), **kwargs): Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how to stack together the array. The rule is: + 1. If the dimension along which the group coordinate is defined is still in the first grouped array after applying `func`, then stack over this dimension. @@ -196,11 +197,13 @@ def map(self, func, shortcut=False, args=(), **kwargs): Callable to apply to each array. shortcut : bool, optional Whether or not to shortcut evaluation under the assumptions that: + (1) The action of `func` does not depend on any of the array metadata (attributes or coordinates) but only on the data and dimensions. (2) The action of `func` creates arrays with homogeneous metadata, that is, with the same dimensions and attributes. + If these conditions are satisfied `shortcut` provides significant speedup. This should be the case for many common groupby operations (e.g., applying numpy ufuncs). @@ -275,6 +278,7 @@ def map(self, func, args=(), shortcut=None, **kwargs): Apply uses heuristics (like `pandas.GroupBy.apply`) to figure out how to stack together the datasets. The rule is: + 1. If the dimension along which the group coordinate is defined is still in the first grouped item after applying `func`, then stack over this dimension. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 4474d973f59..cb00c9dcfe0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1696,6 +1696,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): This optional parameter specifies the interpolation method to use when the desired quantile lies between two data points ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` is the fractional part of the index surrounded by ``i`` and ``j``. @@ -1703,6 +1704,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. + keep_attrs : bool, optional If True, the variable's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 0d6d147f0bb..6a0e62cc9dc 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -78,6 +78,13 @@ def netcdf_and_hdf5_versions(): def show_versions(file=sys.stdout): + """ print the versions of xarray and its dependencies + + Parameters + ---------- + file : file-like, optional + print to the given file-like object. Defaults to sys.stdout. + """ sys_info = get_sys_info() try: From 471a5d6ce3eb10a521c8eebd3b97f72461405f19 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 18 Dec 2019 00:22:16 +0100 Subject: [PATCH 05/10] update readthedocs.yml (#3639) --- readthedocs.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/readthedocs.yml b/readthedocs.yml index c64fa1b7b02..9ed8d28eaf2 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,8 +1,13 @@ +version: 2 + build: image: latest + conda: - file: ci/requirements/doc.yml + environment: ci/requirements/doc.yml + python: version: 3.7 - setup_py_install: false + install: [] + formats: [] From 3cbc459caa010f9b5042d3fa312b66c9b2b6c403 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 18 Dec 2019 08:27:30 -0700 Subject: [PATCH 06/10] Extend DatetimeAccessor properties and support `.dt` accessor for Timedelta (#3612) * Support `.dt` accessor for Timedelta * Rename accessors * Use `is_np_timedelta_like` for consistency * Use `pd.timedelta_range` * Move shared method to Properties * Parametrize field access test * move `strftime()` to `DatetimeAccessor` * Update the documentation * Update `whats-new.rst` * Add PR reference * Parametrize tests * Extend DatetimeAccessor properties * Cleanup * Fix docstring --- doc/api.rst | 1 + doc/whats-new.rst | 4 + xarray/core/accessor_dt.py | 283 +++++++++++++++++++++++-------- xarray/core/common.py | 6 + xarray/core/dataarray.py | 4 +- xarray/tests/test_accessor_dt.py | 259 +++++++++++++++++++++------- 6 files changed, 418 insertions(+), 139 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index d3491e020fd..9735eb0c708 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -616,6 +616,7 @@ Accessors :toctree: generated/ core.accessor_dt.DatetimeAccessor + core.accessor_dt.TimedeltaAccessor core.accessor_str.StringAccessor Custom Indexes diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d03cfb948fa..fe05a4d2c21 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,6 +31,10 @@ New Features - Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen` and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`) By `Deepak Cherian `_ +- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties + and support `.dt` accessor for timedelta + via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`) + By `Anderson Banihirwe `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index aff6fbc6691..c407371f9f0 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -1,7 +1,11 @@ import numpy as np import pandas as pd -from .common import _contains_datetime_like_objects, is_np_datetime_like +from .common import ( + _contains_datetime_like_objects, + is_np_datetime_like, + is_np_timedelta_like, +) from .pycompat import dask_array_type @@ -145,37 +149,8 @@ def _strftime(values, date_format): return access_method(values, date_format) -class DatetimeAccessor: - """Access datetime fields for DataArrays with datetime-like dtypes. - - Similar to pandas, fields can be accessed through the `.dt` attribute - for applicable DataArrays: - - >>> ds = xarray.Dataset({'time': pd.date_range(start='2000/01/01', - ... freq='D', periods=100)}) - >>> ds.time.dt - - >>> ds.time.dt.dayofyear[:5] - - array([1, 2, 3, 4, 5], dtype=int32) - Coordinates: - * time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ... - - All of the pandas fields are accessible here. Note that these fields are - not calendar-aware; if your datetimes are encoded with a non-Gregorian - calendar (e.g. a 360-day calendar) using cftime, then some fields like - `dayofyear` may not be accurate. - - """ - +class Properties: def __init__(self, obj): - if not _contains_datetime_like_objects(obj): - raise TypeError( - "'dt' accessor only available for " - "DataArray with datetime64 timedelta64 dtype or " - "for arrays containing cftime datetime " - "objects." - ) self._obj = obj def _tslib_field_accessor( # type: ignore @@ -194,48 +169,6 @@ def f(self, dtype=dtype): f.__doc__ = docstring return property(f) - year = _tslib_field_accessor("year", "The year of the datetime", np.int64) - month = _tslib_field_accessor( - "month", "The month as January=1, December=12", np.int64 - ) - day = _tslib_field_accessor("day", "The days of the datetime", np.int64) - hour = _tslib_field_accessor("hour", "The hours of the datetime", np.int64) - minute = _tslib_field_accessor("minute", "The minutes of the datetime", np.int64) - second = _tslib_field_accessor("second", "The seconds of the datetime", np.int64) - microsecond = _tslib_field_accessor( - "microsecond", "The microseconds of the datetime", np.int64 - ) - nanosecond = _tslib_field_accessor( - "nanosecond", "The nanoseconds of the datetime", np.int64 - ) - weekofyear = _tslib_field_accessor( - "weekofyear", "The week ordinal of the year", np.int64 - ) - week = weekofyear - dayofweek = _tslib_field_accessor( - "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 - ) - weekday = dayofweek - - weekday_name = _tslib_field_accessor( - "weekday_name", "The name of day in a week (ex: Friday)", object - ) - - dayofyear = _tslib_field_accessor( - "dayofyear", "The ordinal day of the year", np.int64 - ) - quarter = _tslib_field_accessor("quarter", "The quarter of the date") - days_in_month = _tslib_field_accessor( - "days_in_month", "The number of days in the month", np.int64 - ) - daysinmonth = days_in_month - - season = _tslib_field_accessor("season", "Season of the year (ex: DJF)", object) - - time = _tslib_field_accessor( - "time", "Timestamps corresponding to datetimes", object - ) - def _tslib_round_accessor(self, name, freq): obj_type = type(self._obj) result = _round_field(self._obj.data, name, freq) @@ -290,6 +223,50 @@ def round(self, freq): """ return self._tslib_round_accessor("round", freq) + +class DatetimeAccessor(Properties): + """Access datetime fields for DataArrays with datetime-like dtypes. + + Fields can be accessed through the `.dt` attribute + for applicable DataArrays. + + Notes + ------ + Note that these fields are not calendar-aware; if your datetimes are encoded + with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime, + then some fields like `dayofyear` may not be accurate. + + Examples + --------- + >>> import xarray as xr + >>> import pandas as pd + >>> dates = pd.date_range(start='2000/01/01', freq='D', periods=10) + >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts + + array(['2000-01-01T00:00:00.000000000', '2000-01-02T00:00:00.000000000', + '2000-01-03T00:00:00.000000000', '2000-01-04T00:00:00.000000000', + '2000-01-05T00:00:00.000000000', '2000-01-06T00:00:00.000000000', + '2000-01-07T00:00:00.000000000', '2000-01-08T00:00:00.000000000', + '2000-01-09T00:00:00.000000000', '2000-01-10T00:00:00.000000000'], + dtype='datetime64[ns]') + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + >>> ts.dt + + >>> ts.dt.dayofyear + + array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + >>> ts.dt.quarter + + array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) + Coordinates: + * time (time) datetime64[ns] 2000-01-01 2000-01-02 ... 2000-01-10 + + """ + def strftime(self, date_format): ''' Return an array of formatted strings specified by date_format, which @@ -323,3 +300,163 @@ def strftime(self, date_format): return obj_type( result, name="strftime", coords=self._obj.coords, dims=self._obj.dims ) + + year = Properties._tslib_field_accessor( + "year", "The year of the datetime", np.int64 + ) + month = Properties._tslib_field_accessor( + "month", "The month as January=1, December=12", np.int64 + ) + day = Properties._tslib_field_accessor("day", "The days of the datetime", np.int64) + hour = Properties._tslib_field_accessor( + "hour", "The hours of the datetime", np.int64 + ) + minute = Properties._tslib_field_accessor( + "minute", "The minutes of the datetime", np.int64 + ) + second = Properties._tslib_field_accessor( + "second", "The seconds of the datetime", np.int64 + ) + microsecond = Properties._tslib_field_accessor( + "microsecond", "The microseconds of the datetime", np.int64 + ) + nanosecond = Properties._tslib_field_accessor( + "nanosecond", "The nanoseconds of the datetime", np.int64 + ) + weekofyear = Properties._tslib_field_accessor( + "weekofyear", "The week ordinal of the year", np.int64 + ) + week = weekofyear + dayofweek = Properties._tslib_field_accessor( + "dayofweek", "The day of the week with Monday=0, Sunday=6", np.int64 + ) + weekday = dayofweek + + weekday_name = Properties._tslib_field_accessor( + "weekday_name", "The name of day in a week", object + ) + + dayofyear = Properties._tslib_field_accessor( + "dayofyear", "The ordinal day of the year", np.int64 + ) + quarter = Properties._tslib_field_accessor("quarter", "The quarter of the date") + days_in_month = Properties._tslib_field_accessor( + "days_in_month", "The number of days in the month", np.int64 + ) + daysinmonth = days_in_month + + season = Properties._tslib_field_accessor("season", "Season of the year", object) + + time = Properties._tslib_field_accessor( + "time", "Timestamps corresponding to datetimes", object + ) + + is_month_start = Properties._tslib_field_accessor( + "is_month_start", + "Indicates whether the date is the first day of the month.", + bool, + ) + is_month_end = Properties._tslib_field_accessor( + "is_month_end", "Indicates whether the date is the last day of the month.", bool + ) + is_quarter_start = Properties._tslib_field_accessor( + "is_quarter_start", + "Indicator for whether the date is the first day of a quarter.", + bool, + ) + is_quarter_end = Properties._tslib_field_accessor( + "is_quarter_end", + "Indicator for whether the date is the last day of a quarter.", + bool, + ) + is_year_start = Properties._tslib_field_accessor( + "is_year_start", "Indicate whether the date is the first day of a year.", bool + ) + is_year_end = Properties._tslib_field_accessor( + "is_year_end", "Indicate whether the date is the last day of the year.", bool + ) + is_leap_year = Properties._tslib_field_accessor( + "is_leap_year", "Boolean indicator if the date belongs to a leap year.", bool + ) + + +class TimedeltaAccessor(Properties): + """Access Timedelta fields for DataArrays with Timedelta-like dtypes. + + Fields can be accessed through the `.dt` attribute for applicable DataArrays. + + Examples + -------- + >>> import pandas as pd + >>> import xarray as xr + >>> dates = pd.timedelta_range(start="1 day", freq="6H", periods=20) + >>> ts = xr.DataArray(dates, dims=('time')) + >>> ts + + array([ 86400000000000, 108000000000000, 129600000000000, 151200000000000, + 172800000000000, 194400000000000, 216000000000000, 237600000000000, + 259200000000000, 280800000000000, 302400000000000, 324000000000000, + 345600000000000, 367200000000000, 388800000000000, 410400000000000, + 432000000000000, 453600000000000, 475200000000000, 496800000000000], + dtype='timedelta64[ns]') + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt + + >>> ts.dt.days + + array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.microseconds + + array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.seconds + + array([ 0, 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, + 21600, 43200, 64800, 0, 21600, 43200, 64800, 0, 21600, + 43200, 64800]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + """ + + days = Properties._tslib_field_accessor( + "days", "Number of days for each element.", np.int64 + ) + seconds = Properties._tslib_field_accessor( + "seconds", + "Number of seconds (>= 0 and less than 1 day) for each element.", + np.int64, + ) + microseconds = Properties._tslib_field_accessor( + "microseconds", + "Number of microseconds (>= 0 and less than 1 second) for each element.", + np.int64, + ) + nanoseconds = Properties._tslib_field_accessor( + "nanoseconds", + "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.", + np.int64, + ) + + +class CombinedDatetimelikeAccessor(DatetimeAccessor, TimedeltaAccessor): + def __new__(cls, obj): + # CombinedDatetimelikeAccessor isn't really instatiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. + if not _contains_datetime_like_objects(obj): + raise TypeError( + "'.dt' accessor only available for " + "DataArray with datetime64 timedelta64 dtype or " + "for arrays containing cftime datetime " + "objects." + ) + + if is_np_timedelta_like(obj.dtype): + return TimedeltaAccessor(obj) + else: + return DatetimeAccessor(obj) diff --git a/xarray/core/common.py b/xarray/core/common.py index a74318b2f90..e908c69dd14 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1447,6 +1447,12 @@ def is_np_datetime_like(dtype: DTypeLike) -> bool: return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64) +def is_np_timedelta_like(dtype: DTypeLike) -> bool: + """Check whether dtype is of the timedelta64 dtype. + """ + return np.issubdtype(dtype, np.timedelta64) + + def _contains_cftime_datetimes(array) -> bool: """Check if an array contains cftime.datetime objects """ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 31cd3c713f6..31aa4da57b2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -33,7 +33,7 @@ rolling, utils, ) -from .accessor_dt import DatetimeAccessor +from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor from .alignment import ( _broadcast_helper, @@ -258,7 +258,7 @@ class DataArray(AbstractArray, DataWithCoords): _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample - dt = property(DatetimeAccessor) + dt = property(CombinedDatetimelikeAccessor) def __init__( self, diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 5fe5b8c3f59..67ca12532c7 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -12,6 +12,8 @@ requires_dask, ) +from .test_dask import raise_if_dask_computes, assert_chunks_equal + class TestDatetimeAccessor: @pytest.fixture(autouse=True) @@ -37,24 +39,38 @@ def setup(self): name="data", ) - def test_field_access(self): - years = xr.DataArray( - self.times.year, name="year", coords=[self.times], dims=["time"] - ) - months = xr.DataArray( - self.times.month, name="month", coords=[self.times], dims=["time"] - ) - days = xr.DataArray( - self.times.day, name="day", coords=[self.times], dims=["time"] - ) - hours = xr.DataArray( - self.times.hour, name="hour", coords=[self.times], dims=["time"] + @pytest.mark.parametrize( + "field", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "week", + "weekofyear", + "dayofweek", + "weekday", + "dayofyear", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ], + ) + def test_field_access(self, field): + expected = xr.DataArray( + getattr(self.times, field), name=field, coords=[self.times], dims=["time"] ) - - assert_equal(years, self.data.time.dt.year) - assert_equal(months, self.data.time.dt.month) - assert_equal(days, self.data.time.dt.day) - assert_equal(hours, self.data.time.dt.hour) + actual = getattr(self.data.time.dt, field) + assert_equal(expected, actual) def test_strftime(self): assert ( @@ -69,55 +85,74 @@ def test_not_datetime_type(self): nontime_data.time.dt @requires_dask - def test_dask_field_access(self): + @pytest.mark.parametrize( + "field", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + "week", + "weekofyear", + "dayofweek", + "weekday", + "dayofyear", + "quarter", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ], + ) + def test_dask_field_access(self, field): import dask.array as da - years = self.times_data.dt.year - months = self.times_data.dt.month - hours = self.times_data.dt.hour - days = self.times_data.dt.day - floor = self.times_data.dt.floor("D") - ceil = self.times_data.dt.ceil("D") - round = self.times_data.dt.round("D") - strftime = self.times_data.dt.strftime("%Y-%m-%d %H:%M:%S") + expected = getattr(self.times_data.dt, field) + + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, field) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) + + @requires_dask + @pytest.mark.parametrize( + "method, parameters", + [ + ("floor", "D"), + ("ceil", "D"), + ("round", "D"), + ("strftime", "%Y-%m-%d %H:%M:%S"), + ], + ) + def test_dask_accessor_method(self, method, parameters): + import dask.array as da + expected = getattr(self.times_data.dt, method)(parameters) dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) dask_times_2d = xr.DataArray( dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" ) - dask_year = dask_times_2d.dt.year - dask_month = dask_times_2d.dt.month - dask_day = dask_times_2d.dt.day - dask_hour = dask_times_2d.dt.hour - dask_floor = dask_times_2d.dt.floor("D") - dask_ceil = dask_times_2d.dt.ceil("D") - dask_round = dask_times_2d.dt.round("D") - dask_strftime = dask_times_2d.dt.strftime("%Y-%m-%d %H:%M:%S") - - # Test that the data isn't eagerly evaluated - assert isinstance(dask_year.data, da.Array) - assert isinstance(dask_month.data, da.Array) - assert isinstance(dask_day.data, da.Array) - assert isinstance(dask_hour.data, da.Array) - assert isinstance(dask_strftime.data, da.Array) - - # Double check that outcome chunksize is unchanged - dask_chunks = dask_times_2d.chunks - assert dask_year.data.chunks == dask_chunks - assert dask_month.data.chunks == dask_chunks - assert dask_day.data.chunks == dask_chunks - assert dask_hour.data.chunks == dask_chunks - assert dask_strftime.data.chunks == dask_chunks - - # Check the actual output from the accessors - assert_equal(years, dask_year.compute()) - assert_equal(months, dask_month.compute()) - assert_equal(days, dask_day.compute()) - assert_equal(hours, dask_hour.compute()) - assert_equal(floor, dask_floor.compute()) - assert_equal(ceil, dask_ceil.compute()) - assert_equal(round, dask_round.compute()) - assert_equal(strftime, dask_strftime.compute()) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, method)(parameters) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) def test_seasons(self): dates = pd.date_range(start="2000/01/01", freq="M", periods=12) @@ -140,12 +175,108 @@ def test_seasons(self): assert_array_equal(seasons.values, dates.dt.season.values) - def test_rounders(self): + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_accessor_method(self, method, parameters): dates = pd.date_range("2014-01-01", "2014-05-01", freq="H") - xdates = xr.DataArray(np.arange(len(dates)), dims=["time"], coords=[dates]) - assert_array_equal(dates.floor("D").values, xdates.time.dt.floor("D").values) - assert_array_equal(dates.ceil("D").values, xdates.time.dt.ceil("D").values) - assert_array_equal(dates.round("D").values, xdates.time.dt.round("D").values) + xdates = xr.DataArray(dates, dims=["time"]) + expected = getattr(dates, method)(parameters) + actual = getattr(xdates.dt, method)(parameters) + assert_array_equal(expected, actual) + + +class TestTimedeltaAccessor: + @pytest.fixture(autouse=True) + def setup(self): + nt = 100 + data = np.random.rand(10, 10, nt) + lons = np.linspace(0, 11, 10) + lats = np.linspace(0, 20, 10) + self.times = pd.timedelta_range(start="1 day", freq="6H", periods=nt) + + self.data = xr.DataArray( + data, + coords=[lons, lats, self.times], + dims=["lon", "lat", "time"], + name="data", + ) + + self.times_arr = np.random.choice(self.times, size=(10, 10, nt)) + self.times_data = xr.DataArray( + self.times_arr, + coords=[lons, lats, self.times], + dims=["lon", "lat", "time"], + name="data", + ) + + def test_not_datetime_type(self): + nontime_data = self.data.copy() + int_data = np.arange(len(self.data.time)).astype("int8") + nontime_data["time"].values = int_data + with raises_regex(TypeError, "dt"): + nontime_data.time.dt + + @pytest.mark.parametrize( + "field", ["days", "seconds", "microseconds", "nanoseconds"] + ) + def test_field_access(self, field): + expected = xr.DataArray( + getattr(self.times, field), name=field, coords=[self.times], dims=["time"] + ) + actual = getattr(self.data.time.dt, field) + assert_equal(expected, actual) + + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_accessor_methods(self, method, parameters): + dates = pd.timedelta_range(start="1 day", end="30 days", freq="6H") + xdates = xr.DataArray(dates, dims=["time"]) + expected = getattr(dates, method)(parameters) + actual = getattr(xdates.dt, method)(parameters) + assert_array_equal(expected, actual) + + @requires_dask + @pytest.mark.parametrize( + "field", ["days", "seconds", "microseconds", "nanoseconds"] + ) + def test_dask_field_access(self, field): + import dask.array as da + + expected = getattr(self.times_data.dt, field) + + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, field) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual, expected) + + @requires_dask + @pytest.mark.parametrize( + "method, parameters", [("floor", "D"), ("ceil", "D"), ("round", "D")] + ) + def test_dask_accessor_method(self, method, parameters): + import dask.array as da + + expected = getattr(self.times_data.dt, method)(parameters) + dask_times_arr = da.from_array(self.times_arr, chunks=(5, 5, 50)) + dask_times_2d = xr.DataArray( + dask_times_arr, coords=self.data.coords, dims=self.data.dims, name="data" + ) + + with raise_if_dask_computes(): + actual = getattr(dask_times_2d.dt, method)(parameters) + + assert isinstance(actual.data, da.Array) + assert_chunks_equal(actual, dask_times_2d) + assert_equal(actual.compute(), expected.compute()) _CFTIME_CALENDARS = [ From aaf37381b38310cb11311cac26515b1ecbcf09c1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 24 Dec 2019 13:37:03 +0000 Subject: [PATCH 07/10] concat keeps attrs from first variable. (#3637) * concat keep attrs from first variable. Fixes #2060 Fixes #2575 --- doc/whats-new.rst | 3 +++ xarray/core/concat.py | 6 ++++-- xarray/core/variable.py | 5 +++-- xarray/tests/test_concat.py | 13 +++++++++++++ xarray/tests/test_variable.py | 8 ++------ 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fe05a4d2c21..69f9fd7f02c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Breaking changes New Features ~~~~~~~~~~~~ +- :py:func:`xarray.concat` now preserves attributes from the first Variable. + (:issue:`2575`, :issue:`2060`, :issue:`1614`) + By `Deepak Cherian `_. - :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile` and ``GroupBy.quantile`` now work with dask Variables. By `Deepak Cherian `_. diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 5ccbfa3f2b4..302f7afcec6 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -93,12 +93,14 @@ def concat( those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - indexers, mode, concat_over : deprecated - Returns ------- concatenated : type of objs + Notes + ----- + Each concatenated Variable preserves corresponding ``attrs`` from the first element of ``objs``. + See also -------- merge diff --git a/xarray/core/variable.py b/xarray/core/variable.py index cb00c9dcfe0..0a9d0767b77 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1625,8 +1625,9 @@ def concat(cls, variables, dim="concat_dim", positions=None, shortcut=False): if not shortcut: for var in variables: if var.dims != first_var.dims: - raise ValueError("inconsistent dimensions") - utils.remove_incompatible_items(attrs, var.attrs) + raise ValueError( + f"Variable has dimensions {list(var.dims)} but first Variable has dimensions {list(first_var.dims)}" + ) return cls(dims, data, attrs, encoding) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 0661ebb7a38..def5abc942f 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -462,3 +462,16 @@ def test_concat_join_kwarg(self): for join in expected: actual = concat([ds1, ds2], join=join, dim="x") assert_equal(actual, expected[join].to_array()) + + +@pytest.mark.parametrize("attr1", ({"a": {"meta": [10, 20, 30]}}, {"a": [1, 2, 3]}, {})) +@pytest.mark.parametrize("attr2", ({"a": [1, 2, 3]}, {})) +def test_concat_attrs_first_variable(attr1, attr2): + + arrs = [ + DataArray([[1], [2]], dims=["x", "y"], attrs=attr1), + DataArray([[3], [4]], dims=["x", "y"], attrs=attr2), + ] + + concat_attrs = concat(arrs, "y").attrs + assert concat_attrs == attr1 diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 49a6906d5be..62fde920b1e 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -432,7 +432,7 @@ def test_concat(self): assert_identical( Variable(["b", "a"], np.array([x, y])), Variable.concat((v, w), "b") ) - with raises_regex(ValueError, "inconsistent dimensions"): + with raises_regex(ValueError, "Variable has dimensions"): Variable.concat([v, Variable(["c"], y)], "b") # test indexers actual = Variable.concat( @@ -451,16 +451,12 @@ def test_concat(self): Variable.concat([v[:, 0], v[:, 1:]], "x") def test_concat_attrs(self): - # different or conflicting attributes should be removed + # always keep attrs from first variable v = self.cls("a", np.arange(5), {"foo": "bar"}) w = self.cls("a", np.ones(5)) expected = self.cls( "a", np.concatenate([np.arange(5), np.ones(5)]) ).to_base_variable() - assert_identical(expected, Variable.concat([v, w], "a")) - w.attrs["foo"] = 2 - assert_identical(expected, Variable.concat([v, w], "a")) - w.attrs["foo"] = "bar" expected.attrs["foo"] = "bar" assert_identical(expected, Variable.concat([v, w], "a")) From 651f27fd5176674da315501026dc18a03b575a76 Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Tue, 24 Dec 2019 05:46:28 -0800 Subject: [PATCH 08/10] Allow incomplete hypercubes in combine_by_coords (#3649) * allow incomplete hypercubes in combine_by_coords * If fill_value=None then still requires complete hypercube * Closes #3648 --- doc/whats-new.rst | 3 +++ xarray/core/combine.py | 36 +++++++++++++++++++++++++++++------- xarray/tests/test_combine.py | 16 ++++++++++++++++ 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 69f9fd7f02c..f08fad12bba 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,9 @@ New Features Bug fixes ~~~~~~~~~ +- Fix :py:meth:`xarray.combine_by_coords` to allow for combining incomplete + hypercubes of Datasets (:issue:`3648`). By `Ian Bolliger + `_. - Fix :py:meth:`xarray.combine_by_coords` when combining cftime coordinates which span long time intervals (:issue:`3535`). By `Spencer Clark `_. diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 65087b05cc0..3f6e0e79351 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -115,11 +115,12 @@ def _infer_concat_order_from_coords(datasets): return combined_ids, concat_dims -def _check_shape_tile_ids(combined_tile_ids): +def _check_dimension_depth_tile_ids(combined_tile_ids): + """ + Check all tuples are the same length, i.e. check that all lists are + nested to the same depth. + """ tile_ids = combined_tile_ids.keys() - - # Check all tuples are the same length - # i.e. check that all lists are nested to the same depth nesting_depths = [len(tile_id) for tile_id in tile_ids] if not nesting_depths: nesting_depths = [0] @@ -128,8 +129,13 @@ def _check_shape_tile_ids(combined_tile_ids): "The supplied objects do not form a hypercube because" " sub-lists do not have consistent depths" ) + # return these just to be reused in _check_shape_tile_ids + return tile_ids, nesting_depths - # Check all lists along one dimension are same length + +def _check_shape_tile_ids(combined_tile_ids): + """Check all lists along one dimension are same length.""" + tile_ids, nesting_depths = _check_dimension_depth_tile_ids(combined_tile_ids) for dim in range(nesting_depths[0]): indices_along_dim = [tile_id[dim] for tile_id in tile_ids] occurrences = Counter(indices_along_dim) @@ -536,7 +542,8 @@ def combine_by_coords( coords : {'minimal', 'different', 'all' or list of str}, optional As per the 'data_vars' kwarg, but for coordinate variables. fill_value : scalar, optional - Value to use for newly missing values + Value to use for newly missing values. If None, raises a ValueError if + the passed Datasets do not create a complete hypercube. join : {'outer', 'inner', 'left', 'right', 'exact'}, optional String indicating how to combine differing indexes (excluding concat_dim) in objects @@ -653,6 +660,15 @@ def combine_by_coords( temperature (y, x) float64 1.654 10.63 7.015 2.543 ... 12.46 2.22 15.96 precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 + >>> xr.combine_by_coords([x1, x2, x3]) + + Dimensions: (x: 6, y: 4) + Coordinates: + * x (x) int64 10 20 30 40 50 60 + * y (y) int64 0 1 2 3 + Data variables: + temperature (y, x) float64 1.654 10.63 7.015 nan ... 12.46 2.22 15.96 + precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 """ # Group by data vars @@ -667,7 +683,13 @@ def combine_by_coords( list(datasets_with_same_vars) ) - _check_shape_tile_ids(combined_ids) + if fill_value is None: + # check that datasets form complete hypercube + _check_shape_tile_ids(combined_ids) + else: + # check only that all datasets have same dimension depth for these + # vars + _check_dimension_depth_tile_ids(combined_ids) # Concatenate along all of concat_dims one by one to create single ds concatenated = _combine_nd( diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index a29fe0190cf..d907e1c5e46 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -711,6 +711,22 @@ def test_check_for_impossible_ordering(self): ): combine_by_coords([ds1, ds0]) + def test_combine_by_coords_incomplete_hypercube(self): + # test that this succeeds with default fill_value + x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) + x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]}) + x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]}) + actual = combine_by_coords([x1, x2, x3]) + expected = Dataset( + {"a": (("y", "x"), [[1, 1], [1, np.nan]])}, + coords={"y": [0, 1], "x": [0, 1]}, + ) + assert_identical(expected, actual) + + # test that this fails if fill_value is None + with pytest.raises(ValueError): + combine_by_coords([x1, x2, x3], fill_value=None) + @pytest.mark.filterwarnings( "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated" From cc22f41b383cfd3d6df8b95a61893869ff89e855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20BRIOL?= Date: Sun, 29 Dec 2019 15:52:45 +0100 Subject: [PATCH 09/10] added pyinterp to related projects (#3655) added pyinterp to related projects --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index a8af05f3074..3188751366f 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -25,6 +25,7 @@ Geosciences - `PyGDX `_: Python 3 package for accessing data stored in GAMS Data eXchange (GDX) files. Also uses a custom subclass. +- `pyinterp `_: Python 3 package for interpolating geo-referenced data used in the field of geosciences. - `pyXpcm `_: xarray-based Profile Classification Modelling (PCM), mostly for ocean data. - `Regionmask `_: plotting and creation of masks of spatial regions - `rioxarray `_: geospatial xarray extension powered by rasterio From b3d3b4480b7fb63402eb6c02103bb8d6c7dbf93a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 30 Dec 2019 17:46:43 +0000 Subject: [PATCH 10/10] Add nanmedian for dask arrays (#3604) * Add nanmedian for dask arrays Close #2999 * Fix tests. * fix import * Make sure that we don't rechunk the entire variable to one chunk by reducing over all dimensions. Dask raises an error when axis=None but not when axis=range(a.ndim). * fix tests. * Update whats-new.rst --- doc/whats-new.rst | 3 ++ xarray/core/dask_array_compat.py | 83 +++++++++++++++++++++++++++++++- xarray/core/duck_array_ops.py | 8 +-- xarray/core/nanops.py | 12 ++++- xarray/tests/test_dask.py | 4 +- 5 files changed, 102 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f08fad12bba..00d1c50780e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,9 @@ Breaking changes New Features ~~~~~~~~~~~~ +- Implement :py:func:`median` and :py:func:`nanmedian` for dask arrays. This works by rechunking + to a single chunk along all reduction axes. (:issue:`2999`). + By `Deepak Cherian `_. - :py:func:`xarray.concat` now preserves attributes from the first Variable. (:issue:`2575`, :issue:`2060`, :issue:`1614`) By `Deepak Cherian `_. diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index c3dbdd27098..de55de89f0c 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -1,8 +1,14 @@ from distutils.version import LooseVersion +from typing import Iterable -import dask.array as da import numpy as np -from dask import __version__ as dask_version + +try: + import dask.array as da + from dask import __version__ as dask_version +except ImportError: + dask_version = "0.0.0" + da = None if LooseVersion(dask_version) >= LooseVersion("2.0.0"): meta_from_array = da.utils.meta_from_array @@ -89,3 +95,76 @@ def meta_from_array(x, ndim=None, dtype=None): meta = meta.astype(dtype) return meta + + +if LooseVersion(dask_version) >= LooseVersion("2.8.1"): + median = da.median +else: + # Copied from dask v2.8.1 + # Used under the terms of Dask's license, see licenses/DASK_LICENSE. + def median(a, axis=None, keepdims=False): + """ + This works by automatically chunking the reduced axes to a single chunk + and then calling ``numpy.median`` function across the remaining dimensions + """ + + if axis is None: + raise NotImplementedError( + "The da.median function only works along an axis. " + "The full algorithm is difficult to do in parallel" + ) + + if not isinstance(axis, Iterable): + axis = (axis,) + + axis = [ax + a.ndim if ax < 0 else ax for ax in axis] + + a = a.rechunk({ax: -1 if ax in axis else "auto" for ax in range(a.ndim)}) + + result = a.map_blocks( + np.median, + axis=axis, + keepdims=keepdims, + drop_axis=axis if not keepdims else None, + chunks=[1 if ax in axis else c for ax, c in enumerate(a.chunks)] + if keepdims + else None, + ) + + return result + + +if LooseVersion(dask_version) > LooseVersion("2.9.0"): + nanmedian = da.nanmedian +else: + + def nanmedian(a, axis=None, keepdims=False): + """ + This works by automatically chunking the reduced axes to a single chunk + and then calling ``numpy.nanmedian`` function across the remaining dimensions + """ + + if axis is None: + raise NotImplementedError( + "The da.nanmedian function only works along an axis. " + "The full algorithm is difficult to do in parallel" + ) + + if not isinstance(axis, Iterable): + axis = (axis,) + + axis = [ax + a.ndim if ax < 0 else ax for ax in axis] + + a = a.rechunk({ax: -1 if ax in axis else "auto" for ax in range(a.ndim)}) + + result = a.map_blocks( + np.nanmedian, + axis=axis, + keepdims=keepdims, + drop_axis=axis if not keepdims else None, + chunks=[1 if ax in axis else c for ax, c in enumerate(a.chunks)] + if keepdims + else None, + ) + + return result diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index cf616acb485..98b371ab7c3 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from . import dask_array_ops, dtypes, npcompat, nputils +from . import dask_array_ops, dask_array_compat, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast from .pycompat import dask_array_type @@ -284,7 +284,7 @@ def _ignore_warnings_if(condition): yield -def _create_nan_agg_method(name, coerce_strings=False): +def _create_nan_agg_method(name, dask_module=dask_array, coerce_strings=False): from . import nanops def f(values, axis=None, skipna=None, **kwargs): @@ -301,7 +301,7 @@ def f(values, axis=None, skipna=None, **kwargs): nanname = "nan" + name func = getattr(nanops, nanname) else: - func = _dask_or_eager_func(name) + func = _dask_or_eager_func(name, dask_module=dask_module) try: return func(values, axis=axis, **kwargs) @@ -337,7 +337,7 @@ def f(values, axis=None, skipna=None, **kwargs): std.numeric_only = True var = _create_nan_agg_method("var") var.numeric_only = True -median = _create_nan_agg_method("median") +median = _create_nan_agg_method("median", dask_module=dask_array_compat) median.numeric_only = True prod = _create_nan_agg_method("prod") prod.numeric_only = True diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index f70e96217e8..f9989c2c8c9 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -6,8 +6,10 @@ try: import dask.array as dask_array + from . import dask_array_compat except ImportError: dask_array = None + dask_array_compat = None # type: ignore def _replace_nan(a, val): @@ -141,7 +143,15 @@ def nanmean(a, axis=None, dtype=None, out=None): def nanmedian(a, axis=None, out=None): - return _dask_or_eager_func("nanmedian", eager_module=nputils)(a, axis=axis) + # The dask algorithm works by rechunking to one chunk along axis + # Make sure we trigger the dask error when passing all dimensions + # so that we don't rechunk the entire array to one chunk and + # possibly blow memory + if axis is not None and len(np.atleast_1d(axis)) == a.ndim: + axis = None + return _dask_or_eager_func( + "nanmedian", dask_module=dask_array_compat, eager_module=nputils + )(a, axis=axis) def _nanvar_object(value, axis=None, ddof=0, keepdims=False, **kwargs): diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 6122e987154..d0e2654eed3 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -216,8 +216,10 @@ def test_reduce(self): self.assertLazyAndAllClose(u.argmin(dim="x"), actual) self.assertLazyAndAllClose((u > 1).any(), (v > 1).any()) self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x")) - with raises_regex(NotImplementedError, "dask"): + with raises_regex(NotImplementedError, "only works along an axis"): v.median() + with raises_regex(NotImplementedError, "only works along an axis"): + v.median(v.dims) with raise_if_dask_computes(): v.reduce(duck_array_ops.mean)