From 6ece6a1cf424c3080e216fad8fc8058d3b70aadc Mon Sep 17 00:00:00 2001 From: Tony Tung Date: Thu, 26 Sep 2019 22:45:26 -0700 Subject: [PATCH 1/7] Fix DataArray.to_netcdf type annotation (#3325) It calls DataSet.to_netcdf, which returns Union[bytes, "Delayed", None]. So this should as well. --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e63b6c9975f..3becce7e432 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2244,7 +2244,7 @@ def to_masked_array(self, copy: bool = True) -> np.ma.MaskedArray: isnull = pd.isnull(values) return np.ma.MaskedArray(data=values, mask=isnull, copy=copy) - def to_netcdf(self, *args, **kwargs) -> Optional["Delayed"]: + def to_netcdf(self, *args, **kwargs) -> Union[bytes, "Delayed", None]: """Write DataArray contents to a netCDF file. All parameters are passed directly to `xarray.Dataset.to_netcdf`. From f3c7da6eba987ec67616cd8cb9aec6ea79f0e92c Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Sat, 28 Sep 2019 15:57:36 -0400 Subject: [PATCH 2/7] Remove `complex.nc` from built docs (#3353) * Rolling back to prevent a different issue from leaking into this one. * Amended what's new. --- doc/io.rst | 5 +++++ doc/whats-new.rst | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/doc/io.rst b/doc/io.rst index 775d915188e..0943b598a7f 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -516,6 +516,11 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: # Reading it back xr.open_dataarray("complex.nc", engine="h5netcdf") +.. ipython:: python + :suppress: + + import os + os.remove('complex.nc') .. warning:: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 828a66db6ab..13ea55e1c4b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,6 +33,10 @@ Documentation - Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`. By `Justus Magin `_. +- Fixed documentation to clean up an unwanted file created in ``ipython`` example + (:pull:`3353`). + By `Gregory Gundersen `_. + .. _whats-new.0.13.0: v0.13.0 (17 Sep 2019) From b51683f157ca6421ef58d527422a1a88c9ac67f3 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Sun, 29 Sep 2019 07:50:21 -0600 Subject: [PATCH 3/7] Documentation improvements (#3328) * Add examples for full_like, zeros_like, ones_like * Add examples for xr.align * Add examples for xr.merge * Update xr.where docstring * Update xr.dot docstring * Update xarray/core/common.py Co-Authored-By: Deepak Cherian * Update xarray/core/common.py Co-Authored-By: Deepak Cherian * Update xr.combine_by_coords docstring * Apply black formatting only * More black formatting * Remove unnecessary pandas bits * Fix indentation issues * Update assign and pipe * Update `Dataset.reindex` with examples * Update `Dataset.fillna` with examples * Address styling issues * Update docstring Co-Authored-By: Deepak Cherian --- doc/whats-new.rst | 5 +- xarray/core/alignment.py | 130 +++++++++++++++++++++ xarray/core/combine.py | 109 +++++++++++++++--- xarray/core/common.py | 221 +++++++++++++++++++++++++++++++++++- xarray/core/computation.py | 98 ++++++++++++++-- xarray/core/dataset.py | 227 +++++++++++++++++++++++++++++++++++++ xarray/core/merge.py | 150 ++++++++++++++++++++++-- 7 files changed, 899 insertions(+), 41 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 13ea55e1c4b..760ce66ca04 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,7 +32,10 @@ Documentation ~~~~~~~~~~~~~ - Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`. By `Justus Magin `_. - +- Add examples for :py:meth:`align`, :py:meth:`merge`, :py:meth:`combine_by_coords`, + :py:meth:`full_like`, :py:meth:`zeros_like`, :py:meth:`ones_like`, :py:meth:`Dataset.pipe`, + :py:meth:`Dataset.assign`, :py:meth:`Dataset.reindex`, :py:meth:`Dataset.fillna`. + By `Anderson Banihirwe `_. - Fixed documentation to clean up an unwanted file created in ``ipython`` example (:pull:`3353`). By `Gregory Gundersen `_. diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index d63718500bc..c26b879d839 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -116,6 +116,136 @@ def align( ValueError If any dimensions without labels on the arguments have different sizes, or a different size than the size of the aligned dimension labels. + + Examples + -------- + + >>> import xarray as xr + >>> x = xr.DataArray([[25, 35], [10, 24]], dims=('lat', 'lon'), + ... coords={'lat': [35., 40.], 'lon': [100., 120.]}) + >>> y = xr.DataArray([[20, 5], [7, 13]], dims=('lat', 'lon'), + ... coords={'lat': [35., 42.], 'lon': [100., 120.]}) + + >>> x + + array([[25, 35], + [10, 24]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + + >>> y + + array([[20, 5], + [ 7, 13]]) + Coordinates: + * lat (lat) float64 35.0 42.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y) + >>> a + + array([[25, 35]]) + Coordinates: + * lat (lat) float64 35.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20, 5]]) + Coordinates: + * lat (lat) float64 35.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='outer') + >>> a + + array([[25., 35.], + [10., 24.], + [nan, nan]]) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20., 5.], + [nan, nan], + [ 7., 13.]]) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='outer', fill_value=-999) + >>> a + + array([[ 25, 35], + [ 10, 24], + [-999, -999]]) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[ 20, 5], + [-999, -999], + [ 7, 13]]) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='left') + >>> a + + array([[25, 35], + [10, 24]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20., 5.], + [nan, nan]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='right') + >>> a + + array([[25., 35.], + [nan, nan]]) + Coordinates: + * lat (lat) float64 35.0 42.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20, 5], + [ 7, 13]]) + Coordinates: + * lat (lat) float64 35.0 42.0 + * lon (lon) float64 100.0 120.0 + + >>> a, b = xr.align(x, y, join='exact') + Traceback (most recent call last): + ... + "indexes along dimension {!r} are not equal".format(dim) + ValueError: indexes along dimension 'lat' are not equal + + >>> a, b = xr.align(x, y, join='override') + >>> a + + array([[25, 35], + [10, 24]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + >>> b + + array([[20, 5], + [ 7, 13]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + """ if indexes is None: indexes = {} diff --git a/xarray/core/combine.py b/xarray/core/combine.py index be7fd86555c..38befd5698f 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -393,7 +393,7 @@ def combine_nested( -------- A common task is collecting data from a parallelized simulation in which - each processor wrote out to a separate file. A domain which was decomposed + each process wrote out to a separate file. A domain which was decomposed into 4 parts, 2 each along both the x and y axes, requires organising the datasets into a doubly-nested list, e.g: @@ -505,8 +505,7 @@ def combine_by_coords( ---------- datasets : sequence of xarray.Dataset Dataset objects to combine. - compat : {'identical', 'equals', 'broadcast_equals', - 'no_conflicts', 'override'}, optional + compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for potential conflicts: @@ -520,9 +519,21 @@ def combine_by_coords( of all non-null values. - 'override': skip comparing and pick variable from first dataset data_vars : {'minimal', 'different', 'all' or list of str}, optional - Details are in the documentation of concat + These data variables will be concatenated together: + + * 'minimal': Only data variables in which the dimension already + appears are included. + * 'different': Data variables which are not equal (ignoring + attributes) across all datasets are also concatenated (as well as + all for which dimension already appears). Beware: this option may + load the data payload of data variables into memory if they are not + already loaded. + * 'all': All data variables will be concatenated. + * list of str: The listed data variables will be concatenated, in + addition to the 'minimal' data variables. + If objects are DataArrays, `data_vars` must be 'all'. coords : {'minimal', 'different', 'all' or list of str}, optional - Details are in the documentation of concat + As per the 'data_vars' kwarg, but for coordinate variables. fill_value : scalar, optional Value to use for newly missing values join : {'outer', 'inner', 'left', 'right', 'exact'}, optional @@ -556,29 +567,91 @@ def combine_by_coords( they are concatenated based on the values in their dimension coordinates, not on their position in the list passed to `combine_by_coords`. + >>> import numpy as np + >>> import xarray as xr + + >>> x1 = xr.Dataset( + ... { + ... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)), + ... "precipitation": (("y", "x"), np.random.rand(6).reshape(2, 3)), + ... }, + ... coords={"y": [0, 1], "x": [10, 20, 30]}, + ... ) + >>> x2 = xr.Dataset( + ... { + ... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)), + ... "precipitation": (("y", "x"), np.random.rand(6).reshape(2, 3)), + ... }, + ... coords={"y": [2, 3], "x": [10, 20, 30]}, + ... ) + >>> x3 = xr.Dataset( + ... { + ... "temperature": (("y", "x"), 20 * np.random.rand(6).reshape(2, 3)), + ... "precipitation": (("y", "x"), np.random.rand(6).reshape(2, 3)), + ... }, + ... coords={"y": [2, 3], "x": [40, 50, 60]}, + ... ) + >>> x1 - Dimensions: (x: 3) - Coords: - * position (x) int64 0 1 2 + Dimensions: (x: 3, y: 2) + Coordinates: + * y (y) int64 0 1 + * x (x) int64 10 20 30 Data variables: - temperature (x) float64 11.04 23.57 20.77 ... + temperature (y, x) float64 1.654 10.63 7.015 2.543 13.93 9.436 + precipitation (y, x) float64 0.2136 0.9974 0.7603 0.4679 0.3115 0.945 >>> x2 - Dimensions: (x: 3) - Coords: - * position (x) int64 3 4 5 + Dimensions: (x: 3, y: 2) + Coordinates: + * y (y) int64 2 3 + * x (x) int64 10 20 30 + Data variables: + temperature (y, x) float64 9.341 0.1251 6.269 7.709 8.82 2.316 + precipitation (y, x) float64 0.1728 0.1178 0.03018 0.6509 0.06938 0.3792 + + >>> x3 + + Dimensions: (x: 3, y: 2) + Coordinates: + * y (y) int64 2 3 + * x (x) int64 40 50 60 Data variables: - temperature (x) float64 6.97 8.13 7.42 ... + temperature (y, x) float64 2.789 2.446 6.551 12.46 2.22 15.96 + precipitation (y, x) float64 0.4804 0.1902 0.2457 0.6125 0.4654 0.5953 - >>> combined = xr.combine_by_coords([x2, x1]) + >>> xr.combine_by_coords([x2, x1]) - Dimensions: (x: 6) - Coords: - * position (x) int64 0 1 2 3 4 5 + Dimensions: (x: 3, y: 4) + Coordinates: + * x (x) int64 10 20 30 + * y (y) int64 0 1 2 3 Data variables: - temperature (x) float64 11.04 23.57 20.77 ... + temperature (y, x) float64 1.654 10.63 7.015 2.543 ... 7.709 8.82 2.316 + precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6509 0.06938 0.3792 + + >>> xr.combine_by_coords([x3, x1]) + + Dimensions: (x: 6, y: 4) + Coordinates: + * x (x) int64 10 20 30 40 50 60 + * y (y) int64 0 1 2 3 + Data variables: + temperature (y, x) float64 1.654 10.63 7.015 nan ... nan 12.46 2.22 15.96 + precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 + + >>> xr.combine_by_coords([x3, x1], join='override') + + Dimensions: (x: 3, y: 4) + Coordinates: + * x (x) int64 10 20 30 + * y (y) int64 0 1 2 3 + Data variables: + temperature (y, x) float64 1.654 10.63 7.015 2.543 ... 12.46 2.22 15.96 + precipitation (y, x) float64 0.2136 0.9974 0.7603 ... 0.6125 0.4654 0.5953 + """ # Group by data vars diff --git a/xarray/core/common.py b/xarray/core/common.py index ab9e7616ce1..5b166890575 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -542,6 +542,72 @@ def pipe( ... .pipe((f, 'arg2'), arg1=a, arg3=c) ... ) + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.Dataset( + ... { + ... "temperature_c": (("lat", "lon"), 20 * np.random.rand(4).reshape(2, 2)), + ... "precipitation": (("lat", "lon"), np.random.rand(4).reshape(2, 2)), + ... }, + ... coords={"lat": [10, 20], "lon": [150, 160]}, + ... ) + >>> x + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lat (lat) int64 10 20 + * lon (lon) int64 150 160 + Data variables: + temperature_c (lat, lon) float64 14.53 11.85 19.27 16.37 + precipitation (lat, lon) float64 0.7315 0.7189 0.8481 0.4671 + + >>> def adder(data, arg): + ... return data + arg + ... + >>> def div(data, arg): + ... return data / arg + ... + >>> def sub_mult(data, sub_arg, mult_arg): + ... return (data * mult_arg) - sub_arg + ... + >>> x.pipe(adder, 2) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lon (lon) int64 150 160 + * lat (lat) int64 10 20 + Data variables: + temperature_c (lat, lon) float64 16.53 13.85 21.27 18.37 + precipitation (lat, lon) float64 2.731 2.719 2.848 2.467 + + >>> x.pipe(adder, arg=2) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lon (lon) int64 150 160 + * lat (lat) int64 10 20 + Data variables: + temperature_c (lat, lon) float64 16.53 13.85 21.27 18.37 + precipitation (lat, lon) float64 2.731 2.719 2.848 2.467 + + >>> ( + ... x + ... .pipe(adder, arg=2) + ... .pipe(div, arg=2) + ... .pipe(sub_mult, sub_arg=2, mult_arg=2) + ... ) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lon (lon) int64 150 160 + * lat (lat) int64 10 20 + Data variables: + temperature_c (lat, lon) float64 14.53 11.85 19.27 16.37 + precipitation (lat, lon) float64 0.7315 0.7189 0.8481 0.4671 + See Also -------- pandas.DataFrame.pipe @@ -1172,6 +1238,61 @@ def full_like(other, fill_value, dtype: DTypeLike = None): filled with fill_value. Coords will be copied from other. If other is based on dask, the new one will be as well, and will be split in the same chunks. + + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.DataArray(np.arange(6).reshape(2, 3), + ... dims=['lat', 'lon'], + ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x + + array([[0, 1, 2], + [3, 4, 5]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.full_like(x, 1) + + array([[1, 1, 1], + [1, 1, 1]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.full_like(x, 0.5) + + array([[0, 0, 0], + [0, 0, 0]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.full_like(x, 0.5, dtype=np.double) + + array([[0.5, 0.5, 0.5], + [0.5, 0.5, 0.5]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.full_like(x, np.nan, dtype=np.double) + + array([[nan, nan, nan], + [nan, nan, nan]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + See also + -------- + + zeros_like + ones_like + """ from .dataarray import DataArray from .dataset import Dataset @@ -1217,13 +1338,109 @@ def _full_like_variable(other, fill_value, dtype: DTypeLike = None): def zeros_like(other, dtype: DTypeLike = None): - """Shorthand for full_like(other, 0, dtype) + """Return a new object of zeros with the same shape and + type as a given dataarray or dataset. + + Parameters + ---------- + other : DataArray, Dataset, or Variable + The reference object. The output will have the same dimensions and coordinates as this object. + dtype : dtype, optional + dtype of the new array. If omitted, it defaults to other.dtype. + + Returns + ------- + out : same as object + New object of zeros with the same shape and type as other. + + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.DataArray(np.arange(6).reshape(2, 3), + ... dims=['lat', 'lon'], + ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x + + array([[0, 1, 2], + [3, 4, 5]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.zeros_like(x) + + array([[0, 0, 0], + [0, 0, 0]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> xr.zeros_like(x, dtype=np.float) + + array([[0., 0., 0.], + [0., 0., 0.]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + See also + -------- + + ones_like + full_like + """ return full_like(other, 0, dtype) def ones_like(other, dtype: DTypeLike = None): - """Shorthand for full_like(other, 1, dtype) + """Return a new object of ones with the same shape and + type as a given dataarray or dataset. + + Parameters + ---------- + other : DataArray, Dataset, or Variable + The reference object. The output will have the same dimensions and coordinates as this object. + dtype : dtype, optional + dtype of the new array. If omitted, it defaults to other.dtype. + + Returns + ------- + out : same as object + New object of ones with the same shape and type as other. + + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.DataArray(np.arange(6).reshape(2, 3), + ... dims=['lat', 'lon'], + ... coords={'lat': [1, 2], 'lon': [0, 1, 2]}) + >>> x + + array([[0, 1, 2], + [3, 4, 5]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + >>> >>> xr.ones_like(x) + + array([[1, 1, 1], + [1, 1, 1]]) + Coordinates: + * lat (lat) int64 1 2 + * lon (lon) int64 0 1 2 + + See also + -------- + + zeros_like + full_like + """ return full_like(other, 1, dtype) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 424ab5be87a..0d08234c474 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1084,17 +1084,54 @@ def dot(*arrays, dims=None, **kwargs): Examples -------- - >>> da_a = xr.DataArray(np.arange(3 * 4).reshape(3, 4), dims=['a', 'b']) - >>> da_b = xr.DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5), - >>> dims=['a', 'b', 'c']) - >>> da_c = xr.DataArray(np.arange(5 * 6).reshape(5, 6), dims=['c', 'd']) - >>> - >>> xr.dot(da_a, da_b, dims=['a', 'b']).dims - ('c', ) - >>> xr.dot(da_a, da_b, dims=['a']).dims - ('b', 'c') - >>> xr.dot(da_a, da_b, da_c, dims=['b', 'c']).dims - ('a', 'd') + >>> import numpy as np + >>> import xarray as xp + >>> da_a = xr.DataArray(np.arange(3 * 2).reshape(3, 2), dims=['a', 'b']) + >>> da_b = xr.DataArray(np.arange(3 * 2 * 2).reshape(3, 2, 2), + ... dims=['a', 'b', 'c']) + >>> da_c = xr.DataArray(np.arange(2 * 3).reshape(2, 3), dims=['c', 'd']) + + >>> da_a + + array([[0, 1], + [2, 3], + [4, 5]]) + Dimensions without coordinates: a, b + + >>> da_b + + array([[[ 0, 1], + [ 2, 3]], + [[ 4, 5], + [ 6, 7]], + [[ 8, 9], + [10, 11]]]) + Dimensions without coordinates: a, b, c + + >>> da_c + + array([[0, 1, 2], + [3, 4, 5]]) + Dimensions without coordinates: c, d + + >>> xr.dot(da_a, da_b, dims=['a', 'b']) + + array([110, 125]) + Dimensions without coordinates: c + + >>> xr.dot(da_a, da_b, dims=['a']) + + array([[40, 46], + [70, 79]]) + Dimensions without coordinates: b, c + + >>> xr.dot(da_a, da_b, da_c, dims=['b', 'c']) + + array([[ 9, 14, 19], + [ 93, 150, 207], + [273, 446, 619]]) + Dimensions without coordinates: a, d + """ from .dataarray import DataArray from .variable import Variable @@ -1195,6 +1232,45 @@ def where(cond, x, y): Examples -------- + >>> import xarray as xr + >>> import numpy as np + >>> x = xr.DataArray(0.1 * np.arange(10), dims=['lat'], + ... coords={'lat': np.arange(10)}, name='sst') + >>> x + + array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) + Coordinates: + * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 + + >>> xr.where(x < 0.5, x, 100*x) + + array([ 0. , 0.1, 0.2, 0.3, 0.4, 50. , 60. , 70. , 80. , 90. ]) + Coordinates: + * lat (lat) int64 0 1 2 3 4 5 6 7 8 9 + + >>> >>> y = xr.DataArray( + ... 0.1 * np.arange(9).reshape(3, 3), + ... dims=["lat", "lon"], + ... coords={"lat": np.arange(3), "lon": 10 + np.arange(3)}, + ... name="sst", + ... ) + >>> y + + array([[0. , 0.1, 0.2], + [0.3, 0.4, 0.5], + [0.6, 0.7, 0.8]]) + Coordinates: + * lat (lat) int64 0 1 2 + * lon (lon) int64 10 11 12 + + >>> xr.where(y.lat < 1, y, -1) + + array([[ 0. , 0.1, 0.2], + [-1. , -1. , -1. ], + [-1. , -1. , -1. ]]) + Coordinates: + * lat (lat) int64 0 1 2 + * lon (lon) int64 10 11 12 >>> cond = xr.DataArray([True, False], dims=['x']) >>> x = xr.DataArray([1, 2], dims=['y']) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ea087ce3ce1..5fa96216ba0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2292,6 +2292,134 @@ def reindex( Dataset.reindex_like align pandas.Index.get_indexer + + Examples + -------- + + Create a dataset with some fictional data. + + >>> import xarray as xr + >>> import pandas as pd + >>> x = xr.Dataset( + ... { + ... "temperature": ("station", 20 * np.random.rand(4)), + ... "pressure": ("station", 500 * np.random.rand(4)) + ... }, + ... coords={"station": ["boston", "nyc", "seattle", "denver"]}) + >>> x + + Dimensions: (station: 4) + Coordinates: + * station (station) >> x.indexes + station: Index(['boston', 'nyc', 'seattle', 'denver'], dtype='object', name='station') + + Create a new index and reindex the dataset. By default values in the new index that + do not have corresponding records in the dataset are assigned `NaN`. + + >>> new_index = ['boston', 'austin', 'seattle', 'lincoln'] + >>> x.reindex({'station': new_index}) + + Dimensions: (station: 4) + Coordinates: + * station (station) object 'boston' 'austin' 'seattle' 'lincoln' + Data variables: + temperature (station) float64 18.84 nan 19.22 nan + pressure (station) float64 324.1 nan 122.8 nan + + We can fill in the missing values by passing a value to the keyword `fill_value`. + + >>> x.reindex({'station': new_index}, fill_value=0) + + Dimensions: (station: 4) + Coordinates: + * station (station) object 'boston' 'austin' 'seattle' 'lincoln' + Data variables: + temperature (station) float64 18.84 0.0 19.22 0.0 + pressure (station) float64 324.1 0.0 122.8 0.0 + + Because the index is not monotonically increasing or decreasing, we cannot use arguments + to the keyword method to fill the `NaN` values. + + >>> x.reindex({'station': new_index}, method='nearest') + Traceback (most recent call last): + ... + raise ValueError('index must be monotonic increasing or decreasing') + ValueError: index must be monotonic increasing or decreasing + + To further illustrate the filling functionality in reindex, we will create a + dataset with a monotonically increasing index (for example, a sequence of dates). + + >>> x2 = xr.Dataset( + ... { + ... "temperature": ("time", [15.57, 12.77, np.nan, 0.3081, 16.59, 15.12]), + ... "pressure": ("time", 500 * np.random.rand(6)) + ... }, + ... coords={"time": pd.date_range('01/01/2019', periods=6, freq='D')}) + >>> x2 + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2019-01-01 2019-01-02 ... 2019-01-06 + Data variables: + temperature (time) float64 15.57 12.77 nan 0.3081 16.59 15.12 + pressure (time) float64 103.4 122.7 452.0 444.0 399.2 486.0 + + Suppose we decide to expand the dataset to cover a wider date range. + + >>> time_index2 = pd.date_range('12/29/2018', periods=10, freq='D') + >>> x2.reindex({'time': time_index2}) + + Dimensions: (time: 10) + Coordinates: + * time (time) datetime64[ns] 2018-12-29 2018-12-30 ... 2019-01-07 + Data variables: + temperature (time) float64 nan nan nan 15.57 ... 0.3081 16.59 15.12 nan + pressure (time) float64 nan nan nan 103.4 ... 444.0 399.2 486.0 nan + + The index entries that did not have a value in the original data frame (for example, `2018-12-29`) + are by default filled with NaN. If desired, we can fill in the missing values using one of several options. + + For example, to back-propagate the last valid value to fill the `NaN` values, + pass `bfill` as an argument to the `method` keyword. + + >>> x3 = x2.reindex({'time': time_index2}, method='bfill') + >>> x3 + + Dimensions: (time: 10) + Coordinates: + * time (time) datetime64[ns] 2018-12-29 2018-12-30 ... 2019-01-07 + Data variables: + temperature (time) float64 15.57 15.57 15.57 15.57 ... 16.59 15.12 nan + pressure (time) float64 103.4 103.4 103.4 103.4 ... 399.2 486.0 nan + + Please note that the `NaN` value present in the original dataset (at index value `2019-01-03`) + will not be filled by any of the value propagation schemes. + + >>> x2.where(x2.temperature.isnull(), drop=True) + + Dimensions: (time: 1) + Coordinates: + * time (time) datetime64[ns] 2019-01-03 + Data variables: + temperature (time) float64 nan + pressure (time) float64 452.0 + >>> x3.where(x3.temperature.isnull(), drop=True) + + Dimensions: (time: 2) + Coordinates: + * time (time) datetime64[ns] 2019-01-03 2019-01-07 + Data variables: + temperature (time) float64 nan nan + pressure (time) float64 452.0 nan + + This is because filling while reindexing does not look at dataset values, but only compares + the original and desired indexes. If you do want to fill in the `NaN` values present in the + original dataset, use the :py:meth:`~Dataset.fillna()` method. + """ indexers = utils.either_dict_or_kwargs(indexers, indexers_kwargs, "reindex") @@ -3718,6 +3846,57 @@ def fillna(self, value: Any) -> "Dataset": Returns ------- Dataset + + Examples + -------- + + >>> import numpy as np + >>> import xarray as xr + >>> ds = xr.Dataset( + ... { + ... "A": ("x", [np.nan, 2, np.nan, 0]), + ... "B": ("x", [3, 4, np.nan, 1]), + ... "C": ("x", [np.nan, np.nan, np.nan, 5]), + ... "D": ("x", [np.nan, 3, np.nan, 4]) + ... }, + ... coords={"x": [0, 1, 2, 3]}) + >>> ds + + Dimensions: (x: 4) + Coordinates: + * x (x) int64 0 1 2 3 + Data variables: + A (x) float64 nan 2.0 nan 0.0 + B (x) float64 3.0 4.0 nan 1.0 + C (x) float64 nan nan nan 5.0 + D (x) float64 nan 3.0 nan 4.0 + + Replace all `NaN` values with 0s. + + >>> ds.fillna(0) + + Dimensions: (x: 4) + Coordinates: + * x (x) int64 0 1 2 3 + Data variables: + A (x) float64 0.0 2.0 0.0 0.0 + B (x) float64 3.0 4.0 0.0 1.0 + C (x) float64 0.0 0.0 0.0 5.0 + D (x) float64 0.0 3.0 0.0 4.0 + + Replace all `NaN` elements in column ‘A’, ‘B’, ‘C’, and ‘D’, with 0, 1, 2, and 3 respectively. + + >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3} + >>> ds.fillna(value=values) + + Dimensions: (x: 4) + Coordinates: + * x (x) int64 0 1 2 3 + Data variables: + A (x) float64 0.0 2.0 0.0 0.0 + B (x) float64 3.0 4.0 1.0 1.0 + C (x) float64 2.0 2.0 2.0 5.0 + D (x) float64 3.0 3.0 3.0 4.0 """ if utils.is_dict_like(value): value_keys = getattr(value, "data_vars", value).keys() @@ -4043,6 +4222,54 @@ def assign( See Also -------- pandas.DataFrame.assign + + Examples + -------- + >>> import numpy as np + >>> import xarray as xr + >>> x = xr.Dataset( + ... { + ... "temperature_c": (("lat", "lon"), 20 * np.random.rand(4).reshape(2, 2)), + ... "precipitation": (("lat", "lon"), np.random.rand(4).reshape(2, 2)), + ... }, + ... coords={"lat": [10, 20], "lon": [150, 160]}, + ... ) + >>> x + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lat (lat) int64 10 20 + * lon (lon) int64 150 160 + Data variables: + temperature_c (lat, lon) float64 18.04 12.51 17.64 9.313 + precipitation (lat, lon) float64 0.4751 0.6827 0.3697 0.03524 + + Where the value is a callable, evaluated on dataset: + + >>> x.assign(temperature_f = lambda x: x.temperature_c * 9 / 5 + 32) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lat (lat) int64 10 20 + * lon (lon) int64 150 160 + Data variables: + temperature_c (lat, lon) float64 18.04 12.51 17.64 9.313 + precipitation (lat, lon) float64 0.4751 0.6827 0.3697 0.03524 + temperature_f (lat, lon) float64 64.47 54.51 63.75 48.76 + + Alternatively, the same behavior can be achieved by directly referencing an existing dataarray: + + >>> x.assign(temperature_f=x["temperature_c"] * 9 / 5 + 32) + + Dimensions: (lat: 2, lon: 2) + Coordinates: + * lat (lat) int64 10 20 + * lon (lon) int64 150 160 + Data variables: + temperature_c (lat, lon) float64 18.04 12.51 17.64 9.313 + precipitation (lat, lon) float64 0.4751 0.6827 0.3697 0.03524 + temperature_f (lat, lon) float64 64.47 54.51 63.75 48.76 + """ variables = either_dict_or_kwargs(variables, variables_kwargs, "assign") data = self.copy() diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 6dba659f992..ca753c588d4 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -565,18 +565,150 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA): Examples -------- - >>> arrays = [xr.DataArray(n, name='var%d' % n) for n in range(5)] - >>> xr.merge(arrays) + >>> import xarray as xr + >>> x = xr.DataArray( + ... [[1.0, 2.0], [3.0, 5.0]], + ... dims=("lat", "lon"), + ... coords={"lat": [35.0, 40.0], "lon": [100.0, 120.0]}, + ... name="var1", + ... ) + >>> y = xr.DataArray( + ... [[5.0, 6.0], [7.0, 8.0]], + ... dims=("lat", "lon"), + ... coords={"lat": [35.0, 42.0], "lon": [100.0, 150.0]}, + ... name="var2", + ... ) + >>> z = xr.DataArray( + ... [[0.0, 3.0], [4.0, 9.0]], + ... dims=("time", "lon"), + ... coords={"time": [30.0, 60.0], "lon": [100.0, 150.0]}, + ... name="var3", + ... ) + + >>> x + + array([[1., 2.], + [3., 5.]]) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + + >>> y + + array([[5., 6.], + [7., 8.]]) + Coordinates: + * lat (lat) float64 35.0 42.0 + * lon (lon) float64 100.0 150.0 + + >>> z + + array([[0., 3.], + [4., 9.]]) + Coordinates: + * time (time) float64 30.0 60.0 + * lon (lon) float64 100.0 150.0 + + >>> xr.merge([x, y, z]) + + Dimensions: (lat: 3, lon: 3, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 nan 3.0 5.0 nan nan nan nan + var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 + var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 + + >>> xr.merge([x, y, z], compat='identical') + + Dimensions: (lat: 3, lon: 3, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 nan 3.0 5.0 nan nan nan nan + var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 + var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 + + >>> xr.merge([x, y, z], compat='equals') + + Dimensions: (lat: 3, lon: 3, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 nan 3.0 5.0 nan nan nan nan + var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 + var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 + + >>> xr.merge([x, y, z], compat='equals', fill_value=-999.) + + Dimensions: (lat: 3, lon: 3, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 -999.0 3.0 ... -999.0 -999.0 -999.0 + var2 (lat, lon) float64 5.0 -999.0 6.0 -999.0 ... -999.0 7.0 -999.0 8.0 + var3 (time, lon) float64 0.0 -999.0 3.0 4.0 -999.0 9.0 + + >>> xr.merge([x, y, z], join='override') + + Dimensions: (lat: 2, lon: 2, time: 2) + Coordinates: + * lat (lat) float64 35.0 40.0 + * lon (lon) float64 100.0 120.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 2.0 3.0 5.0 + var2 (lat, lon) float64 5.0 6.0 7.0 8.0 + var3 (time, lon) float64 0.0 3.0 4.0 9.0 + + >>> xr.merge([x, y, z], join='inner') + + Dimensions: (lat: 1, lon: 1, time: 2) + Coordinates: + * lat (lat) float64 35.0 + * lon (lon) float64 100.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 + var2 (lat, lon) float64 5.0 + var3 (time, lon) float64 0.0 4.0 + + >>> xr.merge([x, y, z], compat='identical', join='inner') + + Dimensions: (lat: 1, lon: 1, time: 2) + Coordinates: + * lat (lat) float64 35.0 + * lon (lon) float64 100.0 + * time (time) float64 30.0 60.0 + Data variables: + var1 (lat, lon) float64 1.0 + var2 (lat, lon) float64 5.0 + var3 (time, lon) float64 0.0 4.0 + + >>> xr.merge([x, y, z], compat='broadcast_equals', join='outer') - Dimensions: () + Dimensions: (lat: 3, lon: 3, time: 2) Coordinates: - *empty* + * lat (lat) float64 35.0 40.0 42.0 + * lon (lon) float64 100.0 120.0 150.0 + * time (time) float64 30.0 60.0 Data variables: - var0 int64 0 - var1 int64 1 - var2 int64 2 - var3 int64 3 - var4 int64 4 + var1 (lat, lon) float64 1.0 2.0 nan 3.0 5.0 nan nan nan nan + var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 + var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 + + >>> xr.merge([x, y, z], join='exact') + Traceback (most recent call last): + ... + ValueError: indexes along dimension 'lat' are not equal Raises ------ From bd1069bd23d9eac9962d82eabbd1c4227581196c Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Sun, 29 Sep 2019 19:39:53 -0400 Subject: [PATCH 4/7] Add glossary to documentation (#3352) * First draft at terminology glossary. * Made name matching rules more explicit and hopefully clearer. * Amended what's new. * Changes based on feedback. * More changed based on feedback. --- doc/index.rst | 2 ++ doc/terminology.rst | 42 ++++++++++++++++++++++++++++++++++++++++++ doc/whats-new.rst | 4 ++++ 3 files changed, 48 insertions(+) create mode 100644 doc/terminology.rst diff --git a/doc/index.rst b/doc/index.rst index 4d0105f350a..4cf67a37f4c 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -46,6 +46,7 @@ Documentation **User Guide** +* :doc:`terminology` * :doc:`data-structures` * :doc:`indexing` * :doc:`interpolation` @@ -65,6 +66,7 @@ Documentation :hidden: :caption: User Guide + terminology data-structures indexing interpolation diff --git a/doc/terminology.rst b/doc/terminology.rst new file mode 100644 index 00000000000..138a99740fe --- /dev/null +++ b/doc/terminology.rst @@ -0,0 +1,42 @@ +.. _terminology: + +Terminology +=========== + +*Xarray terminology differs slightly from CF, mathematical conventions, and pandas; and therefore using xarray, understanding the documentation, and parsing error messages is easier once key terminology is defined. This glossary was designed so that more fundamental concepts come first. Thus for new users, this page is best read top-to-bottom. Throughout the glossary,* ``arr`` *will refer to an xarray* :py:class:`DataArray` *in any small examples. For more complete examples, please consult the relevant documentation.* + +---- + +**DataArray:** A multi-dimensional array with labeled or named dimensions. ``DataArray`` objects add metadata such as dimension names, coordinates, and attributes (defined below) to underlying "unlabeled" data structures such as numpy and Dask arrays. If its optional ``name`` property is set, it is a *named DataArray*. + +---- + +**Dataset:** A dict-like collection of ``DataArray`` objects with aligned dimensions. Thus, most operations that can be performed on the dimensions of a single ``DataArray`` can be performed on a dataset. Datasets have data variables (see **Variable** below), dimensions, coordinates, and attributes. + +---- + +**Variable:** A `NetCDF-like variable `_ consisting of dimensions, data, and attributes which describe a single array. The main functional difference between variables and numpy arrays is that numerical operations on variables implement array broadcasting by dimension name. Each ``DataArray`` has an underlying variable that can be accessed via ``arr.variable``. However, a variable is not fully described outside of either a ``Dataset`` or a ``DataArray``. + +.. note:: + + The :py:class:`Variable` class is low-level interface and can typically be ignored. However, the word "variable" appears often enough in the code and documentation that is useful to understand. + +---- + +**Dimension:** In mathematics, the *dimension* of data is loosely the number of degrees of freedom for it. A *dimension axis* is a set of all points in which all but one of these degrees of freedom is fixed. We can think of each dimension axis as having a name, for example the "x dimension". In xarray, a ``DataArray`` object's *dimensions* are its named dimension axes, and the name of the ``i``-th dimension is ``arr.dims[i]``. If an array is created without dimensions, the default dimension names are ``dim_0``, ``dim_1``, and so forth. + +---- + +**Coordinate:** An array that labels a dimension of another ``DataArray``. Loosely, the coordinate array's values can be thought of as tick labels along a dimension. There are two types of coordinate arrays: *dimension coordinates* and *non-dimension coordinates* (see below). A coordinate named ``x`` can be retrieved from ``arr.coords[x]``. A ``DataArray`` can have more coordinates than dimensions because a single dimension can be assigned multiple coordinate arrays. However, only one coordinate array can be a assigned as a particular dimension's dimension coordinate array. As a consequence, ``len(arr.dims) <= len(arr.coords)`` in general. + +---- + +**Dimension coordinate:** A coordinate array assigned to ``arr`` with both a name and dimension name in ``arr.dims``. Dimension coordinates are used for label-based indexing and alignment, like the index found on a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. In fact, dimension coordinates use :py:class:`pandas.Index` objects under the hood for efficient computation. Dimension coordinates are marked by ``*`` when printing a ``DataArray`` or ``Dataset``. + +---- + +**Non-dimension coordinate:** A coordinate array assigned to ``arr`` with a name in ``arr.dims`` but a dimension name *not* in ``arr.dims``. These coordinate arrays are useful for auxiliary labeling. However, non-dimension coordinates are not indexed, and any operation on non-dimension coordinates that leverages indexing will fail. Printing ``arr.coords`` will print all of ``arr``'s coordinate names, with the assigned dimensions in parentheses. For example, ``coord_name (dim_name) 1 2 3 ...``. + +---- + +**Index:** An *index* is a data structure optimized for efficient selecting and slicing of an associated array. Xarray creates indexes for dimension coordinates so that operations along dimensions are fast, while non-dimension coordinates are not indexed. Under the hood, indexes are implemented as :py:class:`pandas.Index` objects. The index associated with dimension name ``x`` can be retrieved by ``arr.indexes[x]``. By construction, ``len(arr.dims) == len(arr.indexes)`` \ No newline at end of file diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 760ce66ca04..33410f82c2c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,10 @@ Bug fixes Documentation ~~~~~~~~~~~~~ + +- Created a glossary of important xarray terms (:issue:`2410`, :pull:`3352`). + By `Gregory Gundersen `_. + - Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`. By `Justus Magin `_. - Add examples for :py:meth:`align`, :py:meth:`merge`, :py:meth:`combine_by_coords`, From 1ab2279769554b7d18e9e33763bee31aa8329e85 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 30 Sep 2019 21:12:22 +0000 Subject: [PATCH 5/7] Add how do I ... section (#3357) * Add how do I ... section * Bbugfix. * Update doc/howdoi.rst Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * Update doc/howdoi.rst Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> * small updates. * Add more. --- doc/_static/style.css | 9 +++++++ doc/howdoi.rst | 59 +++++++++++++++++++++++++++++++++++++++++++ doc/index.rst | 2 ++ doc/time-series.rst | 2 ++ doc/whats-new.rst | 3 ++- 5 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 doc/howdoi.rst diff --git a/doc/_static/style.css b/doc/_static/style.css index 7257d57db66..b7d30f429cf 100644 --- a/doc/_static/style.css +++ b/doc/_static/style.css @@ -16,3 +16,12 @@ .wy-nav-top { background-color: #555; } + +table.colwidths-given { + table-layout: fixed; + width: 100%; +} +table.docutils td { + white-space: unset; + word-wrap: break-word; +} diff --git a/doc/howdoi.rst b/doc/howdoi.rst new file mode 100644 index 00000000000..721d1323e73 --- /dev/null +++ b/doc/howdoi.rst @@ -0,0 +1,59 @@ +.. currentmodule:: xarray + +.. _howdoi: + +How do I ... +============ + +.. list-table:: + :header-rows: 1 + :widths: 40 60 + + * - How do I... + - Solution + * - add variables from other datasets to my dataset + - :py:meth:`Dataset.merge` + * - add a new dimension and/or coordinate + - :py:meth:`DataArray.expand_dims`, :py:meth:`Dataset.expand_dims` + * - add a new coordinate variable + - :py:meth:`DataArray.assign_coords` + * - change a data variable to a coordinate variable + - :py:meth:`Dataset.set_coords` + * - change the order of dimensions + - :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose` + * - remove a variable from my object + - :py:meth:`Dataset.drop`, :py:meth:`DataArray.drop` + * - remove dimensions of length 1 or 0 + - :py:meth:`DataArray.squeeze`, :py:meth:`Dataset.squeeze` + * - remove all variables with a particular dimension + - :py:meth:`Dataset.drop_dims` + * - convert non-dimension coordinates to data variables or remove them + - :py:meth:`DataArray.reset_coords`, :py:meth:`Dataset.reset_coords` + * - rename a variable, dimension or coordinate + - :py:meth:`Dataset.rename`, :py:meth:`DataArray.rename`, :py:meth:`Dataset.rename_vars`, :py:meth:`Dataset.rename_dims`, + * - convert a DataArray to Dataset or vice versa + - :py:meth:`DataArray.to_dataset`, :py:meth:`Dataset.to_array` + * - extract the underlying array (e.g. numpy or Dask arrays) + - :py:attr:`DataArray.data` + * - convert to and extract the underlying numpy array + - :py:attr:`DataArray.values` + * - find out if my xarray object is wrapping a Dask Array + - :py:func:`dask.is_dask_collection` + * - know how much memory my object requires + - :py:attr:`DataArray.nbytes`, :py:attr:`Dataset.nbytes` + * - convert a possibly irregularly sampled timeseries to a regularly sampled timeseries + - :py:meth:`DataArray.resample`, :py:meth:`Dataset.resample` (see :ref:`resampling` for more) + * - apply a function on all data variables in a Dataset + - :py:meth:`Dataset.apply` + * - write xarray objects with complex values to a netCDF file + - :py:func:`Dataset.to_netcdf`, :py:func:`DataArray.to_netcdf` specifying ``engine="h5netcdf", invalid_netcdf=True`` + * - make xarray objects look like other xarray objects + - :py:func:`~xarray.ones_like`, :py:func:`~xarray.zeros_like`, :py:func:`~xarray.full_like`, :py:meth:`Dataset.reindex_like`, :py:meth:`Dataset.interpolate_like`, :py:meth:`Dataset.broadcast_like`, :py:meth:`DataArray.reindex_like`, :py:meth:`DataArray.interpolate_like`, :py:meth:`DataArray.broadcast_like` + * - replace NaNs with other values + - :py:meth:`Dataset.fillna`, :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill`, :py:meth:`Dataset.interpolate_na`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.interpolate_na` + * - extract the year, month, day or similar from a DataArray of time values + - ``obj.dt.month`` for example where ``obj`` is a :py:class:`~xarray.DataArray` containing ``datetime64`` or ``cftime`` values. See :ref:`dt_accessor` for more. + * - round off time values to a specified frequency + - ``obj.dt.ceil``, ``obj.dt.floor``, ``obj.dt.round``. See :ref:`dt_accessor` for more. + * - make a mask that is ``True`` where an object contains any of the values in a array + - :py:meth:`Dataset.isin`, :py:meth:`DataArray.isin` diff --git a/doc/index.rst b/doc/index.rst index 4cf67a37f4c..e5bd03801ff 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -84,6 +84,7 @@ Documentation **Help & reference** * :doc:`whats-new` +* :doc:`howdoi` * :doc:`api` * :doc:`internals` * :doc:`roadmap` @@ -96,6 +97,7 @@ Documentation :caption: Help & reference whats-new + howdoi api internals roadmap diff --git a/doc/time-series.rst b/doc/time-series.rst index e198887dd0d..1cb535ea886 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -101,6 +101,8 @@ You can also select a particular time by indexing with a For more details, read the pandas documentation. +.. _dt_accessor: + Datetime components ------------------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 33410f82c2c..f9c952f6752 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,8 @@ Documentation - Created a glossary of important xarray terms (:issue:`2410`, :pull:`3352`). By `Gregory Gundersen `_. - +- Created a "How do I..." section (:ref:`howdoi`) for solutions to common questions. (:pull:`3357`). + By `Deepak Cherian `_. - Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`. By `Justus Magin `_. - Add examples for :py:meth:`align`, :py:meth:`merge`, :py:meth:`combine_by_coords`, From fb575eb8ceb85a3c12df87bad278748ab9271985 Mon Sep 17 00:00:00 2001 From: crusaderky Date: Tue, 1 Oct 2019 15:11:21 +0100 Subject: [PATCH 6/7] Fix codecov.io upload on Windows (#3360) --- ci/azure/unit-tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/azure/unit-tests.yml b/ci/azure/unit-tests.yml index 43be5a79d47..7ee5132632f 100644 --- a/ci/azure/unit-tests.yml +++ b/ci/azure/unit-tests.yml @@ -19,7 +19,8 @@ steps: displayName: Run tests - bash: | - bash <(curl https://codecov.io/bash) -t 688f4d53-31bb-49b5-8370-4ce6f792cf3d + curl https://codecov.io/bash > codecov.sh + bash codecov.sh -t 688f4d53-31bb-49b5-8370-4ce6f792cf3d displayName: Upload coverage to codecov.io # TODO: publish coverage results to Azure, once we can merge them across From 21705e61503fb49f000186c0d556e5623bd5ac82 Mon Sep 17 00:00:00 2001 From: crusaderky Date: Tue, 1 Oct 2019 19:13:55 +0100 Subject: [PATCH 7/7] Revisit # noqa annotations (#3359) --- asv_bench/benchmarks/__init__.py | 2 +- asv_bench/benchmarks/dataarray_missing.py | 2 +- doc/examples/_code/weather_data_setup.py | 2 +- doc/gallery/plot_cartopy_facetgrid.py | 2 +- setup.py | 2 +- xarray/backends/api.py | 73 ++++++++++----------- xarray/backends/locks.py | 4 +- xarray/conventions.py | 2 +- xarray/core/alignment.py | 16 ++--- xarray/core/common.py | 10 +-- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 8 +-- xarray/core/indexing.py | 4 +- xarray/core/merge.py | 24 +++---- xarray/core/rolling_exp.py | 2 +- xarray/testing.py | 12 ++-- xarray/tests/__init__.py | 6 +- xarray/tests/test_accessor_str.py | 4 +- xarray/tests/test_backends.py | 8 +-- xarray/tests/test_coding_times.py | 4 +- xarray/tests/test_dataarray.py | 44 ++++++------- xarray/tests/test_dataset.py | 4 +- xarray/tests/test_distributed.py | 13 ++-- xarray/tests/test_indexing.py | 80 +++++++++++------------ xarray/tests/test_sparse.py | 10 +-- xarray/tests/test_ufuncs.py | 2 +- xarray/tests/test_variable.py | 2 +- 27 files changed, 167 insertions(+), 177 deletions(-) diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py index ef647c5a74a..1ffd3afa4ae 100644 --- a/asv_bench/benchmarks/__init__.py +++ b/asv_bench/benchmarks/__init__.py @@ -16,7 +16,7 @@ def decorator(func): def requires_dask(): try: - import dask # noqa + import dask # noqa: F401 except ImportError: raise NotImplementedError diff --git a/asv_bench/benchmarks/dataarray_missing.py b/asv_bench/benchmarks/dataarray_missing.py index 9711e2bbcd0..d79d2558b35 100644 --- a/asv_bench/benchmarks/dataarray_missing.py +++ b/asv_bench/benchmarks/dataarray_missing.py @@ -5,7 +5,7 @@ from . import randn, requires_dask try: - import dask # noqa + import dask # noqa: F401 except ImportError: pass diff --git a/doc/examples/_code/weather_data_setup.py b/doc/examples/_code/weather_data_setup.py index 385f5366ef7..4e4e2ab176e 100644 --- a/doc/examples/_code/weather_data_setup.py +++ b/doc/examples/_code/weather_data_setup.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -import seaborn as sns # noqa, pandas aware plotting library +import seaborn as sns import xarray as xr diff --git a/doc/gallery/plot_cartopy_facetgrid.py b/doc/gallery/plot_cartopy_facetgrid.py index af04ad6856a..11db9b800b5 100644 --- a/doc/gallery/plot_cartopy_facetgrid.py +++ b/doc/gallery/plot_cartopy_facetgrid.py @@ -12,7 +12,7 @@ For more details see `this discussion`_ on github. .. _this discussion: https://github.com/pydata/xarray/issues/1397#issuecomment-299190567 -""" # noqa +""" import cartopy.crs as ccrs diff --git a/setup.py b/setup.py index b829f6e1f98..5cfa4d9f9df 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,7 @@ - Issue tracker: http://github.com/pydata/xarray/issues - Source code: http://github.com/pydata/xarray - SciPy2015 talk: https://www.youtube.com/watch?v=X0pAhJgySxk -""" # noqa +""" setup( diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0d6dedac57e..458a2d0cc42 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -42,12 +42,12 @@ def _get_default_engine_remote_uri(): try: - import netCDF4 # noqa + import netCDF4 # noqa: F401 engine = "netcdf4" except ImportError: # pragma: no cover try: - import pydap # noqa + import pydap # noqa: F401 engine = "pydap" except ImportError: @@ -61,13 +61,13 @@ def _get_default_engine_remote_uri(): def _get_default_engine_grib(): msgs = [] try: - import Nio # noqa + import Nio # noqa: F401 msgs += ["set engine='pynio' to access GRIB files with PyNIO"] except ImportError: # pragma: no cover pass try: - import cfgrib # noqa + import cfgrib # noqa: F401 msgs += ["set engine='cfgrib' to access GRIB files with cfgrib"] except ImportError: # pragma: no cover @@ -80,7 +80,7 @@ def _get_default_engine_grib(): def _get_default_engine_gz(): try: - import scipy # noqa + import scipy # noqa: F401 engine = "scipy" except ImportError: # pragma: no cover @@ -90,12 +90,12 @@ def _get_default_engine_gz(): def _get_default_engine_netcdf(): try: - import netCDF4 # noqa + import netCDF4 # noqa: F401 engine = "netcdf4" except ImportError: # pragma: no cover try: - import scipy.io.netcdf # noqa + import scipy.io.netcdf # noqa: F401 engine = "scipy" except ImportError: @@ -722,44 +722,41 @@ def open_mfdataset( ): """Open multiple files as a single dataset. - If combine='by_coords' then the function ``combine_by_coords`` is used to - combine the datasets into one before returning the result, and if - combine='nested' then ``combine_nested`` is used. The filepaths must be - structured according to which combining function is used, the details of - which are given in the documentation for ``combine_by_coords`` and - ``combine_nested``. By default the old (now deprecated) ``auto_combine`` - will be used, please specify either ``combine='by_coords'`` or - ``combine='nested'`` in future. Requires dask to be installed. See - documentation for details on dask [1]. Attributes from the first dataset - file are used for the combined dataset. + If combine='by_coords' then the function ``combine_by_coords`` is used to combine + the datasets into one before returning the result, and if combine='nested' then + ``combine_nested`` is used. The filepaths must be structured according to which + combining function is used, the details of which are given in the documentation for + ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated) + ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or + ``combine='nested'`` in future. Requires dask to be installed. See documentation for + details on dask [1]. Attributes from the first dataset file are used for the + combined dataset. Parameters ---------- paths : str or sequence - Either a string glob in the form "path/to/my/files/*.nc" or an explicit - list of files to open. Paths can be given as strings or as pathlib - Paths. If concatenation along more than one dimension is desired, then - ``paths`` must be a nested list-of-lists (see ``manual_combine`` for - details). (A string glob will be expanded to a 1-dimensional list.) + Either a string glob in the form "path/to/my/files/*.nc" or an explicit list of + files to open. Paths can be given as strings or as pathlib Paths. If + concatenation along more than one dimension is desired, then ``paths`` must be a + nested list-of-lists (see ``manual_combine`` for details). (A string glob will + be expanded to a 1-dimensional list.) chunks : int or dict, optional - Dictionary with keys given by dimension names and values given by chunk - sizes. In general, these should divide the dimensions of each dataset. - If int, chunk each dimension by ``chunks``. - By default, chunks will be chosen to load entire input files into - memory at once. This has a major impact on performance: please see the - full documentation for more details [2]. + Dictionary with keys given by dimension names and values given by chunk sizes. + In general, these should divide the dimensions of each dataset. If int, chunk + each dimension by ``chunks``. By default, chunks will be chosen to load entire + input files into memory at once. This has a major impact on performance: please + see the full documentation for more details [2]. concat_dim : str, or list of str, DataArray, Index or None, optional - Dimensions to concatenate files along. You only - need to provide this argument if any of the dimensions along which you - want to concatenate is not a dimension in the original datasets, e.g., - if you want to stack a collection of 2D arrays along a third dimension. - Set ``concat_dim=[..., None, ...]`` explicitly to + Dimensions to concatenate files along. You only need to provide this argument + if any of the dimensions along which you want to concatenate is not a dimension + in the original datasets, e.g., if you want to stack a collection of 2D arrays + along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to disable concatenation along a particular dimension. combine : {'by_coords', 'nested'}, optional - Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is - used to combine all the data. If this argument is not provided, - `xarray.auto_combine` is used, but in the future this behavior will - switch to use `xarray.combine_by_coords` by default. + Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is used to + combine all the data. If this argument is not provided, `xarray.auto_combine` is + used, but in the future this behavior will switch to use + `xarray.combine_by_coords` by default. compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for @@ -854,7 +851,7 @@ def open_mfdataset( .. [1] http://xarray.pydata.org/en/stable/dask.html .. [2] http://xarray.pydata.org/en/stable/dask.html#chunking-and-performance - """ # noqa + """ if isinstance(paths, str): if is_remote_uri(paths): raise ValueError( diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py index 1c5edc215fc..865ce1ddccd 100644 --- a/xarray/backends/locks.py +++ b/xarray/backends/locks.py @@ -21,9 +21,7 @@ NETCDFC_LOCK = SerializableLock() -_FILE_LOCKS = ( - weakref.WeakValueDictionary() -) # type: MutableMapping[Any, threading.Lock] # noqa +_FILE_LOCKS = weakref.WeakValueDictionary() # type: MutableMapping[Any, threading.Lock] def _get_threaded_lock(key): diff --git a/xarray/conventions.py b/xarray/conventions.py index 1e40d254e96..a3424db66ee 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -753,7 +753,7 @@ def cf_encoder(variables, attributes): for var in new_vars.values(): bounds = var.attrs["bounds"] if "bounds" in var.attrs else None if bounds and bounds in new_vars: - # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries # noqa + # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries for attr in [ "units", "standard_name", diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index c26b879d839..4529fa509d9 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -13,8 +13,8 @@ from .variable import IndexVariable, Variable if TYPE_CHECKING: - from .dataarray import DataArray # noqa: F401 - from .dataset import Dataset # noqa: F401 + from .dataarray import DataArray + from .dataset import Dataset def _get_joiner(join): @@ -350,8 +350,8 @@ def deep_align( This function is not public API. """ - from .dataarray import DataArray # noqa: F811 - from .dataset import Dataset # noqa: F811 + from .dataarray import DataArray + from .dataset import Dataset if indexes is None: indexes = {} @@ -411,7 +411,7 @@ def is_alignable(obj): def reindex_like_indexers( - target: Union["DataArray", "Dataset"], other: Union["DataArray", "Dataset"] + target: "Union[DataArray, Dataset]", other: "Union[DataArray, Dataset]" ) -> Dict[Hashable, pd.Index]: """Extract indexers to align target with other. @@ -503,7 +503,7 @@ def reindex_variables( new_indexes : OrderedDict Dict of indexes associated with the reindexed variables. """ - from .dataarray import DataArray # noqa: F811 + from .dataarray import DataArray # create variables for the new dataset reindexed = OrderedDict() # type: OrderedDict[Any, Variable] @@ -600,8 +600,8 @@ def _get_broadcast_dims_map_common_coords(args, exclude): def _broadcast_helper(arg, exclude, dims_map, common_coords): - from .dataarray import DataArray # noqa: F811 - from .dataset import Dataset # noqa: F811 + from .dataarray import DataArray + from .dataset import Dataset def _set_dims(var): # Add excluded dims to a copy of dims_map diff --git a/xarray/core/common.py b/xarray/core/common.py index 5b166890575..a8fac245c02 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -293,7 +293,7 @@ def _ipython_key_completions_(self) -> List[str]: """Provide method for the key-autocompletions in IPython. See http://ipython.readthedocs.io/en/stable/config/integrating.html#tab-completion For the details. - """ # noqa + """ item_lists = [ item for sublist in self._item_sources @@ -669,7 +669,7 @@ def groupby(self, group, squeeze: bool = True, restore_coord_dims: bool = None): -------- core.groupby.DataArrayGroupBy core.groupby.DatasetGroupBy - """ # noqa + """ return self._groupby_cls( self, group, squeeze=squeeze, restore_coord_dims=restore_coord_dims ) @@ -732,7 +732,7 @@ def groupby_bins( References ---------- .. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html - """ # noqa + """ return self._groupby_cls( self, group, @@ -808,7 +808,7 @@ def rolling( -------- core.rolling.DataArrayRolling core.rolling.DatasetRolling - """ # noqa + """ dim = either_dict_or_kwargs(dim, window_kwargs, "rolling") return self._rolling_cls(self, dim, min_periods=min_periods, center=center) @@ -1005,7 +1005,7 @@ def resample( ---------- .. [1] http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases - """ # noqa + """ # TODO support non-string indexer after removing the old API. from .dataarray import DataArray diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3becce7e432..68bfe301bfc 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3054,7 +3054,7 @@ def integrate( return self._from_temp_dataset(ds) # this needs to be at the end, or mypy will confuse with `str` - # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names # noqa + # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = property(StringAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5fa96216ba0..9a1339cf528 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1063,7 +1063,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": See Also -------- pandas.DataFrame.copy - """ # noqa + """ if data is None: variables = OrderedDict( (k, v.copy(deep=deep)) for k, v in self._variables.items() @@ -1714,7 +1714,7 @@ def chunk( from dask.base import tokenize except ImportError: # raise the usual error if dask is entirely missing - import dask # noqa + import dask # noqa: F401 raise ImportError("xarray requires dask version 0.9 or newer") @@ -4178,7 +4178,7 @@ def apply( Data variables: foo (dim_0, dim_1) float64 0.3751 1.951 1.945 0.2948 0.711 0.3948 bar (x) float64 1.0 2.0 - """ # noqa + """ variables = OrderedDict( (k, maybe_wrap_array(v, func(v, *args, **kwargs))) for k, v in self.data_vars.items() @@ -5381,7 +5381,7 @@ def filter_by_attrs(self, **kwargs): temperature (x, y, time) float64 25.86 20.82 6.954 23.13 10.25 11.68 ... precipitation (x, y, time) float64 5.702 0.9422 2.075 1.178 3.284 ... - """ # noqa + """ selection = [] for var_name, variable in self.variables.items(): has_value_flag = False diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index c6a8f6f35e4..6d42c254438 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -331,7 +331,7 @@ class ExplicitIndexer: __slots__ = ("_key",) def __init__(self, key): - if type(self) is ExplicitIndexer: # noqa + if type(self) is ExplicitIndexer: raise TypeError("cannot instantiate base ExplicitIndexer objects") self._key = tuple(key) @@ -1261,7 +1261,7 @@ def _indexing_array_and_key(self, key): array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see - # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#detailed-notes). # noqa + # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#detailed-notes). key = key.tuple + (Ellipsis,) else: raise TypeError("unexpected key type: {}".format(type(key))) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index ca753c588d4..ceeb7db09f1 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -94,7 +94,7 @@ def unique_variable(name, variables, compat="broadcast_equals", equals=None): Raises ------ MergeError: if any of the variables are not equal. - """ # noqa + """ out = variables[0] if len(variables) == 1 or compat == "override": @@ -171,7 +171,7 @@ def merge_variables( OrderedDict with keys taken by the union of keys on list_of_variable_dicts, and Variable values corresponding to those that should be found on the merged result. - """ # noqa + """ if priority_vars is None: priority_vars = {} @@ -231,7 +231,7 @@ def expand_variable_dicts( an input's values. The values of each ordered dictionary are all xarray.Variable objects. """ - from .dataarray import DataArray # noqa: F811 + from .dataarray import DataArray from .dataset import Dataset var_dicts = [] @@ -278,7 +278,7 @@ def determine_coords( All variable found in the input should appear in either the set of coordinate or non-coordinate names. """ - from .dataarray import DataArray # noqa: F811 + from .dataarray import DataArray from .dataset import Dataset coord_names = set() # type: set @@ -313,7 +313,7 @@ def coerce_pandas_values(objects: Iterable["DatasetLike"]) -> List["DatasetLike" List of Dataset or OrderedDict objects. Any inputs or values in the inputs that were pandas objects have been converted into native xarray objects. """ - from .dataarray import DataArray # noqa: F811 + from .dataarray import DataArray from .dataset import Dataset out = [] @@ -363,7 +363,7 @@ def _get_priority_vars(objects, priority_arg, compat="equals"): ------- None, if priority_arg is None, or an OrderedDict with Variable objects as values indicating priority variables. - """ # noqa + """ if priority_arg is None: priority_vars = {} else: @@ -485,7 +485,7 @@ def merge_core( Raises ------ MergeError if the merge cannot be done successfully. - """ # noqa + """ from .dataset import calculate_dimensions _assert_compat_valid(compat) @@ -592,7 +592,7 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA): Coordinates: * lat (lat) float64 35.0 40.0 * lon (lon) float64 100.0 120.0 - + >>> y array([[5., 6.], @@ -632,7 +632,7 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA): var1 (lat, lon) float64 1.0 2.0 nan 3.0 5.0 nan nan nan nan var2 (lat, lon) float64 5.0 nan 6.0 nan nan nan 7.0 nan 8.0 var3 (time, lon) float64 0.0 nan 3.0 4.0 nan 9.0 - + >>> xr.merge([x, y, z], compat='equals') Dimensions: (lat: 3, lon: 3, time: 2) @@ -718,8 +718,8 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA): See also -------- concat - """ # noqa - from .dataarray import DataArray # noqa: F811 + """ + from .dataarray import DataArray from .dataset import Dataset dict_like_objects = list() @@ -793,7 +793,7 @@ def dataset_update_method( `xarray.Dataset`, e.g., if it's a dict with DataArray values (GH2068, GH2180). """ - from .dataarray import DataArray # noqa: F811 + from .dataarray import DataArray from .dataset import Dataset if not isinstance(other, Dataset): diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 057884fef85..2139d246f46 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -81,7 +81,7 @@ class RollingExp: Returns ------- RollingExp : type of input argument - """ # noqa + """ def __init__(self, obj, windows, window_type="span"): self.obj = obj diff --git a/xarray/testing.py b/xarray/testing.py index 9fa58b64001..787ec1aadb0 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -50,8 +50,8 @@ def assert_equal(a, b): assert_identical, assert_allclose, Dataset.equals, DataArray.equals, numpy.testing.assert_array_equal """ - __tracebackhide__ = True # noqa: F841 - assert type(a) == type(b) # noqa + __tracebackhide__ = True + assert type(a) == type(b) if isinstance(a, (Variable, DataArray)): assert a.equals(b), formatting.diff_array_repr(a, b, "equals") elif isinstance(a, Dataset): @@ -77,8 +77,8 @@ def assert_identical(a, b): -------- assert_equal, assert_allclose, Dataset.equals, DataArray.equals """ - __tracebackhide__ = True # noqa: F841 - assert type(a) == type(b) # noqa + __tracebackhide__ = True + assert type(a) == type(b) if isinstance(a, Variable): assert a.identical(b), formatting.diff_array_repr(a, b, "identical") elif isinstance(a, DataArray): @@ -115,8 +115,8 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True): -------- assert_identical, assert_equal, numpy.testing.assert_allclose """ - __tracebackhide__ = True # noqa: F841 - assert type(a) == type(b) # noqa + __tracebackhide__ = True + assert type(a) == type(b) kwargs = dict(rtol=rtol, atol=atol, decode_bytes=decode_bytes) if isinstance(a, Variable): assert a.dims == b.dims diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index ab1d2714b9d..4f5a3e37888 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -4,7 +4,7 @@ import warnings from contextlib import contextmanager from distutils import version -from unittest import mock # noqa +from unittest import mock # noqa: F401 import numpy as np import pytest @@ -12,7 +12,7 @@ import xarray.testing from xarray.core import utils -from xarray.core.duck_array_ops import allclose_or_equiv # noqa +from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401 from xarray.core.indexing import ExplicitlyIndexed from xarray.core.options import set_options from xarray.plot.utils import import_seaborn @@ -127,7 +127,7 @@ def LooseVersion(vstring): @contextmanager def raises_regex(error, pattern): - __tracebackhide__ = True # noqa: F841 + __tracebackhide__ = True with pytest.raises(error) as excinfo: yield message = str(excinfo.value) diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py index 360653b229b..56bf6dbb3a2 100644 --- a/xarray/tests/test_accessor_str.py +++ b/xarray/tests/test_accessor_str.py @@ -142,14 +142,14 @@ def test_replace(dtype): def test_replace_callable(): values = xr.DataArray(["fooBAD__barBAD"]) # test with callable - repl = lambda m: m.group(0).swapcase() # noqa + repl = lambda m: m.group(0).swapcase() result = values.str.replace("[a-z][A-Z]{2}", repl, n=2) exp = xr.DataArray(["foObaD__baRbaD"]) assert_equal(result, exp) # test regex named groups values = xr.DataArray(["Foo Bar Baz"]) pat = r"(?P\w+) (?P\w+) (?P\w+)" - repl = lambda m: m.group("middle").swapcase() # noqa + repl = lambda m: m.group("middle").swapcase() result = values.str.replace(pat, repl) exp = xr.DataArray(["bAR"]) assert_equal(result, exp) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 04801a64c7f..87958824c7b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2861,11 +2861,9 @@ def test_encoding_mfdataset(self): ds1.to_netcdf(tmp1) ds2.to_netcdf(tmp2) with open_mfdataset([tmp1, tmp2], combine="nested") as actual: - assert ( - actual.t.encoding["units"] == original.t.encoding["units"] - ) # noqa - assert actual.t.encoding["units"] == ds1.t.encoding["units"] # noqa - assert actual.t.encoding["units"] != ds2.t.encoding["units"] # noqa + assert actual.t.encoding["units"] == original.t.encoding["units"] + assert actual.t.encoding["units"] == ds1.t.encoding["units"] + assert actual.t.encoding["units"] != ds2.t.encoding["units"] def test_preprocess_mfdataset(self): original = Dataset({"foo": ("x", np.random.randn(10))}) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 615a7e00172..a778ff8147f 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -738,9 +738,7 @@ def test_encode_time_bounds(): with pytest.raises(AssertionError): assert_equal(encoded["time_bounds"], expected["time_bounds"]) assert "calendar" not in encoded["time_bounds"].attrs - assert ( - encoded["time_bounds"].attrs["units"] == ds.time_bounds.encoding["units"] - ) # noqa + assert encoded["time_bounds"].attrs["units"] == ds.time_bounds.encoding["units"] ds.time.encoding = {} with pytest.warns(UserWarning): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 9ba3eecc5a0..717025afb23 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -486,32 +486,32 @@ def test_getitem(self): assert_identical(self.ds["x"], x) assert_identical(self.ds["y"], y) - I = ReturnItem() # noqa + arr = ReturnItem() for i in [ - I[:], - I[...], - I[x.values], - I[x.variable], - I[x], - I[x, y], - I[x.values > -1], - I[x.variable > -1], - I[x > -1], - I[x > -1, y > -1], + arr[:], + arr[...], + arr[x.values], + arr[x.variable], + arr[x], + arr[x, y], + arr[x.values > -1], + arr[x.variable > -1], + arr[x > -1], + arr[x > -1, y > -1], ]: assert_equal(self.dv, self.dv[i]) for i in [ - I[0], - I[:, 0], - I[:3, :2], - I[x.values[:3]], - I[x.variable[:3]], - I[x[:3]], - I[x[:3], y[:4]], - I[x.values > 3], - I[x.variable > 3], - I[x > 3], - I[x > 3, y > 3], + arr[0], + arr[:, 0], + arr[:3, :2], + arr[x.values[:3]], + arr[x.variable[:3]], + arr[x[:3]], + arr[x[:3], y[:4]], + arr[x.values > 3], + arr[x.variable > 3], + arr[x > 3], + arr[x > 3, y > 3], ]: assert_array_equal(self.v[i], self.dv[i]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f02990a1be9..5d856c9f323 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -557,7 +557,7 @@ def test_properties(self): # change them inadvertently: assert isinstance(ds.dims, utils.Frozen) assert isinstance(ds.dims.mapping, utils.SortedKeysDict) - assert type(ds.dims.mapping.mapping) is dict # noqa + assert type(ds.dims.mapping.mapping) is dict assert list(ds) == list(ds.data_vars) assert list(ds.keys()) == list(ds.data_vars) @@ -2359,7 +2359,7 @@ def test_rename(self): renamed[k].variable.to_base_variable(), ) assert v.encoding == renamed[k].encoding - assert type(v) == type(renamed.variables[k]) # noqa: E721 + assert type(v) is type(renamed.variables[k]) # noqa: E721 assert "var1" not in renamed assert "dim2" not in renamed diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index 0929efc56f2..a3bea6db85f 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -1,5 +1,4 @@ """ isort:skip_file """ -# flake8: noqa: E402 - ignore linters re order of imports import pickle import pytest @@ -9,7 +8,7 @@ from dask.distributed import Client, Lock from distributed.utils_test import cluster, gen_cluster -from distributed.utils_test import loop # noqa +from distributed.utils_test import loop from distributed.client import futures_of import xarray as xr @@ -74,7 +73,7 @@ def tmp_netcdf_filename(tmpdir): ] -@pytest.mark.parametrize("engine,nc_format", ENGINES_AND_FORMATS) # noqa +@pytest.mark.parametrize("engine,nc_format", ENGINES_AND_FORMATS) def test_dask_distributed_netcdf_roundtrip( loop, tmp_netcdf_filename, engine, nc_format ): @@ -106,7 +105,7 @@ def test_dask_distributed_netcdf_roundtrip( assert_allclose(original, computed) -@pytest.mark.parametrize("engine,nc_format", ENGINES_AND_FORMATS) # noqa +@pytest.mark.parametrize("engine,nc_format", ENGINES_AND_FORMATS) def test_dask_distributed_read_netcdf_integration_test( loop, tmp_netcdf_filename, engine, nc_format ): @@ -130,7 +129,7 @@ def test_dask_distributed_read_netcdf_integration_test( assert_allclose(original, computed) -@requires_zarr # noqar +@requires_zarr @pytest.mark.parametrize("consolidated", [True, False]) @pytest.mark.parametrize("compute", [True, False]) def test_dask_distributed_zarr_integration_test(loop, consolidated, compute): @@ -158,7 +157,7 @@ def test_dask_distributed_zarr_integration_test(loop, consolidated, compute): assert_allclose(original, computed) -@requires_rasterio # noqa +@requires_rasterio def test_dask_distributed_rasterio_integration_test(loop): with create_tmp_geotiff() as (tmp_file, expected): with cluster() as (s, [a, b]): @@ -169,7 +168,7 @@ def test_dask_distributed_rasterio_integration_test(loop): assert_allclose(actual, expected) -@requires_cfgrib # noqa +@requires_cfgrib def test_dask_distributed_cfgrib_integration_test(loop): with cluster() as (s, [a, b]): with Client(s["address"], loop=loop): diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 82ee9b63f9d..ba108b2dbaf 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -21,24 +21,24 @@ def set_to_zero(self, x, i): def test_expanded_indexer(self): x = np.random.randn(10, 11, 12, 13, 14) y = np.arange(5) - I = ReturnItem() # noqa + arr = ReturnItem() for i in [ - I[:], - I[...], - I[0, :, 10], - I[..., 10], - I[:5, ..., 0], - I[..., 0, :], - I[y], - I[y, y], - I[..., y, y], - I[..., 0, 1, 2, 3, 4], + arr[:], + arr[...], + arr[0, :, 10], + arr[..., 10], + arr[:5, ..., 0], + arr[..., 0, :], + arr[y], + arr[y, y], + arr[..., y, y], + arr[..., 0, 1, 2, 3, 4], ]: j = indexing.expanded_indexer(i, x.ndim) assert_array_equal(x[i], x[j]) assert_array_equal(self.set_to_zero(x, i), self.set_to_zero(x, j)) with raises_regex(IndexError, "too many indices"): - indexing.expanded_indexer(I[1, 2, 3], 2) + indexing.expanded_indexer(arr[1, 2, 3], 2) def test_asarray_tuplesafe(self): res = indexing._asarray_tuplesafe(("a", 1)) @@ -184,27 +184,27 @@ def test_read_only_view(self): class TestLazyArray: def test_slice_slice(self): - I = ReturnItem() # noqa: E741 # allow ambiguous name + arr = ReturnItem() for size in [100, 99]: # We test even/odd size cases x = np.arange(size) slices = [ - I[:3], - I[:4], - I[2:4], - I[:1], - I[:-1], - I[5:-1], - I[-5:-1], - I[::-1], - I[5::-1], - I[:3:-1], - I[:30:-1], - I[10:4:], - I[::4], - I[4:4:4], - I[:4:-4], - I[::-2], + arr[:3], + arr[:4], + arr[2:4], + arr[:1], + arr[:-1], + arr[5:-1], + arr[-5:-1], + arr[::-1], + arr[5::-1], + arr[:3:-1], + arr[:30:-1], + arr[10:4:], + arr[::4], + arr[4:4:4], + arr[:4:-4], + arr[::-2], ] for i in slices: for j in slices: @@ -219,9 +219,9 @@ def test_lazily_indexed_array(self): v = Variable(["i", "j", "k"], original) lazy = indexing.LazilyOuterIndexedArray(x) v_lazy = Variable(["i", "j", "k"], lazy) - I = ReturnItem() # noqa: E741 # allow ambiguous name + arr = ReturnItem() # test orthogonally applied indexers - indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], [0], np.arange(10) < 5] + indexers = [arr[:], 0, -2, arr[:3], [0, 1, 2, 3], [0], np.arange(10) < 5] for i in indexers: for j in indexers: for k in indexers: @@ -252,12 +252,12 @@ def test_lazily_indexed_array(self): # test sequentially applied indexers indexers = [ (3, 2), - (I[:], 0), - (I[:2], -1), - (I[:4], [0]), + (arr[:], 0), + (arr[:2], -1), + (arr[:4], [0]), ([4, 5], 0), ([0, 1, 2], [0, 1]), - ([0, 3, 5], I[:2]), + ([0, 3, 5], arr[:2]), ] for i, j in indexers: expected = v[i][j] @@ -288,7 +288,7 @@ def test_vectorized_lazily_indexed_array(self): v_eager = Variable(["i", "j", "k"], x) lazy = indexing.LazilyOuterIndexedArray(x) v_lazy = Variable(["i", "j", "k"], lazy) - I = ReturnItem() # noqa: E741 # allow ambiguous name + arr = ReturnItem() def check_indexing(v_eager, v_lazy, indexers): for indexer in indexers: @@ -307,7 +307,7 @@ def check_indexing(v_eager, v_lazy, indexers): v_lazy = actual # test orthogonal indexing - indexers = [(I[:], 0, 1), (Variable("i", [0, 1]),)] + indexers = [(arr[:], 0, 1), (Variable("i", [0, 1]),)] check_indexing(v_eager, v_lazy, indexers) # vectorized indexing @@ -637,13 +637,13 @@ def nonzero(x): original = np.random.rand(10, 20, 30) v = Variable(["i", "j", "k"], original) - I = ReturnItem() # noqa: E741 # allow ambiguous name + arr = ReturnItem() # test orthogonally applied indexers indexers = [ - I[:], + arr[:], 0, -2, - I[:3], + arr[:3], np.array([0, 1, 2, 3]), np.array([0]), np.arange(10) < 5, diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 9393d073cb7..4a0c6c58619 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -176,7 +176,7 @@ def test_variable_property(prop): ), param( do("pad_with_fill_value", pad_widths={"x": (1, 1)}, fill_value=5), - True, # noqa + True, marks=xfail(reason="Missing implementation for np.pad"), ), (do("prod"), False), @@ -430,7 +430,7 @@ def test_dataarray_property(prop): ), True, marks=xfail(reason="Indexing COO with more than one iterable index"), - ), # noqa + ), param(do("interpolate_na", "x"), True, marks=xfail(reason="Coercion to dense")), param( do("isin", [1, 2, 3]), @@ -477,13 +477,13 @@ def test_dataarray_property(prop): ), True, marks=xfail(reason="Indexing COO with more than one iterable index"), - ), # noqa + ), (do("roll", x=2, roll_coords=True), True), param( do("sel", x=[0, 1, 2], y=[2, 3]), True, marks=xfail(reason="Indexing COO with more than one iterable index"), - ), # noqa + ), param( do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd") ), @@ -495,7 +495,7 @@ def test_dataarray_property(prop): do("where", make_xrarray({"x": 10, "y": 5}) > 0.5), False, marks=xfail(reason="Conversion of dense to sparse when using sparse mask"), - ), # noqa + ), ], ids=repr, ) diff --git a/xarray/tests/test_ufuncs.py b/xarray/tests/test_ufuncs.py index dc8ba22f57c..1095cc360dd 100644 --- a/xarray/tests/test_ufuncs.py +++ b/xarray/tests/test_ufuncs.py @@ -12,7 +12,7 @@ def assert_identical(a, b): - assert type(a) is type(b) or (float(a) == float(b)) # noqa + assert type(a) is type(b) or float(a) == float(b) if isinstance(a, (xr.DataArray, xr.Dataset, xr.Variable)): assert_identical_(a, b) else: diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 43551d62265..7f9538c9ea9 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -101,7 +101,7 @@ def test_getitem_1d_fancy(self): ind = Variable(("a", "b"), [[0, 1], [0, 1]]) v_new = v[ind] assert v_new.dims == ("a", "b") - expected = np.array(v._data)[([0, 1], [0, 1]),] # noqa + expected = np.array(v._data)[([0, 1], [0, 1]), ...] assert_array_equal(v_new, expected) # boolean indexing