Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add head, tail and thin methods #3278

Merged
merged 5 commits into from
Sep 5, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ Indexing
Dataset.loc
Dataset.isel
Dataset.sel
Dataset.head
Dataset.tail
Dataset.thin
Dataset.squeeze
Dataset.interp
Dataset.interp_like
Expand Down Expand Up @@ -279,6 +282,9 @@ Indexing
DataArray.loc
DataArray.isel
DataArray.sel
Dataset.head
Dataset.tail
Dataset.thin
DataArray.squeeze
DataArray.interp
DataArray.interp_like
Expand Down
5 changes: 4 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ New functions/methods
Currently only :py:meth:`Dataset.plot.scatter` is implemented.
By `Yohai Bar Sinai <https://github.com/yohai>`_ and `Deepak Cherian <https://github.com/dcherian>`_

- Added `head`, `tail` and `thin` methods to `Dataset` and `DataArray`. (:issue:`319`)
By `Gerardo Rivera <https://github.com/dangomelon>`_.

Enhancements
~~~~~~~~~~~~

Expand All @@ -102,7 +105,7 @@ Enhancements

- Added the ability to initialize an empty or full DataArray
with a single value. (:issue:`277`)
By `Gerardo Rivera <http://github.com/dangomelon>`_.
By `Gerardo Rivera <https://github.com/dangomelon>`_.

- :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used
with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`.
Expand Down
49 changes: 49 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,55 @@ def sel(
)
return self._from_temp_dataset(ds)

def head(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
) -> "DataArray":
"""Return a new DataArray whose data is given by the the first `n`
values along the specified dimension(s).

See Also
--------
Dataset.head
DataArray.tail
DataArray.thin
"""

indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head")
ds = self._to_temp_dataset().head(indexers=indexers)
return self._from_temp_dataset(ds)

def tail(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
) -> "DataArray":
"""Return a new DataArray whose data is given by the the last `n`
values along the specified dimension(s).

See Also
--------
Dataset.tail
DataArray.head
DataArray.thin
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail")
ds = self._to_temp_dataset().tail(indexers=indexers)
return self._from_temp_dataset(ds)

def thin(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
) -> "DataArray":
"""Return a new DataArray whose data is given by each `n` value
along the specified dimension(s).

See Also
--------
Dataset.thin
DataArray.head
DataArray.tail
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin")
ds = self._to_temp_dataset().thin(indexers=indexers)
return self._from_temp_dataset(ds)

def broadcast_like(
self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None
) -> "DataArray":
Expand Down
84 changes: 84 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2008,6 +2008,90 @@ def sel(
result = self.isel(indexers=pos_indexers, drop=drop)
return result._overwrite_indexes(new_indexes)

def head(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
) -> "Dataset":
"""Returns a new dataset with the first `n` values of each array
for the specified dimension(s).

Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and integer values `n`.
One of indexers or indexers_kwargs must be provided.
**indexers_kwarg : {dim: n, ...}, optional
jhamman marked this conversation as resolved.
Show resolved Hide resolved
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.


See Also
--------
Dataset.tail
Dataset.thin
DataArray.head
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head")
indexers = {k: slice(val) for k, val in indexers.items()}
return self.isel(indexers)

def tail(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
) -> "Dataset":
"""Returns a new dataset with the last `n` values of each array
for the specified dimension(s).

Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and integer values `n`.
One of indexers or indexers_kwargs must be provided.
**indexers_kwarg : {dim: n, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.


See Also
--------
Dataset.head
Dataset.thin
DataArray.tail
"""

indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail")
indexers = {
k: slice(-val, None) if val != 0 else slice(val)
for k, val in indexers.items()
}
return self.isel(indexers)

def thin(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
) -> "Dataset":
"""Returns a new dataset with each array indexed along every `n`th
value for the specified dimension(s)

Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and integer values `n`.
One of indexers or indexers_kwargs must be provided.
**indexers_kwarg : {dim: n, ...}, optional
The keyword arguments form of ``indexers``.
One of indexers or indexers_kwargs must be provided.


See Also
--------
Dataset.head
Dataset.tail
DataArray.thin
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin")
if 0 in indexers.values():
raise ValueError("step cannot be zero")
indexers = {k: slice(None, None, val) for k, val in indexers.items()}
return self.isel(indexers)

def broadcast_like(
self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None
) -> "Dataset":
Expand Down
32 changes: 32 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1411,6 +1411,38 @@ def test_isel_drop(self):
selected = data.isel(x=0, drop=False)
assert_identical(expected, selected)

def test_head(self):
data = create_test_data()

expected = data.isel(time=slice(5), dim2=slice(6))
actual = data.head(time=5, dim2=6)
assert_equal(expected, actual)

expected = data.isel(time=slice(0))
actual = data.head(time=0)
assert_equal(expected, actual)

def test_tail(self):
data = create_test_data()

expected = data.isel(time=slice(-5, None), dim2=slice(-6, None))
actual = data.tail(time=5, dim2=6)
assert_equal(expected, actual)

expected = data.isel(dim1=slice(0))
actual = data.tail(dim1=0)
assert_equal(expected, actual)

def test_thin(self):
data = create_test_data()

expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6))
actual = data.thin(time=5, dim2=6)
assert_equal(expected, actual)

with raises_regex(ValueError, "cannot be zero"):
data.thin(time=0)

@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_sel_fancy(self):
data = create_test_data()
Expand Down