Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accept int value in head, thin and tail #3298

Merged
merged 5 commits into from
Sep 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,52 +1035,54 @@ def sel(
return self._from_temp_dataset(ds)

def head(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "DataArray":
"""Return a new DataArray whose data is given by the the first `n`
values along the specified dimension(s).
values along the specified dimension(s). Default `n` = 5
See Also
--------
Dataset.head
DataArray.tail
DataArray.thin
"""

indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head")
ds = self._to_temp_dataset().head(indexers=indexers)
ds = self._to_temp_dataset().head(indexers, **indexers_kwargs)
return self._from_temp_dataset(ds)

def tail(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "DataArray":
"""Return a new DataArray whose data is given by the the last `n`
values along the specified dimension(s).
values along the specified dimension(s). Default `n` = 5
See Also
--------
Dataset.tail
DataArray.head
DataArray.thin
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail")
ds = self._to_temp_dataset().tail(indexers=indexers)
ds = self._to_temp_dataset().tail(indexers, **indexers_kwargs)
return self._from_temp_dataset(ds)

def thin(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "DataArray":
"""Return a new DataArray whose data is given by each `n` value
along the specified dimension(s).
along the specified dimension(s). Default `n` = 5
See Also
--------
Dataset.thin
DataArray.head
DataArray.tail
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin")
ds = self._to_temp_dataset().thin(indexers=indexers)
ds = self._to_temp_dataset().thin(indexers, **indexers_kwargs)
return self._from_temp_dataset(ds)

def broadcast_like(
Expand Down
99 changes: 81 additions & 18 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2009,15 +2009,18 @@ def sel(
return result._overwrite_indexes(new_indexes)

def head(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "Dataset":
"""Returns a new dataset with the first `n` values of each array
for the specified dimension(s).
Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and integer values `n`.
indexers : dict or int, default: 5
A dict with keys matching dimensions and integer values `n`
or a single integer `n` applied over all dimensions.
One of indexers or indexers_kwargs must be provided.
**indexers_kwargs : {dim: n, ...}, optional
The keyword arguments form of ``indexers``.
Expand All @@ -2030,20 +2033,41 @@ def head(
Dataset.thin
DataArray.head
"""
if not indexers_kwargs:
if indexers is None:
indexers = 5
if not isinstance(indexers, int) and not is_dict_like(indexers):
griverat marked this conversation as resolved.
Show resolved Hide resolved
raise TypeError("indexers must be either dict-like or a single integer")
if isinstance(indexers, int):
indexers = {dim: indexers for dim in self.dims}
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head")
indexers = {k: slice(val) for k, val in indexers.items()}
return self.isel(indexers)
for k, v in indexers.items():
if not isinstance(v, int):
raise TypeError(
"expected integer type indexer for "
"dimension %r, found %r" % (k, type(v))
)
elif v < 0:
raise ValueError(
"expected positive integer as indexer "
"for dimension %r, found %s" % (k, v)
)
indexers_slices = {k: slice(val) for k, val in indexers.items()}
return self.isel(indexers_slices)

def tail(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "Dataset":
"""Returns a new dataset with the last `n` values of each array
for the specified dimension(s).
Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and integer values `n`.
indexers : dict or int, default: 5
A dict with keys matching dimensions and integer values `n`
or a single integer `n` applied over all dimensions.
One of indexers or indexers_kwargs must be provided.
**indexers_kwargs : {dim: n, ...}, optional
The keyword arguments form of ``indexers``.
Expand All @@ -2056,24 +2080,44 @@ def tail(
Dataset.thin
DataArray.tail
"""

if not indexers_kwargs:
if indexers is None:
indexers = 5
if not isinstance(indexers, int) and not is_dict_like(indexers):
raise TypeError("indexers must be either dict-like or a single integer")
if isinstance(indexers, int):
indexers = {dim: indexers for dim in self.dims}
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail")
indexers = {
for k, v in indexers.items():
if not isinstance(v, int):
raise TypeError(
"expected integer type indexer for "
"dimension %r, found %r" % (k, type(v))
)
elif v < 0:
raise ValueError(
"expected positive integer as indexer "
"for dimension %r, found %s" % (k, v)
)
indexers_slices = {
k: slice(-val, None) if val != 0 else slice(val)
for k, val in indexers.items()
}
return self.isel(indexers)
return self.isel(indexers_slices)

def thin(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "Dataset":
"""Returns a new dataset with each array indexed along every `n`th
value for the specified dimension(s)
Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and integer values `n`.
indexers : dict or int, default: 5
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to thin by a factor of five by default? Or should we not have a default value? The use case for a default thinning value are less clear to me than defaults for head/tail.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think picking a default makes it very convenient to use. And this is a convenience method...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I set a default value to thin to make it even with the other two methods (really didn't think that much of it usage) but I agree with @dcherian that it might be ok to have it for convenience.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a strong enough view. I agree it's a convenience method, but a convenient value significantly depends on the size of the array, unlike with head & tail

So whatever you think. I'm probably a -0.2

A dict with keys matching dimensions and integer values `n`
or a single integer `n` applied over all dimensions.
One of indexers or indexers_kwargs must be provided.
**indexers_kwargs : {dim: n, ...}, optional
The keyword arguments form of ``indexers``.
Expand All @@ -2086,11 +2130,30 @@ def thin(
Dataset.tail
DataArray.thin
"""
if (
not indexers_kwargs
and not isinstance(indexers, int)
and not is_dict_like(indexers)
):
raise TypeError("indexers must be either dict-like or a single integer")
if isinstance(indexers, int):
indexers = {dim: indexers for dim in self.dims}
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin")
if 0 in indexers.values():
raise ValueError("step cannot be zero")
indexers = {k: slice(None, None, val) for k, val in indexers.items()}
return self.isel(indexers)
for k, v in indexers.items():
if not isinstance(v, int):
raise TypeError(
"expected integer type indexer for "
"dimension %r, found %r" % (k, type(v))
)
elif v < 0:
raise ValueError(
"expected positive integer as indexer "
"for dimension %r, found %s" % (k, v)
)
elif v == 0:
raise ValueError("step cannot be zero")
indexers_slices = {k: slice(None, None, val) for k, val in indexers.items()}
return self.isel(indexers_slices)

def broadcast_like(
self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None
Expand Down
35 changes: 35 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,13 +1005,48 @@ def test_isel_drop(self):
def test_head(self):
assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5))
assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0))
assert_equal(
self.dv.isel({dim: slice(6) for dim in self.dv.dims}), self.dv.head(6)
)
assert_equal(
self.dv.isel({dim: slice(5) for dim in self.dv.dims}), self.dv.head()
)
with raises_regex(TypeError, "either dict-like or a single int"):
self.dv.head([3])
with raises_regex(TypeError, "expected integer type"):
self.dv.head(x=3.1)
with raises_regex(ValueError, "expected positive int"):
self.dv.head(-3)

def test_tail(self):
assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5))
assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0))
assert_equal(
self.dv.isel({dim: slice(-6, None) for dim in self.dv.dims}),
self.dv.tail(6),
)
assert_equal(
self.dv.isel({dim: slice(-5, None) for dim in self.dv.dims}), self.dv.tail()
)
with raises_regex(TypeError, "either dict-like or a single int"):
self.dv.tail([3])
with raises_regex(TypeError, "expected integer type"):
self.dv.tail(x=3.1)
with raises_regex(ValueError, "expected positive int"):
self.dv.tail(-3)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very thorough tests! Thank you!


def test_thin(self):
assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5))
assert_equal(
self.dv.isel({dim: slice(None, None, 6) for dim in self.dv.dims}),
self.dv.thin(6),
)
with raises_regex(TypeError, "either dict-like or a single int"):
self.dv.thin([3])
with raises_regex(TypeError, "expected integer type"):
self.dv.thin(x=3.1)
with raises_regex(ValueError, "expected positive int"):
self.dv.thin(-3)
with raises_regex(ValueError, "cannot be zero"):
self.dv.thin(time=0)

Expand Down
40 changes: 40 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1422,6 +1422,21 @@ def test_head(self):
actual = data.head(time=0)
assert_equal(expected, actual)

expected = data.isel({dim: slice(6) for dim in data.dims})
actual = data.head(6)
assert_equal(expected, actual)

expected = data.isel({dim: slice(5) for dim in data.dims})
actual = data.head()
assert_equal(expected, actual)

with raises_regex(TypeError, "either dict-like or a single int"):
data.head([3])
with raises_regex(TypeError, "expected integer type"):
data.head(dim2=3.1)
with raises_regex(ValueError, "expected positive int"):
data.head(time=-3)

def test_tail(self):
data = create_test_data()

Expand All @@ -1433,15 +1448,40 @@ def test_tail(self):
actual = data.tail(dim1=0)
assert_equal(expected, actual)

expected = data.isel({dim: slice(-6, None) for dim in data.dims})
actual = data.tail(6)
assert_equal(expected, actual)

expected = data.isel({dim: slice(-5, None) for dim in data.dims})
actual = data.tail()
assert_equal(expected, actual)

with raises_regex(TypeError, "either dict-like or a single int"):
data.tail([3])
with raises_regex(TypeError, "expected integer type"):
data.tail(dim2=3.1)
with raises_regex(ValueError, "expected positive int"):
data.tail(time=-3)

def test_thin(self):
data = create_test_data()

expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6))
actual = data.thin(time=5, dim2=6)
assert_equal(expected, actual)

expected = data.isel({dim: slice(None, None, 6) for dim in data.dims})
actual = data.thin(6)
assert_equal(expected, actual)

with raises_regex(TypeError, "either dict-like or a single int"):
data.thin([3])
with raises_regex(TypeError, "expected integer type"):
data.thin(dim2=3.1)
with raises_regex(ValueError, "cannot be zero"):
data.thin(time=0)
with raises_regex(ValueError, "expected positive int"):
data.thin(time=-3)

@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_sel_fancy(self):
Expand Down