Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accept int value in head, thin and tail #3298

Merged
merged 5 commits into from
Sep 14, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,52 +1035,54 @@ def sel(
return self._from_temp_dataset(ds)

def head(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "DataArray":
"""Return a new DataArray whose data is given by the the first `n`
values along the specified dimension(s).
values along the specified dimension(s). Default `n` = 5

See Also
--------
Dataset.head
DataArray.tail
DataArray.thin
"""

indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head")
ds = self._to_temp_dataset().head(indexers=indexers)
ds = self._to_temp_dataset().head(indexers, **indexers_kwargs)
return self._from_temp_dataset(ds)

def tail(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "DataArray":
"""Return a new DataArray whose data is given by the the last `n`
values along the specified dimension(s).
values along the specified dimension(s). Default `n` = 5

See Also
--------
Dataset.tail
DataArray.head
DataArray.thin
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail")
ds = self._to_temp_dataset().tail(indexers=indexers)
ds = self._to_temp_dataset().tail(indexers, **indexers_kwargs)
return self._from_temp_dataset(ds)

def thin(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "DataArray":
"""Return a new DataArray whose data is given by each `n` value
along the specified dimension(s).
along the specified dimension(s). Default `n` = 5

See Also
--------
Dataset.thin
DataArray.head
DataArray.tail
"""
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin")
ds = self._to_temp_dataset().thin(indexers=indexers)
ds = self._to_temp_dataset().thin(indexers, **indexers_kwargs)
return self._from_temp_dataset(ds)

def broadcast_like(
Expand Down
69 changes: 57 additions & 12 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2009,15 +2009,18 @@ def sel(
return result._overwrite_indexes(new_indexes)

def head(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "Dataset":
"""Returns a new dataset with the first `n` values of each array
for the specified dimension(s).

Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and integer values `n`.
indexers : dict or int, default: 5
A dict with keys matching dimensions and integer values `n`
or a single integer `n` applied over all dimensions.
One of indexers or indexers_kwargs must be provided.
**indexers_kwargs : {dim: n, ...}, optional
The keyword arguments form of ``indexers``.
Expand All @@ -2030,20 +2033,35 @@ def head(
Dataset.thin
DataArray.head
"""
if not indexers_kwargs:
if indexers is None:
indexers = 5
if not isinstance(indexers, int) and not is_dict_like(indexers):
griverat marked this conversation as resolved.
Show resolved Hide resolved
raise TypeError("indexers must be a dict or a single integer")
if isinstance(indexers, int):
indexers = {dim: indexers for dim in self.dims}
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head")
for v in indexers.values():
if not isinstance(v, int):
raise TypeError("indexer value must be an integer")
elif v < 0:
raise ValueError("indexer value must be positive")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is help to include a little more context in error messages if possible. In this case, you could include offending the name and value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, Something along these lines maybe?

"expected integer as indexer value, found type %r for dim %r" % (type(v), k)

and

"expected positive integer as indexer value for dim %r" % k

The k and v come from iterating over indexers.items()

indexers = {k: slice(val) for k, val in indexers.items()}
return self.isel(indexers)

def tail(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "Dataset":
"""Returns a new dataset with the last `n` values of each array
for the specified dimension(s).

Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and integer values `n`.
indexers : dict or int, default: 5
A dict with keys matching dimensions and integer values `n`
or a single integer `n` applied over all dimensions.
One of indexers or indexers_kwargs must be provided.
**indexers_kwargs : {dim: n, ...}, optional
The keyword arguments form of ``indexers``.
Expand All @@ -2056,24 +2074,38 @@ def tail(
Dataset.thin
DataArray.tail
"""

if not indexers_kwargs:
if indexers is None:
indexers = 5
if not isinstance(indexers, int) and not is_dict_like(indexers):
raise TypeError("indexers must be a dict or a single integer")
if isinstance(indexers, int):
indexers = {dim: indexers for dim in self.dims}
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail")
for v in indexers.values():
if not isinstance(v, int):
raise TypeError("indexer value must be an integer")
elif v < 0:
raise ValueError("indexer value must be positive")
indexers = {
k: slice(-val, None) if val != 0 else slice(val)
for k, val in indexers.items()
}
return self.isel(indexers)

def thin(
self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any
self,
indexers: Union[Mapping[Hashable, int], int] = None,
**indexers_kwargs: Any
) -> "Dataset":
"""Returns a new dataset with each array indexed along every `n`th
value for the specified dimension(s)

Parameters
----------
indexers : dict, optional
A dict with keys matching dimensions and integer values `n`.
indexers : dict or int, default: 5
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to thin by a factor of five by default? Or should we not have a default value? The use case for a default thinning value are less clear to me than defaults for head/tail.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think picking a default makes it very convenient to use. And this is a convenience method...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I set a default value to thin to make it even with the other two methods (really didn't think that much of it usage) but I agree with @dcherian that it might be ok to have it for convenience.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a strong enough view. I agree it's a convenience method, but a convenient value significantly depends on the size of the array, unlike with head & tail

So whatever you think. I'm probably a -0.2

A dict with keys matching dimensions and integer values `n`
or a single integer `n` applied over all dimensions.
One of indexers or indexers_kwargs must be provided.
**indexers_kwargs : {dim: n, ...}, optional
The keyword arguments form of ``indexers``.
Expand All @@ -2086,9 +2118,22 @@ def thin(
Dataset.tail
DataArray.thin
"""
if (
not indexers_kwargs
and not isinstance(indexers, int)
and not is_dict_like(indexers)
):
raise TypeError("indexers must be a dict or a single integer")
if isinstance(indexers, int):
indexers = {dim: indexers for dim in self.dims}
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin")
if 0 in indexers.values():
raise ValueError("step cannot be zero")
for v in indexers.values():
if not isinstance(v, int):
raise TypeError("indexer value must be an integer")
elif v < 0:
raise ValueError("indexer value must be positive")
elif v == 0:
raise ValueError("step cannot be zero")
indexers = {k: slice(None, None, val) for k, val in indexers.items()}
return self.isel(indexers)

Expand Down
35 changes: 35 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,13 +1005,48 @@ def test_isel_drop(self):
def test_head(self):
assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5))
assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0))
assert_equal(
self.dv.isel({dim: slice(6) for dim in self.dv.dims}), self.dv.head(6)
)
assert_equal(
self.dv.isel({dim: slice(5) for dim in self.dv.dims}), self.dv.head()
)
with raises_regex(TypeError, "must be a dict or a single int"):
self.dv.head([3])
with raises_regex(TypeError, "must be an int"):
self.dv.head(x=3.1)
with raises_regex(ValueError, "must be positive"):
self.dv.head(-3)

def test_tail(self):
assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5))
assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0))
assert_equal(
self.dv.isel({dim: slice(-6, None) for dim in self.dv.dims}),
self.dv.tail(6),
)
assert_equal(
self.dv.isel({dim: slice(-5, None) for dim in self.dv.dims}), self.dv.tail()
)
with raises_regex(TypeError, "must be a dict or a single int"):
self.dv.tail([3])
with raises_regex(TypeError, "must be an int"):
self.dv.tail(x=3.1)
with raises_regex(ValueError, "must be positive"):
self.dv.tail(-3)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very thorough tests! Thank you!


def test_thin(self):
assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5))
assert_equal(
self.dv.isel({dim: slice(None, None, 6) for dim in self.dv.dims}),
self.dv.thin(6),
)
with raises_regex(TypeError, "must be a dict or a single int"):
self.dv.thin([3])
with raises_regex(TypeError, "must be an int"):
self.dv.thin(x=3.1)
with raises_regex(ValueError, "must be positive"):
self.dv.thin(-3)
with raises_regex(ValueError, "cannot be zero"):
self.dv.thin(time=0)

Expand Down
40 changes: 40 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1422,6 +1422,21 @@ def test_head(self):
actual = data.head(time=0)
assert_equal(expected, actual)

expected = data.isel({dim: slice(6) for dim in data.dims})
actual = data.head(6)
assert_equal(expected, actual)

expected = data.isel({dim: slice(5) for dim in data.dims})
actual = data.head()
assert_equal(expected, actual)

with raises_regex(TypeError, "must be a dict or a single int"):
data.head([3])
with raises_regex(TypeError, "must be an int"):
data.head(dim2=3.1)
with raises_regex(ValueError, "must be positive"):
data.head(time=-3)

def test_tail(self):
data = create_test_data()

Expand All @@ -1433,15 +1448,40 @@ def test_tail(self):
actual = data.tail(dim1=0)
assert_equal(expected, actual)

expected = data.isel({dim: slice(-6, None) for dim in data.dims})
actual = data.tail(6)
assert_equal(expected, actual)

expected = data.isel({dim: slice(-5, None) for dim in data.dims})
actual = data.tail()
assert_equal(expected, actual)

with raises_regex(TypeError, "must be a dict or a single int"):
data.tail([3])
with raises_regex(TypeError, "must be an int"):
data.tail(dim2=3.1)
with raises_regex(ValueError, "must be positive"):
data.tail(time=-3)

def test_thin(self):
data = create_test_data()

expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6))
actual = data.thin(time=5, dim2=6)
assert_equal(expected, actual)

expected = data.isel({dim: slice(None, None, 6) for dim in data.dims})
actual = data.thin(6)
assert_equal(expected, actual)

with raises_regex(TypeError, "must be a dict or a single int"):
data.thin([3])
with raises_regex(TypeError, "must be an int"):
data.thin(dim2=3.1)
with raises_regex(ValueError, "cannot be zero"):
data.thin(time=0)
with raises_regex(ValueError, "must be positive"):
data.thin(time=-3)

@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_sel_fancy(self):
Expand Down