Skip to content

Commit

Permalink
ENH: consistency of input args for boundaries in DataFrame.between_ti…
Browse files Browse the repository at this point in the history
…me() #40245 (#43248)
  • Loading branch information
suyashgupta01 committed Sep 10, 2021
1 parent 449eaae commit df1de66
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 19 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ Other enhancements
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)


.. ---------------------------------------------------------------------------
.. _whatsnew_140.notable_bug_fixes:
Expand Down Expand Up @@ -278,6 +279,7 @@ Other Deprecations
- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
- Deprecated :meth:`.Styler.render` in favour of :meth:`.Styler.to_html` (:issue:`42140`)
- Deprecated passing in a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
- Deprecated the 'include_start' and 'include_end' arguments in :meth:`DataFrame.between_time`; in a future version passing 'include_start' or 'include_end' will raise (:issue:`40245`)
- Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)

.. ---------------------------------------------------------------------------
Expand Down
57 changes: 54 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7606,8 +7606,9 @@ def between_time(
self: FrameOrSeries,
start_time,
end_time,
include_start: bool_t = True,
include_end: bool_t = True,
include_start: bool_t | lib.NoDefault = lib.no_default,
include_end: bool_t | lib.NoDefault = lib.no_default,
inclusive: str | None = None,
axis=None,
) -> FrameOrSeries:
"""
Expand All @@ -7624,8 +7625,20 @@ def between_time(
End time as a time filter limit.
include_start : bool, default True
Whether the start time needs to be included in the result.
.. deprecated:: 1.4.0
Arguments `include_start` and `include_end` have been deprecated
to standardize boundary inputs. Use `inclusive` instead, to set
each bound as closed or open.
include_end : bool, default True
Whether the end time needs to be included in the result.
.. deprecated:: 1.4.0
Arguments `include_start` and `include_end` have been deprecated
to standardize boundary inputs. Use `inclusive` instead, to set
each bound as closed or open.
inclusive : {"both", "neither", "left", "right"}, default "both"
Include boundaries; whether to set each bound as closed or open.
axis : {0 or 'index', 1 or 'columns'}, default 0
Determine range time on index or columns value.
Expand Down Expand Up @@ -7679,8 +7692,46 @@ def between_time(
if not isinstance(index, DatetimeIndex):
raise TypeError("Index must be DatetimeIndex")

if (include_start != lib.no_default or include_end != lib.no_default) and (
inclusive is not None
):
raise ValueError(
"Deprecated arguments `include_start` and `include_end` "
"cannot be passed if `inclusive` has been given."
)
# If any of the deprecated arguments ('include_start', 'include_end')
# have been passed
elif (include_start != lib.no_default) or (include_end != lib.no_default):
warnings.warn(
"`include_start` and `include_end` are deprecated in "
"favour of `inclusive`.",
FutureWarning,
stacklevel=2,
)
left = True if isinstance(include_start, lib.NoDefault) else include_start
right = True if isinstance(include_end, lib.NoDefault) else include_end

inc_dict = {
(True, True): "both",
(True, False): "left",
(False, True): "right",
(False, False): "neither",
}
inclusive = inc_dict[(left, right)]
else: # On arg removal inclusive can default to "both"
if inclusive is None:
inclusive = "both"
elif inclusive not in ["both", "neither", "left", "right"]:
raise ValueError(
f"Inclusive has to be either string of 'both', "
f"'left', 'right', or 'neither'. Got {inclusive}."
)

indexer = index.indexer_between_time(
start_time, end_time, include_start=include_start, include_end=include_end
start_time,
end_time,
include_start=inclusive in ["both", "left"],
include_end=inclusive in ["both", "right"],
)
return self._take_with_is_copy(indexer, axis=axis)

Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/frame/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from itertools import product

import numpy as np
import pytest

Expand All @@ -11,8 +9,8 @@
import pandas._testing as tm


@pytest.fixture(params=product([True, False], [True, False]))
def close_open_fixture(request):
@pytest.fixture(params=["both", "neither", "left", "right"])
def inclusive_endpoints_fixture(request):
return request.param


Expand Down
113 changes: 101 additions & 12 deletions pandas/tests/frame/methods/test_between_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,32 +69,33 @@ def test_between_time_types(self, frame_or_series):
with pytest.raises(ValueError, match=msg):
obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))

def test_between_time(self, close_open_fixture, frame_or_series):
def test_between_time(self, inclusive_endpoints_fixture, frame_or_series):
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
if frame_or_series is not DataFrame:
ts = ts[0]

stime = time(0, 0)
etime = time(1, 0)
inc_start, inc_end = close_open_fixture
inclusive = inclusive_endpoints_fixture

filtered = ts.between_time(stime, etime, inc_start, inc_end)
filtered = ts.between_time(stime, etime, inclusive=inclusive)
exp_len = 13 * 4 + 1
if not inc_start:

if inclusive in ["right", "neither"]:
exp_len -= 5
if not inc_end:
if inclusive in ["left", "neither"]:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
if inclusive in ["left", "both"]:
assert t >= stime
else:
assert t > stime

if inc_end:
if inclusive in ["right", "both"]:
assert t <= etime
else:
assert t < etime
Expand All @@ -111,22 +112,22 @@ def test_between_time(self, close_open_fixture, frame_or_series):
stime = time(22, 0)
etime = time(9, 0)

filtered = ts.between_time(stime, etime, inc_start, inc_end)
filtered = ts.between_time(stime, etime, inclusive=inclusive)
exp_len = (12 * 11 + 1) * 4 + 1
if not inc_start:
if inclusive in ["right", "neither"]:
exp_len -= 4
if not inc_end:
if inclusive in ["left", "neither"]:
exp_len -= 4

assert len(filtered) == exp_len
for rs in filtered.index:
t = rs.time()
if inc_start:
if inclusive in ["left", "both"]:
assert (t >= stime) or (t <= etime)
else:
assert (t > stime) or (t <= etime)

if inc_end:
if inclusive in ["right", "both"]:
assert (t <= etime) or (t >= stime)
else:
assert (t < etime) or (t >= stime)
Expand Down Expand Up @@ -207,3 +208,91 @@ def test_between_time_datetimeindex(self):
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected2)
assert len(result) == 12

@pytest.mark.parametrize("include_start", [True, False])
@pytest.mark.parametrize("include_end", [True, False])
def test_between_time_warn(self, include_start, include_end, frame_or_series):
# GH40245
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
if frame_or_series is not DataFrame:
ts = ts[0]

stime = time(0, 0)
etime = time(1, 0)

match = (
"`include_start` and `include_end` "
"are deprecated in favour of `inclusive`."
)
with tm.assert_produces_warning(FutureWarning, match=match):
_ = ts.between_time(stime, etime, include_start, include_end)

def test_between_time_incorr_arg_inclusive(self):
# GH40245
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

stime = time(0, 0)
etime = time(1, 0)
inclusive = "bad_string"
msg = (
"Inclusive has to be either string of 'both', 'left', 'right', "
"or 'neither'. Got bad_string."
)
with pytest.raises(ValueError, match=msg):
ts.between_time(stime, etime, inclusive=inclusive)

@pytest.mark.parametrize(
"include_start, include_end", [(True, None), (True, True), (None, True)]
)
def test_between_time_incompatiable_args_given(self, include_start, include_end):
# GH40245
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

stime = time(0, 0)
etime = time(1, 0)
msg = (
"Deprecated arguments `include_start` and `include_end` cannot be "
"passed if `inclusive` has been given."
)
with pytest.raises(ValueError, match=msg):
ts.between_time(stime, etime, include_start, include_end, inclusive="left")

def test_between_time_same_functionality_old_and_new_args(self):
# GH40245
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
stime = time(0, 0)
etime = time(1, 0)
match = (
"`include_start` and `include_end` "
"are deprecated in favour of `inclusive`."
)

result = ts.between_time(stime, etime)
expected = ts.between_time(stime, etime, inclusive="both")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, match=match):
result = ts.between_time(stime, etime, include_start=False)
expected = ts.between_time(stime, etime, inclusive="right")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, match=match):
result = ts.between_time(stime, etime, include_end=False)
expected = ts.between_time(stime, etime, inclusive="left")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, match=match):
result = ts.between_time(
stime, etime, include_start=False, include_end=False
)
expected = ts.between_time(stime, etime, inclusive="neither")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, match=match):
result = ts.between_time(stime, etime, include_start=True, include_end=True)
expected = ts.between_time(stime, etime, inclusive="both")
tm.assert_frame_equal(result, expected)

0 comments on commit df1de66

Please sign in to comment.