Skip to content

Commit

Permalink
Update for pandas 2.1.0
Browse files Browse the repository at this point in the history
Updates for use with pandas 2.1.0
* Fixes required to retain behaviors.
* Changes required in light of FutureWarning concerning deprecations.
  • Loading branch information
maread99 committed Sep 9, 2023
1 parent 12ccca2 commit 4a3e2de
Show file tree
Hide file tree
Showing 15 changed files with 130 additions and 102 deletions.
25 changes: 23 additions & 2 deletions src/market_prices/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,10 @@ def volume_to_na(df: pd.DataFrame) -> pd.DataFrame:

def resample(
resample_me: pd.DataFrame | pd.core.groupby.groupby.GroupBy,
rule: pd.offsets.BaseOffset,
rule: pd.offsets.BaseOffset | str,
data: pd.DataFrame | None = None,
origin: str = "start",
nominal_start: pd.Timestamp | None = None,
) -> pd.DataFrame:
"""Resample ohlcv data to a pandas rule.
Expand All @@ -339,7 +340,7 @@ def resample(
Pandas object to be resampled. Object must have .resample method.
rule
Pandas offset to which data to be resampled.
Pandas frequency or offset to which data to be resampled.
data
If resample_me is not a DataFrame (but, for example, a GroupBy
Expand All @@ -348,6 +349,16 @@ def resample(
origin
As `pd.DataFrame.resample` method.
nominal_start
The earliest date prior to the first index of `resample_me` on and
subsequent to which there are no trading sessions until the first
index of `resample_me`.
Only useful when `rule` describes a frequency greater than daily
and there are no sessions between the first index and the date to
which that first index would be rolled back to conicide with the
nearest occurrence of 'rule'.
"""
if isinstance(resample_me, pd.DataFrame):
resample_me = resample_me.copy()
Expand All @@ -367,6 +378,16 @@ def resample(

resampler = resample_me.resample(rule, closed="left", label="left", origin=origin)
resampled = resampler.agg(agg_f)

This comment has been minimized.

Copy link
@maread99

maread99 Sep 9, 2023

Author Owner

Can revert if pandas reverts change in behavior introduced in 2.1.0 and which it's anticipated will be reverted in 2.1.1. See pandas-dev/pandas#55064. Can remove nominal_start from signature (revise clients).

# NOTE START... required for at least pandas 2.1.0.
# See https://github.com/pandas-dev/pandas/issues/55064
offset = pdutils.pdfreq_to_offset(rule) if isinstance(rule, str) else rule
first_index = data.index[0] if nominal_start is None else nominal_start
cut_off = first_index - offset
if resampled.index[0] <= cut_off:
resampled = resampled[resampled.index > cut_off]
# required for at least pandas 2.1.0. ...END

resampled.columns = columns_
resampled = volume_to_na(resampled)
return resampled
9 changes: 5 additions & 4 deletions src/market_prices/prices/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1141,7 +1141,7 @@ def _set_indexes_status(self):
for bi in self.bis_intraday:
start_session, end_session = self.limits_sessions[bi]
sessions = self.cc.sessions_in_range(start_session, end_session)
status = pd.Series(True, index=sessions)
status = pd.Series(True, index=sessions, dtype="object")

if bi.is_one_minute:
# shortcut, cannot have partial indices or conflicts at T1
Expand Down Expand Up @@ -1827,7 +1827,7 @@ def _downsample_bi_table(self, df: pd.DataFrame, bi: intervals.BI) -> pd.DataFra
target_indices = pd.cut(bi_index.to_list(), target_index)
target_indices = target_indices.remove_unused_categories()
agg_f = helpers.agg_funcs(df)
df = df.groupby(target_indices).agg(agg_f)
df = df.groupby(target_indices, observed=False).agg(agg_f)
df.index = pd.IntervalIndex(df.index) # convert from CategoricalIndex
df = helpers.volume_to_na(df)
df.index = pdutils.interval_index_new_tz(df.index, UTC)
Expand Down Expand Up @@ -2025,8 +2025,9 @@ def _get_table_daily(self, force_ds_daily: bool = False) -> pd.DataFrame:
df.index = index
else: # downsample for monthly
pdfreq = ds_interval.as_pdfreq
df = helpers.resample(df_bi, pdfreq, origin="start")
df.index = pdutils.get_interval_index(df.index, pdfreq)
df = df_bi.pt.downsample(

This comment has been minimized.

Copy link
@maread99

maread99 Sep 9, 2023

Author Owner

Optionally, can revert if pandas reverts change in behavior introduced in 2.1.0 and which it's anticipated will be reverted in 2.1.1. See pandas-dev/pandas#55064.

pdfreq, calendar, drop_incomplete_last_indice=False
)
if df.pt.first_ts < self.limits[intervals.BI_ONE_DAY][0]:
# This can happen if getting all data. As the Getter's .daterange
# can return start as None (at least as at April 22). Ideal would
Expand Down
2 changes: 1 addition & 1 deletion src/market_prices/prices/yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ def _fill_reindexed_daily(
return df

delay = self.delays[symbol]
if na_rows[-1] and helpers.now() <= cal.session_open(df.index[-1]) + delay:
if na_rows.iloc[-1] and helpers.now() <= cal.session_open(df.index[-1]) + delay:
na_rows.iloc[-1] = False
if not na_rows.any():
return df
Expand Down
6 changes: 3 additions & 3 deletions src/market_prices/pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,11 +857,11 @@ def fill(s: str | None):
if closes_missing.all() or not closes_missing.any():
return
if method != "bfill":
df.loc[:, close_key] = df[close_key].fillna(method="ffill")
df.loc[:, close_key] = df[close_key].ffill()
df.loc[bv, open_key] = df.loc[bv, close_key]
bv = df[close_key].isna()
if method != "ffill":
df.loc[:, open_key] = df[open_key].fillna(method="bfill")
df.loc[:, open_key] = df[open_key].bfill()
df.loc[bv, close_key] = df.loc[bv, open_key]

closes_still_missing = df[close_key].isna()
Expand Down Expand Up @@ -1350,7 +1350,7 @@ def _downsample_months(
if not pre_table_sessions.empty:
start_ds = pd_offset.rollforward(start_table)
df = df[start_ds:]
resampled = helpers.resample(df, pdfreq, origin="start")
resampled = helpers.resample(df, pdfreq, origin="start", nominal_start=start_ds)

This comment has been minimized.

Copy link
@maread99

maread99 Sep 9, 2023

Author Owner
resampled.index = pdutils.get_interval_index(resampled.index, pdfreq)

if drop_incomplete_last_indice:
Expand Down
6 changes: 3 additions & 3 deletions src/market_prices/utils/calendar_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,12 +379,12 @@ def last_session(self) -> pd.Timestamp:
@property
def first_minute(self) -> pd.Timestamp:
"""First composite calendar minute."""
return self.first_minutes[0]
return self.first_minutes.iloc[0]

@property
def last_minute(self) -> pd.Timestamp:
"""Last composite calendar minute."""
return self.last_minutes[-1]
return self.last_minutes.iloc[-1]

def _parse_session(self, session: Session) -> pd.Timestamp:
"""Parse client input representing a session."""
Expand Down Expand Up @@ -1149,7 +1149,7 @@ def _add_to_index(self, last_close: pd.Series, next_open: pd.Series):
except ValueError:
last_close_ = last_close.dropna()
# last value of last close is last calendar close (there is no next open)
if last_close_.iloc[-1] == self.cc.closes[-1].tz_convert(None):
if last_close_.iloc[-1] == self.cc.closes.iloc[-1].tz_convert(None):
index = pd.IntervalIndex.from_arrays(
last_close_.iloc[:-1], next_open.dropna(), "left"
)
Expand Down
14 changes: 7 additions & 7 deletions tests/hypstrtgy.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def start_minutes(
l_limit, r_limit = limit

if r_limit is None:
r_limit = calendar.last_minutes[-2]
r_limit = calendar.last_minutes.iloc[-2]

if l_limit is None:
if calendar_name in _24h_calendars:
Expand Down Expand Up @@ -240,14 +240,14 @@ def end_minutes(
l_limit, r_limit = limit

if r_limit is None:
r_limit = calendar.closes[-2]
r_limit = calendar.closes.iloc[-2]

if l_limit is None:
if calendar_name in _24h_calendars:
offset = pd.DateOffset(months=6)
else:
offset = pd.DateOffset(years=2)
last_close = calendar.closes[0]
last_close = calendar.closes.iloc[0]
alt_limit = r_limit - offset # type: ignore[operator] # is a valid operation
l_limit = max(last_close, alt_limit)

Expand Down Expand Up @@ -441,7 +441,7 @@ def pp_days_start_minute(
"""
pp = get_pp_default()
calendar = get_calendar(calendar_name)
start = draw(start_minutes(calendar_name, (None, calendar.last_minutes[-3])))
start = draw(start_minutes(calendar_name, (None, calendar.last_minutes.iloc[-3])))
start_session_i = calendar.sessions.get_loc(calendar.minute_to_session(start))
max_days = len(calendar.sessions) - 2 - start_session_i
pp["days"] = draw(st.integers(1, max_days))
Expand Down Expand Up @@ -578,10 +578,10 @@ def pp_caldur_start_minute(
months=pp["months"],
years=pp["years"],
)
limit = (None, calendar.last_minutes[-2] - duration)
limit = (None, calendar.last_minutes.iloc[-2] - duration)
start = draw(start_minutes(calendar_name, limit))
# See `pp_caldur_end_session` for note on need for this assume guard
assume(start + duration <= calendar.last_minutes[-2])
assume(start + duration <= calendar.last_minutes.iloc[-2])
pp["start"] = start
return pp

Expand Down Expand Up @@ -641,7 +641,7 @@ def pp_intraday_start_minute(
"""
pp = draw(pp_intraday())
calendar = get_calendar(calendar_name)
i = calendar.minutes.get_loc(calendar.last_minutes[-2])
i = calendar.minutes.get_loc(calendar.last_minutes.iloc[-2])
i -= pp["minutes"] + (pp["hours"] * 60)
limit = (None, calendar.minutes[i])
pp["start"] = draw(start_minutes(calendar_name, limit))
Expand Down
12 changes: 6 additions & 6 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1312,7 +1312,7 @@ def assert_all_same(
prices: m.PricesBase, bi: intervals.BI, value: bool | float
):
sessions = get_sessions(prices, bi)
expected = pd.Series(value, index=sessions)
expected = pd.Series(value, index=sessions, dtype="object")
assert_series_equal(prices._indexes_status[bi], expected)

drg = GetterMock(
Expand Down Expand Up @@ -1353,7 +1353,7 @@ def assert_all_same(
# ...1H conflict every day
bi = prices.bis.H1
sessions = get_sessions(prices, bi)
expected = pd.Series(np.nan, index=sessions)
expected = pd.Series(np.nan, index=sessions, dtype="object")
# ...other than those sessions when xnys closed
x247_sessions, xnys_sessions = get_calendars_sessions(prices, bi, [x247, xnys])
expected[x247_sessions.difference(xnys_sessions)] = True
Expand All @@ -1366,7 +1366,7 @@ def assert_all_same(
bi = prices.bis.H1
sessions = get_sessions(prices, bi)
# on a normal day, no partial indices
expected = pd.Series(True, index=sessions)
expected = pd.Series(True, index=sessions, dtype="object")
# although there are a couple of early closes that are not aligned with 1H
dates = ["2021-12-24", "2021-12-31"]
expected[dates] = False
Expand All @@ -1380,7 +1380,7 @@ def assert_all_same(
sessions = get_sessions(prices, bi)
xasx_sessions, xlon_sessions = get_calendars_sessions(prices, bi, [xasx, xlon])
# ...IH partial indices every session
expected = pd.Series(False, index=sessions)
expected = pd.Series(False, index=sessions, dtype="object")
# ...save when xlon closed
expected[xasx_sessions.difference(xlon_sessions)] = True
assert_series_equal(prices._indexes_status[bi], expected)
Expand All @@ -1392,7 +1392,7 @@ def assert_all_same(
bi = prices.bis.H1
sessions = get_sessions(prices, bi)
# ...on a normal day, True (xasx enveloped by cmes and indices align)
expected = pd.Series(True, index=sessions)
expected = pd.Series(True, index=sessions, dtype="object")
# ...except when axsx early close (unaligned with !H) coincides with CMES hol.
expected["2021-12-24"] = False
assert_series_equal(prices._indexes_status[bi], expected)
Expand All @@ -1405,7 +1405,7 @@ def assert_all_same(
sessions = get_sessions(prices, bi)
xasx_sessions, xhkg_sessions = get_calendars_sessions(prices, bi, [xasx, xhkg])
# ...on a normal day sessions will conflict
expected = pd.Series(np.NaN, index=sessions)
expected = pd.Series(np.NaN, index=sessions, dtype="object")
# ...but if xasx open and xhkg closed, no partial indices
expected[xasx_sessions.difference(xhkg_sessions)] = True
# ...whilst if xhkg open and xasx closed, always partial indices
Expand Down
10 changes: 6 additions & 4 deletions tests/test_base_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -1710,8 +1710,8 @@ def assertions_downsample_bi_table(
assert subset_s.volume.sum() == row_s.volume
assert subset_s.high.max() == row_s.high
assert subset_s.low.min() == row_s.low
assert subset_s.bfill().open[0] == row_s.open
assert subset_s.ffill().close[-1] == row_s.close
assert subset_s.bfill().open.iloc[0] == row_s.open
assert subset_s.ffill().close.iloc[-1] == row_s.close

def test__downsample_bi_table_lon_us(self, prices_lon_us, one_min):
"""Tests `_downsample_bi_table` for symbols on overlapping exchanges.
Expand Down Expand Up @@ -2227,7 +2227,9 @@ def test__get_table_composite_daily_intraday(
_start_session, end_session = get_sessions_daterange_for_bi(
prices, prices.bis.T2, length_end_session=length
)
while not (prices.cc.sessions_length(end_session, end_session) == length)[0]:
while not (
prices.cc.sessions_length(end_session, end_session) == length
).iloc[0]:
end_session = prices.cc.previous_session(end_session)
if end_session == _start_session:
raise ValueError(f"Unable to get a 'T2' session of length {length}.")
Expand Down Expand Up @@ -4426,7 +4428,7 @@ def assertions(
assert df.index[0] == indice
assert df.index.tz is tz
for s, (session, col) in values.items():
assert df[s][0] == self.get_cell(table, s, session, col)
assert df[s].iloc[0] == self.get_cell(table, s, session, col)

def test_oob(self, prices_us_lon_hk, one_min):
"""Test raises errors when minute out-of-bounds.
Expand Down
12 changes: 6 additions & 6 deletions tests/test_calendar_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,12 +460,12 @@ def session(self) -> pd.Series:
@property
def session_open(self) -> pd.Series:
"""Open time of `self.session`."""
return self.opens[self._session_idx]
return self.opens.iloc[self._session_idx]

@property
def session_close(self) -> pd.Series:
"""Close time of `self.session`."""
return self.closes[self._session_idx]
return self.closes.iloc[self._session_idx]

@property
def next_session(self) -> pd.Series:
Expand Down Expand Up @@ -881,8 +881,8 @@ def test_misc_properties(self, composite_calendars_with_answers, calendar_groups
cc, answers = composite_calendars_with_answers
assert cc.first_session == answers.sessions[0]
assert cc.last_session == answers.sessions[-1]
assert cc.first_minute == answers.first_minutes[0]
assert cc.last_minute == answers.last_minutes[-1]
assert cc.first_minute == answers.first_minutes.iloc[0]
assert cc.last_minute == answers.last_minutes.iloc[-1]
assert cc.side == "left"
assert len(cc.calendars) in (2, 3)
i = 1 if len(cc.calendars) == 3 else 2
Expand Down Expand Up @@ -978,8 +978,8 @@ def test_non_trading_index1(self, composite_calendars):
# test full index, with no arguments
full_index = f()
assert isinstance(full_index, pd.IntervalIndex)
assert cc.closes[0] in full_index[:6].left
assert cc.opens[-1] in full_index[-6:].right
assert cc.closes.iloc[0] in full_index[:6].left
assert cc.opens.iloc[-1] in full_index[-6:].right

# test utc option
args = ("2021-02", "2021-03")
Expand Down
12 changes: 8 additions & 4 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,24 +235,28 @@ def get_data(delay, left_limit=None, right_limit=None) -> m.Data:
assert data.cc is cc
assert data.bi == bi

pool: pd.Series | pd.DatetimeIndex
if bi.is_intraday:
pool = ans.first_minutes
r_edge = pd.Timestamp.now(tz=UTC) + bi
l_edge = pool.iloc[0]
else:
pool = ans.sessions
r_edge = today
l_edge = pool[0]

l_edge = pool[0]
delta = get_delta(pool[0])
def get_pool_value(idx: int) -> pd.Timestamp:
return pool.iloc[idx] if isinstance(pool, pd.Series) else pool[idx]

delta = get_delta(get_pool_value(0))
assert data.ll is None
assert data.rl == r_edge

assert_rng_available_unknown(data, l_edge, r_edge)
assert_ts_not_available(data, r_edge + delta)

# define left_limit, right_limit as default
left_limit = pool[-30]
left_limit = get_pool_value(-30)
data = get_data(delay, left_limit)

assert_empty(data)
Expand All @@ -264,7 +268,7 @@ def get_data(delay, left_limit=None, right_limit=None) -> m.Data:
assert_ts_not_available(data, [left_limit - delta, r_edge + delta])

# define left_limit and right_limit
right_limit = pool[-5]
right_limit = get_pool_value(-5)
data = get_data(delay, left_limit, right_limit)

assert_empty(data)
Expand Down
Loading

0 comments on commit 4a3e2de

Please sign in to comment.