diff --git a/src/market_prices/helpers.py b/src/market_prices/helpers.py index c0171e9..49b5fc2 100644 --- a/src/market_prices/helpers.py +++ b/src/market_prices/helpers.py @@ -327,9 +327,10 @@ def volume_to_na(df: pd.DataFrame) -> pd.DataFrame: def resample( resample_me: pd.DataFrame | pd.core.groupby.groupby.GroupBy, - rule: pd.offsets.BaseOffset, + rule: pd.offsets.BaseOffset | str, data: pd.DataFrame | None = None, origin: str = "start", + nominal_start: pd.Timestamp | None = None, ) -> pd.DataFrame: """Resample ohlcv data to a pandas rule. @@ -339,7 +340,7 @@ def resample( Pandas object to be resampled. Object must have .resample method. rule - Pandas offset to which data to be resampled. + Pandas frequency or offset to which data to be resampled. data If resample_me is not a DataFrame (but, for example, a GroupBy @@ -348,6 +349,16 @@ def resample( origin As `pd.DataFrame.resample` method. + + nominal_start + The earliest date prior to the first index of `resample_me` on and + subsequent to which there are no trading sessions until the first + index of `resample_me`. + + Only useful when `rule` describes a frequency greater than daily + and there are no sessions between the first index and the date to + which that first index would be rolled back to conicide with the + nearest occurrence of 'rule'. """ if isinstance(resample_me, pd.DataFrame): resample_me = resample_me.copy() @@ -367,6 +378,16 @@ def resample( resampler = resample_me.resample(rule, closed="left", label="left", origin=origin) resampled = resampler.agg(agg_f) + + # NOTE START... required for at least pandas 2.1.0. + # See https://github.com/pandas-dev/pandas/issues/55064 + offset = pdutils.pdfreq_to_offset(rule) if isinstance(rule, str) else rule + first_index = data.index[0] if nominal_start is None else nominal_start + cut_off = first_index - offset + if resampled.index[0] <= cut_off: + resampled = resampled[resampled.index > cut_off] + # required for at least pandas 2.1.0. ...END + resampled.columns = columns_ resampled = volume_to_na(resampled) return resampled diff --git a/src/market_prices/prices/base.py b/src/market_prices/prices/base.py index 18c78b7..9057068 100644 --- a/src/market_prices/prices/base.py +++ b/src/market_prices/prices/base.py @@ -1141,7 +1141,7 @@ def _set_indexes_status(self): for bi in self.bis_intraday: start_session, end_session = self.limits_sessions[bi] sessions = self.cc.sessions_in_range(start_session, end_session) - status = pd.Series(True, index=sessions) + status = pd.Series(True, index=sessions, dtype="object") if bi.is_one_minute: # shortcut, cannot have partial indices or conflicts at T1 @@ -1827,7 +1827,7 @@ def _downsample_bi_table(self, df: pd.DataFrame, bi: intervals.BI) -> pd.DataFra target_indices = pd.cut(bi_index.to_list(), target_index) target_indices = target_indices.remove_unused_categories() agg_f = helpers.agg_funcs(df) - df = df.groupby(target_indices).agg(agg_f) + df = df.groupby(target_indices, observed=False).agg(agg_f) df.index = pd.IntervalIndex(df.index) # convert from CategoricalIndex df = helpers.volume_to_na(df) df.index = pdutils.interval_index_new_tz(df.index, UTC) @@ -2025,8 +2025,9 @@ def _get_table_daily(self, force_ds_daily: bool = False) -> pd.DataFrame: df.index = index else: # downsample for monthly pdfreq = ds_interval.as_pdfreq - df = helpers.resample(df_bi, pdfreq, origin="start") - df.index = pdutils.get_interval_index(df.index, pdfreq) + df = df_bi.pt.downsample( + pdfreq, calendar, drop_incomplete_last_indice=False + ) if df.pt.first_ts < self.limits[intervals.BI_ONE_DAY][0]: # This can happen if getting all data. As the Getter's .daterange # can return start as None (at least as at April 22). Ideal would diff --git a/src/market_prices/prices/yahoo.py b/src/market_prices/prices/yahoo.py index 1df3819..d041f61 100644 --- a/src/market_prices/prices/yahoo.py +++ b/src/market_prices/prices/yahoo.py @@ -673,7 +673,7 @@ def _fill_reindexed_daily( return df delay = self.delays[symbol] - if na_rows[-1] and helpers.now() <= cal.session_open(df.index[-1]) + delay: + if na_rows.iloc[-1] and helpers.now() <= cal.session_open(df.index[-1]) + delay: na_rows.iloc[-1] = False if not na_rows.any(): return df diff --git a/src/market_prices/pt.py b/src/market_prices/pt.py index 14cac6e..601cd04 100644 --- a/src/market_prices/pt.py +++ b/src/market_prices/pt.py @@ -857,11 +857,11 @@ def fill(s: str | None): if closes_missing.all() or not closes_missing.any(): return if method != "bfill": - df.loc[:, close_key] = df[close_key].fillna(method="ffill") + df.loc[:, close_key] = df[close_key].ffill() df.loc[bv, open_key] = df.loc[bv, close_key] bv = df[close_key].isna() if method != "ffill": - df.loc[:, open_key] = df[open_key].fillna(method="bfill") + df.loc[:, open_key] = df[open_key].bfill() df.loc[bv, close_key] = df.loc[bv, open_key] closes_still_missing = df[close_key].isna() @@ -1350,7 +1350,7 @@ def _downsample_months( if not pre_table_sessions.empty: start_ds = pd_offset.rollforward(start_table) df = df[start_ds:] - resampled = helpers.resample(df, pdfreq, origin="start") + resampled = helpers.resample(df, pdfreq, origin="start", nominal_start=start_ds) resampled.index = pdutils.get_interval_index(resampled.index, pdfreq) if drop_incomplete_last_indice: diff --git a/src/market_prices/utils/calendar_utils.py b/src/market_prices/utils/calendar_utils.py index fdc1003..2d38c4e 100644 --- a/src/market_prices/utils/calendar_utils.py +++ b/src/market_prices/utils/calendar_utils.py @@ -379,12 +379,12 @@ def last_session(self) -> pd.Timestamp: @property def first_minute(self) -> pd.Timestamp: """First composite calendar minute.""" - return self.first_minutes[0] + return self.first_minutes.iloc[0] @property def last_minute(self) -> pd.Timestamp: """Last composite calendar minute.""" - return self.last_minutes[-1] + return self.last_minutes.iloc[-1] def _parse_session(self, session: Session) -> pd.Timestamp: """Parse client input representing a session.""" @@ -1149,7 +1149,7 @@ def _add_to_index(self, last_close: pd.Series, next_open: pd.Series): except ValueError: last_close_ = last_close.dropna() # last value of last close is last calendar close (there is no next open) - if last_close_.iloc[-1] == self.cc.closes[-1].tz_convert(None): + if last_close_.iloc[-1] == self.cc.closes.iloc[-1].tz_convert(None): index = pd.IntervalIndex.from_arrays( last_close_.iloc[:-1], next_open.dropna(), "left" ) diff --git a/tests/hypstrtgy.py b/tests/hypstrtgy.py index c89409c..4f4ca72 100644 --- a/tests/hypstrtgy.py +++ b/tests/hypstrtgy.py @@ -190,7 +190,7 @@ def start_minutes( l_limit, r_limit = limit if r_limit is None: - r_limit = calendar.last_minutes[-2] + r_limit = calendar.last_minutes.iloc[-2] if l_limit is None: if calendar_name in _24h_calendars: @@ -240,14 +240,14 @@ def end_minutes( l_limit, r_limit = limit if r_limit is None: - r_limit = calendar.closes[-2] + r_limit = calendar.closes.iloc[-2] if l_limit is None: if calendar_name in _24h_calendars: offset = pd.DateOffset(months=6) else: offset = pd.DateOffset(years=2) - last_close = calendar.closes[0] + last_close = calendar.closes.iloc[0] alt_limit = r_limit - offset # type: ignore[operator] # is a valid operation l_limit = max(last_close, alt_limit) @@ -441,7 +441,7 @@ def pp_days_start_minute( """ pp = get_pp_default() calendar = get_calendar(calendar_name) - start = draw(start_minutes(calendar_name, (None, calendar.last_minutes[-3]))) + start = draw(start_minutes(calendar_name, (None, calendar.last_minutes.iloc[-3]))) start_session_i = calendar.sessions.get_loc(calendar.minute_to_session(start)) max_days = len(calendar.sessions) - 2 - start_session_i pp["days"] = draw(st.integers(1, max_days)) @@ -578,10 +578,10 @@ def pp_caldur_start_minute( months=pp["months"], years=pp["years"], ) - limit = (None, calendar.last_minutes[-2] - duration) + limit = (None, calendar.last_minutes.iloc[-2] - duration) start = draw(start_minutes(calendar_name, limit)) # See `pp_caldur_end_session` for note on need for this assume guard - assume(start + duration <= calendar.last_minutes[-2]) + assume(start + duration <= calendar.last_minutes.iloc[-2]) pp["start"] = start return pp @@ -641,7 +641,7 @@ def pp_intraday_start_minute( """ pp = draw(pp_intraday()) calendar = get_calendar(calendar_name) - i = calendar.minutes.get_loc(calendar.last_minutes[-2]) + i = calendar.minutes.get_loc(calendar.last_minutes.iloc[-2]) i -= pp["minutes"] + (pp["hours"] * 60) limit = (None, calendar.minutes[i]) pp["start"] = draw(start_minutes(calendar_name, limit)) diff --git a/tests/test_base.py b/tests/test_base.py index c9f27ac..b3686ef 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -1312,7 +1312,7 @@ def assert_all_same( prices: m.PricesBase, bi: intervals.BI, value: bool | float ): sessions = get_sessions(prices, bi) - expected = pd.Series(value, index=sessions) + expected = pd.Series(value, index=sessions, dtype="object") assert_series_equal(prices._indexes_status[bi], expected) drg = GetterMock( @@ -1353,7 +1353,7 @@ def assert_all_same( # ...1H conflict every day bi = prices.bis.H1 sessions = get_sessions(prices, bi) - expected = pd.Series(np.nan, index=sessions) + expected = pd.Series(np.nan, index=sessions, dtype="object") # ...other than those sessions when xnys closed x247_sessions, xnys_sessions = get_calendars_sessions(prices, bi, [x247, xnys]) expected[x247_sessions.difference(xnys_sessions)] = True @@ -1366,7 +1366,7 @@ def assert_all_same( bi = prices.bis.H1 sessions = get_sessions(prices, bi) # on a normal day, no partial indices - expected = pd.Series(True, index=sessions) + expected = pd.Series(True, index=sessions, dtype="object") # although there are a couple of early closes that are not aligned with 1H dates = ["2021-12-24", "2021-12-31"] expected[dates] = False @@ -1380,7 +1380,7 @@ def assert_all_same( sessions = get_sessions(prices, bi) xasx_sessions, xlon_sessions = get_calendars_sessions(prices, bi, [xasx, xlon]) # ...IH partial indices every session - expected = pd.Series(False, index=sessions) + expected = pd.Series(False, index=sessions, dtype="object") # ...save when xlon closed expected[xasx_sessions.difference(xlon_sessions)] = True assert_series_equal(prices._indexes_status[bi], expected) @@ -1392,7 +1392,7 @@ def assert_all_same( bi = prices.bis.H1 sessions = get_sessions(prices, bi) # ...on a normal day, True (xasx enveloped by cmes and indices align) - expected = pd.Series(True, index=sessions) + expected = pd.Series(True, index=sessions, dtype="object") # ...except when axsx early close (unaligned with !H) coincides with CMES hol. expected["2021-12-24"] = False assert_series_equal(prices._indexes_status[bi], expected) @@ -1405,7 +1405,7 @@ def assert_all_same( sessions = get_sessions(prices, bi) xasx_sessions, xhkg_sessions = get_calendars_sessions(prices, bi, [xasx, xhkg]) # ...on a normal day sessions will conflict - expected = pd.Series(np.NaN, index=sessions) + expected = pd.Series(np.NaN, index=sessions, dtype="object") # ...but if xasx open and xhkg closed, no partial indices expected[xasx_sessions.difference(xhkg_sessions)] = True # ...whilst if xhkg open and xasx closed, always partial indices diff --git a/tests/test_base_prices.py b/tests/test_base_prices.py index a42a357..877dbb0 100644 --- a/tests/test_base_prices.py +++ b/tests/test_base_prices.py @@ -1710,8 +1710,8 @@ def assertions_downsample_bi_table( assert subset_s.volume.sum() == row_s.volume assert subset_s.high.max() == row_s.high assert subset_s.low.min() == row_s.low - assert subset_s.bfill().open[0] == row_s.open - assert subset_s.ffill().close[-1] == row_s.close + assert subset_s.bfill().open.iloc[0] == row_s.open + assert subset_s.ffill().close.iloc[-1] == row_s.close def test__downsample_bi_table_lon_us(self, prices_lon_us, one_min): """Tests `_downsample_bi_table` for symbols on overlapping exchanges. @@ -2227,7 +2227,9 @@ def test__get_table_composite_daily_intraday( _start_session, end_session = get_sessions_daterange_for_bi( prices, prices.bis.T2, length_end_session=length ) - while not (prices.cc.sessions_length(end_session, end_session) == length)[0]: + while not ( + prices.cc.sessions_length(end_session, end_session) == length + ).iloc[0]: end_session = prices.cc.previous_session(end_session) if end_session == _start_session: raise ValueError(f"Unable to get a 'T2' session of length {length}.") @@ -4426,7 +4428,7 @@ def assertions( assert df.index[0] == indice assert df.index.tz is tz for s, (session, col) in values.items(): - assert df[s][0] == self.get_cell(table, s, session, col) + assert df[s].iloc[0] == self.get_cell(table, s, session, col) def test_oob(self, prices_us_lon_hk, one_min): """Test raises errors when minute out-of-bounds. diff --git a/tests/test_calendar_utils.py b/tests/test_calendar_utils.py index 0f08957..612334e 100644 --- a/tests/test_calendar_utils.py +++ b/tests/test_calendar_utils.py @@ -460,12 +460,12 @@ def session(self) -> pd.Series: @property def session_open(self) -> pd.Series: """Open time of `self.session`.""" - return self.opens[self._session_idx] + return self.opens.iloc[self._session_idx] @property def session_close(self) -> pd.Series: """Close time of `self.session`.""" - return self.closes[self._session_idx] + return self.closes.iloc[self._session_idx] @property def next_session(self) -> pd.Series: @@ -881,8 +881,8 @@ def test_misc_properties(self, composite_calendars_with_answers, calendar_groups cc, answers = composite_calendars_with_answers assert cc.first_session == answers.sessions[0] assert cc.last_session == answers.sessions[-1] - assert cc.first_minute == answers.first_minutes[0] - assert cc.last_minute == answers.last_minutes[-1] + assert cc.first_minute == answers.first_minutes.iloc[0] + assert cc.last_minute == answers.last_minutes.iloc[-1] assert cc.side == "left" assert len(cc.calendars) in (2, 3) i = 1 if len(cc.calendars) == 3 else 2 @@ -978,8 +978,8 @@ def test_non_trading_index1(self, composite_calendars): # test full index, with no arguments full_index = f() assert isinstance(full_index, pd.IntervalIndex) - assert cc.closes[0] in full_index[:6].left - assert cc.opens[-1] in full_index[-6:].right + assert cc.closes.iloc[0] in full_index[:6].left + assert cc.opens.iloc[-1] in full_index[-6:].right # test utc option args = ("2021-02", "2021-03") diff --git a/tests/test_data.py b/tests/test_data.py index d117c79..7dfbbc4 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -235,16 +235,20 @@ def get_data(delay, left_limit=None, right_limit=None) -> m.Data: assert data.cc is cc assert data.bi == bi + pool: pd.Series | pd.DatetimeIndex if bi.is_intraday: pool = ans.first_minutes r_edge = pd.Timestamp.now(tz=UTC) + bi + l_edge = pool.iloc[0] else: pool = ans.sessions r_edge = today + l_edge = pool[0] - l_edge = pool[0] - delta = get_delta(pool[0]) + def get_pool_value(idx: int) -> pd.Timestamp: + return pool.iloc[idx] if isinstance(pool, pd.Series) else pool[idx] + delta = get_delta(get_pool_value(0)) assert data.ll is None assert data.rl == r_edge @@ -252,7 +256,7 @@ def get_data(delay, left_limit=None, right_limit=None) -> m.Data: assert_ts_not_available(data, r_edge + delta) # define left_limit, right_limit as default - left_limit = pool[-30] + left_limit = get_pool_value(-30) data = get_data(delay, left_limit) assert_empty(data) @@ -264,7 +268,7 @@ def get_data(delay, left_limit=None, right_limit=None) -> m.Data: assert_ts_not_available(data, [left_limit - delta, r_edge + delta]) # define left_limit and right_limit - right_limit = pool[-5] + right_limit = get_pool_value(-5) data = get_data(delay, left_limit, right_limit) assert_empty(data) diff --git a/tests/test_daterange.py b/tests/test_daterange.py index 6d3b400..4c549da 100644 --- a/tests/test_daterange.py +++ b/tests/test_daterange.py @@ -1790,7 +1790,7 @@ def test_get_end_high_interval2( def test_get_end_ool(self, calendars_with_answers_extended, pp_default, one_min): """Test `get_end` with ool input.""" cal, ans = calendars_with_answers_extended - limit = ans.opens[len(ans.sessions) // 2] + limit = ans.opens.iloc[len(ans.sessions) // 2] too_early = limit - one_min match = re.escape( f"Prices unavailable as end ({helpers.fts(too_early)}) is earlier" @@ -1909,20 +1909,20 @@ def test_daterange_start_end(self, calendars_extended, data, base_ds_interval): start_session = cal.minute_to_session(start, _parse=False) i = cal.sessions.get_loc(start_session) - if start >= cal.first_pm_minutes[i]: - session_start = cal.first_pm_minutes[i] + if start >= cal.first_pm_minutes.iloc[i]: + session_start = cal.first_pm_minutes.iloc[i] else: - session_start = cal.first_minutes[i] + session_start = cal.first_minutes.iloc[i] minutes_i = cal.minutes.get_loc(start) start_ = cal.minutes[minutes_i - interval.as_minutes] if start == session_start: start_ = drg.get_start(start_) - if session_start == cal.first_pm_minutes[i]: - prev_session_start = cal.first_minutes[i] + if session_start == cal.first_pm_minutes.iloc[i]: + prev_session_start = cal.first_minutes.iloc[i] else: prev_session_start = max( - cal.first_minutes[i - 1], cal.first_pm_minutes[i - 1] + cal.first_minutes.iloc[i - 1], cal.first_pm_minutes.iloc[i - 1] ) start_ = max(prev_session_start, start_) else: @@ -1977,8 +1977,8 @@ def match( session = ans.sessions_sample[1] i = ans.sessions.get_loc(session) - session_open = ans.opens[i] - prev_session_close = ans.closes[i - 1] + session_open = ans.opens.iloc[i] + prev_session_close = ans.closes.iloc[i - 1] bi = TDInterval.T5 dsi = TDInterval.T15 @@ -2507,10 +2507,10 @@ def test_daterange_duration_days_end_oolb_minute( drg_kwargs = dict(interval=bi) - start = ans.opens[0] + start = ans.opens.iloc[0] for i in range(3): pp["days"] = i + 1 - pp["end"] = end = ans.closes[i] + pp["end"] = end = ans.closes.iloc[i] # on left bound for strict in [True, False]: @@ -2533,13 +2533,13 @@ def test_daterange_duration_days_end_oolb_minute( _ = drg.daterange limit_i = 30 - limit = ans.opens[limit_i] + limit = ans.opens.iloc[limit_i] drg_kwargs["limit"] = limit for i in range(3): pp["days"] = i + 1 - pp["end"] = end = ans.closes[limit_i + i] + pp["end"] = end = ans.closes.iloc[limit_i + i] for strict in [True, False]: # on left limit drg = self.get_drg(cal, pp, strict=strict, **drg_kwargs) @@ -2576,7 +2576,7 @@ def test_daterange_duration_days_intervalperiod_error( if session > today: continue i = ans.sessions.get_loc(session) - open_, close = ans.opens[i], ans.closes[i] + open_, close = ans.opens.iloc[i], ans.closes.iloc[i] length = close - open_ if length > TDInterval.H22 or ans.session_has_break(session): @@ -2820,8 +2820,8 @@ def test_daterange_duration_intraday_intervalduration_error( # on limit, where intraday duration == final interval pp["minutes"] = base_interval.as_minutes - pp["start"] = start = ans.first_minutes[1] - end = ans.first_minutes[1] + base_interval + pp["start"] = start = ans.first_minutes.iloc[1] + end = ans.first_minutes.iloc[1] + base_interval drg = self.get_drg(cal, pp, **drg_kwargs) assert drg.daterange == ((start, end), end) diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 703f81b..0821745 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -267,7 +267,7 @@ def test_start_end_as_minutes( assert f(None, minute, as_times) == (None, ans.closes[session]) # verify if start/end are not minute accurate then rounded up/down respectively - start, end = first_mins[0], last_mins[0] + start, end = first_mins.iloc[0], last_mins.iloc[0] assert f(start + one_sec, end - one_sec, True) == ( start + one_min, end - one_min, diff --git a/tests/test_pt.py b/tests/test_pt.py index 55822c8..0679f27 100644 --- a/tests/test_pt.py +++ b/tests/test_pt.py @@ -787,7 +787,7 @@ def test_intraday_1h_pt(self, intraday_1h_pt, symbols, calendars, side): bv = df[symbol].notna().all(axis=1) # rows for which have prices # partial indices are the last indice of each session. bv_partial_trading = (bv + bv.shift(-1) == 1) & bv - bv_trading_status = bv.copy() + bv_trading_status = pd.Series(bv, dtype="object") bv_trading_status.loc[bv_partial_trading] = np.nan assert_series_equal(df.pt.indices_trading_status(cal), bv_trading_status) assert_index_equal( @@ -814,7 +814,7 @@ def test_intraday_1h_pt(self, intraday_1h_pt, symbols, calendars, side): bv = df[symbol].notna().all(axis=1) # normally, partial indices are first indice of each session... bv_partial_trading = (bv + bv.shift(1) == 1) & bv - bv_trading_status = bv.copy() + bv_trading_status = pd.Series(bv, dtype="object") bv_trading_status.loc[bv_partial_trading] = np.nan # ...but 2021-12-24 is irregular due to different exchange hours. @@ -860,7 +860,7 @@ def test_multiple_sessions_pt( df = multiple_sessions_alldays_pt symbols, calendars = symbols_alldays, calendars_alldays for symbol, cal in zip(symbols, calendars): - indices_trading_status = pd.Series(np.nan, index=df.index) + indices_trading_status = pd.Series(np.nan, index=df.index, dtype="object") bv = df[symbol].notna().all(axis=1) indices_non_trading = df.index[~bv] indices_trading_status.loc[indices_non_trading] = False @@ -1305,7 +1305,7 @@ def test_intraday_pt(self, intraday_pt, tz_default, one_sec, one_min): # but prices not available for MSFT and AZN.L, so... expected_ = self.get_expected(df.ffill(), i - 1, "close") for s in ["MSFT", "AZN.L"]: - expected[s] = expected_[s][0] + expected[s] = expected_[s].iloc[0] for ts in (gap_left - one_sec, gap_left - one_min): assert_frame_equal(f(ts), expected) @@ -1316,7 +1316,7 @@ def test_intraday_pt(self, intraday_pt, tz_default, one_sec, one_min): # but prices not available for MSFT and ES=F, so... expected_ = self.get_expected(df.ffill(), i, "close") for s in ["MSFT", "ES=F"]: - expected[s] = expected_[s][0] + expected[s] = expected_[s].iloc[0] for ts in (gap_right, gap_right + one_sec, gap_right + one_min): assert_frame_equal(f(ts), expected) @@ -1651,16 +1651,16 @@ def assertions(df: pd.DataFrame, symbols: list[str]): df_test = df.iloc[i_start : i_end + 1].copy() rtrn_ff = df_test.pt.fillna("ffill") - assert rtrn_ff.isna().any(axis=1)[0] + assert rtrn_ff.isna().any(axis=1).iloc[0] rtrn_bf = df_test.pt.fillna("bfill") - assert rtrn_bf.isna().any(axis=1)[-1] + assert rtrn_bf.isna().any(axis=1).iloc[-1] rtrn_both = df_test.pt.fillna("both") assert rtrn_both.notna().all(axis=None) # for those symbols that have missing values in first row of df_test, make sure # that "both" is filling initial na rows backwards and everything else forwards. for s in symbols: - if df_test[s].notna().all(axis=1)[0]: + if df_test[s].notna().all(axis=1).iloc[0]: continue df_notna = df_test[s][df_test.notna().all(axis=1)] start_label = df_notna.index[0] @@ -1954,8 +1954,8 @@ def assert_aggregations(symbols, subset: pd.DataFrame, row: pd.Series): assert subset_s.volume.sum() == row_s.volume assert subset_s.high.max() == row_s.high assert subset_s.low.min() == row_s.low - assert subset_s.bfill().open[0] == row_s.open - assert subset_s.ffill().close[-1] == row_s.close + assert subset_s.bfill().open.iloc[0] == row_s.open + assert subset_s.ffill().close.iloc[-1] == row_s.close class TestDownsampleDaily: @@ -3036,9 +3036,9 @@ def test_sessions(self, intraday_pt, calendars, cc): # verify `direction` as default / "previous" rtrn = f(cal) srs_ = srs.copy() - if pd.isna(srs_[0]): + if pd.isna(srs_.iloc[0]): srs_.iloc[0] = sessions[opens_arr[0]] - expected = srs_.fillna(method="ffill") + expected = srs_.ffill() assert_series_equal(rtrn, expected) rtrn_previous = f(cal, direction="previous") @@ -3047,9 +3047,9 @@ def test_sessions(self, intraday_pt, calendars, cc): # verify `direction` as "previous" rtrn_next = f(cal, direction="next") srs_ = srs.copy() - if pd.isna(srs_[-1]): + if pd.isna(srs_.iloc[-1]): srs_.iloc[-1] = sessions[opens_arr[-1] + 1] - assert_series_equal(rtrn_next, srs_.fillna(method="bfill")) + assert_series_equal(rtrn_next, srs_.bfill()) # verify `direction` as None rtrn_none = f(cal, direction=None) @@ -3090,13 +3090,13 @@ def test_indices_trading_minutes(self, intraday_1h_pt, calendars, one_sec): assert indice_mins in possible_indice_mins # create df_test where all indices have same number of trading minutes - indice_mins_change = expected[expected != expected[0]].index[0] + indice_mins_change = expected[expected != expected.iloc[0]].index[0] constant_trading_mins = expected[: indice_mins_change.left - one_sec] start = constant_trading_mins.index[0].left end = constant_trading_mins.index[-1].right - one_sec df_test = df[start:end] rtrn = df_test.pt.trading_minutes_interval(cal) - expected_interval = TDInterval(pd.Timedelta(minutes=expected[0])) + expected_interval = TDInterval(pd.Timedelta(minutes=expected.iloc[0])) assert rtrn == expected_interval assert df_test.pt.indices_have_regular_trading_minutes(cal) diff --git a/tests/test_yahoo.py b/tests/test_yahoo.py index 5a9544a..c4e5f5f 100644 --- a/tests/test_yahoo.py +++ b/tests/test_yahoo.py @@ -431,13 +431,13 @@ def test__adjust_high_low(): """Verify staticmethod PricesYahoo._adjust_high_low.""" columns = pd.Index(["open", "high", "low", "close", "volume"]) ohlcv = ( - [100, 103, 98, 103.4, 0], # close higher than high - [104, 109, 104, 107, 0], - [106, 108, 104, 107, 0], - [106, 110, 107, 109, 0], # open lower than low - [108, 112, 108, 112, 0], - [112, 114, 107, 106.4, 0], # close lower than low - [112, 108, 104, 105, 0], # open higher than high + [100.0, 103.0, 98.0, 103.4, 0], # close higher than high + [104.0, 109.0, 104.0, 107.0, 0], + [106.0, 108.0, 104.0, 107.0, 0], + [106.0, 110.0, 107.0, 109.0, 0], # open lower than low + [108.0, 112.0, 108.0, 112.0, 0], + [112.0, 114.0, 107.0, 106.4, 0], # close lower than low + [112.0, 108.0, 104.0, 105.0, 0], # open higher than high ) index = pd.date_range( start=pd.Timestamp("2022-01-01"), freq="D", periods=len(ohlcv) @@ -446,13 +446,13 @@ def test__adjust_high_low(): rtrn = m.PricesYahoo._adjust_high_low(df) ohlcv_expected = ( - [100, 103.4, 98, 103.4, 0], # close was higher than high - [104, 109, 104, 107, 0], - [106, 108, 104, 107, 0], - [107, 110, 107, 109, 0], # open was lower than low - [108, 112, 108, 112, 0], - [112, 114, 106.4, 106.4, 0], # close was lower than low - [108, 108, 104, 105, 0], # open was higher than high + [100.0, 103.4, 98, 103.4, 0], # close was higher than high + [104.0, 109.0, 104.0, 107.0, 0], + [106.0, 108.0, 104.0, 107.0, 0], + [107.0, 110.0, 107.0, 109.0, 0], # open was lower than low + [108.0, 112.0, 108.0, 112.0, 0], + [112.0, 114.0, 106.4, 106.4, 0], # close was lower than low + [108.0, 108.0, 104.0, 105.0, 0], # open was higher than high ) expected = pd.DataFrame(ohlcv_expected, index=index, columns=columns) assert (expected.open >= expected.low).all() @@ -1282,8 +1282,8 @@ def expected_table_structure_us( expected_num_rows = int(sessions_rows.sum()) sessions_end = cc.opens[slc] + (interval.as_pdtd * sessions_rows) - start = cc.opens[slc][0] - end = sessions_end[-1] + start = cc.opens[slc].iloc[0] + end = sessions_end.iloc[-1] return (start, end), expected_num_rows, sessions_end @@ -1446,8 +1446,8 @@ def test_prices_us_lon(self, pricess): sessions_last_indice = cc.opens[slc] + ( interval.as_pdtd * sessions_rows_gross ) - start = cc.opens[slc][0] - end = sessions_last_indice[-1] + start = cc.opens[slc].iloc[0] + end = sessions_last_indice.iloc[-1] assertions_intraday(df, interval, prices, start, end, expected_num_rows) assert cc.opens[slc].isin(df.index.left).all() @@ -1522,8 +1522,8 @@ def test_prices_inc_245(self, pricess): expected_num_rows = int(sessions_rows.sum()) sessions_last_indice = cc.opens[slc] + (interval.as_pdtd * sessions_rows) - start = cc.opens[slc][0] - end = sessions_last_indice[-1] + start = cc.opens[slc].iloc[0] + end = sessions_last_indice.iloc[-1] assertions_intraday(df, interval, prices, start, end, expected_num_rows) assert cc.opens[slc].isin(df.index.left).all() @@ -1556,8 +1556,8 @@ def test_prices_inc_247(self, pricess): expected_num_rows = int(sessions_rows.sum()) sessions_last_indice = cc.opens[slc] + (interval.as_pdtd * sessions_rows) - start = cc.opens[slc][0] - end = sessions_last_indice[-1] + start = cc.opens[slc].iloc[0] + end = sessions_last_indice.iloc[-1] assertions_intraday(df, interval, prices, start, end, expected_num_rows) assert cc.opens[slc].isin(df.index.left).all() @@ -1604,8 +1604,8 @@ def test_start_end_session_minutes(self, pricess, one_min): _, slc = get_data_bounds(prices, interval) delta = pd.Timedelta(20, "T") - start = cc.opens[slc][0] + delta - end = cc.closes[slc][-1] - delta + start = cc.opens[slc].iloc[0] + delta + end = cc.closes[slc].iloc[-1] - delta expected_num_rows, _ = self.get_expected_num_rows_us_lon(interval, cc, slc) expected_num_rows -= (delta // interval) * 2 @@ -1781,7 +1781,7 @@ def assertions( indice = hist0.name df = prices._request_data(interval, start, end)[symbol] - df0_vol = df[indice:indice].volume[0] + df0_vol = df[indice:indice].volume.iloc[0] # verify glitch in hist not present in df if prev_close is None: @@ -1988,7 +1988,7 @@ def test__get_bi_table(pricess): to = pd.Timestamp.now() from_ = to - pd.Timedelta(21, "D") (start, _), slc = get_data_bounds(prices, interval, (from_, to)) - end = prices.cc.closes[slc][-1] + end = prices.cc.closes[slc].iloc[-1] table = prices._get_bi_table(interval, (start, end)) bounds, num_rows, sessions_end = expected_table_structure_us(prices, interval, slc) diff --git a/tests/utils.py b/tests/utils.py index 0b27195..f7f8dab 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -571,12 +571,12 @@ def sessions_range(self) -> tuple[pd.Timestamp, pd.Timestamp]: @property def first_session_open(self) -> pd.Timestamp: """Open time of first session covered by answers.""" - return self.opens[0] + return self.opens.iloc[0] @property def last_session_close(self) -> pd.Timestamp: """Close time of last session covered by answers.""" - return self.closes[-1] + return self.closes.iloc[-1] @property def first_minute(self) -> pd.Timestamp: @@ -882,7 +882,7 @@ def _get_sessions_with_times_different_to_next_session( if is_break_col: if column_.isna().all(): return [pd.DatetimeIndex([])] * 2 - column_ = column_.fillna(method="ffill").fillna(method="bfill") + column_ = column_.ffill().bfill() diff = (column_.shift(-1) - column_)[:-1] remainder = diff % pd.Timedelta(24, "H")