Skip to content

Commit

Permalink
Remove 'repair_intervals'
Browse files Browse the repository at this point in the history
  • Loading branch information
ValueRaider committed Jan 21, 2023
1 parent 1636839 commit 2b0ae5a
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 114 deletions.
38 changes: 0 additions & 38 deletions tests/prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,44 +261,6 @@ def test_dst_fix(self):
print("Weekly data not aligned to Monday")
raise

def test_correct_early_close(self):
# Stockholm exchange closed early on 2022-12-23 @ 13:02.
# For hourly intervals, Yahoo returns:
# - 13:00 filled with NaNs
# - 13:02 contains data for 13:00
# Test that 'repair' fixes this without affecting other intervals.
tkr = "AEC.ST"
d = "2022-12-23"
start = "2022-12-01"
end = "2023-01-01"
data_cols = ["Open","High","Low","Close","Volume","Dividends","Stock Splits"]

dat = yf.Ticker(tkr, session=self.session)
df_old = dat.history(start=start, end=end, interval="1h", keepna=True)
df_repair = dat.history(start=start, end=end, interval="1h", keepna=True, repair_intervals=True)

tz = df_old.index.tz
expected_intervals_fixed = []
expected_intervals_fixed.append(tz.localize(_dt.datetime(2022,12,23,13,0)))
expected_intervals_lost = []
expected_intervals_lost.append(tz.localize(_dt.datetime(2022,12,23,13,2)))

# Test no unexpected intervals lost
dts_lost = df_old.index[~df_old.index.isin(df_repair.index)]
self.assertTrue(_np.equal(dts_lost.to_numpy(), expected_intervals_lost))

# Test only the expected interval changed
dts_shared = df_old.index[df_old.index.isin(df_repair.index)]
f_changed = (df_old.loc[dts_shared, data_cols].to_numpy() != df_repair.loc[dts_shared, data_cols].to_numpy()).any(axis=1)
self.assertTrue(f_changed.any(), "Expected data to change")
dts_changed = dts_shared[f_changed]
self.assertEqual(len(dts_changed), len(expected_intervals_fixed), "Different number of intervals changed")
self.assertTrue(_np.equal(dts_shared[f_changed], expected_intervals_fixed), "Unexpected intervals were changed")

# Test the expected interval is valid data
f_na = df_repair.loc[expected_intervals_fixed, data_cols].isna().any(axis=1)
self.assertFalse(f_na.any(), "Repaired interval still contains NaNs")

def test_weekly_2rows_fix(self):
tkr = "AMZN"
start = _dt.date.today() - _dt.timedelta(days=14)
Expand Down
22 changes: 4 additions & 18 deletions yfinance/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,7 @@ def stats(self, proxy=None):

def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False, actions=True,
auto_adjust=True, back_adjust=False,
repair=None, # deprecated
repair_prices=False, repair_intervals=False,
auto_adjust=True, back_adjust=False, repair=None,
keepna=False,
proxy=None, rounding=False, timeout=10,
debug=True, raise_errors=False) -> pd.DataFrame:
Expand All @@ -113,12 +111,9 @@ def history(self, period="1mo", interval="1d",
Adjust all OHLC automatically? Default is True
back_adjust: bool
Back-adjusted data to mimic true historical prices
repair_prices: bool
repair: bool
Detect currency unit 100x mixups and attempt repair
Default is False
repair_intervals: bool
Detect
Default is False
keepna: bool
Keep NaN rows returned by Yahoo?
Default is False
Expand All @@ -139,11 +134,6 @@ def history(self, period="1mo", interval="1d",
exceptions instead of printing to console.
"""

# Handle deprecated arguments
if repair is not None:
print("WARNING: 'repair' is deprecated and will be removed in future version. Use 'repair_prices' instead")
repair_prices = repair

if start or period is None or period.lower() == "max":
# Check can get TZ. Fail => probably delisted
tz = self._get_ticker_tz(debug, proxy, timeout)
Expand Down Expand Up @@ -302,9 +292,6 @@ def history(self, period="1mo", interval="1d",
quotes = utils.fix_Yahoo_dst_issue(quotes, params["interval"])
quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)

if repair_intervals:
quotes = utils.fix_Yahoo_including_unaligned_intervals(quotes, params["interval"])

# actions
dividends, splits, capital_gains = utils.parse_actions(data["chart"]["result"][0])
if not expect_capital_gains:
Expand Down Expand Up @@ -368,8 +355,7 @@ def history(self, period="1mo", interval="1d",
else:
df["Capital Gains"] = 0.0


if repair_prices:
if repair:
# Do this before auto/back adjust
df = self._fix_zeroes(df, interval, tz_exchange, prepost)
df = self._fix_unit_mixups(df, interval, tz_exchange, prepost)
Expand Down Expand Up @@ -553,7 +539,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
fetch_start = g[0]
fetch_end = g[-1] + td_range

df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair_prices=False, keepna=True)
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair=False, keepna=True)
if df_fine is None or df_fine.empty:
print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
continue
Expand Down
58 changes: 0 additions & 58 deletions yfinance/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,64 +472,6 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
return quotes


def fix_Yahoo_including_unaligned_intervals(quotes, interval):
if interval[1] not in ['m', 'h']:
# Only correct intraday
return quotes

# Merge adjacent rows if in same interval
# e.g. 13:02pm with 13:00pm 1h interval
n = quotes.shape[0]
itd = _interval_to_timedelta(interval)
td0 = _pd.Timedelta(0)
iend = quotes.index + itd
if interval[1:] in ["d", "wk", "mo"]:
# # Allow for DST
# iend -= _pd.Timedelta('2h')
return quotes
steps = _np.full(n, td0)
steps[1:] = quotes.index[1:] - iend[0:n-1]
f_overlap = steps < td0
if f_overlap.any():
# Process overlaps one-at-time because some may be false positives.
# Recalculate subsequent step after removing an overlap.
overlaps_exist = True
n_merged = 0
dts_to_drop = []
dts_merged = []
while overlaps_exist:
indices = _np.where(f_overlap)[0]
i = indices[0]
dt1 = quotes.index[i-1]
dt2 = quotes.index[i]
dt3 = quotes.index[i+1]

dropped_dt = dt2
quotes.loc[dt1] = merge_two_prices_intervals(quotes.iloc[i-1], quotes.iloc[i])
dts_merged.append((dt2, dt1))

# Remove record of i:
dts_to_drop.append(dt2)
f_overlap[i] = False
steps[i] = td0
# Recalc step of following dt:
steps[i+1] = quotes.index[i+1] - iend[i-1]

f_overlap[i+1] = steps[i+1] < td0
overlaps_exist = f_overlap[i+1:].any()

# Useful debug code:
# for d in [str(dt.date()) for dt in dts_to_drop]:
# print(quotes.loc[d])
#
# print("Dropped unaligned intervals:", dts_to_drop)
print(f"Removed {len(dts_merged)} unaligned intervals by merging:")
for i in range(len(dts_merged)):
print(f"- {dts_merged[i][0].date()}: {dts_merged[i][0].time()} -> {dts_merged[i][1].time()}")
quotes = quotes.drop(dts_to_drop)
return quotes


def merge_two_prices_intervals(i1, i2):
TypeCheckSeries(i1, "i1")
TypeCheckSeries(i2, "i2")
Expand Down

0 comments on commit 2b0ae5a

Please sign in to comment.