Skip to content

Commit

Permalink
Price repair: improve 'sudden change' repair for splits & currency
Browse files Browse the repository at this point in the history
Original logic for repairing missing split adjustment only checked latest split.
Improved logic checks ALL splits in data, because any can be missing.

Then related changes to 'sudden change detection':
- use prices median not mean, reduce sensitivity to noise.
- handle Kuwait Dinar, which sub-divides into 1000x not 100x.
  • Loading branch information
ValueRaider committed May 19, 2024
1 parent 7c41434 commit 036f393
Showing 1 changed file with 40 additions and 19 deletions.
59 changes: 40 additions & 19 deletions yfinance/scrapers/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def history(self, period="1mo", interval="1d",
# Do this before auto/back adjust
logger.debug(f'{self.ticker}: checking OHLC for repairs ...')
df = self._fix_unit_mixups(df, interval, tz_exchange, prepost)
df = self._fix_bad_stock_split(df, interval, tz_exchange)
df = self._fix_bad_stock_splits(df, interval, tz_exchange)
# Must repair 100x and split errors before price reconstruction
df = self._fix_zeroes(df, interval, tz_exchange, prepost)
df = self._fix_missing_div_adjust(df, interval, tz_exchange)
Expand Down Expand Up @@ -981,7 +981,12 @@ def _fix_unit_switch(self, df, interval, tz_exchange):
# This function fixes the second.
# Eventually Yahoo fixes but could take them 2 weeks.

return self._fix_prices_sudden_change(df, interval, tz_exchange, 100.0)
if self._history_metadata['currency'] == 'KWF':
# Kuwaiti Dinar divided into 1000 not 100
n = 1000
else:
n = 100
return self._fix_prices_sudden_change(df, interval, tz_exchange, n)

@utils.log_indent_decorator
def _fix_zeroes(self, df, interval, tz_exchange, prepost):
Expand Down Expand Up @@ -1171,9 +1176,12 @@ def _fix_missing_div_adjust(self, df, interval, tz_exchange):
return df2

@utils.log_indent_decorator
def _fix_bad_stock_split(self, df, interval, tz_exchange):
# Repair idea is to look for BIG daily price changes that closely match the
# most recent stock split ratio. This indicates Yahoo failed to apply a new
def _fix_bad_stock_splits(self, df, interval, tz_exchange):
# Original logic only considered latest split adjustment could be missing, but
# actually **any** split adjustment can be missing. So check all splits in df.
#
# Improved logic looks for BIG daily price changes that closely match the
# **nearest future** stock split ratio. This indicates Yahoo failed to apply a new
# stock split to old price data.
#
# There is a slight complication, because Yahoo does another stupid thing.
Expand All @@ -1190,22 +1198,33 @@ def _fix_bad_stock_split(self, df, interval, tz_exchange):
if not interday:
return df

# Find the most recent stock split
df = df.sort_index(ascending=False)
df = df.sort_index() # scan splits oldest -> newest
split_f = df['Stock Splits'].to_numpy() != 0
if not split_f.any():
logger.debug('price-repair-split: No splits in data')
return df
most_recent_split_day = df.index[split_f].max()
split = df.loc[most_recent_split_day, 'Stock Splits']
if most_recent_split_day == df.index[0]:
logger.info(
"price-repair-split: Need 1+ day of price data after split to determine true price. Won't repair")
return df

logger.debug(f'price-repair-split: Most recent split = {split:.4f} @ {most_recent_split_day.date()}')
for split_idx in np.where(split_f)[0]:
split_dt = df.index[split_idx]
split = df.loc[split_dt, 'Stock Splits']
if split_dt == df.index[0]:
# logger.info(
# "price-repair-split: Need 1+ day of price data after split to determine true price. Won't repair")
# return df
continue

logger.debug(f'price-repair-split: Checking split {split:.4f} @ {split_dt.date()} for possible repair')

cutoff_idx = min(df.shape[0], split_idx+1) # add one row after to detect big change
df_pre_split = df.iloc[0:cutoff_idx+1]

return self._fix_prices_sudden_change(df, interval, tz_exchange, split, correct_volume=True)
df_pre_split_repaired = self._fix_prices_sudden_change(df_pre_split, interval, tz_exchange, split, correct_volume=True)
# Merge back in:
if cutoff_idx == df.shape[0]-1:
df = df_pre_split_repaired
else:
df = pd.concat([df_pre_split_repaired.sort_index(), df.iloc[cutoff_idx+1:]])
return df

@utils.log_indent_decorator
def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_volume=False):
Expand Down Expand Up @@ -1302,10 +1321,12 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
# average change
_1d_change_minx = np.average(_1d_change_x, axis=1)
else:
# change nearest to 1.0
diff = np.abs(_1d_change_x - 1.0)
j_indices = np.argmin(diff, axis=1)
_1d_change_minx = _1d_change_x[np.arange(n), j_indices]
# # change nearest to 1.0
# diff = np.abs(_1d_change_x - 1.0)
# j_indices = np.argmin(diff, axis=1)
# _1d_change_minx = _1d_change_x[np.arange(n), j_indices]
# Still sensitive to extreme-low low. Try median:
_1d_change_minx = np.median(_1d_change_x, axis=1)
f_na = np.isnan(_1d_change_minx)
if f_na.any():
# Possible if data was too old for reconstruction.
Expand Down

0 comments on commit 036f393

Please sign in to comment.