Skip to content

Commit

Permalink
Price-repair-split-errors: stop zeroes, big-divs, and 100x errors tri…
Browse files Browse the repository at this point in the history
…ggering false split errors
  • Loading branch information
ValueRaider committed Sep 20, 2023
1 parent 965d1d5 commit 16d3bc3
Showing 1 changed file with 39 additions and 10 deletions.
49 changes: 39 additions & 10 deletions yfinance/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,14 +994,6 @@ def _fix_unit_switch(self, df, interval, tz_exchange):
# This function fixes the second.
# Eventually Yahoo fixes but could take them 2 weeks.

# To detect, use 'bad split adjustment' algorithm. But only correct
# if no stock splits in data

f_splits = df['Stock Splits'].to_numpy() != 0.0
if f_splits.any():
utils.get_yf_logger().debug('price-repair-100x: Cannot check for chunked 100x errors because splits present')
return df

return self._fix_prices_sudden_change(df, interval, tz_exchange, 100.0)

@utils.log_indent_decorator
Expand Down Expand Up @@ -1291,11 +1283,25 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
# Avoid using 'Low' and 'High'. For multiday intervals, these can be
# very volatile so reduce ability to detect genuine stock split errors
_1d_change_x = np.full((n, 2), 1.0)
price_data = df2[['Open','Close']].replace(0.0, 1.0).to_numpy()
price_data = df2[['Open','Close']].to_numpy()
f_zero = price_data == 0.0
else:
_1d_change_x = np.full((n, 4), 1.0)
price_data = df2[OHLC].replace(0.0, 1.0).to_numpy()
price_data = df2[OHLC].to_numpy()
f_zero = price_data == 0.0
if f_zero.any():
price_data[f_zero] = 1.0

# Update: if a VERY large dividend is paid out, then can be mistaken for a 1:2 stock split.
# Fix = use adjusted prices
adj = df2['Adj Close'].to_numpy() / df2['Close'].to_numpy()
for j in range(price_data.shape[1]):
price_data[:,j] *= adj

_1d_change_x[1:] = price_data[1:, ] / price_data[:-1, ]
f_zero_num_denom = f_zero | np.roll(f_zero, 1, axis=0)
if f_zero_num_denom.any():
_1d_change_x[f_zero_num_denom] = 1.0
if interday and interval != '1d':
# average change
_1d_change_minx = np.average(_1d_change_x, axis=1)
Expand Down Expand Up @@ -1394,6 +1400,29 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
logger.info(f'price-repair-split: No {fix_type}s detected')
return df

# Update: if any 100x changes are soon after a stock split, so could be confused with split error, then abort
threshold_days = 30
f_splits = df['Stock Splits'].to_numpy() != 0.0
if change in [100.0, 0.01] and f_splits.any():
indices_A = np.where(f_splits)[0]
indices_B = np.where(f)[0]
if not len(indices_A) or not len(indices_B):
return None
gaps = indices_B[:, None] - indices_A
# Because data is sorted in DEscending order, need to flip gaps
gaps *= -1
f_pos = gaps > 0
if f_pos.any():
gap_min = gaps[f_pos].min()
gap_td = utils._interval_to_timedelta(interval) * gap_min
if isinstance(gap_td, _dateutil.relativedelta.relativedelta):
threshold = _dateutil.relativedelta.relativedelta(days=threshold_days)
else:
threshold = _datetime.timedelta(days=threshold_days)
if gap_td < threshold:
logger.info(f'price-repair-split: 100x changes are too soon after stock split events, aborting')
return df

# if logger.isEnabledFor(logging.DEBUG):
# df_debug['i'] = list(range(0, df_debug.shape[0]))
# df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i']
Expand Down

0 comments on commit 16d3bc3

Please sign in to comment.