From 37733023ad82ed7761a27d971ad2e1328d25a754 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sat, 24 Aug 2024 17:38:38 +0100 Subject: [PATCH] Fix price-repair-currency, logic was inverted --- yfinance/scrapers/history.py | 97 ++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 42 deletions(-) diff --git a/yfinance/scrapers/history.py b/yfinance/scrapers/history.py index e267a663..84d15d2f 100644 --- a/yfinance/scrapers/history.py +++ b/yfinance/scrapers/history.py @@ -368,48 +368,7 @@ def history(self, period="1mo", interval="1d", # Must fix bad 'Adj Close' & dividends before 100x/split errors. # First make currency consistent. On some exchanges, dividends often in different currency # to prices, e.g. £ vs pence. - if currency in ["GBp", "ZAc", "ILA"]: - if currency == 'GBp': - # UK £/pence - currency = 'GBP' - m = 0.01 - elif currency == 'ZAc': - # South Africa Rand/cents - currency = 'ZAR' - m = 0.01 - elif currency == 'ILA': - # Israel Shekels/Agora - currency = 'ILS' - m = 0.01 - - prices_in_subunits = True # usually is true - if df.index[-1] > (pd.Timestamp.utcnow() - _datetime.timedelta(days=30)): - try: - ratio = self._history_metadata['regularMarketPrice'] / self._history_metadata['chartPreviousClose'] - if abs((ratio*m)-1) < 0.1: - # within 10% of 100x - prices_in_subunits = True - except Exception: - pass - if prices_in_subunits: - for c in _PRICE_COLNAMES_: - df[c] *= m - self._history_metadata["currency"] = currency - - f_div = df['Dividends']!=0.0 - if f_div.any(): - # But sometimes the dividend was in pence. - # Heuristic is: if dividend yield is ridiculous high vs converted prices, then - # assume dividend was also in pence and convert to GBP. - # Threshold for "ridiculous" based on largest yield I've seen anywhere - 63.4% - # If this simple heuritsic generates a false positive, then _fix_bad_div_adjust() - # will detect and repair. - divs = df[['Close','Dividends']].copy() - divs['Close'] = divs['Close'].ffill().shift(1, fill_value=divs['Close'].iloc[0]) - divs = divs[f_div] - div_pcts = (divs['Dividends'] / divs['Close']).to_numpy() - if len(div_pcts) > 0 and np.average(div_pcts) > 1: - df['Dividends'] *= m + df = self._standardise_currency(df, currency) df = self._fix_bad_div_adjust(df, interval, currency) @@ -933,6 +892,60 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): return df_v2 + def _standardise_currency(self, df, currency): + if currency not in ["GBp", "ZAc", "ILA"]: + return df + if currency == 'GBp': + # UK £/pence + currency = 'GBP' + m = 0.01 + elif currency == 'ZAc': + # South Africa Rand/cents + currency = 'ZAR' + m = 0.01 + elif currency == 'ILA': + # Israel Shekels/Agora + currency = 'ILS' + m = 0.01 + + prices_in_subunits = True # usually is true + # Use latest row with actual volume, because volume=0 rows can be 0.01x the other rows. + # _fix_unit_switch() will ensure all rows are on same scale. + f_volume = df['Volume']>0 + if not f_volume.any(): + return df + last_row = df.iloc[np.where(f_volume)[0][-1]] + if last_row.name > (pd.Timestamp.utcnow() - _datetime.timedelta(days=30)): + try: + ratio = self._history_metadata['regularMarketPrice'] / last_row['Close'] + if abs((ratio*m)-1) < 0.1: + # within 10% of 100x + prices_in_subunits = False + except Exception: + # Should never happen but just-in-case + pass + if prices_in_subunits: + for c in _PRICE_COLNAMES_: + df[c] *= m + self._history_metadata["currency"] = currency + + f_div = df['Dividends']!=0.0 + if f_div.any(): + # But sometimes the dividend was in pence. + # Heuristic is: if dividend yield is ridiculous high vs converted prices, then + # assume dividend was also in pence and convert to GBP. + # Threshold for "ridiculous" based on largest yield I've seen anywhere - 63.4% + # If this simple heuristic generates a false positive, then _fix_bad_div_adjust() + # will detect and repair. + divs = df[['Close','Dividends']].copy() + divs['Close'] = divs['Close'].ffill().shift(1, fill_value=divs['Close'].iloc[0]) + divs = divs[f_div] + div_pcts = (divs['Dividends'] / divs['Close']).to_numpy() + if len(div_pcts) > 0 and np.average(div_pcts) > 1: + df['Dividends'] *= m + + return df + @utils.log_indent_decorator def _fix_unit_mixups(self, df, interval, tz_exchange, prepost): if df.empty: