From 4c1669ad9d41f8383c27c4e398ebd7424580a5ad Mon Sep 17 00:00:00 2001 From: rickturner2001 Date: Sun, 1 Oct 2023 21:06:13 -0400 Subject: [PATCH 01/25] Refactored tests for Ticker with proxy Ticker proxy refactor --- tests/ticker.py | 128 +++------------------------------------------ yfinance/base.py | 44 ++++++++-------- yfinance/ticker.py | 14 ++--- 3 files changed, 37 insertions(+), 149 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index 681f0387d..c70e2b606 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -126,132 +126,18 @@ def test_goodTicker(self): for attribute_name, attribute_type in ticker_attributes: assert_attribute_type(self, dat, attribute_name, attribute_type) - #TODO:: Refactor with `assert_attribute` once proxy is accepted as a parameter of `Ticker` def test_goodTicker_withProxy(self): - # that yfinance works when full api is called on same instance of ticker - tkr = "IBM" - dat = yf.Ticker(tkr, session=self.session) - - dat._fetch_ticker_tz(proxy=self.proxy, timeout=5) - dat._get_ticker_tz(proxy=self.proxy, timeout=5) - dat.history(period="1wk", proxy=self.proxy) - - v = dat.get_major_holders(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_institutional_holders(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_mutualfund_holders(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_info(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertTrue(len(v) > 0) - - v = dat.get_income_stmt(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_incomestmt(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_financials(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_balance_sheet(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_balancesheet(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_cash_flow(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_cashflow(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_shares_full(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - v = dat.get_isin(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertTrue(v != "") - - v = dat.get_news(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertTrue(len(v) > 0) + dat = yf.Ticker(tkr, session=self.session, proxy=self.proxy) - v = dat.get_earnings_dates(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertFalse(v.empty) - - dat.get_history_metadata(proxy=self.proxy) - self.assertIsNotNone(v) - self.assertTrue(len(v) > 0) - - # Below will fail because not ported to Yahoo API - - # v = dat.stats(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertTrue(len(v) > 0) - - # v = dat.get_recommendations(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_calendar(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_sustainability(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_recommendations_summary(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_analyst_price_target(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_rev_forecast(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_earnings_forecast(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_trend_details(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_earnings_trend(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_earnings(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) - - # v = dat.get_shares(proxy=self.proxy) - # self.assertIsNotNone(v) - # self.assertFalse(v.empty) + dat._fetch_ticker_tz(timeout=5) + dat._get_ticker_tz(timeout=5) + dat.history(period="1wk") + for attribute_name, attribute_type in ticker_attributes: + assert_attribute_type(self, dat, attribute_name, attribute_type) + class TestTickerHistory(unittest.TestCase): session = None diff --git a/yfinance/base.py b/yfinance/base.py index 21af2bb51..8a021bd61 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -46,8 +46,9 @@ class TickerBase: - def __init__(self, ticker, session=None): + def __init__(self, ticker, session=None, proxy=None): self.ticker = ticker.upper() + self.proxy = proxy self.session = session self._history = None self._history_metadata = None @@ -132,6 +133,7 @@ def history(self, period="1mo", interval="1d", If True, then raise errors as Exceptions instead of logging. """ logger = utils.get_yf_logger() + proxy = proxy or self.proxy or self.proxy if debug is not None: if debug: @@ -1695,28 +1697,28 @@ def _fetch_ticker_tz(self, proxy, timeout): return None def get_recommendations(self, proxy=None, as_dict=False): - self._quote.proxy = proxy + self._quote.proxy = proxy or self.proxy data = self._quote.recommendations if as_dict: return data.to_dict() return data def get_calendar(self, proxy=None, as_dict=False): - self._quote.proxy = proxy + self._quote.proxy = proxy or self.proxy data = self._quote.calendar if as_dict: return data.to_dict() return data def get_major_holders(self, proxy=None, as_dict=False): - self._holders.proxy = proxy + self._holders.proxy = proxy or self.proxy data = self._holders.major if as_dict: return data.to_dict() return data def get_institutional_holders(self, proxy=None, as_dict=False): - self._holders.proxy = proxy + self._holders.proxy = proxy or self.proxy data = self._holders.institutional if data is not None: if as_dict: @@ -1724,7 +1726,7 @@ def get_institutional_holders(self, proxy=None, as_dict=False): return data def get_mutualfund_holders(self, proxy=None, as_dict=False): - self._holders.proxy = proxy + self._holders.proxy = proxy or self.proxy data = self._holders.mutualfund if data is not None: if as_dict: @@ -1732,7 +1734,7 @@ def get_mutualfund_holders(self, proxy=None, as_dict=False): return data def get_info(self, proxy=None) -> dict: - self._quote.proxy = proxy + self._quote.proxy = proxy or self.proxy data = self._quote.info return data @@ -1747,49 +1749,49 @@ def basic_info(self): return self.fast_info def get_sustainability(self, proxy=None, as_dict=False): - self._quote.proxy = proxy + self._quote.proxy = proxy or self.proxy data = self._quote.sustainability if as_dict: return data.to_dict() return data def get_recommendations_summary(self, proxy=None, as_dict=False): - self._quote.proxy = proxy + self._quote.proxy = proxy or self.proxy data = self._quote.recommendations if as_dict: return data.to_dict() return data def get_analyst_price_target(self, proxy=None, as_dict=False): - self._analysis.proxy = proxy + self._analysis.proxy = proxy or self.proxy data = self._analysis.analyst_price_target if as_dict: return data.to_dict() return data def get_rev_forecast(self, proxy=None, as_dict=False): - self._analysis.proxy = proxy + self._analysis.proxy = proxy or self.proxy data = self._analysis.rev_est if as_dict: return data.to_dict() return data def get_earnings_forecast(self, proxy=None, as_dict=False): - self._analysis.proxy = proxy + self._analysis.proxy = proxy or self.proxy data = self._analysis.eps_est if as_dict: return data.to_dict() return data def get_trend_details(self, proxy=None, as_dict=False): - self._analysis.proxy = proxy + self._analysis.proxy = proxy or self.proxy data = self._analysis.analyst_trend_details if as_dict: return data.to_dict() return data def get_earnings_trend(self, proxy=None, as_dict=False): - self._analysis.proxy = proxy + self._analysis.proxy = proxy or self.proxy data = self._analysis.earnings_trend if as_dict: return data.to_dict() @@ -1808,7 +1810,7 @@ def get_earnings(self, proxy=None, as_dict=False, freq="yearly"): Optional. Proxy server URL scheme Default is None """ - self._fundamentals.proxy = proxy + self._fundamentals.proxy = proxy or self.proxy data = self._fundamentals.earnings[freq] if as_dict: dict_data = data.to_dict() @@ -1833,7 +1835,7 @@ def get_income_stmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly" Optional. Proxy server URL scheme Default is None """ - self._fundamentals.proxy = proxy + self._fundamentals.proxy = proxy or self.proxy data = self._fundamentals.financials.get_income_time_series(freq=freq, proxy=proxy) @@ -1866,7 +1868,7 @@ def get_balance_sheet(self, proxy=None, as_dict=False, pretty=False, freq="yearl Optional. Proxy server URL scheme Default is None """ - self._fundamentals.proxy = proxy + self._fundamentals.proxy = proxy or self.proxy data = self._fundamentals.financials.get_balance_sheet_time_series(freq=freq, proxy=proxy) @@ -1896,7 +1898,7 @@ def get_cash_flow(self, proxy=None, as_dict=False, pretty=False, freq="yearly"): Optional. Proxy server URL scheme Default is None """ - self._fundamentals.proxy = proxy + self._fundamentals.proxy = proxy or self.proxy data = self._fundamentals.financials.get_cash_flow_time_series(freq=freq, proxy=proxy) @@ -1946,7 +1948,7 @@ def get_actions(self, proxy=None): return [] def get_shares(self, proxy=None, as_dict=False): - self._fundamentals.proxy = proxy + self._fundamentals.proxy = proxy or self.proxy data = self._fundamentals.shares if as_dict: return data.to_dict() @@ -2020,7 +2022,7 @@ def get_isin(self, proxy=None) -> Optional[str]: q = ticker - self._quote.proxy = proxy + self._quote.proxy = proxy or self.proxy or self.proxy if self._quote.info is None: # Don't print error message cause self._quote.info will print one return None @@ -2141,7 +2143,7 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]: dates[cn] = dates[cn] + ' ' + tzinfo["AM/PM"] dates[cn] = pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p") # - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info': - self._quote.proxy = proxy + self._quote.proxy = proxy or self.proxy tz = self._get_ticker_tz(proxy=proxy, timeout=30) dates[cn] = dates[cn].dt.tz_localize(tz) diff --git a/yfinance/ticker.py b/yfinance/ticker.py index 241638a5c..af8dd750c 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -30,21 +30,21 @@ class Ticker(TickerBase): - def __init__(self, ticker, session=None): - super(Ticker, self).__init__(ticker, session=session) + def __init__(self, ticker, session=None, proxy=None): + super(Ticker, self).__init__(ticker, session=session, proxy=proxy) self._expirations = {} self._underlying = {} def __repr__(self): return f'yfinance.Ticker object <{self.ticker}>' - def _download_options(self, date=None, proxy=None): + def _download_options(self, date=None): if date is None: url = f"{self._base_url}/v7/finance/options/{self.ticker}" else: url = f"{self._base_url}/v7/finance/options/{self.ticker}?date={date}" - r = self._data.get(url=url, proxy=proxy).json() + r = self._data.get(url=url, proxy=self.proxy).json() if len(r.get('optionChain', {}).get('result', [])) > 0: for exp in r['optionChain']['result'][0]['expirationDates']: self._expirations[_datetime.datetime.utcfromtimestamp( @@ -80,9 +80,9 @@ def _options2df(self, opt, tz=None): data['lastTradeDate'] = data['lastTradeDate'].dt.tz_convert(tz) return data - def option_chain(self, date=None, proxy=None, tz=None): + def option_chain(self, date=None, tz=None): if date is None: - options = self._download_options(proxy=proxy) + options = self._download_options() else: if not self._expirations: self._download_options() @@ -91,7 +91,7 @@ def option_chain(self, date=None, proxy=None, tz=None): f"Expiration `{date}` cannot be found. " f"Available expirations are: [{', '.join(self._expirations)}]") date = self._expirations[date] - options = self._download_options(date, proxy=proxy) + options = self._download_options(date) return _namedtuple('Options', ['calls', 'puts', 'underlying'])(**{ "calls": self._options2df(options['calls'], tz=tz), From d607c43967f8ac26f3935cc6185818278214f633 Mon Sep 17 00:00:00 2001 From: rickturner2001 Date: Sun, 1 Oct 2023 21:08:04 -0400 Subject: [PATCH 02/25] refactored Ticker proxy attribute --- yfinance/base.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 8a021bd61..52e0e8ecf 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -133,7 +133,7 @@ def history(self, period="1mo", interval="1d", If True, then raise errors as Exceptions instead of logging. """ logger = utils.get_yf_logger() - proxy = proxy or self.proxy or self.proxy + proxy = proxy or self.proxy if debug is not None: if debug: @@ -1640,7 +1640,8 @@ def map_signals_to_ranges(f, f_up, f_down): return df2 - def _get_ticker_tz(self, proxy, timeout): + def _get_ticker_tz(self,timeout, proxy=None): + proxy = proxy or self.proxy if self._tz is not None: return self._tz cache = utils.get_tz_cache() @@ -1664,9 +1665,9 @@ def _get_ticker_tz(self, proxy, timeout): return tz @utils.log_indent_decorator - def _fetch_ticker_tz(self, proxy, timeout): + def _fetch_ticker_tz(self, timeout, proxy=None): # Query Yahoo for fast price data just to get returned timezone - + proxy = proxy or self.proxy logger = utils.get_yf_logger() params = {"range": "1d", "interval": "1d"} @@ -2022,7 +2023,7 @@ def get_isin(self, proxy=None) -> Optional[str]: q = ticker - self._quote.proxy = proxy or self.proxy or self.proxy + self._quote.proxy = proxy or self.proxy if self._quote.info is None: # Don't print error message cause self._quote.info will print one return None From a3ac9fc72d7478ba1a4a3e873413cfe57bb1c644 Mon Sep 17 00:00:00 2001 From: Unit Date: Sat, 2 Dec 2023 12:00:18 +0100 Subject: [PATCH 03/25] added recommendations added valid modules for quote summary request added _fetch method for fetching quote summary added fetch recommendationTrend --- tests/ticker.py | 14 ++++++++------ yfinance/base.py | 10 +++++----- yfinance/const.py | 36 +++++++++++++++++++++++++++++++++++ yfinance/scrapers/quote.py | 39 +++++++++++++++++++++++++++----------- 4 files changed, 77 insertions(+), 22 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index 0e769083f..871d01efe 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -627,13 +627,15 @@ def test_bad_freq_value_raises_exception(self): # data_cached = self.ticker.sustainability # self.assertIs(data, data_cached, "data not cached") - # def test_recommendations(self): - # data = self.ticker.recommendations - # self.assertIsInstance(data, pd.DataFrame, "data has wrong type") - # self.assertFalse(data.empty, "data is empty") + def test_recommendations(self): + data = self.ticker.recommendations + data_summary = self.ticker.recommendations_summary + self.assertTrue(data.equals(data_summary)) + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") - # data_cached = self.ticker.recommendations - # self.assertIs(data, data_cached, "data not cached") + data_cached = self.ticker.recommendations + self.assertIs(data, data_cached, "data not cached") # def test_recommendations_summary(self): # data = self.ticker.recommendations_summary diff --git a/yfinance/base.py b/yfinance/base.py index ab8195b95..568f5684a 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -1698,6 +1698,10 @@ def _fetch_ticker_tz(self, timeout, proxy=None): return None def get_recommendations(self, proxy=None, as_dict=False): + """ + Returns a DataFrame with the recommendations + Columns: period strongBuy buy hold sell strongSell + """ self._quote.proxy = proxy or self.proxy data = self._quote.recommendations if as_dict: @@ -1757,11 +1761,7 @@ def get_sustainability(self, proxy=None, as_dict=False): return data def get_recommendations_summary(self, proxy=None, as_dict=False): - self._quote.proxy = proxy or self.proxy - data = self._quote.recommendations - if as_dict: - return data.to_dict() - return data + return self.get_recommendations(proxy=proxy, as_dict=as_dict) def get_analyst_price_target(self, proxy=None, as_dict=False): self._analysis.proxy = proxy or self.proxy diff --git a/yfinance/const.py b/yfinance/const.py index 48a5e91dc..592737974 100644 --- a/yfinance/const.py +++ b/yfinance/const.py @@ -116,3 +116,39 @@ "OtherCashReceiptsfromOperatingActivities", "ReceiptsfromGovernmentGrants", "ReceiptsfromCustomers"]} price_colnames = ['Open', 'High', 'Low', 'Close', 'Adj Close'] + +quote_summary_valid_modules = ( + "summaryProfile", # contains general information about the company + "summaryDetail", # prices + volume + market cap + etc + "assetProfile", # summaryProfile + company officers + "fundProfile", + "price", # current prices + "quoteType", # quoteType + "esgScores", # Environmental, social, and governance (ESG) scores, sustainability and ethical performance of companies + "incomeStatementHistory", + "incomeStatementHistoryQuarterly", + "balanceSheetHistory", + "balanceSheetHistoryQuarterly", + "cashFlowStatementHistory", + "cashFlowStatementHistoryQuarterly", + "defaultKeyStatistics", # KPIs (PE, enterprise value, EPS, EBITA, and more) + "financialData", # Financial KPIs (revenue, gross margins, operating cash flow, free cash flow, and more) + "calendarEvents", # future earnings date + "secFilings", # SEC filings, such as 10K and 10Q reports + "upgradeDowngradeHistory", # upgrades and downgrades that analysts have given a company's stock + "institutionOwnership", # institutional ownership, holders and shares outstanding + "fundOwnership", # mutual fund ownership, holders and shares outstanding + "majorDirectHolders", + "majorHoldersBreakdown", + "insiderTransactions", # insider transactions, such as the number of shares bought and sold by company executives + "insiderHolders", # insider holders, such as the number of shares held by company executives + "netSharePurchaseActivity", # net share purchase activity, such as the number of shares bought and sold by company executives + "earnings", # earnings history + "earningsHistory", + "earningsTrend", # earnings trend + "industryTrend", + "indexTrend", + "sectorTrend", + "recommendationTrend", + "futuresChain", +) diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py index 98e6bc50a..37b77f92a 100644 --- a/yfinance/scrapers/quote.py +++ b/yfinance/scrapers/quote.py @@ -9,7 +9,8 @@ from yfinance import utils from yfinance.data import YfData -from yfinance.exceptions import YFNotImplementedError +from yfinance.const import quote_summary_valid_modules, _BASE_URL_ +from yfinance.exceptions import YFNotImplementedError, YFinanceDataException, YFinanceException info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"} info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]}) @@ -21,7 +22,7 @@ info_retired_keys = info_retired_keys_price | info_retired_keys_exchange | info_retired_keys_marketCap | info_retired_keys_symbol -_BASIC_URL_ = "https://query2.finance.yahoo.com/v10/finance/quoteSummary" +_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary" class InfoDictWrapper(MutableMapping): @@ -569,7 +570,7 @@ def __init__(self, data: YfData, symbol: str, proxy=None): @property def info(self) -> dict: if self._info is None: - self._fetch(self.proxy) + self._fetch_info(self.proxy) self._fetch_complementary(self.proxy) return self._info @@ -583,7 +584,12 @@ def sustainability(self) -> pd.DataFrame: @property def recommendations(self) -> pd.DataFrame: if self._recommendations is None: - raise YFNotImplementedError('recommendations') + result = self._fetch(self.proxy, modules=['recommendationTrend']) + try: + data = result["quoteSummary"]["result"][0]["recommendationTrend"]["trend"] + except (KeyError, IndexError): + raise YFinanceDataException(f"Failed to parse json response from Yahoo Finance: {result}") + self._recommendations = pd.DataFrame(data) return self._recommendations @property @@ -592,16 +598,27 @@ def calendar(self) -> pd.DataFrame: raise YFNotImplementedError('calendar') return self._calendar - def _fetch(self, proxy): + @staticmethod + def valid_modules(): + return quote_summary_valid_modules + + def _fetch(self, proxy, modules: list): + if not isinstance(modules, list): + raise YFinanceException("Should provide a list of modules, see available modules using `valid_modules`") + + modules = ','.join([m for m in modules if m in quote_summary_valid_modules]) + if len(modules) == 0: + raise YFinanceException("No valid modules provided, see available modules using `valid_modules`") + params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "symbol": self._symbol} + result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_ + f"/{self._symbol}", user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy) + return result + + def _fetch_info(self, proxy): if self._already_fetched: return self._already_fetched = True modules = ['financialData', 'quoteType', 'defaultKeyStatistics', 'assetProfile', 'summaryDetail'] - modules = ','.join(modules) - params_dict = {"modules": modules, "ssl": "true"} - result = self._data.get_raw_json( - _BASIC_URL_ + f"/{self._symbol}", params=params_dict, proxy=proxy - ) + result = self._fetch(proxy, modules=modules) result["quoteSummary"]["result"][0]["symbol"] = self._symbol query1_info = next( (info for info in result.get("quoteSummary", {}).get("result", []) if info["symbol"] == self._symbol), @@ -643,7 +660,7 @@ def _fetch_complementary(self, proxy): self._already_fetched_complementary = True # self._scrape(proxy) # decrypt broken - self._fetch(proxy) + self._fetch_info(proxy) if self._info is None: return From 9b9158050a74793055eb5b8ea802be62fe16f195 Mon Sep 17 00:00:00 2001 From: "Julia L. Wang" Date: Fri, 8 Dec 2023 04:26:04 -0500 Subject: [PATCH 04/25] Pandas future proofing (tested) Changed fillna, iloc, and added test changes --- tests/ticker.py | 9 +++++++++ yfinance/base.py | 12 ++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index 871d01efe..17568332c 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -223,6 +223,15 @@ def test_actions(self): self.assertIsInstance(data, pd.DataFrame, "data has wrong type") self.assertFalse(data.empty, "data is empty") + def test_reconstruct_intervals_batch(self): + data = self.ticker.history(period="3mo", interval="1d", prepost=True, repair=True) + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + reconstructed = self.ticker._reconstruct_intervals_batch(data, "1wk", True) + self.assertIsInstance(reconstructed, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + class TestTickerEarnings(unittest.TestCase): session = None diff --git a/yfinance/base.py b/yfinance/base.py index ac766f928..e1128f2ae 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -706,33 +706,33 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): # But in case are repairing a chunk of bad 1d data, back/forward-fill the # good div-adjustments - not perfect, but a good backup. div_adjusts[f_tag] = np.nan - div_adjusts = div_adjusts.fillna(method='bfill').fillna(method='ffill') + div_adjusts = div_adjusts.ffill().bfill() for idx in np.where(f_tag)[0]: dt = df_new_calib.index[idx] n = len(div_adjusts) if df_new.loc[dt, "Dividends"] != 0: if idx < n - 1: # Easy, take div-adjustment from next-day - div_adjusts[idx] = div_adjusts[idx + 1] + div_adjusts[idx] = div_adjusts.iloc[idx + 1] else: # Take previous-day div-adjustment and reverse todays adjustment div_adj = 1.0 - df_new_calib["Dividends"].iloc[idx] / df_new_calib['Close'].iloc[ idx - 1] - div_adjusts[idx] = div_adjusts[idx - 1] / div_adj + div_adjusts[idx] = div_adjusts.iloc[idx - 1] / div_adj else: if idx > 0: # Easy, take div-adjustment from previous-day - div_adjusts[idx] = div_adjusts[idx - 1] + div_adjusts[idx] = div_adjusts.iloc[idx - 1] else: # Must take next-day div-adjustment - div_adjusts[idx] = div_adjusts[idx + 1] + div_adjusts[idx] = div_adjusts.iloc[idx + 1] if df_new_calib["Dividends"].iloc[idx + 1] != 0: div_adjusts[idx] *= 1.0 - df_new_calib["Dividends"].iloc[idx + 1] / \ df_new_calib['Close'].iloc[idx] f_close_bad = df_block_calib['Close'] == tag df_new['Adj Close'] = df_block['Close'] * div_adjusts if f_close_bad.any(): - df_new.loc[f_close_bad, 'Adj Close'] = df_new['Close'][f_close_bad] * div_adjusts[f_close_bad] + df_new.loc[f_close_bad, 'Adj Close'] = df_new['Close'].iloc[f_close_bad] * div_adjusts[f_close_bad] # Check whether 'df_fine' has different split-adjustment. # If different, then adjust to match 'df' From fb2006b814eda152d6c7c1a1a6ddcd62b2c1622c Mon Sep 17 00:00:00 2001 From: Marco Carvalho Date: Sat, 2 Dec 2023 12:32:52 -0300 Subject: [PATCH 05/25] add ruff --- .github/workflows/ruff.yml | 12 ++++ tests/context.py | 14 ++--- tests/prices.py | 89 ++++++---------------------- tests/ticker.py | 1 - tests/utils.py | 1 - yfinance/base.py | 97 ++++++++++++++++--------------- yfinance/data.py | 14 ++--- yfinance/multi.py | 4 +- yfinance/scrapers/analysis.py | 1 - yfinance/scrapers/fundamentals.py | 2 +- yfinance/scrapers/quote.py | 25 ++++---- yfinance/utils.py | 21 +++---- 12 files changed, 114 insertions(+), 167 deletions(-) create mode 100644 .github/workflows/ruff.yml diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 000000000..b5a6e0aab --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,12 @@ +name: Ruff +on: + pull_request: + branches: + - master + - main +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: chartboost/ruff-action@v1 \ No newline at end of file diff --git a/tests/context.py b/tests/context.py index 54d37b729..a9ec7d07e 100644 --- a/tests/context.py +++ b/tests/context.py @@ -4,18 +4,20 @@ import datetime as _dt import sys import os +import yfinance +from requests import Session +from requests_cache import CacheMixin, SQLiteCache +from requests_ratelimiter import LimiterMixin, MemoryQueueBucket +from pyrate_limiter import Duration, RequestRate, Limiter + _parent_dp = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) _src_dp = _parent_dp sys.path.insert(0, _src_dp) -import yfinance - - # Optional: see the exact requests that are made during tests: # import logging # logging.basicConfig(level=logging.DEBUG) - # Use adjacent cache folder for testing, delete if already exists and older than today testing_cache_dirpath = os.path.join(_ad.user_cache_dir(), "py-yfinance-testing") yfinance.set_tz_cache_location(testing_cache_dirpath) @@ -27,12 +29,8 @@ # Setup a session to rate-limit and cache persistently: -from requests import Session -from requests_cache import CacheMixin, SQLiteCache -from requests_ratelimiter import LimiterMixin, MemoryQueueBucket class CachedLimiterSession(CacheMixin, LimiterMixin, Session): pass -from pyrate_limiter import Duration, RequestRate, Limiter history_rate = RequestRate(1, Duration.SECOND*2) limiter = Limiter(history_rate) cache_fp = os.path.join(testing_cache_dirpath, "unittests-cache") diff --git a/tests/prices.py b/tests/prices.py index a3e485e18..7bcfd2fd4 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -132,7 +132,6 @@ def test_pricesEventsMerge(self): def test_pricesEventsMerge_bug(self): # Reproduce exception when merging intraday prices with future dividend - tkr = 'S32.AX' interval = '30m' df_index = [] d = 13 @@ -148,7 +147,7 @@ def test_pricesEventsMerge_bug(self): future_div_dt = _dt.datetime(2023, 9, 14, 10) divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt]) - df2 = yf.utils.safe_merge_dfs(df, divs, interval) + yf.utils.safe_merge_dfs(df, divs, interval) # No exception = test pass def test_intraDayWithEvents(self): @@ -223,8 +222,10 @@ def test_dailyWithEvents(self): self.assertTrue((df_divs.index.date == dates).all()) except AssertionError: print(f'- ticker = {tkr}') - print('- response:') ; print(df_divs.index.date) - print('- answer:') ; print(dates) + print('- response:') + print(df_divs.index.date) + print('- answer:') + print(dates) raise def test_dailyWithEvents_bugs(self): @@ -270,60 +271,6 @@ def test_dailyWithEvents_bugs(self): self.assertTrue(df_merged[df_prices.columns].iloc[1:].equals(df_prices)) self.assertEqual(df_merged.index[0], div_dt) - def test_intraDayWithEvents(self): - tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"] - test_run = False - for tkr in tkrs: - start_d = _dt.date.today() - _dt.timedelta(days=59) - end_d = None - df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True) - df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0] - if df_daily_divs.shape[0] == 0: - continue - - last_div_date = df_daily_divs.index[-1] - start_d = last_div_date.date() - end_d = last_div_date.date() + _dt.timedelta(days=1) - df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True) - self.assertTrue((df_intraday["Dividends"] != 0.0).any()) - - df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0] - df_intraday_divs.index = df_intraday_divs.index.floor('D') - self.assertTrue(df_daily_divs.equals(df_intraday_divs)) - - test_run = True - - if not test_run: - self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days") - - def test_intraDayWithEvents_tase(self): - # TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present - - tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"] - test_run = False - for tkr in tase_tkrs: - start_d = _dt.date.today() - _dt.timedelta(days=59) - end_d = None - df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True) - df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0] - if df_daily_divs.shape[0] == 0: - continue - - last_div_date = df_daily_divs.index[-1] - start_d = last_div_date.date() - end_d = last_div_date.date() + _dt.timedelta(days=1) - df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True) - self.assertTrue((df_intraday["Dividends"] != 0.0).any()) - - df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0] - df_intraday_divs.index = df_intraday_divs.index.floor('D') - self.assertTrue(df_daily_divs.equals(df_intraday_divs)) - - test_run = True - - if not test_run: - self.skipTest("Skipping test_intraDayWithEvents_tase() because no tickers had a dividend in last 60 days") - def test_weeklyWithEvents(self): # Reproduce issue #521 tkr1 = "QQQ" @@ -415,9 +362,9 @@ def test_tz_dst_ambiguous(self): raise Exception("Ambiguous DST issue not resolved") def test_dst_fix(self): - # Daily intervals should start at time 00:00. But for some combinations of date and timezone, + # Daily intervals should start at time 00:00. But for some combinations of date and timezone, # Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem. - # The clue is (a) minutes=0 and (b) hour near 0. + # The clue is (a) minutes=0 and (b) hour near 0. # Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion. # The correction is successful if no days are weekend, and weekly data begins Monday @@ -440,8 +387,8 @@ def test_dst_fix(self): raise def test_prune_post_intraday_us(self): - # Half-day before USA Thanksgiving. Yahoo normally - # returns an interval starting when regular trading closes, + # Half-day before USA Thanksgiving. Yahoo normally + # returns an interval starting when regular trading closes, # even if prepost=False. # Setup @@ -477,8 +424,8 @@ def test_prune_post_intraday_us(self): self.assertEqual(len(late_open_dates), 0) def test_prune_post_intraday_omx(self): - # Half-day before Sweden Christmas. Yahoo normally - # returns an interval starting when regular trading closes, + # Half-day before Sweden Christmas. Yahoo normally + # returns an interval starting when regular trading closes, # even if prepost=False. # If prepost=False, test that yfinance is removing prepost intervals. @@ -528,7 +475,6 @@ def test_prune_post_intraday_omx(self): def test_prune_post_intraday_asx(self): # Setup tkr = "BHP.AX" - interval = "1h" interval_td = _dt.timedelta(hours=1) time_open = _dt.time(10) time_close = _dt.time(16, 12) @@ -566,7 +512,7 @@ def test_aggregate_capital_gains(self): end = "2019-12-31" interval = "3mo" - df = dat.history(start=start, end=end, interval=interval) + dat.history(start=start, end=end, interval=interval) class TestPriceRepair(unittest.TestCase): @@ -589,7 +535,6 @@ def test_reconstruct_2m(self): tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"] dt_now = _pd.Timestamp.utcnow() - td_7d = _dt.timedelta(days=7) td_60d = _dt.timedelta(days=60) # Round time for 'requests_cache' reuse @@ -599,7 +544,7 @@ def test_reconstruct_2m(self): dat = yf.Ticker(tkr, session=self.session) end_dt = dt_now start_dt = end_dt - td_60d - df = dat.history(start=start_dt, end=end_dt, interval="2m", repair=True) + dat.history(start=start_dt, end=end_dt, interval="2m", repair=True) def test_repair_100x_random_weekly(self): # Setup: @@ -844,7 +789,7 @@ def test_repair_zeroes_daily(self): self.assertFalse(repaired_df["Repaired?"].isna().any()) def test_repair_zeroes_daily_adjClose(self): - # Test that 'Adj Close' is reconstructed correctly, + # Test that 'Adj Close' is reconstructed correctly, # particularly when a dividend occurred within 1 day. tkr = "INTC" @@ -914,7 +859,7 @@ def test_repair_zeroes_hourly(self): self.assertFalse(repaired_df["Repaired?"].isna().any()) def test_repair_bad_stock_split(self): - # Stocks that split in 2022 but no problems in Yahoo data, + # Stocks that split in 2022 but no problems in Yahoo data, # so repair should change nothing good_tkrs = ['AMZN', 'DXCM', 'FTNT', 'GOOG', 'GME', 'PANW', 'SHOP', 'TSLA'] good_tkrs += ['AEI', 'CHRA', 'GHI', 'IRON', 'LXU', 'NUZE', 'RSLS', 'TISI'] @@ -979,8 +924,8 @@ def test_repair_bad_stock_split(self): # print(repaired_df[c] - correct_df[c]) raise - # Had very high price volatility in Jan-2021 around split date that could - # be mistaken for missing stock split adjustment. And old logic did think + # Had very high price volatility in Jan-2021 around split date that could + # be mistaken for missing stock split adjustment. And old logic did think # column 'High' required fixing - wrong! sketchy_tkrs = ['FIZZ'] intervals = ['1wk'] diff --git a/tests/ticker.py b/tests/ticker.py index 871d01efe..8f41cca65 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -18,7 +18,6 @@ import unittest import requests_cache from typing import Union, Any -import re from urllib.parse import urlparse, parse_qs, urlencode, urlunparse ticker_attributes = ( diff --git a/tests/utils.py b/tests/utils.py index 1b1a6a2d4..f9e54c09d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -12,7 +12,6 @@ # import numpy as np from .context import yfinance as yf -from .context import session_gbl import unittest # import requests_cache diff --git a/yfinance/base.py b/yfinance/base.py index ac766f928..9b8f62d93 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -247,9 +247,9 @@ def history(self, period="1mo", interval="1d", err_msg += f' (period={period})' fail = False - if data is None or type(data) is not dict: + if data is None or not isinstance(data, dict): fail = True - elif type(data) is dict and 'status_code' in data: + elif isinstance(data, dict) and 'status_code' in data: err_msg += f"(Yahoo status_code = {data['status_code']})" fail = True elif "chart" in data and data["chart"]["error"]: @@ -395,7 +395,7 @@ def history(self, period="1mo", interval="1d", df = df[~df.index.duplicated(keep='first')] # must do before repair if isinstance(repair, str) and repair=='silent': - utils.log_once(logging.WARNING, f"yfinance: Ticker.history(repair='silent') value is deprecated and will be removed in future version. Repair now silent by default, use logging module to increase verbosity.") + utils.log_once(logging.WARNING, "yfinance: Ticker.history(repair='silent') value is deprecated and will be removed in future version. Repair now silent by default, use logging module to increase verbosity.") repair = True if repair: # Do this before auto/back adjust @@ -521,7 +521,6 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): return df dts_to_repair = df.index[f_repair_rows] - indices_to_repair = np.where(f_repair_rows)[0] if len(dts_to_repair) == 0: logger.info("Nothing needs repairing (dts_to_repair[] empty)") @@ -538,9 +537,6 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): # Group nearby NaN-intervals together to reduce number of Yahoo fetches dts_groups = [[dts_to_repair[0]]] - last_dt = dts_to_repair[0] - last_ind = indices_to_repair[0] - td = utils._interval_to_timedelta(interval) # Note on setting max size: have to allow space for adding good data if sub_interval == "1mo": grp_max_size = _dateutil.relativedelta.relativedelta(years=2) @@ -556,14 +552,11 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): grp_max_size = _datetime.timedelta(days=30) logger.debug(f"grp_max_size = {grp_max_size}") for i in range(1, len(dts_to_repair)): - ind = indices_to_repair[i] dt = dts_to_repair[i] if dt.date() < dts_groups[-1][0].date() + grp_max_size: dts_groups[-1].append(dt) else: dts_groups.append([dt]) - last_dt = dt - last_ind = ind logger.debug("Repair groups:") for g in dts_groups: @@ -687,7 +680,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): df_new.index = new_index logger.debug('df_new:' + '\n' + str(df_new)) - # Calibrate! + # Calibrate! common_index = np.intersect1d(df_block.index, df_new.index) if len(common_index) == 0: # Can't calibrate so don't attempt repair @@ -701,9 +694,9 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): f_tag = df_block_calib['Adj Close'] == tag if f_tag.any(): div_adjusts = df_block_calib['Adj Close'] / df_block_calib['Close'] - # The loop below assumes each 1d repair is isoloated, i.e. surrounded by - # good data. Which is case most of time. - # But in case are repairing a chunk of bad 1d data, back/forward-fill the + # The loop below assumes each 1d repair is isoloated, i.e. surrounded by + # good data. Which is case most of time. + # But in case are repairing a chunk of bad 1d data, back/forward-fill the # good div-adjustments - not perfect, but a good backup. div_adjusts[f_tag] = np.nan div_adjusts = div_adjusts.fillna(method='bfill').fillna(method='ffill') @@ -787,14 +780,14 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): no_fine_data_dts = [] for idx in bad_dts: if idx not in df_new.index: - # Yahoo didn't return finer-grain data for this interval, + # Yahoo didn't return finer-grain data for this interval, # so probably no trading happened. no_fine_data_dts.append(idx) if len(no_fine_data_dts) > 0: - logger.debug(f"Yahoo didn't return finer-grain data for these intervals: " + str(no_fine_data_dts)) + logger.debug("Yahoo didn't return finer-grain data for these intervals: " + str(no_fine_data_dts)) for idx in bad_dts: if idx not in df_new.index: - # Yahoo didn't return finer-grain data for this interval, + # Yahoo didn't return finer-grain data for this interval, # so probably no trading happened. continue df_new_row = df_new.loc[idx] @@ -929,11 +922,13 @@ def _fix_unit_random_mixups(self, df, interval, tz_exchange, prepost): if fi[j]: df2.loc[idx, c] = df.loc[idx, c] * 0.01 - c = "High" ; j = data_cols.index(c) + c = "High" + j = data_cols.index(c) if fi[j]: df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max() - c = "Low" ; j = data_cols.index(c) + c = "Low" + j = data_cols.index(c) if fi[j]: df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min() @@ -949,11 +944,13 @@ def _fix_unit_random_mixups(self, df, interval, tz_exchange, prepost): if fi[j]: df2.loc[idx, c] = df.loc[idx, c] * 100.0 - c = "High" ; j = data_cols.index(c) + c = "High" + j = data_cols.index(c) if fi[j]: df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max() - c = "Low" ; j = data_cols.index(c) + c = "Low" + j = data_cols.index(c) if fi[j]: df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min() @@ -1142,7 +1139,7 @@ def _fix_missing_div_adjust(self, df, interval, tz_exchange): div_indices = np.where(f_div)[0] last_div_idx = div_indices[-1] if last_div_idx == 0: - # Not enough data to recalculate the div-adjustment, + # Not enough data to recalculate the div-adjustment, # because need close day before logger.debug('div-adjust-repair: Insufficient data to recalculate div-adjustment') return df @@ -1186,13 +1183,13 @@ def _fix_bad_stock_split(self, df, interval, tz_exchange): # stock split to old price data. # # There is a slight complication, because Yahoo does another stupid thing. - # Sometimes the old data is adjusted twice. So cannot simply assume - # which direction to reverse adjustment - have to analyse prices and detect. + # Sometimes the old data is adjusted twice. So cannot simply assume + # which direction to reverse adjustment - have to analyse prices and detect. # Not difficult. if df.empty: return df - + logger = utils.get_yf_logger() interday = interval in ['1d', '1wk', '1mo', '3mo'] @@ -1220,7 +1217,7 @@ def _fix_bad_stock_split(self, df, interval, tz_exchange): def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_volume=False): if df.empty: return df - + logger = utils.get_yf_logger() df = df.sort_index(ascending=False) @@ -1239,9 +1236,8 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v logger.debug(f'price-repair-split: start_min={start_min}') OHLC = ['Open', 'High', 'Low', 'Close'] - OHLCA = OHLC + ['Adj Close'] - # Do not attempt repair of the split is small, + # Do not attempt repair of the split is small, # could be mistaken for normal price variance if 0.8 < split < 1.25: logger.info("price-repair-split: Split ratio too close to 1. Won't repair") @@ -1279,10 +1275,10 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v else: debug_cols = [] - # Calculate daily price % change. To reduce effect of price volatility, + # Calculate daily price % change. To reduce effect of price volatility, # calculate change for each OHLC column. if interday and interval != '1d' and split not in [100.0, 100, 0.001]: - # Avoid using 'Low' and 'High'. For multiday intervals, these can be + # Avoid using 'Low' and 'High'. For multiday intervals, these can be # very volatile so reduce ability to detect genuine stock split errors _1d_change_x = np.full((n, 2), 1.0) price_data = df2[['Open','Close']].to_numpy() @@ -1422,7 +1418,7 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v else: threshold = _datetime.timedelta(days=threshold_days) if gap_td < threshold: - logger.info(f'price-repair-split: 100x changes are too soon after stock split events, aborting') + logger.info('price-repair-split: 100x changes are too soon after stock split events, aborting') return df # if logger.isEnabledFor(logging.DEBUG): @@ -1434,9 +1430,12 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v def map_signals_to_ranges(f, f_up, f_down): # Ensure 0th element is False, because True is nonsense if f[0]: - f = np.copy(f) ; f[0] = False - f_up = np.copy(f_up) ; f_up[0] = False - f_down = np.copy(f_down) ; f_down[0] = False + f = np.copy(f) + f[0] = False + f_up = np.copy(f_up) + f_up[0] = False + f_down = np.copy(f_down) + f_down[0] = False if not f.any(): return [] @@ -1467,9 +1466,9 @@ def map_signals_to_ranges(f, f_up, f_down): if correct_columns_individually: f_corrected = np.full(n, False) if correct_volume: - # If Open or Close is repaired but not both, + # If Open or Close is repaired but not both, # then this means the interval has a mix of correct - # and errors. A problem for correcting Volume, + # and errors. A problem for correcting Volume, # so use a heuristic: # - if both Open & Close were Nx bad => Volume is Nx bad # - if only one of Open & Close are Nx bad => Volume is 0.5*Nx bad @@ -1483,7 +1482,7 @@ def map_signals_to_ranges(f, f_up, f_down): if appears_suspended and (idx_latest_active is not None and idx_latest_active >= idx_first_f): # Suspended midway during data date range. # 1: process data before suspension in index-ascending (date-descending) order. - # 2: process data after suspension in index-descending order. Requires signals to be reversed, + # 2: process data after suspension in index-descending order. Requires signals to be reversed, # then returned ranges to also be reversed, because this logic was originally written for # index-ascending (date-descending) order. fj = f[:, j] @@ -1508,7 +1507,8 @@ def map_signals_to_ranges(f, f_up, f_down): for i in range(len(ranges_after)): r = ranges_after[i] ranges_after[i] = (n-r[1], n-r[0], r[2]) - ranges = ranges_before ; ranges.extend(ranges_after) + ranges = ranges_before + ranges.extend(ranges_after) else: ranges = map_signals_to_ranges(f[:, j], f_up[:, j], f_down[:, j]) logger.debug(f"column '{c}' ranges: {ranges}") @@ -1541,9 +1541,11 @@ def map_signals_to_ranges(f, f_up, f_down): ranges = [] for r in ranges: if r[2] == 'split': - m = split ; m_rcp = split_rcp + m = split + m_rcp = split_rcp else: - m = split_rcp ; m_rcp = split + m = split_rcp + m_rcp = split if interday: logger.info(f"price-repair-split: Corrected {fix_type} on col={c} range=[{df2.index[r[1]-1].date()}:{df2.index[r[0]].date()}] m={m:.4f}") else: @@ -1573,7 +1575,7 @@ def map_signals_to_ranges(f, f_up, f_down): if appears_suspended and (idx_latest_active is not None and idx_latest_active >= idx_first_f): # Suspended midway during data date range. # 1: process data before suspension in index-ascending (date-descending) order. - # 2: process data after suspension in index-descending order. Requires signals to be reversed, + # 2: process data after suspension in index-descending order. Requires signals to be reversed, # then returned ranges to also be reversed, because this logic was originally written for # index-ascending (date-descending) order. ranges_before = map_signals_to_ranges(f[idx_latest_active:], f_up[idx_latest_active:], f_down[idx_latest_active:]) @@ -1595,7 +1597,8 @@ def map_signals_to_ranges(f, f_up, f_down): for i in range(len(ranges_after)): r = ranges_after[i] ranges_after[i] = (n-r[1], n-r[0], r[2]) - ranges = ranges_before ; ranges.extend(ranges_after) + ranges = ranges_before + ranges.extend(ranges_after) else: ranges = map_signals_to_ranges(f, f_up, f_down) if start_min is not None: @@ -1607,9 +1610,11 @@ def map_signals_to_ranges(f, f_up, f_down): del ranges[i] for r in ranges: if r[2] == 'split': - m = split ; m_rcp = split_rcp + m = split + m_rcp = split_rcp else: - m = split_rcp ; m_rcp = split + m = split_rcp + m_rcp = split logger.debug(f"price-repair-split: range={r} m={m}") for c in ['Open', 'High', 'Low', 'Close', 'Adj Close']: df2.iloc[r[0]:r[1], df2.columns.get_loc(c)] *= m @@ -1965,11 +1970,9 @@ def get_shares_full(self, start=None, end=None, proxy=None): if start is not None: start_ts = utils._parse_user_dt(start, tz) start = pd.Timestamp.fromtimestamp(start_ts).tz_localize("UTC").tz_convert(tz) - start_d = start.date() if end is not None: end_ts = utils._parse_user_dt(end, tz) end = pd.Timestamp.fromtimestamp(end_ts).tz_localize("UTC").tz_convert(tz) - end_d = end.date() if end is None: end = dt_now if start is None: @@ -1991,7 +1994,7 @@ def get_shares_full(self, start=None, end=None, proxy=None): return None try: fail = json_data["finance"]["error"]["code"] == "Bad Request" - except KeyError as e: + except KeyError: fail = False if fail: logger.error(f"{self.ticker}: Yahoo web request for share count failed") diff --git a/yfinance/data.py b/yfinance/data.py index 4906e6f4d..2365f66c7 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -8,6 +8,7 @@ from frozendict import frozendict from . import utils, cache +import threading cache_maxsize = 64 @@ -33,7 +34,6 @@ def wrapped(*args, **kwargs): return wrapped -import threading class SingletonMeta(type): """ Metaclass that creates a Singleton instance. @@ -68,8 +68,8 @@ def __init__(self, session=None): # Not caching self._session_is_caching = False else: - # Is caching. This is annoying. - # Can't simply use a non-caching session to fetch cookie & crumb, + # Is caching. This is annoying. + # Can't simply use a non-caching session to fetch cookie & crumb, # because then the caching-session won't have cookie. self._session_is_caching = True from requests_cache import DO_NOT_CACHE @@ -207,7 +207,7 @@ def _get_crumb_basic(self, proxy=None, timeout=30): utils.get_yf_logger().debug(f"crumb = '{self._crumb}'") return self._crumb - + @utils.log_indent_decorator def _get_cookie_and_crumb_basic(self, proxy, timeout): cookie = self._get_cookie_basic(proxy, timeout) @@ -257,10 +257,10 @@ def _get_cookie_csrf(self, proxy, timeout): 'originalDoneUrl': originalDoneUrl, 'namespace': namespace, } - post_args = {**base_args, + post_args = {**base_args, 'url': f'https://consent.yahoo.com/v2/collectConsent?sessionId={sessionId}', 'data': data} - get_args = {**base_args, + get_args = {**base_args, 'url': f'https://guce.yahoo.com/copyConsent?sessionId={sessionId}', 'data': data} if self._session_is_caching: @@ -288,7 +288,7 @@ def _get_crumb_csrf(self, proxy=None, timeout=30): return None get_args = { - 'url': 'https://query2.finance.yahoo.com/v1/test/getcrumb', + 'url': 'https://query2.finance.yahoo.com/v1/test/getcrumb', 'headers': self.user_agent_headers, 'proxies': proxy, 'timeout': timeout} diff --git a/yfinance/multi.py b/yfinance/multi.py index ece3cc3ea..f9e0332f3 100644 --- a/yfinance/multi.py +++ b/yfinance/multi.py @@ -172,7 +172,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_ rounding=rounding, timeout=timeout) if progress: shared._PROGRESS_BAR.animate() - + if progress: shared._PROGRESS_BAR.completed() @@ -262,7 +262,7 @@ def _download_one_threaded(ticker, start=None, end=None, actions=False, progress=True, period="max", interval="1d", prepost=False, proxy=None, keepna=False, rounding=False, timeout=10): - data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair, + _download_one(ticker, start, end, auto_adjust, back_adjust, repair, actions, period, interval, prepost, proxy, rounding, keepna, timeout) if progress: diff --git a/yfinance/scrapers/analysis.py b/yfinance/scrapers/analysis.py index 27c27c9a4..0f6cd5f15 100644 --- a/yfinance/scrapers/analysis.py +++ b/yfinance/scrapers/analysis.py @@ -1,6 +1,5 @@ import pandas as pd -from yfinance import utils from yfinance.data import YfData from yfinance.exceptions import YFNotImplementedError diff --git a/yfinance/scrapers/fundamentals.py b/yfinance/scrapers/fundamentals.py index 27623e1ec..2ff37924b 100644 --- a/yfinance/scrapers/fundamentals.py +++ b/yfinance/scrapers/fundamentals.py @@ -99,7 +99,7 @@ def _create_financials_table(self, name, timescale, proxy): try: return self.get_financials_time_series(timescale, keys, proxy) - except Exception as e: + except Exception: pass def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.DataFrame: diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py index 37b77f92a..c09cc8d36 100644 --- a/yfinance/scrapers/quote.py +++ b/yfinance/scrapers/quote.py @@ -26,7 +26,7 @@ class InfoDictWrapper(MutableMapping): - """ Simple wrapper around info dict, intercepting 'gets' to + """ Simple wrapper around info dict, intercepting 'gets' to print how-to-migrate messages for specific keys. Requires override dict API""" @@ -68,7 +68,7 @@ def __delitem__(self, k): def __iter__(self): return iter(self.info) - + def __len__(self): return len(self.info) @@ -126,7 +126,7 @@ def __init__(self, tickerBaseObject, proxy=None): _properties += ["fifty_day_average", "two_hundred_day_average", "ten_day_average_volume", "three_month_average_volume"] _properties += ["year_high", "year_low", "year_change"] - # Because released before fixing key case, need to officially support + # Because released before fixing key case, need to officially support # camel-case but also secretly support snake-case base_keys = [k for k in _properties if '_' not in k] @@ -134,7 +134,7 @@ def __init__(self, tickerBaseObject, proxy=None): self._sc_to_cc_key = {k: utils.snake_case_2_camelCase(k) for k in sc_keys} self._cc_to_sc_key = {v: k for k, v in self._sc_to_cc_key.items()} - + self._public_keys = sorted(base_keys + list(self._sc_to_cc_key.values())) self._keys = sorted(self._public_keys + sc_keys) @@ -157,7 +157,7 @@ def get(self, key, default=None): def __getitem__(self, k): if not isinstance(k, str): - raise KeyError(f"key must be a string") + raise KeyError("key must be a string") if k not in self._keys: raise KeyError(f"'{k}' not valid key. Examine 'FastInfo.keys()'") if k in self._cc_to_sc_key: @@ -177,7 +177,6 @@ def __repr__(self): return self.__str__() def toJSON(self, indent=4): - d = {k: self[k] for k in self.keys()} return json.dumps({k: self[k] for k in self.keys()}, indent=indent) def _get_1y_prices(self, fullDaysOnly=False): @@ -337,7 +336,7 @@ def previous_close(self): else: prices = prices[["Close"]].groupby(prices.index.date).last() if prices.shape[0] < 2: - # Very few symbols have previousClose despite no + # Very few symbols have previousClose despite no # no trading data e.g. 'QCSTIX'. fail = True else: @@ -356,12 +355,12 @@ def regular_market_previous_close(self): return self._reg_prev_close prices = self._get_1y_prices() if prices.shape[0] == 1: - # Tiny % of tickers don't return daily history before last trading day, + # Tiny % of tickers don't return daily history before last trading day, # so backup option is hourly history: prices = self._get_1wk_1h_reg_prices() prices = prices[["Close"]].groupby(prices.index.date).last() if prices.shape[0] < 2: - # Very few symbols have regularMarketPreviousClose despite no + # Very few symbols have regularMarketPreviousClose despite no # no trading data. E.g. 'QCSTIX'. # So fallback to original info[] if available. self._tkr.info # trigger fetch @@ -630,10 +629,10 @@ def _fetch_info(self, proxy): if "maxAge" in query1_info[k] and query1_info[k]["maxAge"] == 1: query1_info[k]["maxAge"] = 86400 query1_info = { - k1: v1 - for k, v in query1_info.items() - if isinstance(v, dict) - for k1, v1 in v.items() + k1: v1 + for k, v in query1_info.items() + if isinstance(v, dict) + for k1, v1 in v.items() if v1 } # recursively format but only because of 'companyOfficers' diff --git a/yfinance/utils.py b/yfinance/utils.py index 2f28fdb88..0371ab5ff 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -29,7 +29,7 @@ from functools import lru_cache from inspect import getmembers from types import FunctionType -from typing import Dict, List, Optional +from typing import List, Optional import numpy as _np import pandas as _pd @@ -41,11 +41,6 @@ from yfinance import const from .const import _BASE_URL_ -try: - import ujson as _json -except ImportError: - import json as _json - user_agent_headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} @@ -62,7 +57,7 @@ def attributes(obj): @lru_cache(maxsize=20) def print_once(msg): - # 'warnings' module suppression of repeat messages does not work. + # 'warnings' module suppression of repeat messages does not work. # This function replicates correct behaviour print(msg) @@ -585,8 +580,8 @@ def fix_Yahoo_returning_prepost_unrequested(quotes, interval, tradingPeriods): def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange): - # Yahoo bug fix. If market is open today then Yahoo normally returns - # todays data as a separate row from rest-of week/month interval in above row. + # Yahoo bug fix. If market is open today then Yahoo normally returns + # todays data as a separate row from rest-of week/month interval in above row. # Seems to depend on what exchange e.g. crypto OK. # Fix = merge them together n = quotes.shape[0] @@ -650,7 +645,6 @@ def safe_merge_dfs(df_main, df_sub, interval): if df_main.empty: return df_main - df_sub_backup = df_sub.copy() data_cols = [c for c in df_sub.columns if c not in df_main] if len(data_cols) > 1: raise Exception("Expected 1 data col") @@ -704,7 +698,7 @@ def safe_merge_dfs(df_main, df_sub, interval): empty_row = _pd.DataFrame(data=empty_row_data, index=[dt]) df_main = _pd.concat([df_main, empty_row], sort=True) else: - # Else, only add out-of-range event dates if occurring in interval + # Else, only add out-of-range event dates if occurring in interval # immediately after last price row last_dt = df_main.index[-1] next_interval_start_dt = last_dt + td @@ -712,7 +706,6 @@ def safe_merge_dfs(df_main, df_sub, interval): for i in _np.where(f_outOfRange)[0]: dt = df_sub.index[i] if next_interval_start_dt <= dt < next_interval_end_dt: - new_dt = next_interval_start_dt get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs") empty_row = _pd.DataFrame(data=empty_row_data, index=[dt]) df_main = _pd.concat([df_main, empty_row], sort=True) @@ -772,9 +765,9 @@ def _reindex_events(df, new_index, data_col_name): def fix_Yahoo_dst_issue(df, interval): if interval in ["1d", "1w", "1wk"]: - # These intervals should start at time 00:00. But for some combinations of date and timezone, + # These intervals should start at time 00:00. But for some combinations of date and timezone, # Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem. - # The clue is (a) minutes=0 and (b) hour near 0. + # The clue is (a) minutes=0 and (b) hour near 0. # Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion: f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22, 23])) dst_error_hours = _np.array([0] * df.shape[0]) From 27ef2bcd1a316b473bcecec41c2d7d9fe47b0e6e Mon Sep 17 00:00:00 2001 From: Marco Carvalho Date: Sat, 2 Dec 2023 22:12:40 -0300 Subject: [PATCH 06/25] Update ruff.yml --- .github/workflows/ruff.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index b5a6e0aab..2e345cb19 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -4,9 +4,10 @@ on: branches: - master - main + - dev jobs: ruff: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: chartboost/ruff-action@v1 \ No newline at end of file + - uses: chartboost/ruff-action@v1 From f7825c1c3aa18f9c98ff5b5be1b9290e9056a65a Mon Sep 17 00:00:00 2001 From: Value Raider Date: Sat, 9 Dec 2023 19:37:10 +0000 Subject: [PATCH 07/25] Minor fixes for price repair and related tests Minor fixes for price repair and related tests: - update out-of-date test, remove delisted ticker - fix Numpy type mismatch error --- tests/data/CNE-L-1d-bad-stock-split-fixed.csv | 14 +++++++------- tests/prices.py | 2 +- yfinance/base.py | 5 +++++ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/data/CNE-L-1d-bad-stock-split-fixed.csv b/tests/data/CNE-L-1d-bad-stock-split-fixed.csv index b8ef1806d..b6627f509 100644 --- a/tests/data/CNE-L-1d-bad-stock-split-fixed.csv +++ b/tests/data/CNE-L-1d-bad-stock-split-fixed.csv @@ -2,10 +2,10 @@ Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits 2023-05-18 00:00:00+01:00,193.220001220703,200.839996337891,193.220001220703,196.839996337891,196.839996337891,653125,0,0 2023-05-17 00:00:00+01:00,199.740005493164,207.738006591797,190.121994018555,197.860000610352,197.860000610352,822268,0,0 2023-05-16 00:00:00+01:00,215.600006103516,215.600006103516,201.149993896484,205.100006103516,205.100006103516,451009,243.93939,0.471428571428571 -2023-05-15 00:00:00+01:00,215.399955531529,219.19995640346,210.599967302595,217.399987792969,102.39998147147,1761679.3939394,0,0 -2023-05-12 00:00:00+01:00,214.599988664899,216.199965558733,209.599965558733,211.399977329799,99.573855808803,1522298.48484849,0,0 -2023-05-11 00:00:00+01:00,219.999966430664,219.999966430664,212.199987357003,215.000000871931,101.269541277204,3568042.12121213,0,0 -2023-05-10 00:00:00+01:00,218.199954659598,223.000000435965,212.59995640346,215.399955531529,101.457929992676,5599908.78787879,0,0 -2023-05-09 00:00:00+01:00,224,227.688003540039,218.199996948242,218.399993896484,102.87100982666,1906090,0,0 -2023-05-05 00:00:00+01:00,220.999968174526,225.19996686663,220.799976457868,224.4,105.697140066964,964523.636363637,0,0 -2023-05-04 00:00:00+01:00,216.999989972796,222.799965558733,216.881988961356,221.399965994698,104.284055655343,880983.93939394,0,0 +2023-05-15 00:00:00+01:00,456.9090,464.9696,446.7272,461.1515,217.2121,830506.0000,0,0 +2023-05-12 00:00:00+01:00,455.2121,458.6060,444.6060,448.4242,211.2173,717655.0000,0,0 +2023-05-11 00:00:00+01:00,466.6666,466.6666,450.1212,456.0606,214.8142,1682077.0000,0,0 +2023-05-10 00:00:00+01:00,462.8484,473.0303,450.9696,456.9090,215.2138,2639957.0000,0,0 +2023-05-09 00:00:00+01:00,475.1515,482.9746,462.8485,463.2727,218.2112,898585.2857,0,0 +2023-05-05 00:00:00+01:00,468.7878,477.6969,468.3636,476.0000,224.2061,454704.0000,0,0 +2023-05-04 00:00:00+01:00,460.3030,472.6060,460.0527,469.6363,221.2086,415321.0000,0,0 diff --git a/tests/prices.py b/tests/prices.py index a3e485e18..dc65858c5 100644 --- a/tests/prices.py +++ b/tests/prices.py @@ -917,7 +917,7 @@ def test_repair_bad_stock_split(self): # Stocks that split in 2022 but no problems in Yahoo data, # so repair should change nothing good_tkrs = ['AMZN', 'DXCM', 'FTNT', 'GOOG', 'GME', 'PANW', 'SHOP', 'TSLA'] - good_tkrs += ['AEI', 'CHRA', 'GHI', 'IRON', 'LXU', 'NUZE', 'RSLS', 'TISI'] + good_tkrs += ['AEI', 'GHI', 'IRON', 'LXU', 'NUZE', 'RSLS', 'TISI'] good_tkrs += ['BOL.ST', 'TUI1.DE'] intervals = ['1d', '1wk', '1mo', '3mo'] for tkr in good_tkrs: diff --git a/yfinance/base.py b/yfinance/base.py index ac766f928..c389bf30a 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -1297,8 +1297,13 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v # Update: if a VERY large dividend is paid out, then can be mistaken for a 1:2 stock split. # Fix = use adjusted prices adj = df2['Adj Close'].to_numpy() / df2['Close'].to_numpy() + df_dtype = price_data.dtype + if df_dtype == np.int64: + price_data = price_data.astype('float') for j in range(price_data.shape[1]): price_data[:,j] *= adj + if df_dtype == np.int64: + price_data = price_data.astype('int') _1d_change_x[1:] = price_data[1:, ] / price_data[:-1, ] f_zero_num_denom = f_zero | np.roll(f_zero, 1, axis=0) From 0bcd2dc725e9a40a306db96370c3be60b8454a33 Mon Sep 17 00:00:00 2001 From: "Julia L. Wang" Date: Sat, 9 Dec 2023 23:08:22 -0500 Subject: [PATCH 08/25] Removed unnecessary iloc --- yfinance/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yfinance/base.py b/yfinance/base.py index e1128f2ae..64ce7b3aa 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -732,7 +732,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): f_close_bad = df_block_calib['Close'] == tag df_new['Adj Close'] = df_block['Close'] * div_adjusts if f_close_bad.any(): - df_new.loc[f_close_bad, 'Adj Close'] = df_new['Close'].iloc[f_close_bad] * div_adjusts[f_close_bad] + df_new.loc[f_close_bad, 'Adj Close'] = df_new['Close'][f_close_bad] * div_adjusts[f_close_bad] # Check whether 'df_fine' has different split-adjustment. # If different, then adjust to match 'df' From 1863b211cdffbbd908ad759aa9edd7b636e86ea5 Mon Sep 17 00:00:00 2001 From: "Julia L. Wang" Date: Sun, 10 Dec 2023 10:36:02 -0500 Subject: [PATCH 09/25] Added extra dependencies --- README.md | 50 +++++++++++++++++++++++++++++++------------------- setup.py | 4 ++++ 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 6e356998a..0de313e11 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,32 @@ Yahoo! finance API is intended for personal use only.** [Changelog ยป](https://github.com/ranaroussi/yfinance/blob/main/CHANGELOG.rst) +--- + +## Installation + +Install `yfinance` using `pip`: + +``` {.sourceCode .bash} +$ pip install yfinance --upgrade --no-cache-dir +``` + +To install `yfinance` using `conda`, see +[this](https://anaconda.org/ranaroussi/yfinance). + +Test new features by installing betas, provide feedback in [corresponding Discussion](https://github.com/ranaroussi/yfinance/discussions): +``` {.sourceCode .bash} +$ pip install yfinance --upgrade --no-cache-dir --pre +``` + +To install with extra dependencies, replace `extra` with `nospam` for custom requests or `repair` for price repairing functionality: +``` {.sourceCode .bash} +$ pip install yfinance[extra] +``` + +To view the full list of [required dependencies](https://github.com/ranaroussi/yfinance/blob/main/requirements.txt) and extra packages, see [this](https://github.com/ranaroussi/yfinance/blob/f08fe83290136d103d46d67524f5b6e7b6b827ff/setup.py#L62). + + --- ## Quick Start @@ -155,8 +181,9 @@ data = yf.download("SPY AAPL", period="1mo") ### Smarter scraping -To use a custom `requests` session (for example to cache calls to the -API or customize the `User-agent` header), pass a `session=` argument to +Install the `nospam` packages for smarter scraping using `pip` (see [Installation](#installation)). + +To use a custom `requests` session (for example to cache calls to the API or customize the `User-agent` header), pass a `session=` argument to the Ticker constructor. ```python @@ -230,23 +257,8 @@ yf.set_tz_cache_location("custom/cache/location") --- -## Installation - -Install `yfinance` using `pip`: - -``` {.sourceCode .bash} -$ pip install yfinance --upgrade --no-cache-dir -``` - -Test new features by installing betas, provide feedback in [corresponding Discussion](https://github.com/ranaroussi/yfinance/discussions): -``` {.sourceCode .bash} -$ pip install yfinance --upgrade --no-cache-dir --pre -``` - -To install `yfinance` using `conda`, see -[this](https://anaconda.org/ranaroussi/yfinance). -### Requirements + ## Developers: want to contribute? diff --git a/setup.py b/setup.py index d48340fd6..4e41e27b9 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,10 @@ 'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5', 'frozendict>=2.3.4', 'peewee>=3.16.2', 'beautifulsoup4>=4.11.1', 'html5lib>=1.1'], + extras_require={ + 'nospam': ['requests_cache>=1.1.1', 'requests_ratelimiter>=0.4.2'], + 'repair': ['scipy>=1.10.1'], + }, # Note: Pandas.read_html() needs html5lib & beautifulsoup4 entry_points={ 'console_scripts': [ From 41758857476ac91525e55578cc000ae1008b63b8 Mon Sep 17 00:00:00 2001 From: Unit Date: Sun, 10 Dec 2023 22:35:53 +0100 Subject: [PATCH 10/25] add upgrades downgrades add upgrades/downgrades (recommendations history) return data is pandas dataframe add test for upgrades/downgrades data --- tests/ticker.py | 19 +++++++++++++++++-- yfinance/base.py | 18 +++++++++++++++--- yfinance/scrapers/quote.py | 18 ++++++++++++++++++ yfinance/ticker.py | 16 ++++++++++++---- 4 files changed, 62 insertions(+), 9 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index 871d01efe..b04354d78 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -31,9 +31,11 @@ ("info", dict), ("calendar", pd.DataFrame), ("recommendations", Union[pd.DataFrame, dict]), + ("recommendations_summary", Union[pd.DataFrame, dict]), + ("upgrades_downgrades", Union[pd.DataFrame, dict]), + ("recommendations_history", Union[pd.DataFrame, dict]), ("earnings", pd.DataFrame), ("quarterly_earnings", pd.DataFrame), - ("recommendations_summary", Union[pd.DataFrame, dict]), ("quarterly_cashflow", pd.DataFrame), ("cashflow", pd.DataFrame), ("quarterly_balance_sheet", pd.DataFrame), @@ -637,7 +639,7 @@ def test_recommendations(self): data_cached = self.ticker.recommendations self.assertIs(data, data_cached, "data not cached") - # def test_recommendations_summary(self): + # def test_recommendations_summary(self): # currently alias for recommendations # data = self.ticker.recommendations_summary # self.assertIsInstance(data, pd.DataFrame, "data has wrong type") # self.assertFalse(data.empty, "data is empty") @@ -645,6 +647,19 @@ def test_recommendations(self): # data_cached = self.ticker.recommendations_summary # self.assertIs(data, data_cached, "data not cached") + def test_recommendations_history(self): # alias for upgrades_downgrades + data = self.ticker.upgrades_downgrades + data_history = self.ticker.recommendations_history + self.assertTrue(data.equals(data_history)) + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + self.assertTrue(len(data.columns) == 4, "data has wrong number of columns") + self.assertEqual(data.columns.values.tolist(), ['Firm', 'ToGrade', 'FromGrade', 'Action'], "data has wrong column names") + self.assertIsInstance(data.index, pd.DatetimeIndex, "data has wrong index type") + + data_cached = self.ticker.upgrades_downgrades + self.assertIs(data, data_cached, "data not cached") + # def test_analyst_price_target(self): # data = self.ticker.analyst_price_target # self.assertIsInstance(data, pd.DataFrame, "data has wrong type") diff --git a/yfinance/base.py b/yfinance/base.py index 568f5684a..ecf481687 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -1708,6 +1708,21 @@ def get_recommendations(self, proxy=None, as_dict=False): return data.to_dict() return data + def get_recommendations_summary(self, proxy=None, as_dict=False): + return self.get_recommendations(proxy=proxy, as_dict=as_dict) + + def get_upgrades_downgrades(self, proxy=None, as_dict=False): + """ + Returns a DataFrame with the recommendations changes (upgrades/downgrades) + Index: date of grade + Columns: firm toGrade fromGrade action + """ + self._quote.proxy = proxy or self.proxy + data = self._quote.upgrades_downgrades + if as_dict: + return data.to_dict() + return data + def get_calendar(self, proxy=None, as_dict=False): self._quote.proxy = proxy or self.proxy data = self._quote.calendar @@ -1760,9 +1775,6 @@ def get_sustainability(self, proxy=None, as_dict=False): return data.to_dict() return data - def get_recommendations_summary(self, proxy=None, as_dict=False): - return self.get_recommendations(proxy=proxy, as_dict=as_dict) - def get_analyst_price_target(self, proxy=None, as_dict=False): self._analysis.proxy = proxy or self.proxy data = self._analysis.analyst_price_target diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py index 37b77f92a..fa6cdadb9 100644 --- a/yfinance/scrapers/quote.py +++ b/yfinance/scrapers/quote.py @@ -561,6 +561,7 @@ def __init__(self, data: YfData, symbol: str, proxy=None): self._retired_info = None self._sustainability = None self._recommendations = None + self._upgrades_downgrades = None self._calendar = None self._already_scraped = False @@ -592,6 +593,23 @@ def recommendations(self) -> pd.DataFrame: self._recommendations = pd.DataFrame(data) return self._recommendations + @property + def upgrades_downgrades(self) -> pd.DataFrame: + if self._upgrades_downgrades is None: + result = self._fetch(self.proxy, modules=['upgradeDowngradeHistory']) + try: + data = result["quoteSummary"]["result"][0]["upgradeDowngradeHistory"]["history"] + if len(data) == 0: + raise YFinanceDataException(f"No upgrade/downgrade history found for {self._symbol}") + df = pd.DataFrame(data) + df.rename(columns={"epochGradeDate": "GradeDate", 'firm': 'Firm', 'toGrade': 'ToGrade', 'fromGrade': 'FromGrade', 'action': 'Action'}, inplace=True) + df.set_index('GradeDate', inplace=True) + df.index = pd.to_datetime(df.index, unit='s') + self._upgrades_downgrades = df + except (KeyError, IndexError): + raise YFinanceDataException(f"Failed to parse json response from Yahoo Finance: {result}") + return self._upgrades_downgrades + @property def calendar(self) -> pd.DataFrame: if self._calendar is None: diff --git a/yfinance/ticker.py b/yfinance/ticker.py index af8dd750c..c86414777 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -153,6 +153,18 @@ def calendar(self) -> _pd.DataFrame: def recommendations(self): return self.get_recommendations() + @property + def recommendations_summary(self): + return self.get_recommendations_summary() + + @property + def upgrades_downgrades(self): + return self.get_upgrades_downgrades() + + @property + def recommendations_history(self): + return self.get_upgrades_downgrades() + @property def earnings(self) -> _pd.DataFrame: return self.get_earnings() @@ -217,10 +229,6 @@ def cashflow(self) -> _pd.DataFrame: def quarterly_cashflow(self) -> _pd.DataFrame: return self.quarterly_cash_flow - @property - def recommendations_summary(self): - return self.get_recommendations_summary() - @property def analyst_price_target(self) -> _pd.DataFrame: return self.get_analyst_price_target() From 8fdf53233f6283b60ab9055341799fc7f49cf2b9 Mon Sep 17 00:00:00 2001 From: Filip Kostic Date: Sun, 10 Dec 2023 17:54:08 -0500 Subject: [PATCH 11/25] Fixed issue #1305. Added test case to test for trailingPegInfo statistic retrieval --- tests/ticker.py | 12 ++++++++++++ yfinance/scrapers/quote.py | 18 ++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index ce8741dbb..24215e3d7 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -720,6 +720,18 @@ def test_info(self): self.assertIn("symbol", data.keys(), f"Did not find expected key '{k}' in info dict") self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict") + def test_complementary_info(self): + # This test is to check that we can successfully retrieve the trailing PEG ratio + + # We don't expect this one to have a trailing PEG ratio + data1 = self.tickers[0].info + self.assertEqual(data1['trailingPegRatio'], None) + + # This one should have a trailing PEG ratio + data2 = self.tickers[2].info + self.assertEqual(data2['trailingPegRatio'], 1.2713) + pass + # def test_fast_info_matches_info(self): # fast_info_keys = set() # for ticker in self.tickers: diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py index c09cc8d36..8d234f923 100644 --- a/yfinance/scrapers/quote.py +++ b/yfinance/scrapers/quote.py @@ -700,14 +700,12 @@ def _fetch_complementary(self, proxy): json_str = self._data.cache_get(url=url, proxy=proxy).text json_data = json.loads(json_str) - try: - key_stats = json_data["timeseries"]["result"][0] - if k not in key_stats: - # Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate - v = None + if json_data["timeseries"]["error"] is not None: + raise YFinanceException(f"Failed to parse json response from Yahoo Finance: " + json_data["error"]) + for k in keys: + keydict = json_data["timeseries"]["result"][0] + if k in keydict: + self._info[k] = keydict[k][-1]["reportedValue"]["raw"] else: - # Select most recent (last) raw value in list: - v = key_stats[k][-1]["reportedValue"]["raw"] - except Exception: - v = None - self._info[k] = v + self.info[k] = None + From 2b1a26ef0c6e50db513a59585a046bcc8322d9a8 Mon Sep 17 00:00:00 2001 From: Filip Kostic Date: Sun, 10 Dec 2023 20:51:11 -0500 Subject: [PATCH 12/25] Moved progress bar output to stderr --- yfinance/utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/yfinance/utils.py b/yfinance/utils.py index 0371ab5ff..285cf91da 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -24,6 +24,7 @@ import datetime as _datetime import logging import re as _re +import sys import sys as _sys import threading from functools import lru_cache @@ -858,9 +859,9 @@ def completed(self): if self.elapsed > self.iterations: self.elapsed = self.iterations self.update_iteration(1) - print('\r' + str(self), end='') - _sys.stdout.flush() - print() + print('\r' + str(self), end='', file=sys.stderr) + _sys.stderr.flush() + print("", file=sys.stderr) def animate(self, iteration=None): if iteration is None: @@ -869,8 +870,8 @@ def animate(self, iteration=None): else: self.elapsed += iteration - print('\r' + str(self), end='') - _sys.stdout.flush() + print('\r' + str(self), end='', file=sys.stderr) + _sys.stderr.flush() self.update_iteration() def update_iteration(self, val=None): From 5e0006e4b3821d6d5aa95b605fd0f0e6f3fe11ce Mon Sep 17 00:00:00 2001 From: Filip Kostic Date: Mon, 11 Dec 2023 15:07:16 -0500 Subject: [PATCH 13/25] Removed redundant import --- yfinance/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yfinance/utils.py b/yfinance/utils.py index 285cf91da..172d0525e 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -24,7 +24,6 @@ import datetime as _datetime import logging import re as _re -import sys import sys as _sys import threading from functools import lru_cache From c8280e40012c4a9b359bf19c93bb55602abb6d25 Mon Sep 17 00:00:00 2001 From: Filip Kostic Date: Tue, 12 Dec 2023 17:45:26 -0500 Subject: [PATCH 14/25] Update utils.py --- yfinance/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yfinance/utils.py b/yfinance/utils.py index 172d0525e..53eac44a9 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -858,9 +858,9 @@ def completed(self): if self.elapsed > self.iterations: self.elapsed = self.iterations self.update_iteration(1) - print('\r' + str(self), end='', file=sys.stderr) + print('\r' + str(self), end='', file=_sys.stderr) _sys.stderr.flush() - print("", file=sys.stderr) + print("", file=_sys.stderr) def animate(self, iteration=None): if iteration is None: @@ -869,7 +869,7 @@ def animate(self, iteration=None): else: self.elapsed += iteration - print('\r' + str(self), end='', file=sys.stderr) + print('\r' + str(self), end='', file=_sys.stderr) _sys.stderr.flush() self.update_iteration() From 9648e69b7e071cfa77132828f01c79dda7963b43 Mon Sep 17 00:00:00 2001 From: "Julia L. Wang" Date: Tue, 12 Dec 2023 18:26:04 -0500 Subject: [PATCH 15/25] Updated scipy and readme --- README.md | 8 ++++---- setup.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 0de313e11..d089deb45 100644 --- a/README.md +++ b/README.md @@ -181,10 +181,10 @@ data = yf.download("SPY AAPL", period="1mo") ### Smarter scraping -Install the `nospam` packages for smarter scraping using `pip` (see [Installation](#installation)). +Install the `nospam` packages for smarter scraping using `pip` (see [Installation](#installation)). These packages help cache calls such that Yahoo is not spammed with requests. -To use a custom `requests` session (for example to cache calls to the API or customize the `User-agent` header), pass a `session=` argument to -the Ticker constructor. +To use a custom `requests` session, pass a `session=` argument to +the Ticker constructor. This allows for caching calls to the API as well as a custom way to modify requests via the `User-agent` header. ```python import requests_cache @@ -195,7 +195,7 @@ ticker = yf.Ticker('msft', session=session) ticker.actions ``` -Combine a `requests_cache` with rate-limiting to avoid triggering Yahoo's rate-limiter/blocker that can corrupt data. +Combine `requests_cache` with rate-limiting to avoid triggering Yahoo's rate-limiter/blocker that can corrupt data. ```python from requests import Session from requests_cache import CacheMixin, SQLiteCache diff --git a/setup.py b/setup.py index 4e41e27b9..c1c867e51 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ 'beautifulsoup4>=4.11.1', 'html5lib>=1.1'], extras_require={ 'nospam': ['requests_cache>=1.1.1', 'requests_ratelimiter>=0.4.2'], - 'repair': ['scipy>=1.10.1'], + 'repair': ['scipy>=1.6.3'], }, # Note: Pandas.read_html() needs html5lib & beautifulsoup4 entry_points={ From 469037be8034bb03d3f64a4a420ea2d9c4d17a0f Mon Sep 17 00:00:00 2001 From: Value Raider Date: Wed, 13 Dec 2023 19:08:06 +0000 Subject: [PATCH 16/25] Tweaks to formatting and links. --- README.md | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index d089deb45..e233bb137 100644 --- a/README.md +++ b/README.md @@ -50,21 +50,15 @@ Install `yfinance` using `pip`: $ pip install yfinance --upgrade --no-cache-dir ``` -To install `yfinance` using `conda`, see -[this](https://anaconda.org/ranaroussi/yfinance). +[With Conda](https://anaconda.org/ranaroussi/yfinance). -Test new features by installing betas, provide feedback in [corresponding Discussion](https://github.com/ranaroussi/yfinance/discussions): -``` {.sourceCode .bash} -$ pip install yfinance --upgrade --no-cache-dir --pre -``` +To install with optional dependencies, replace `optional` with: `nospam` for [caching-requests](#smarter-scraping), `repair` for [price repair](https://github.com/ranaroussi/yfinance/wiki/Price-repair), or `nospam,repair` for both: -To install with extra dependencies, replace `extra` with `nospam` for custom requests or `repair` for price repairing functionality: ``` {.sourceCode .bash} -$ pip install yfinance[extra] +$ pip install yfinance[optional] ``` -To view the full list of [required dependencies](https://github.com/ranaroussi/yfinance/blob/main/requirements.txt) and extra packages, see [this](https://github.com/ranaroussi/yfinance/blob/f08fe83290136d103d46d67524f5b6e7b6b827ff/setup.py#L62). - +[Required dependencies](./requirements.txt) , [all dependencies](./setup.py#L62). --- @@ -257,26 +251,6 @@ yf.set_tz_cache_location("custom/cache/location") --- - - - ## Developers: want to contribute? `yfinance` relies on community to investigate bugs and contribute code. Developer guide: https://github.com/ranaroussi/yfinance/discussions/1084 From dc957eeb0ede9efee641192a6a9a60c3480d0f71 Mon Sep 17 00:00:00 2001 From: "Julia L. Wang" Date: Sun, 10 Dec 2023 12:39:50 -0500 Subject: [PATCH 17/25] Implementation of holders data --- README.md | 3 + tests/ticker.py | 27 ++++ yfinance/base.py | 24 ++++ yfinance/scrapers/holders.py | 232 +++++++++++++++++++++++++++++------ yfinance/ticker.py | 12 ++ 5 files changed, 262 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 6e356998a..862afad5c 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,9 @@ msft.quarterly_cashflow msft.major_holders msft.institutional_holders msft.mutualfund_holders +msft.insider_transactions +msft.insider_purchases +msft.insider_roster_holders # Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default. # Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument. diff --git a/tests/ticker.py b/tests/ticker.py index 8f41cca65..e1678ab63 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -24,6 +24,9 @@ ("major_holders", pd.DataFrame), ("institutional_holders", pd.DataFrame), ("mutualfund_holders", pd.DataFrame), + ("insider_transactions", pd.DataFrame), + ("insider_purchases", pd.DataFrame), + ("insider_roster_holders", pd.DataFrame), ("splits", pd.Series), ("actions", pd.DataFrame), ("shares", pd.DataFrame), @@ -338,6 +341,30 @@ def test_mutualfund_holders(self): data_cached = self.ticker.mutualfund_holders self.assertIs(data, data_cached, "data not cached") + def test_insider_transactions(self): + data = self.ticker.insider_transactions + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.insider_transactions + self.assertIs(data, data_cached, "data not cached") + + def test_insider_purchases(self): + data = self.ticker.insider_purchases + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.insider_purchases + self.assertIs(data, data_cached, "data not cached") + + def test_insider_roster_holders(self): + data = self.ticker.insider_roster_holders + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.insider_roster_holders + self.assertIs(data, data_cached, "data not cached") + class TestTickerMiscFinancials(unittest.TestCase): session = None diff --git a/yfinance/base.py b/yfinance/base.py index 66cc7aaf9..0e0c2034b 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -1747,6 +1747,30 @@ def get_mutualfund_holders(self, proxy=None, as_dict=False): if as_dict: return data.to_dict() return data + + def get_insider_purchases(self, proxy=None, as_dict=False): + self._holders.proxy = proxy or self.proxy + data = self._holders.insider_purchases + if data is not None: + if as_dict: + return data.to_dict() + return data + + def get_insider_transactions(self, proxy=None, as_dict=False): + self._holders.proxy = proxy or self.proxy + data = self._holders.insider_transactions + if data is not None: + if as_dict: + return data.to_dict() + return data + + def get_insider_roster_holders(self, proxy=None, as_dict=False): + self._holders.proxy = proxy or self.proxy + data = self._holders.insider_roster + if data is not None: + if as_dict: + return data.to_dict() + return data def get_info(self, proxy=None) -> dict: self._quote.proxy = proxy or self.proxy diff --git a/yfinance/scrapers/holders.py b/yfinance/scrapers/holders.py index 1376d41a1..90db8bae9 100644 --- a/yfinance/scrapers/holders.py +++ b/yfinance/scrapers/holders.py @@ -1,8 +1,12 @@ -from io import StringIO +# from io import StringIO import pandas as pd from yfinance.data import YfData +from yfinance.const import _BASE_URL_ +from yfinance.exceptions import YFinanceDataException + +_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary/" class Holders: @@ -14,57 +18,213 @@ def __init__(self, data: YfData, symbol: str, proxy=None): self.proxy = proxy self._major = None + self._major_direct_holders = None self._institutional = None self._mutualfund = None + self._insider_transactions = None + self._insider_purchases = None + self._insider_roster = None + @property def major(self) -> pd.DataFrame: if self._major is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._major @property def institutional(self) -> pd.DataFrame: if self._institutional is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._institutional @property def mutualfund(self) -> pd.DataFrame: if self._mutualfund is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._mutualfund - def _scrape(self, proxy): - ticker_url = f"{self._SCRAPE_URL_}/{self._symbol}" + @property + def insider_transactions(self) -> pd.DataFrame: + if self._insider_transactions is None: + # self._scrape_insider_transactions(self.proxy) + self._fetch_and_parse() + return self._insider_transactions + + @property + def insider_purchases(self) -> pd.DataFrame: + if self._insider_purchases is None: + # self._scrape_insider_transactions(self.proxy) + self._fetch_and_parse() + return self._insider_purchases + + @property + def insider_roster(self) -> pd.DataFrame: + if self._insider_roster is None: + # self._scrape_insider_ros(self.proxy) + self._fetch_and_parse() + return self._insider_roster + + def _fetch(self, proxy): + modules = ','.join( + ["institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity"]) + params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "symbol": self._symbol, "formatted": "false"} + result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_, user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy) + return result + + def _fetch_and_parse(self): + result = self._fetch(self.proxy) try: - resp = self._data.cache_get(ticker_url + '/holders', proxy=proxy) - holders = pd.read_html(StringIO(resp.text)) - except Exception: - holders = [] - - if len(holders) >= 3: - self._major = holders[0] - self._institutional = holders[1] - self._mutualfund = holders[2] - elif len(holders) >= 2: - self._major = holders[0] - self._institutional = holders[1] - elif len(holders) >= 1: - self._major = holders[0] - - if self._institutional is not None: - if 'Date Reported' in self._institutional: - self._institutional['Date Reported'] = pd.to_datetime( - self._institutional['Date Reported']) - if '% Out' in self._institutional: - self._institutional['% Out'] = self._institutional[ - '% Out'].str.replace('%', '').astype(float) / 100 - - if self._mutualfund is not None: - if 'Date Reported' in self._mutualfund: - self._mutualfund['Date Reported'] = pd.to_datetime( - self._mutualfund['Date Reported']) - if '% Out' in self._mutualfund: - self._mutualfund['% Out'] = self._mutualfund[ - '% Out'].str.replace('%', '').astype(float) / 100 + data = result["quoteSummary"]["result"][0] + # parse "institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity" + self._parse_institution_ownership(data["institutionOwnership"]) + self._parse_fund_ownership(data["fundOwnership"]) + # self._parse_major_direct_holders(data["majorDirectHolders"]) # need more data to investigate + self._parse_major_holders_breakdown(data["majorHoldersBreakdown"]) + self._parse_insider_transactions(data["insiderTransactions"]) + self._parse_insider_holders(data["insiderHolders"]) + self._parse_net_share_purchase_activity(data["netSharePurchaseActivity"]) + except (KeyError, IndexError): + raise YFinanceDataException("Failed to parse holders json data.") + + @staticmethod + def _parse_raw_values(data): + if isinstance(data, dict) and "raw" in data: + return data["raw"] + return data + + def _parse_institution_ownership(self, data): + holders = data["ownershipList"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) # "pctHeld": "% Out" + self._institutional = df + + def _parse_fund_ownership(self, data): + holders = data["ownershipList"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) + self._mutualfund = df + + def _parse_major_direct_holders(self, data): + holders = data["holders"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "positionDirect": "Shares", "valueDirect": "Value"}, inplace=True) + self._major_direct_holders = df + + def _parse_major_holders_breakdown(self, data): + if "maxAge" in data: + del data["maxAge"] + df = pd.DataFrame.from_dict(data, orient="index") + if not df.empty: + df.columns.name = "Breakdown" + df.rename(columns={df.columns[0]: 'Value'}, inplace=True) + self._major = df + + def _parse_insider_transactions(self, data): + holders = data["transactions"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["startDate"] = pd.to_datetime(df["startDate"], unit="s") + df.rename(columns={ + "startDate": "Start Date", + "filerName": "Insider", + "filerRelation": "Position", + "filerUrl": "URL", + "moneyText": "Transaction", + "transactionText": "Text", + "shares": "Shares", + "value": "Value", + "ownership": "Ownership" # ownership flag, direct or institutional + }, inplace=True) + self._insider_transactions = df + + def _parse_insider_holders(self, data): + holders = data["holders"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["positionDirectDate"] = pd.to_datetime(df["positionDirectDate"], unit="s") + df["latestTransDate"] = pd.to_datetime(df["latestTransDate"], unit="s") + + df.rename(columns={ + "name": "Name", + "relation": "Position", + "url": "URL", + "transactionDescription": "Most Recent Transaction", + "latestTransDate": "Latest Transaction Date", + "positionDirectDate": "Position Direct Date", + "positionDirect": "Shares Owned Directly", + "positionIndirectDate": "Position Indirect Date", + "positionIndirect": "Shares Owned Indirectly" + }, inplace=True) + + df["Name"] = df["Name"].astype(str) + df["Position"] = df["Position"].astype(str) + df["URL"] = df["URL"].astype(str) + df["Most Recent Transaction"] = df["Most Recent Transaction"].astype(str) + + self._insider_roster = df + + def _parse_net_share_purchase_activity(self, data): + df = pd.DataFrame( + { + "Insider Purchases Last " + data.get("period", ""): [ + "Purchases", + "Sales", + "Net Shares Purchased (Sold)", + "Total Insider Shares Held", + "% Net Shares Purchased (Sold)", + "% Buy Shares", + "% Sell Shares" + ], + "Shares": [ + data.get('buyInfoShares'), + data.get('sellInfoShares'), + data.get('netInfoShares'), + data.get('totalInsiderShares'), + data.get('netPercentInsiderShares'), + data.get('buyPercentInsiderShares'), + data.get('sellPercentInsiderShares') + ], + "Trans": [ + data.get('buyInfoCount'), + data.get('sellInfoCount'), + data.get('netInfoCount'), + pd.NA, + pd.NA, + pd.NA, + pd.NA + ] + } + ).convert_dtypes() + self._insider_purchases = df + + \ No newline at end of file diff --git a/yfinance/ticker.py b/yfinance/ticker.py index af8dd750c..580481ccd 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -117,6 +117,18 @@ def institutional_holders(self) -> _pd.DataFrame: def mutualfund_holders(self) -> _pd.DataFrame: return self.get_mutualfund_holders() + @property + def insider_purchases(self) -> _pd.DataFrame: + return self.get_insider_purchases() + + @property + def insider_transactions(self) -> _pd.DataFrame: + return self.get_insider_transactions() + + @property + def insider_roster_holders(self) -> _pd.DataFrame: + return self.get_insider_roster_holders() + @property def dividends(self) -> _pd.Series: return self.get_dividends() From 122269cf53c08a7ae20e257a8233e1549ae2f795 Mon Sep 17 00:00:00 2001 From: Filip Kostic Date: Wed, 13 Dec 2023 19:45:47 -0500 Subject: [PATCH 18/25] Fixed fstring error --- yfinance/scrapers/quote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py index 8d234f923..9616f2666 100644 --- a/yfinance/scrapers/quote.py +++ b/yfinance/scrapers/quote.py @@ -701,7 +701,7 @@ def _fetch_complementary(self, proxy): json_str = self._data.cache_get(url=url, proxy=proxy).text json_data = json.loads(json_str) if json_data["timeseries"]["error"] is not None: - raise YFinanceException(f"Failed to parse json response from Yahoo Finance: " + json_data["error"]) + raise YFinanceException("Failed to parse json response from Yahoo Finance: " + json_data["error"]) for k in keys: keydict = json_data["timeseries"]["result"][0] if k in keydict: From 24f53e935d09800adf0cc3038544ff995c52c41d Mon Sep 17 00:00:00 2001 From: Unit Date: Sat, 16 Dec 2023 13:35:04 +0100 Subject: [PATCH 19/25] added calendar events added events from calendarEvents module returning data is dict test upgraded and passed --- tests/ticker.py | 24 +++++++++++++++++------- yfinance/base.py | 7 ++----- yfinance/scrapers/quote.py | 28 +++++++++++++++++++++++++--- yfinance/ticker.py | 5 ++++- 4 files changed, 48 insertions(+), 16 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index 59ef8c247..8b8e882d4 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -711,13 +711,23 @@ def test_recommendations_history(self): # alias for upgrades_downgrades # data_cached = self.ticker.revenue_forecasts # self.assertIs(data, data_cached, "data not cached") - # def test_calendar(self): - # data = self.ticker.calendar - # self.assertIsInstance(data, pd.DataFrame, "data has wrong type") - # self.assertFalse(data.empty, "data is empty") - - # data_cached = self.ticker.calendar - # self.assertIs(data, data_cached, "data not cached") + def test_calendar(self): + data = self.ticker.calendar + self.assertIsInstance(data, dict, "data has wrong type") + self.assertTrue(len(data) > 0, "data is empty") + self.assertIn("Earnings Date", data.keys(), "data missing expected key") + self.assertIn("Earnings Average", data.keys(), "data missing expected key") + self.assertIn("Earnings Low", data.keys(), "data missing expected key") + self.assertIn("Earnings High", data.keys(), "data missing expected key") + self.assertIn("Revenue Average", data.keys(), "data missing expected key") + self.assertIn("Revenue Low", data.keys(), "data missing expected key") + self.assertIn("Revenue High", data.keys(), "data missing expected key") + # dividend date is not available for tested ticker GOOGL + if self.ticker.ticker != "GOOGL": + self.assertIn("Dividend Date", data.keys(), "data missing expected key") + # ex-dividend date is not always available + data_cached = self.ticker.calendar + self.assertIs(data, data_cached, "data not cached") # def test_shares(self): # data = self.ticker.shares diff --git a/yfinance/base.py b/yfinance/base.py index ad6884818..f463578aa 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -1733,12 +1733,9 @@ def get_upgrades_downgrades(self, proxy=None, as_dict=False): return data.to_dict() return data - def get_calendar(self, proxy=None, as_dict=False): + def get_calendar(self, proxy=None) -> dict: self._quote.proxy = proxy or self.proxy - data = self._quote.calendar - if as_dict: - return data.to_dict() - return data + return self._quote.calendar def get_major_holders(self, proxy=None, as_dict=False): self._holders.proxy = proxy or self.proxy diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py index b338cd718..9d7a67b14 100644 --- a/yfinance/scrapers/quote.py +++ b/yfinance/scrapers/quote.py @@ -610,9 +610,9 @@ def upgrades_downgrades(self) -> pd.DataFrame: return self._upgrades_downgrades @property - def calendar(self) -> pd.DataFrame: + def calendar(self) -> dict: if self._calendar is None: - raise YFNotImplementedError('calendar') + self._fetch_calendar() return self._calendar @staticmethod @@ -626,7 +626,7 @@ def _fetch(self, proxy, modules: list): modules = ','.join([m for m in modules if m in quote_summary_valid_modules]) if len(modules) == 0: raise YFinanceException("No valid modules provided, see available modules using `valid_modules`") - params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "symbol": self._symbol} + params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "formatted": "false", "symbol": self._symbol} result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_ + f"/{self._symbol}", user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy) return result @@ -727,3 +727,25 @@ def _fetch_complementary(self, proxy): else: self.info[k] = None + def _fetch_calendar(self): + # secFilings return too old data, so not requesting it for now + result = self._fetch(self.proxy, modules=['calendarEvents']) + try: + self._calendar = dict() + _events = result["quoteSummary"]["result"][0]["calendarEvents"] + if 'dividendDate' in _events: + self._calendar['Dividend Date'] = datetime.datetime.fromtimestamp(_events['dividendDate']).date() + if 'exDividendDate' in _events: + self._calendar['Ex-Dividend Date'] = datetime.datetime.fromtimestamp(_events['exDividendDate']).date() + # splits = _events.get('splitDate') # need to check later, i will add code for this if found data + earnings = _events.get('earnings') + if earnings is not None: + self._calendar['Earnings Date'] = [datetime.datetime.fromtimestamp(d).date() for d in earnings.get('earningsDate', [])] + self._calendar['Earnings High'] = earnings.get('earningsHigh', None) + self._calendar['Earnings Low'] = earnings.get('earningsLow', None) + self._calendar['Earnings Average'] = earnings.get('earningsAverage', None) + self._calendar['Revenue High'] = earnings.get('revenueHigh', None) + self._calendar['Revenue Low'] = earnings.get('revenueLow', None) + self._calendar['Revenue Average'] = earnings.get('revenueAverage', None) + except (KeyError, IndexError): + raise YFinanceDataException(f"Failed to parse json response from Yahoo Finance: {result}") diff --git a/yfinance/ticker.py b/yfinance/ticker.py index b556f0f1f..f205d245b 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -158,7 +158,10 @@ def fast_info(self): return self.get_fast_info() @property - def calendar(self) -> _pd.DataFrame: + def calendar(self) -> dict: + """ + Returns a dictionary of events, earnings, and dividends for the ticker + """ return self.get_calendar() @property From 9021fe52b4cab59ba6233c36c1e847098fe6af83 Mon Sep 17 00:00:00 2001 From: Value Raider Date: Sun, 17 Dec 2023 18:33:05 +0000 Subject: [PATCH 20/25] Fix _get_ticker_tz() args, were being swapped. Improve its unit test --- tests/ticker.py | 2 +- yfinance/base.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ticker.py b/tests/ticker.py index 8b8e882d4..45f8eca29 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -86,7 +86,7 @@ def test_getTz(self): # Test: dat = yf.Ticker(tkr, session=self.session) - tz = dat._get_ticker_tz(proxy=None, timeout=None) + tz = dat._get_ticker_tz(proxy=None, timeout=5) self.assertIsNotNone(tz) diff --git a/yfinance/base.py b/yfinance/base.py index f463578aa..2741c1108 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -1650,7 +1650,7 @@ def map_signals_to_ranges(f, f_up, f_down): return df2 - def _get_ticker_tz(self,timeout, proxy=None): + def _get_ticker_tz(self, proxy, timeout): proxy = proxy or self.proxy if self._tz is not None: return self._tz @@ -1675,7 +1675,7 @@ def _get_ticker_tz(self,timeout, proxy=None): return tz @utils.log_indent_decorator - def _fetch_ticker_tz(self, timeout, proxy=None): + def _fetch_ticker_tz(self, proxy, timeout): # Query Yahoo for fast price data just to get returned timezone proxy = proxy or self.proxy logger = utils.get_yf_logger() From db670aefd73ead89cfd7f317da1fc6137b904b53 Mon Sep 17 00:00:00 2001 From: Value Raider Date: Wed, 20 Dec 2023 23:58:50 +0000 Subject: [PATCH 21/25] Fix invalid date entering cache DB 'peewee.DateTimeField' is not ISO-compliant. If user enforces strict ISO-compliance, then translation between DateTimeField and sqlite breaks. Fix is to manually implement translation. --- yfinance/cache.py | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/yfinance/cache.py b/yfinance/cache.py index d1acf2810..d625a2ab2 100644 --- a/yfinance/cache.py +++ b/yfinance/cache.py @@ -197,17 +197,6 @@ def get_tz_cache(): return _TzCacheManager.get_tz_cache() -def set_tz_cache_location(cache_dir: str): - """ - Sets the path to create the "py-yfinance" cache folder in. - Useful if the default folder returned by "appdir.user_cache_dir()" is not writable. - Must be called before cache is used (that is, before fetching tickers). - :param cache_dir: Path to use for caches - :return: None - """ - _TzDBManager.set_location(cache_dir) - - # -------------- # Cookie cache @@ -300,9 +289,21 @@ def get_location(cls): Cookie_db_proxy = _peewee.Proxy() +class ISODateTimeField(_peewee.DateTimeField): + # Ensure Python datetime is read & written correctly for sqlite, + # because user discovered peewee allowed an invalid datetime + # to get written. + def db_value(self, value): + if value and isinstance(value, _datetime.datetime): + return value.isoformat() + return super().db_value(value) + def python_value(self, value): + if value and isinstance(value, str) and 'T' in value: + return _datetime.datetime.fromisoformat(value) + return super().python_value(value) class _CookieSchema(_peewee.Model): strategy = _peewee.CharField(primary_key=True) - fetch_date = _peewee.DateTimeField(default=_datetime.datetime.now) + fetch_date = ISODateTimeField(default=_datetime.datetime.now) # Which cookie type depends on strategy cookie_bytes = _peewee.BlobField() @@ -398,3 +399,19 @@ def store(self, strategy, cookie): def get_cookie_cache(): return _CookieCacheManager.get_cookie_cache() + + +def set_cache_location(cache_dir: str): + """ + Sets the path to create the "py-yfinance" cache folder in. + Useful if the default folder returned by "appdir.user_cache_dir()" is not writable. + Must be called before cache is used (that is, before fetching tickers). + :param cache_dir: Path to use for caches + :return: None + """ + _TzDBManager.set_location(cache_dir) + _CookieDBManager.set_location(cache_dir) + +def set_tz_cache_location(cache_dir: str): + set_cache_location(cache_dir) + From d44eff40653b0f58a8913f1424d33dc091f9ad46 Mon Sep 17 00:00:00 2001 From: Value Raider Date: Fri, 22 Dec 2023 20:29:04 +0000 Subject: [PATCH 22/25] Fix 'Unalignable' error in reconstruct_intervals --- yfinance/base.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index 2741c1108..f1c526c7e 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -694,7 +694,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): f_tag = df_block_calib['Adj Close'] == tag if f_tag.any(): div_adjusts = df_block_calib['Adj Close'] / df_block_calib['Close'] - # The loop below assumes each 1d repair is isoloated, i.e. surrounded by + # The loop below assumes each 1d repair is isolated, i.e. surrounded by # good data. Which is case most of time. # But in case are repairing a chunk of bad 1d data, back/forward-fill the # good div-adjustments - not perfect, but a good backup. @@ -706,26 +706,30 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1): if df_new.loc[dt, "Dividends"] != 0: if idx < n - 1: # Easy, take div-adjustment from next-day - div_adjusts[idx] = div_adjusts.iloc[idx + 1] + div_adjusts.iloc[idx] = div_adjusts.iloc[idx + 1] else: # Take previous-day div-adjustment and reverse todays adjustment div_adj = 1.0 - df_new_calib["Dividends"].iloc[idx] / df_new_calib['Close'].iloc[ idx - 1] - div_adjusts[idx] = div_adjusts.iloc[idx - 1] / div_adj + div_adjusts.iloc[idx] = div_adjusts.iloc[idx - 1] / div_adj else: if idx > 0: # Easy, take div-adjustment from previous-day - div_adjusts[idx] = div_adjusts.iloc[idx - 1] + div_adjusts.iloc[idx] = div_adjusts.iloc[idx - 1] else: # Must take next-day div-adjustment - div_adjusts[idx] = div_adjusts.iloc[idx + 1] + div_adjusts.iloc[idx] = div_adjusts.iloc[idx + 1] if df_new_calib["Dividends"].iloc[idx + 1] != 0: - div_adjusts[idx] *= 1.0 - df_new_calib["Dividends"].iloc[idx + 1] / \ + div_adjusts.iloc[idx] *= 1.0 - df_new_calib["Dividends"].iloc[idx + 1] / \ df_new_calib['Close'].iloc[idx] f_close_bad = df_block_calib['Close'] == tag + div_adjusts = div_adjusts.reindex(df_block.index, fill_value=np.nan).ffill().bfill() df_new['Adj Close'] = df_block['Close'] * div_adjusts if f_close_bad.any(): - df_new.loc[f_close_bad, 'Adj Close'] = df_new['Close'][f_close_bad] * div_adjusts[f_close_bad] + f_close_bad_new = f_close_bad.reindex(df_new.index, fill_value=False) + div_adjusts_new = div_adjusts.reindex(df_new.index, fill_value=np.nan).ffill().bfill() + div_adjusts_new_np = f_close_bad_new.to_numpy() + df_new.loc[div_adjusts_new_np, 'Adj Close'] = df_new['Close'][div_adjusts_new_np] * div_adjusts_new[div_adjusts_new_np] # Check whether 'df_fine' has different split-adjustment. # If different, then adjust to match 'df' From c1ad2589da6e523630f5e18696c6499956c223f2 Mon Sep 17 00:00:00 2001 From: Tejasweee Date: Sun, 31 Dec 2023 09:29:19 +0545 Subject: [PATCH 23/25] make nan as float --- yfinance/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yfinance/base.py b/yfinance/base.py index 2741c1108..1d588b82d 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -2167,7 +2167,7 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]: # Convert types for cn in ["EPS Estimate", "Reported EPS", "Surprise(%)"]: - dates.loc[dates[cn] == '-', cn] = "NaN" + dates.loc[dates[cn] == '-', cn] = float("nan") dates[cn] = dates[cn].astype(float) # Convert % to range 0->1: From 112b297c4159f47732eea5fbb8979d2b2fd1fbc7 Mon Sep 17 00:00:00 2001 From: Value Raider Date: Sat, 30 Dec 2023 17:02:22 +0000 Subject: [PATCH 24/25] Set sensible min versions for optional 'nospam' reqs Set sensible min versions for optional 'nospam' reqs: - requests_cache >= 1.0 , first defined DO_NOT_CACHE --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c1c867e51..88050d963 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ 'frozendict>=2.3.4', 'peewee>=3.16.2', 'beautifulsoup4>=4.11.1', 'html5lib>=1.1'], extras_require={ - 'nospam': ['requests_cache>=1.1.1', 'requests_ratelimiter>=0.4.2'], + 'nospam': ['requests_cache>=1.0', 'requests_ratelimiter>=0.3.1'], 'repair': ['scipy>=1.6.3'], }, # Note: Pandas.read_html() needs html5lib & beautifulsoup4 From c94cbb64d41700a36ef6d8bd3f930c8efddce809 Mon Sep 17 00:00:00 2001 From: puntonim Date: Sat, 30 Dec 2023 17:02:02 +0100 Subject: [PATCH 25/25] Ticker.history() to raise HTTP request excs if raise_errors args is True --- yfinance/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yfinance/base.py b/yfinance/base.py index 1d588b82d..d01c4e69c 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -218,7 +218,8 @@ def history(self, period="1mo", interval="1d", data = data.json() except Exception: - pass + if raise_errors: + raise # Store the meta data that gets retrieved simultaneously try: