Merge pull request #1302 from ranaroussi/dev

dev -> main
ranaroussi · Jan 14, 2023 · 3ee4674 · 3ee4674
2 parents eacfbc4 + 5d9a91d
commit 3ee4674
Show file tree

Hide file tree

Showing 8 changed files with 114 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -84,6 +84,7 @@ msft.capital_gains
 
 # show share count
 msft.shares
+msft.get_shares_full()
 
 # show financials:
 # - income statement
@@ -213,8 +214,7 @@ data = yf.download(  # or pdr.get_data_yahoo(...
         interval = "5d",
 
         # Whether to ignore timezone when aligning ticker data from 
-        # different timezones. Default is True. False may be useful for 
-        # minute/hourly data.
+        # different timezones. Default is False.
         ignore_tz = False,
 
         # group by ticker (to access via data['SPY'])

diff --git a/tests/ticker.py b/tests/ticker.py
@@ -65,6 +65,7 @@ def test_badTicker(self):
         dat.splits
         dat.actions
         dat.shares
+        dat.get_shares_full()
         dat.info
         dat.calendar
         dat.recommendations
@@ -100,6 +101,7 @@ def test_goodTicker(self):
         dat.splits
         dat.actions
         dat.shares
+        dat.get_shares_full()
         dat.info
         dat.calendar
         dat.recommendations
@@ -653,6 +655,11 @@ def test_shares(self):
         self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
         self.assertFalse(data.empty, "data is empty")
 
+    def test_shares_full(self):
+        data = self.ticker.get_shares_full()
+        self.assertIsInstance(data, pd.Series, "data has wrong type")
+        self.assertFalse(data.empty, "data is empty")
+
     def test_info(self):
         data = self.ticker.info
         self.assertIsInstance(data, dict, "data has wrong type")

diff --git a/yfinance/base.py b/yfinance/base.py
@@ -40,6 +40,7 @@
 from .scrapers.fundamentals import Fundamentals
 from .scrapers.holders import Holders
 from .scrapers.quote import Quote
+import json as _json
 
 _BASE_URL_ = 'https://query2.finance.yahoo.com'
 _SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
@@ -1118,6 +1119,59 @@ def get_shares(self, proxy=None, as_dict=False):
             return data.to_dict()
         return data
 
+    def get_shares_full(self, start=None, end=None, proxy=None):
+        # Process dates
+        tz = self._get_ticker_tz(debug_mode=False, proxy=None, timeout=10)
+        dt_now = _pd.Timestamp.utcnow().tz_convert(tz)
+        if start is not None:
+            start_ts = utils._parse_user_dt(start, tz)
+            start = _pd.Timestamp.fromtimestamp(start_ts).tz_localize("UTC").tz_convert(tz)
+            start_d = start.date()
+        if end is not None:
+            end_ts = utils._parse_user_dt(end, tz)
+            end = _pd.Timestamp.fromtimestamp(end_ts).tz_localize("UTC").tz_convert(tz)
+            end_d = end.date()
+        if end is None:
+            end = dt_now
+        if start is None:
+            start = end - _pd.Timedelta(days=548)  # 18 months
+        if start >= end:
+            print("ERROR: start date must be before end")
+            return None
+        start = start.floor("D")
+        end = end.ceil("D")
+
+        # Fetch
+        ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format(self.ticker)
+        shares_url = ts_url_base + "&period1={}&period2={}".format(int(start.timestamp()), int(end.timestamp()))
+        try:
+            json_str = self._data.cache_get(shares_url).text
+            json_data = _json.loads(json_str)
+        except:
+            print(f"{self.ticker}: Yahoo web request for share count failed")
+            return None
+        try:
+            fail = json_data["finance"]["error"]["code"] == "Bad Request"
+        except:
+            fail = False
+        if fail:
+            print(f"{self.ticker}: Yahoo web request for share count failed")
+            return None
+
+        shares_data = json_data["timeseries"]["result"]
+        if not "shares_out" in shares_data[0]:
+            print(f"{self.ticker}: Yahoo did not return share count in date range {start} -> {end}")
+            return None
+        try:
+            df = _pd.Series(shares_data[0]["shares_out"], index=_pd.to_datetime(shares_data[0]["timestamp"], unit="s"))
+        except Exception as e:
+            print(f"{self.ticker}: Failed to parse shares count data: "+str(e))
+            return None
+
+        df.index = df.index.tz_localize(tz)
+        df = df.sort_index()
+        return df
+
     def get_isin(self, proxy=None) -> Optional[str]:
         # *** experimental ***
         if self._isin is not None:
@@ -1254,8 +1308,8 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]:
         dates[cn] = _pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p")
         # - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info':
         self._quote.proxy = proxy
-        dates[cn] = dates[cn].dt.tz_localize(
-            tz=self._quote.info["exchangeTimezoneName"])
+        tz = self._get_ticker_tz(debug_mode=False, proxy=proxy, timeout=30)
+        dates[cn] = dates[cn].dt.tz_localize(tz)
 
         dates = dates.set_index("Earnings Date")
 

diff --git a/yfinance/data.py b/yfinance/data.py
@@ -46,13 +46,33 @@ def wrapped(*args, **kwargs):
     return wrapped
 
 
-def decrypt_cryptojs_aes(data):
+def decrypt_cryptojs_aes_stores(data):
     encrypted_stores = data['context']['dispatcher']['stores']
-    _cs = data["_cs"]
-    _cr = data["_cr"]
 
-    _cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
-    password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
+    if "_cs" in data and "_cr" in data:
+        _cs = data["_cs"]
+        _cr = data["_cr"]
+        _cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
+        password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
+    else:
+        # Currently assume one extra key in dict, which is password. Print error if 
+        # more extra keys detected.
+        new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
+        l = len(new_keys)
+        if l == 0:
+            return None
+        elif l == 1 and isinstance(data[new_keys[0]], str):
+            password_key = new_keys[0]
+        else:
+            msg = "Yahoo has again changed data format, yfinance now unsure which key(s) is for decryption:"
+            k = new_keys[0]
+            k_str = k if len(k) < 32 else k[:32-3]+"..."
+            msg += f" '{k_str}'->{type(data[k])}"
+            for i in range(1, len(new_keys)):
+                msg += f" , '{k_str}'->{type(data[k])}"
+            raise Exception(msg)
+        password_key = new_keys[0]
+        password = data[password_key]
 
     encrypted_stores = b64decode(encrypted_stores)
     assert encrypted_stores[0:8] == b"Salted__"
@@ -98,7 +118,10 @@ def EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="m
         key, iv = key_iv[:keySize], key_iv[keySize:final_length]
         return key, iv
 
-    key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
+    try:
+        key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
+    except:
+        raise Exception("yfinance failed to decrypt Yahoo data response")
 
     if usePycryptodome:
         cipher = AES.new(key, AES.MODE_CBC, iv=iv)
@@ -176,15 +199,16 @@ def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
 
         data = json.loads(json_str)
 
-        if "_cs" in data and "_cr" in data:
-            data = decrypt_cryptojs_aes(data)
-
-        if "context" in data and "dispatcher" in data["context"]:
-            # Keep old code, just in case
-            data = data['context']['dispatcher']['stores']
+        stores = decrypt_cryptojs_aes_stores(data)
+        if stores is None:
+            # Maybe Yahoo returned old format, not encrypted
+            if "context" in data and "dispatcher" in data["context"]:
+                stores = data['context']['dispatcher']['stores']
+        if stores is None:
+            raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
 
         # return data
-        new_data = json.dumps(data).replace('{}', 'null')
+        new_data = json.dumps(stores).replace('{}', 'null')
         new_data = re.sub(
             r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
 

diff --git a/yfinance/multi.py b/yfinance/multi.py
@@ -29,7 +29,7 @@
 from . import shared
 
 
-def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
+def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=False,
              group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
              progress=True, period="max", show_errors=True, interval="1d", prepost=False,
              proxy=None, rounding=False, timeout=10):
@@ -68,7 +68,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
             How many threads to use for mass downloading. Default is True
         ignore_tz: bool
             When combining from different timezones, ignore that part of datetime.
-            Default is True
+            Default is False
         proxy: str
             Optional. Proxy server URL scheme. Default is None
         rounding: bool

diff --git a/yfinance/scrapers/fundamentals.py b/yfinance/scrapers/fundamentals.py
@@ -195,7 +195,7 @@ def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.Da
         url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
         # Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
         start_dt = datetime.datetime(2016, 12, 31)
-        end = (datetime.datetime.now() + datetime.timedelta(days=366))
+        end = pd.Timestamp.utcnow().ceil("D")
         url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp()))
 
         # Step 3: fetch and reshape data

diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py
@@ -194,9 +194,11 @@ def _scrape_complementary(self, proxy):
             for k in keys:
                 url += "&type=" + k
             # Request 6 months of data
-            url += "&period1={}".format(
-                int((datetime.datetime.now() - datetime.timedelta(days=365 // 2)).timestamp()))
-            url += "&period2={}".format(int((datetime.datetime.now() + datetime.timedelta(days=1)).timestamp()))
+            start = pd.Timestamp.utcnow().floor("D") - datetime.timedelta(days=365 // 2)
+            start = int(start.timestamp())
+            end = pd.Timestamp.utcnow().ceil("D")
+            end = int(end.timestamp())
+            url += f"&period1={start}&period2={end}"
 
             json_str = self._data.cache_get(url=url, proxy=proxy).text
             json_data = json.loads(json_str)

diff --git a/yfinance/utils.py b/yfinance/utils.py
@@ -607,7 +607,7 @@ def _reindex_events(df, new_index, data_col_name):
         if interval.endswith('m') or interval.endswith('h') or interval == "1d":
             # Update: is possible with daily data when dividend very recent
             f_missing = ~df_sub.index.isin(df.index)
-            df_sub_missing = df_sub[f_missing]
+            df_sub_missing = df_sub[f_missing].copy()
             keys = {"Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close",
                     "Close"}.intersection(df.columns)
             df_sub_missing[list(keys)] = _np.nan
@@ -743,8 +743,10 @@ class _TzCache:
     """Simple sqlite file cache of ticker->timezone"""
 
     def __init__(self):
-        self._tz_db = None
         self._setup_cache_folder()
+        # Must init db here, where is thread-safe
+        self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
+        self._migrate_cache_tkr_tz()
 
     def _setup_cache_folder(self):
         if not _os.path.isdir(self._db_dir):
@@ -776,11 +778,6 @@ def _db_dir(self):
 
     @property
     def tz_db(self):
-        # lazy init
-        if self._tz_db is None:
-            self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
-            self._migrate_cache_tkr_tz()
-
         return self._tz_db
 
     def _migrate_cache_tkr_tz(self):