Skip to content

Commit

Permalink
Merge pull request #1302 from ranaroussi/dev
Browse files Browse the repository at this point in the history
dev -> main
  • Loading branch information
ValueRaider committed Jan 14, 2023
2 parents eacfbc4 + 5d9a91d commit 3ee4674
Show file tree
Hide file tree
Showing 8 changed files with 114 additions and 30 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ msft.capital_gains

# show share count
msft.shares
msft.get_shares_full()

# show financials:
# - income statement
Expand Down Expand Up @@ -213,8 +214,7 @@ data = yf.download( # or pdr.get_data_yahoo(...
interval = "5d",

# Whether to ignore timezone when aligning ticker data from
# different timezones. Default is True. False may be useful for
# minute/hourly data.
# different timezones. Default is False.
ignore_tz = False,

# group by ticker (to access via data['SPY'])
Expand Down
7 changes: 7 additions & 0 deletions tests/ticker.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def test_badTicker(self):
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
Expand Down Expand Up @@ -100,6 +101,7 @@ def test_goodTicker(self):
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
Expand Down Expand Up @@ -653,6 +655,11 @@ def test_shares(self):
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")

def test_shares_full(self):
data = self.ticker.get_shares_full()
self.assertIsInstance(data, pd.Series, "data has wrong type")
self.assertFalse(data.empty, "data is empty")

def test_info(self):
data = self.ticker.info
self.assertIsInstance(data, dict, "data has wrong type")
Expand Down
58 changes: 56 additions & 2 deletions yfinance/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from .scrapers.fundamentals import Fundamentals
from .scrapers.holders import Holders
from .scrapers.quote import Quote
import json as _json

_BASE_URL_ = 'https://query2.finance.yahoo.com'
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
Expand Down Expand Up @@ -1118,6 +1119,59 @@ def get_shares(self, proxy=None, as_dict=False):
return data.to_dict()
return data

def get_shares_full(self, start=None, end=None, proxy=None):
# Process dates
tz = self._get_ticker_tz(debug_mode=False, proxy=None, timeout=10)
dt_now = _pd.Timestamp.utcnow().tz_convert(tz)
if start is not None:
start_ts = utils._parse_user_dt(start, tz)
start = _pd.Timestamp.fromtimestamp(start_ts).tz_localize("UTC").tz_convert(tz)
start_d = start.date()
if end is not None:
end_ts = utils._parse_user_dt(end, tz)
end = _pd.Timestamp.fromtimestamp(end_ts).tz_localize("UTC").tz_convert(tz)
end_d = end.date()
if end is None:
end = dt_now
if start is None:
start = end - _pd.Timedelta(days=548) # 18 months
if start >= end:
print("ERROR: start date must be before end")
return None
start = start.floor("D")
end = end.ceil("D")

# Fetch
ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format(self.ticker)
shares_url = ts_url_base + "&period1={}&period2={}".format(int(start.timestamp()), int(end.timestamp()))
try:
json_str = self._data.cache_get(shares_url).text
json_data = _json.loads(json_str)
except:
print(f"{self.ticker}: Yahoo web request for share count failed")
return None
try:
fail = json_data["finance"]["error"]["code"] == "Bad Request"
except:
fail = False
if fail:
print(f"{self.ticker}: Yahoo web request for share count failed")
return None

shares_data = json_data["timeseries"]["result"]
if not "shares_out" in shares_data[0]:
print(f"{self.ticker}: Yahoo did not return share count in date range {start} -> {end}")
return None
try:
df = _pd.Series(shares_data[0]["shares_out"], index=_pd.to_datetime(shares_data[0]["timestamp"], unit="s"))
except Exception as e:
print(f"{self.ticker}: Failed to parse shares count data: "+str(e))
return None

df.index = df.index.tz_localize(tz)
df = df.sort_index()
return df

def get_isin(self, proxy=None) -> Optional[str]:
# *** experimental ***
if self._isin is not None:
Expand Down Expand Up @@ -1254,8 +1308,8 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]:
dates[cn] = _pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p")
# - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info':
self._quote.proxy = proxy
dates[cn] = dates[cn].dt.tz_localize(
tz=self._quote.info["exchangeTimezoneName"])
tz = self._get_ticker_tz(debug_mode=False, proxy=proxy, timeout=30)
dates[cn] = dates[cn].dt.tz_localize(tz)

dates = dates.set_index("Earnings Date")

Expand Down
50 changes: 37 additions & 13 deletions yfinance/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,33 @@ def wrapped(*args, **kwargs):
return wrapped


def decrypt_cryptojs_aes(data):
def decrypt_cryptojs_aes_stores(data):
encrypted_stores = data['context']['dispatcher']['stores']
_cs = data["_cs"]
_cr = data["_cr"]

_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
if "_cs" in data and "_cr" in data:
_cs = data["_cs"]
_cr = data["_cr"]
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
else:
# Currently assume one extra key in dict, which is password. Print error if
# more extra keys detected.
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
l = len(new_keys)
if l == 0:
return None
elif l == 1 and isinstance(data[new_keys[0]], str):
password_key = new_keys[0]
else:
msg = "Yahoo has again changed data format, yfinance now unsure which key(s) is for decryption:"
k = new_keys[0]
k_str = k if len(k) < 32 else k[:32-3]+"..."
msg += f" '{k_str}'->{type(data[k])}"
for i in range(1, len(new_keys)):
msg += f" , '{k_str}'->{type(data[k])}"
raise Exception(msg)
password_key = new_keys[0]
password = data[password_key]

encrypted_stores = b64decode(encrypted_stores)
assert encrypted_stores[0:8] == b"Salted__"
Expand Down Expand Up @@ -98,7 +118,10 @@ def EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="m
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
return key, iv

key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
try:
key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
except:
raise Exception("yfinance failed to decrypt Yahoo data response")

if usePycryptodome:
cipher = AES.new(key, AES.MODE_CBC, iv=iv)
Expand Down Expand Up @@ -176,15 +199,16 @@ def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:

data = json.loads(json_str)

if "_cs" in data and "_cr" in data:
data = decrypt_cryptojs_aes(data)

if "context" in data and "dispatcher" in data["context"]:
# Keep old code, just in case
data = data['context']['dispatcher']['stores']
stores = decrypt_cryptojs_aes_stores(data)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:
stores = data['context']['dispatcher']['stores']
if stores is None:
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")

# return data
new_data = json.dumps(data).replace('{}', 'null')
new_data = json.dumps(stores).replace('{}', 'null')
new_data = re.sub(
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)

Expand Down
4 changes: 2 additions & 2 deletions yfinance/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from . import shared


def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=False,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=10):
Expand Down Expand Up @@ -68,7 +68,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
How many threads to use for mass downloading. Default is True
ignore_tz: bool
When combining from different timezones, ignore that part of datetime.
Default is True
Default is False
proxy: str
Optional. Proxy server URL scheme. Default is None
rounding: bool
Expand Down
2 changes: 1 addition & 1 deletion yfinance/scrapers/fundamentals.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.Da
url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
# Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
start_dt = datetime.datetime(2016, 12, 31)
end = (datetime.datetime.now() + datetime.timedelta(days=366))
end = pd.Timestamp.utcnow().ceil("D")
url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp()))

# Step 3: fetch and reshape data
Expand Down
8 changes: 5 additions & 3 deletions yfinance/scrapers/quote.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,11 @@ def _scrape_complementary(self, proxy):
for k in keys:
url += "&type=" + k
# Request 6 months of data
url += "&period1={}".format(
int((datetime.datetime.now() - datetime.timedelta(days=365 // 2)).timestamp()))
url += "&period2={}".format(int((datetime.datetime.now() + datetime.timedelta(days=1)).timestamp()))
start = pd.Timestamp.utcnow().floor("D") - datetime.timedelta(days=365 // 2)
start = int(start.timestamp())
end = pd.Timestamp.utcnow().ceil("D")
end = int(end.timestamp())
url += f"&period1={start}&period2={end}"

json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
Expand Down
11 changes: 4 additions & 7 deletions yfinance/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ def _reindex_events(df, new_index, data_col_name):
if interval.endswith('m') or interval.endswith('h') or interval == "1d":
# Update: is possible with daily data when dividend very recent
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
df_sub_missing = df_sub[f_missing].copy()
keys = {"Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close",
"Close"}.intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
Expand Down Expand Up @@ -743,8 +743,10 @@ class _TzCache:
"""Simple sqlite file cache of ticker->timezone"""

def __init__(self):
self._tz_db = None
self._setup_cache_folder()
# Must init db here, where is thread-safe
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
self._migrate_cache_tkr_tz()

def _setup_cache_folder(self):
if not _os.path.isdir(self._db_dir):
Expand Down Expand Up @@ -776,11 +778,6 @@ def _db_dir(self):

@property
def tz_db(self):
# lazy init
if self._tz_db is None:
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
self._migrate_cache_tkr_tz()

return self._tz_db

def _migrate_cache_tkr_tz(self):
Expand Down

0 comments on commit 3ee4674

Please sign in to comment.