From b438f29a718224c9bc80a3247b9a2a05337426a5 Mon Sep 17 00:00:00 2001 From: Alexey Vasilyev Date: Sat, 14 Jan 2023 08:06:35 +0100 Subject: [PATCH 1/3] Fix decryption --- yfinance/data.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/yfinance/data.py b/yfinance/data.py index ed4657697..3f456d698 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -48,11 +48,8 @@ def wrapped(*args, **kwargs): def decrypt_cryptojs_aes(data): encrypted_stores = data['context']['dispatcher']['stores'] - _cs = data["_cs"] - _cr = data["_cr"] - - _cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"]) - password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex() + password_key = next(key for key in data.keys() if key not in ["context", "plugins"]) + password = data[password_key] encrypted_stores = b64decode(encrypted_stores) assert encrypted_stores[0:8] == b"Salted__" @@ -176,12 +173,11 @@ def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict: data = json.loads(json_str) - if "_cs" in data and "_cr" in data: - data = decrypt_cryptojs_aes(data) - if "context" in data and "dispatcher" in data["context"]: - # Keep old code, just in case - data = data['context']['dispatcher']['stores'] + stores = data['context']['dispatcher']['stores'] + if isinstance(stores, str): + stores = decrypt_cryptojs_aes(data) + data = stores # return data new_data = json.dumps(data).replace('{}', 'null') From 4ca96424034fb2340e3ca78dee78cbc888571ec4 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sat, 14 Jan 2023 14:20:40 +0000 Subject: [PATCH 2/3] Ensure 'requests_cache' responses processed ; Improve naming --- yfinance/data.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/yfinance/data.py b/yfinance/data.py index 3f456d698..542b523cc 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -46,10 +46,20 @@ def wrapped(*args, **kwargs): return wrapped -def decrypt_cryptojs_aes(data): +def decrypt_cryptojs_aes_stores(data): encrypted_stores = data['context']['dispatcher']['stores'] - password_key = next(key for key in data.keys() if key not in ["context", "plugins"]) - password = data[password_key] + + if "_cs" in data and "_cr" in data: + _cs = data["_cs"] + _cr = data["_cr"] + _cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"]) + password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex() + else: + try: + password_key = next(key for key in data.keys() if key not in ["context", "plugins"]) + except: + return None + password = data[password_key] encrypted_stores = b64decode(encrypted_stores) assert encrypted_stores[0:8] == b"Salted__" @@ -173,14 +183,16 @@ def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict: data = json.loads(json_str) - if "context" in data and "dispatcher" in data["context"]: - stores = data['context']['dispatcher']['stores'] - if isinstance(stores, str): - stores = decrypt_cryptojs_aes(data) - data = stores + stores = decrypt_cryptojs_aes_stores(data) + if stores is None: + # Maybe Yahoo returned old format, not encrypted + if "context" in data and "dispatcher" in data["context"]: + stores = data['context']['dispatcher']['stores'] + if stores is None: + raise Exception(f"{self.ticker}: Failed to extract data stores from web request") # return data - new_data = json.dumps(data).replace('{}', 'null') + new_data = json.dumps(stores).replace('{}', 'null') new_data = re.sub( r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data) From cd2c1ada1484c402c04bb2852a91d6002941f6d7 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sat, 14 Jan 2023 15:41:33 +0000 Subject: [PATCH 3/3] Improve decrypt key deduction --- yfinance/data.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/yfinance/data.py b/yfinance/data.py index 542b523cc..dda032df3 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -55,10 +55,23 @@ def decrypt_cryptojs_aes_stores(data): _cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"]) password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex() else: - try: - password_key = next(key for key in data.keys() if key not in ["context", "plugins"]) - except: + # Currently assume one extra key in dict, which is password. Print error if + # more extra keys detected. + new_keys = [k for k in data.keys() if k not in ["context", "plugins"]] + l = len(new_keys) + if l == 0: return None + elif l == 1 and isinstance(data[new_keys[0]], str): + password_key = new_keys[0] + else: + msg = "Yahoo has again changed data format, yfinance now unsure which key(s) is for decryption:" + k = new_keys[0] + k_str = k if len(k) < 32 else k[:32-3]+"..." + msg += f" '{k_str}'->{type(data[k])}" + for i in range(1, len(new_keys)): + msg += f" , '{k_str}'->{type(data[k])}" + raise Exception(msg) + password_key = new_keys[0] password = data[password_key] encrypted_stores = b64decode(encrypted_stores) @@ -105,7 +118,10 @@ def EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="m key, iv = key_iv[:keySize], key_iv[keySize:final_length] return key, iv - key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") + try: + key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") + except: + raise Exception("yfinance failed to decrypt Yahoo data response") if usePycryptodome: cipher = AES.new(key, AES.MODE_CBC, iv=iv)