Skip to content

Commit

Permalink
Improve crumb fetch (@bot-unit), fix its reuse
Browse files Browse the repository at this point in the history
  • Loading branch information
ValueRaider committed Oct 30, 2023
1 parent 227b2d2 commit b7a78e8
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 20 deletions.
86 changes: 67 additions & 19 deletions yfinance/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging

import requests as requests
from bs4 import BeautifulSoup
import re
import random
import time
Expand Down Expand Up @@ -50,13 +51,12 @@ def __init__(self, ticker: str, session=None):
self.ticker = ticker
self._session = session or requests

self._cookie, self._crumb = None, None

def _get_cookie(self, proxy=None, timeout=30):
if self._cookie is not None:
return self._cookie
if utils.cookie is not None:
return utils.cookie

utils.get_yf_logger().debug(f"Fetching cookie ...")

# response = self.get('https://fc.yahoo.com')
# To avoid infinite recursion, do NOT use self.get()
response = self._session.get(
url='https://fc.yahoo.com',
Expand All @@ -66,39 +66,87 @@ def _get_cookie(self, proxy=None, timeout=30):

if not response.cookies:
raise Exception("Failed to obtain Yahoo auth cookie.")
self._cookie = list(response.cookies)[0]
return self._cookie

utils.cookie = list(response.cookies)[0]
utils.get_yf_logger().debug(f"cookie = '{utils.cookie}'")
return utils.cookie

def _get_crumb(self, proxy=None, timeout=30):
if self._crumb is not None:
return self._crumb
cookie = self._get_cookie()
if utils.crumb is not None:
return utils.crumb

utils.get_yf_logger().debug(f"Fetching crumb ...")

cookie = self._get_cookie()
crumb_response = self._session.get(
url="https://query1.finance.yahoo.com/v1/test/getcrumb",
headers=self.user_agent_headers,
cookies={cookie.name: cookie.value},
proxies=proxy,
timeout=timeout)

self._crumb = crumb_response.text
return self._crumb

utils.crumb = crumb_response.text
if utils.crumb is None or '<html>' in utils.crumb:
raise Exception("Failed to fetch crumb")

utils.get_yf_logger().debug(f"crumb = '{utils.crumb}'")
utils.crumb = utils.crumb
return utils.crumb

def _get_crumb_botunit(self, proxy=None, timeout=30):
# Credit goes to @bot-unit #1729

if utils.crumb is not None:
return utils.crumb

utils.get_yf_logger().debug(f"Fetching crumb ...")

# ToDo: might have to force fetch crumb direct from `requests`,
# to avoid using cached crumb from `requests_cache`

response = self._session.get('https://guce.yahoo.com/consent', headers=self.user_agent_headers)
soup = BeautifulSoup(response.content, 'html.parser')
csrfTokenInput = soup.find('input', attrs={'name': 'csrfToken'})
csrfToken = csrfTokenInput['value']
sessionIdInput = soup.find('input', attrs={'name': 'sessionId'})
sessionId = sessionIdInput['value']
originalDoneUrl = 'https://finance.yahoo.com/'
namespace = 'yahoo'
data = {
'agree': ['agree', 'agree'],
'consentUUID': 'default',
'sessionId': sessionId,
'csrfToken': csrfToken,
'originalDoneUrl': originalDoneUrl,
'namespace': namespace,
}
self._session.post(f'https://consent.yahoo.com/v2/collectConsent?sessionId={sessionId}', data=data, headers=self.user_agent_headers)
self._session.get(f'https://guce.yahoo.com/copyConsent?sessionId={sessionId}', headers=self.user_agent_headers)
r = self._session.get('https://query2.finance.yahoo.com/v1/test/getcrumb', headers=self.user_agent_headers)
utils.crumb = r.text

if utils.crumb is None or '<html>' in utils.crumb:
raise Exception("Failed to fetch crumb")

utils.get_yf_logger().debug(f"crumb = '{utils.crumb}'")
utils.crumb = utils.crumb
return utils.crumb

def get(self, url, user_agent_headers=None, params=None, cookies=None, proxy=None, timeout=30):
utils.get_yf_logger().debug(f'get(): {url}')
proxy = self._get_proxy(proxy)

# Add cookie & crumb
if cookies is None:
cookie = self._get_cookie()
cookies = {cookie.name: cookie.value}
# if cookies is None:
# cookie = self._get_cookie()
# cookies = {cookie.name: cookie.value}
# Update: don't need cookie
if params is None:
params = {}
if 'crumb' not in params:
params['crumb'] = self._get_crumb()
# params['crumb'] = self._get_crumb()
params['crumb'] = self._get_crumb_botunit()

response = self._session.get(
# response = requests.get(
url=url,
params=params,
cookies=cookies,
Expand Down
2 changes: 1 addition & 1 deletion yfinance/scrapers/quote.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ def __init__(self, data: TickerData, proxy=None):
def info(self) -> dict:
if self._info is None:
self._fetch(self.proxy)
self._fetch_complementary(self.proxy)
# self._fetch_complementary(self.proxy) # Failing, don't know why. Help!

return self._info

Expand Down
5 changes: 5 additions & 0 deletions yfinance/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}


# Quick hack to globally-cache cookie & crumb
cookie = None
crumb = None


# From https://stackoverflow.com/a/59128615
def attributes(obj):
disallowed_names = {
Expand Down

0 comments on commit b7a78e8

Please sign in to comment.