Skip to content

Commit

Permalink
Merge pull request #1927 from ranaroussi/dev
Browse files Browse the repository at this point in the history
sync dev -> main
  • Loading branch information
ValueRaider authored May 19, 2024
2 parents 4bc546c + fe00fd5 commit e65ca40
Show file tree
Hide file tree
Showing 22 changed files with 272 additions and 199 deletions.
25 changes: 5 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ msft.recommendations
msft.recommendations_summary
msft.upgrades_downgrades

# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default.
# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default.
# Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument.
msft.earnings_dates

Expand Down Expand Up @@ -191,7 +191,7 @@ data = yf.download("SPY AAPL", period="1mo")

### Smarter scraping

Install the `nospam` packages for smarter scraping using `pip` (see [Installation](#installation)). These packages help cache calls such that Yahoo is not spammed with requests.
Install the `nospam` packages for smarter scraping using `pip` (see [Installation](#installation)). These packages help cache calls such that Yahoo is not spammed with requests.

To use a custom `requests` session, pass a `session=` argument to
the Ticker constructor. This allows for caching calls to the API as well as a custom way to modify requests via the `User-agent` header.
Expand Down Expand Up @@ -236,31 +236,16 @@ yfinance?](https://stackoverflow.com/questions/63107801)
- How to download single or multiple tickers into a single
dataframe with single level column names and a ticker column

### `pandas_datareader` override

If your code uses `pandas_datareader` and you want to download data
faster, you can "hijack" `pandas_datareader.data.get_data_yahoo()`
method to use **yfinance** while making sure the returned data is in the
same format as **pandas\_datareader**'s `get_data_yahoo()`.

```python
from pandas_datareader import data as pdr

import yfinance as yf
yf.pdr_override() # <== that's all it takes :-)

# download dataframe
data = pdr.get_data_yahoo("SPY", start="2017-01-01", end="2017-04-30")
```

### Persistent cache store

To reduce Yahoo, yfinance store some data locally: timezones to localize dates, and cookie. Cache location is:

- Windows = C:/Users/\<USER\>/AppData/Local/py-yfinance
- Linux = /home/\<USER\>/.cache/py-yfinance
- MacOS = /Users/\<USER\>/Library/Caches/py-yfinance

You can direct cache to use a different location with `set_tz_cache_location()`:

```python
import yfinance as yf
yf.set_tz_cache_location("custom/cache/location")
Expand All @@ -287,7 +272,7 @@ intended for research and educational purposes. You should refer to Yahoo!'s ter
([here](https://policies.yahoo.com/us/en/yahoo/terms/product-atos/apiforydn/index.htm),
[here](https://legal.yahoo.com/us/en/yahoo/terms/otos/index.html), and
[here](https://policies.yahoo.com/us/en/yahoo/terms/index.htm)) for
detailes on your rights to use the actual data downloaded.
details on your rights to use the actual data downloaded.

---

Expand Down
4 changes: 2 additions & 2 deletions meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ requirements:
- requests >=2.31
- multitasking >=0.0.7
- lxml >=4.9.1
- appdirs >=1.4.4
- platformdirs >=2.0.0
- pytz >=2022.5
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
Expand All @@ -37,7 +37,7 @@ requirements:
- requests >=2.31
- multitasking >=0.0.7
- lxml >=4.9.1
- appdirs >=1.4.4
- platformdirs >=2.0.0
- pytz >=2022.5
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ numpy>=1.16.5
requests>=2.31
multitasking>=0.0.7
lxml>=4.9.1
appdirs>=1.4.4
platformdirs>=2.0.0
pytz>=2022.5
frozendict>=2.3.4
beautifulsoup4>=4.11.1
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
'requests>=2.31', 'multitasking>=0.0.7',
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'lxml>=4.9.1', 'platformdirs>=2.0.0', 'pytz>=2022.5',
'frozendict>=2.3.4', 'peewee>=3.16.2',
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
extras_require={
Expand Down
1 change: 0 additions & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
#!/usr/bin/env python
2 changes: 1 addition & 1 deletion tests/context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

import appdirs as _ad
import platformdirs as _ad
import datetime as _dt
import sys
import os
Expand Down
46 changes: 23 additions & 23 deletions tests/data/AV-L-1wk-bad-stock-split-fixed.csv
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
2021-12-13 00:00:00+00:00,393.999975585938,406.6,391.4,402.899916992188,291.232287597656,62714764.4736842,0,0
2021-12-20 00:00:00+00:00,393.999975585938,412.199990234375,392.502983398438,409.899997558594,296.292243652344,46596651.3157895,0,0
2021-12-27 00:00:00+00:00,409.899997558594,416.550971679688,408.387001953125,410.4,296.653642578125,10818482.8947368,0,0
2022-01-03 00:00:00+00:00,410.4,432.199995117188,410.4,432.099985351563,312.339265136719,44427327.6315789,0,0
2022-01-10 00:00:00+00:00,431.3,439.199982910156,429.099970703125,436.099912109375,315.230618896484,29091400,0,0
2022-01-17 00:00:00+00:00,437.999912109375,445.199965820313,426.999997558594,431.999975585938,312.267017822266,43787351.3157895,0,0
2022-01-24 00:00:00+00:00,430.099975585938,440.999973144531,420.999968261719,433.499982910156,313.351237792969,58487296.0526316,0,0
2022-01-31 00:00:00+00:00,436.199968261719,443.049987792969,432.099985351563,435.199916992188,314.580045166016,43335806.5789474,0,0
2022-02-07 00:00:00+00:00,437.899995117188,448.799992675781,436.051994628906,444.39998046875,321.230207519531,39644061.8421053,0,0
2022-02-14 00:00:00+00:00,437.699975585938,441.999978027344,426.699968261719,432.199995117188,312.411558837891,49972693.4210526,0,0
2022-02-21 00:00:00+00:00,435.499992675781,438.476999511719,408.29998046875,423.399970703125,306.050571289063,65719596.0526316,0,0
2022-02-28 00:00:00+00:00,415.099995117188,427.999909667969,386.199932861328,386.799945068359,279.594578857422,94057936.8421053,4.1875,0
2022-03-07 00:00:00+00:00,374.999952392578,417.299978027344,361.101981201172,409.599968261719,298.389248046875,71269101.3157895,0,0
2022-03-14 00:00:00+00:00,413.099985351563,426.699968261719,408.899992675781,422.399965820313,307.713929443359,55431927.6315789,0,0
2022-03-21 00:00:00+00:00,422.699995117188,442.7,422.399965820313,437.799985351563,318.932696533203,39896352.6315789,0,0
2022-03-28 00:00:00+01:00,442.49998046875,460.999978027344,440.097983398438,444.6,323.886403808594,56413515.7894737,0,0
2022-04-04 00:00:00+01:00,439.699985351563,445.399985351563,421.999973144531,425.799973144531,310.190817871094,49415836.8421053,19.342106,0
2022-04-11 00:00:00+01:00,425.39998046875,435.599909667969,420.799995117188,434.299968261719,327.211427001953,29875081.5789474,0,0
2022-04-18 00:00:00+01:00,434.299968261719,447.799987792969,433.599992675781,437.799985351563,329.848419189453,49288272.3684211,0,0
2022-04-25 00:00:00+01:00,430.699987792969,438.799990234375,423.999982910156,433.299916992188,326.457967529297,44656776.3157895,0,0
2022-05-02 00:00:00+01:00,433.299916992188,450.999975585938,414.499982910156,414.899975585938,312.595018310547,29538167.1052632,0,0
2022-05-09 00:00:00+01:00,413.199995117188,417.449992675781,368.282923583984,408.199970703125,307.547099609375,73989611.8421053,0,0
2022-05-16 00:00:00+01:00,384,423.600006103516,384,412.100006103516,310.485473632813,81938261,101.69,0.76
2021-12-13 00:00:00+00:00,518.421020507813,535,515,530.131469726563,383.200378417969,47663221,0,0
2021-12-20 00:00:00+00:00,518.421020507813,542.368408203125,516.451293945313,539.342102050781,389.858215332031,35413455,0,0
2021-12-27 00:00:00+00:00,539.342102050781,548.093383789063,537.351318359375,540,390.333740234375,8222047,0,0
2022-01-03 00:00:00+00:00,540,568.684204101563,540,568.552612304688,410.972717285156,33764769,0,0
2022-01-10 00:00:00+00:00,567.5,577.894714355469,564.605224609375,573.815673828125,414.777130126953,22109464,0,0
2022-01-17 00:00:00+00:00,576.315673828125,585.789428710938,561.842102050781,568.421020507813,410.877655029297,33278387,0,0
2022-01-24 00:00:00+00:00,565.921020507813,580.263122558594,553.947326660156,570.394714355469,412.304260253906,44450345,0,0
2022-01-31 00:00:00+00:00,573.947326660156,582.960510253906,568.552612304688,572.631469726563,413.921112060547,32935213,0,0
2022-02-07 00:00:00+00:00,576.184204101563,590.526306152344,573.752624511719,584.73681640625,422.671325683594,30129487,0,0
2022-02-14 00:00:00+00:00,575.921020507813,581.578918457031,561.447326660156,568.684204101563,411.067840576172,37979247,0,0
2022-02-21 00:00:00+00:00,573.026306152344,576.943420410156,537.23681640625,557.105224609375,402.698120117188,49946893,0,0
2022-02-28 00:00:00+00:00,546.184204101563,563.157775878906,508.157806396484,508.947296142578,367.887603759766,71484032,4.1875,0
2022-03-07 00:00:00+00:00,493.420989990234,549.078918457031,475.134185791016,538.947326660156,392.617431640625,54164517,0,0
2022-03-14 00:00:00+00:00,543.552612304688,561.447326660156,538.026306152344,555.789428710938,404.886749267578,42128265,0,0
2022-03-21 00:00:00+00:00,556.184204101563,582.5,555.789428710938,576.052612304688,419.648284912109,30321228,0,0
2022-03-28 00:00:00+01:00,582.23681640625,606.578918457031,579.076293945313,585,426.166320800781,42874272,0,0
2022-04-04 00:00:00+01:00,578.552612304688,586.052612304688,555.263122558594,560.263122558594,408.145812988281,37556036,19.342106,0
2022-04-11 00:00:00+01:00,559.73681640625,573.157775878906,553.684204101563,571.447326660156,430.541351318359,22705062,0,0
2022-04-18 00:00:00+01:00,571.447326660156,589.210510253906,570.526306152344,576.052612304688,434.011077880859,37459087,0,0
2022-04-25 00:00:00+01:00,566.710510253906,577.368408203125,557.894714355469,570.131469726563,429.549957275391,33939150,0,0
2022-05-02 00:00:00+01:00,570.131469726563,593.421020507813,545.394714355469,545.921020507813,411.309234619141,22449007,0,0
2022-05-09 00:00:00+01:00,543.684204101563,549.276306152344,484.582794189453,537.105224609375,404.667236328125,56232105,0,0
2022-05-16 00:00:00+01:00,505.263157894737,557.368429083573,505.263157894737,542.236850136205,408.533517937911,62273078.36,101.69,0.76
2022-05-23 00:00:00+01:00,416.100006103516,442.399993896484,341.915008544922,440.899993896484,409.764678955078,45432941,0,0
2022-05-30 00:00:00+01:00,442.700012207031,444.200012207031,426.600006103516,428.700012207031,398.426239013672,37906659,0,0
2022-06-06 00:00:00+01:00,425.299987792969,434.010009765625,405.200012207031,405.399993896484,376.771606445313,40648810,0,0
Expand Down
25 changes: 9 additions & 16 deletions tests/prices.py → tests/test_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ def test_download(self):

df_tkrs = df.columns.levels[1]
self.assertEqual(sorted(tkrs), sorted(df_tkrs))

def test_download_with_invalid_ticker(self):
#Checks if using an invalid symbol gives the same output as not using an invalid symbol in combination with a valid symbol (AAPL)
#Checks to make sure that invalid symbol handling for the date column is the same as the base case (no invalid symbols)

invalid_tkrs = ["AAPL", "ATVI"] #AAPL exists and ATVI does not exist
valid_tkrs = ["AAPL", "INTC"] #AAPL and INTC both exist

data_invalid_sym = yf.download(invalid_tkrs, start='2023-11-16', end='2023-11-17')
data_valid_sym = yf.download(valid_tkrs, start='2023-11-16', end='2023-11-17')

Expand All @@ -62,7 +62,7 @@ def test_duplicatingHourly(self):
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(proxy=None, timeout=None)

dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt_utc = _pd.Timestamp.utcnow()
dt = dt_utc.astimezone(_tz.timezone(tz))
start_d = dt.date() - _dt.timedelta(days=7)
df = dat.history(start=start_d, interval="1h")
Expand All @@ -82,7 +82,7 @@ def test_duplicatingDaily(self):
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(proxy=None, timeout=None)

dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt_utc = _pd.Timestamp.utcnow()
dt = dt_utc.astimezone(_tz.timezone(tz))
if dt.time() < _dt.time(17, 0):
continue
Expand Down Expand Up @@ -359,13 +359,6 @@ def test_monthlyWithEvents2(self):
dfd_divs = dfd[dfd['Dividends'] != 0]
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])

dfm = yf.Ticker("F").history(period="50mo", interval="1mo")
dfd = yf.Ticker("F").history(period="50mo", interval="1d")
dfd = dfd[dfd.index > dfm.index[0]]
dfm_divs = dfm[dfm['Dividends'] != 0]
dfd_divs = dfd[dfd['Dividends'] != 0]
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])

def test_tz_dst_ambiguous(self):
# Reproduce issue #1100
try:
Expand Down Expand Up @@ -791,7 +784,7 @@ def test_repair_zeroes_hourly(self):
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()

correct_df = hist.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
correct_df = hist.history(period="5d", interval="1h", auto_adjust=False, repair=True)

df_bad = correct_df.copy()
bad_idx = correct_df.index[10]
Expand Down Expand Up @@ -820,7 +813,7 @@ def test_repair_zeroes_hourly(self):
self.assertTrue("Repaired?" in repaired_df.columns)
self.assertFalse(repaired_df["Repaired?"].isna().any())

def test_repair_bad_stock_split(self):
def test_repair_bad_stock_splits(self):
# Stocks that split in 2022 but no problems in Yahoo data,
# so repair should change nothing
good_tkrs = ['AMZN', 'DXCM', 'FTNT', 'GOOG', 'GME', 'PANW', 'SHOP', 'TSLA']
Expand All @@ -836,7 +829,7 @@ def test_repair_bad_stock_split(self):
_dp = os.path.dirname(__file__)
df_good = dat.history(start='2020-01-01', end=_dt.date.today(), interval=interval, auto_adjust=False)

repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
repaired_df = hist._fix_bad_stock_splits(df_good, interval, tz_exchange)

# Expect no change from repair
df_good = df_good.sort_index()
Expand Down Expand Up @@ -867,7 +860,7 @@ def test_repair_bad_stock_split(self):
df_bad = _pd.read_csv(fp, index_col="Date")
df_bad.index = _pd.to_datetime(df_bad.index, utc=True)

repaired_df = hist._fix_bad_stock_split(df_bad, "1d", tz_exchange)
repaired_df = hist._fix_bad_stock_splits(df_bad, "1d", tz_exchange)

fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split-fixed.csv")
correct_df = _pd.read_csv(fp, index_col="Date")
Expand Down Expand Up @@ -902,7 +895,7 @@ def test_repair_bad_stock_split(self):
_dp = os.path.dirname(__file__)
df_good = hist.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False)

repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
repaired_df = hist._fix_bad_stock_splits(df_good, interval, tz_exchange)

# Expect no change from repair
df_good = df_good.sort_index()
Expand Down
Loading

0 comments on commit e65ca40

Please sign in to comment.