Skip to content

Commit

Permalink
Fix tests ; Fine-tune split repair ; Fix UTC warning
Browse files Browse the repository at this point in the history
  • Loading branch information
ValueRaider committed May 19, 2024
1 parent da1c466 commit 25b6175
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 55 deletions.
Empty file added tests/__init__.py
Empty file.
46 changes: 23 additions & 23 deletions tests/data/AV-L-1wk-bad-stock-split-fixed.csv
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
2021-12-13 00:00:00+00:00,393.999975585938,406.6,391.4,402.899916992188,291.232287597656,62714764.4736842,0,0
2021-12-20 00:00:00+00:00,393.999975585938,412.199990234375,392.502983398438,409.899997558594,296.292243652344,46596651.3157895,0,0
2021-12-27 00:00:00+00:00,409.899997558594,416.550971679688,408.387001953125,410.4,296.653642578125,10818482.8947368,0,0
2022-01-03 00:00:00+00:00,410.4,432.199995117188,410.4,432.099985351563,312.339265136719,44427327.6315789,0,0
2022-01-10 00:00:00+00:00,431.3,439.199982910156,429.099970703125,436.099912109375,315.230618896484,29091400,0,0
2022-01-17 00:00:00+00:00,437.999912109375,445.199965820313,426.999997558594,431.999975585938,312.267017822266,43787351.3157895,0,0
2022-01-24 00:00:00+00:00,430.099975585938,440.999973144531,420.999968261719,433.499982910156,313.351237792969,58487296.0526316,0,0
2022-01-31 00:00:00+00:00,436.199968261719,443.049987792969,432.099985351563,435.199916992188,314.580045166016,43335806.5789474,0,0
2022-02-07 00:00:00+00:00,437.899995117188,448.799992675781,436.051994628906,444.39998046875,321.230207519531,39644061.8421053,0,0
2022-02-14 00:00:00+00:00,437.699975585938,441.999978027344,426.699968261719,432.199995117188,312.411558837891,49972693.4210526,0,0
2022-02-21 00:00:00+00:00,435.499992675781,438.476999511719,408.29998046875,423.399970703125,306.050571289063,65719596.0526316,0,0
2022-02-28 00:00:00+00:00,415.099995117188,427.999909667969,386.199932861328,386.799945068359,279.594578857422,94057936.8421053,4.1875,0
2022-03-07 00:00:00+00:00,374.999952392578,417.299978027344,361.101981201172,409.599968261719,298.389248046875,71269101.3157895,0,0
2022-03-14 00:00:00+00:00,413.099985351563,426.699968261719,408.899992675781,422.399965820313,307.713929443359,55431927.6315789,0,0
2022-03-21 00:00:00+00:00,422.699995117188,442.7,422.399965820313,437.799985351563,318.932696533203,39896352.6315789,0,0
2022-03-28 00:00:00+01:00,442.49998046875,460.999978027344,440.097983398438,444.6,323.886403808594,56413515.7894737,0,0
2022-04-04 00:00:00+01:00,439.699985351563,445.399985351563,421.999973144531,425.799973144531,310.190817871094,49415836.8421053,19.342106,0
2022-04-11 00:00:00+01:00,425.39998046875,435.599909667969,420.799995117188,434.299968261719,327.211427001953,29875081.5789474,0,0
2022-04-18 00:00:00+01:00,434.299968261719,447.799987792969,433.599992675781,437.799985351563,329.848419189453,49288272.3684211,0,0
2022-04-25 00:00:00+01:00,430.699987792969,438.799990234375,423.999982910156,433.299916992188,326.457967529297,44656776.3157895,0,0
2022-05-02 00:00:00+01:00,433.299916992188,450.999975585938,414.499982910156,414.899975585938,312.595018310547,29538167.1052632,0,0
2022-05-09 00:00:00+01:00,413.199995117188,417.449992675781,368.282923583984,408.199970703125,307.547099609375,73989611.8421053,0,0
2022-05-16 00:00:00+01:00,384,423.600006103516,384,412.100006103516,310.485473632813,81938261,101.69,0.76
2021-12-13 00:00:00+00:00,518.421020507813,535,515,530.131469726563,383.200378417969,47663221,0,0
2021-12-20 00:00:00+00:00,518.421020507813,542.368408203125,516.451293945313,539.342102050781,389.858215332031,35413455,0,0
2021-12-27 00:00:00+00:00,539.342102050781,548.093383789063,537.351318359375,540,390.333740234375,8222047,0,0
2022-01-03 00:00:00+00:00,540,568.684204101563,540,568.552612304688,410.972717285156,33764769,0,0
2022-01-10 00:00:00+00:00,567.5,577.894714355469,564.605224609375,573.815673828125,414.777130126953,22109464,0,0
2022-01-17 00:00:00+00:00,576.315673828125,585.789428710938,561.842102050781,568.421020507813,410.877655029297,33278387,0,0
2022-01-24 00:00:00+00:00,565.921020507813,580.263122558594,553.947326660156,570.394714355469,412.304260253906,44450345,0,0
2022-01-31 00:00:00+00:00,573.947326660156,582.960510253906,568.552612304688,572.631469726563,413.921112060547,32935213,0,0
2022-02-07 00:00:00+00:00,576.184204101563,590.526306152344,573.752624511719,584.73681640625,422.671325683594,30129487,0,0
2022-02-14 00:00:00+00:00,575.921020507813,581.578918457031,561.447326660156,568.684204101563,411.067840576172,37979247,0,0
2022-02-21 00:00:00+00:00,573.026306152344,576.943420410156,537.23681640625,557.105224609375,402.698120117188,49946893,0,0
2022-02-28 00:00:00+00:00,546.184204101563,563.157775878906,508.157806396484,508.947296142578,367.887603759766,71484032,4.1875,0
2022-03-07 00:00:00+00:00,493.420989990234,549.078918457031,475.134185791016,538.947326660156,392.617431640625,54164517,0,0
2022-03-14 00:00:00+00:00,543.552612304688,561.447326660156,538.026306152344,555.789428710938,404.886749267578,42128265,0,0
2022-03-21 00:00:00+00:00,556.184204101563,582.5,555.789428710938,576.052612304688,419.648284912109,30321228,0,0
2022-03-28 00:00:00+01:00,582.23681640625,606.578918457031,579.076293945313,585,426.166320800781,42874272,0,0
2022-04-04 00:00:00+01:00,578.552612304688,586.052612304688,555.263122558594,560.263122558594,408.145812988281,37556036,19.342106,0
2022-04-11 00:00:00+01:00,559.73681640625,573.157775878906,553.684204101563,571.447326660156,430.541351318359,22705062,0,0
2022-04-18 00:00:00+01:00,571.447326660156,589.210510253906,570.526306152344,576.052612304688,434.011077880859,37459087,0,0
2022-04-25 00:00:00+01:00,566.710510253906,577.368408203125,557.894714355469,570.131469726563,429.549957275391,33939150,0,0
2022-05-02 00:00:00+01:00,570.131469726563,593.421020507813,545.394714355469,545.921020507813,411.309234619141,22449007,0,0
2022-05-09 00:00:00+01:00,543.684204101563,549.276306152344,484.582794189453,537.105224609375,404.667236328125,56232105,0,0
2022-05-16 00:00:00+01:00,505.263157894737,557.368429083573,505.263157894737,542.236850136205,408.533517937911,62273078.36,101.69,0.76
2022-05-23 00:00:00+01:00,416.100006103516,442.399993896484,341.915008544922,440.899993896484,409.764678955078,45432941,0,0
2022-05-30 00:00:00+01:00,442.700012207031,444.200012207031,426.600006103516,428.700012207031,398.426239013672,37906659,0,0
2022-06-06 00:00:00+01:00,425.299987792969,434.010009765625,405.200012207031,405.399993896484,376.771606445313,40648810,0,0
Expand Down
17 changes: 5 additions & 12 deletions tests/test_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,13 +359,6 @@ def test_monthlyWithEvents2(self):
dfd_divs = dfd[dfd['Dividends'] != 0]
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])

dfm = yf.Ticker("F").history(period="50mo", interval="1mo")
dfd = yf.Ticker("F").history(period="50mo", interval="1d")
dfd = dfd[dfd.index > dfm.index[0]]
dfm_divs = dfm[dfm['Dividends'] != 0]
dfd_divs = dfd[dfd['Dividends'] != 0]
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])

def test_tz_dst_ambiguous(self):
# Reproduce issue #1100
try:
Expand Down Expand Up @@ -791,7 +784,7 @@ def test_repair_zeroes_hourly(self):
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()

correct_df = hist.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
correct_df = hist.history(period="5d", interval="1h", auto_adjust=False, repair=True)

df_bad = correct_df.copy()
bad_idx = correct_df.index[10]
Expand Down Expand Up @@ -820,7 +813,7 @@ def test_repair_zeroes_hourly(self):
self.assertTrue("Repaired?" in repaired_df.columns)
self.assertFalse(repaired_df["Repaired?"].isna().any())

def test_repair_bad_stock_split(self):
def test_repair_bad_stock_splits(self):
# Stocks that split in 2022 but no problems in Yahoo data,
# so repair should change nothing
good_tkrs = ['AMZN', 'DXCM', 'FTNT', 'GOOG', 'GME', 'PANW', 'SHOP', 'TSLA']
Expand All @@ -836,7 +829,7 @@ def test_repair_bad_stock_split(self):
_dp = os.path.dirname(__file__)
df_good = dat.history(start='2020-01-01', end=_dt.date.today(), interval=interval, auto_adjust=False)

repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
repaired_df = hist._fix_bad_stock_splits(df_good, interval, tz_exchange)

# Expect no change from repair
df_good = df_good.sort_index()
Expand Down Expand Up @@ -867,7 +860,7 @@ def test_repair_bad_stock_split(self):
df_bad = _pd.read_csv(fp, index_col="Date")
df_bad.index = _pd.to_datetime(df_bad.index, utc=True)

repaired_df = hist._fix_bad_stock_split(df_bad, "1d", tz_exchange)
repaired_df = hist._fix_bad_stock_splits(df_bad, "1d", tz_exchange)

fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split-fixed.csv")
correct_df = _pd.read_csv(fp, index_col="Date")
Expand Down Expand Up @@ -902,7 +895,7 @@ def test_repair_bad_stock_split(self):
_dp = os.path.dirname(__file__)
df_good = hist.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False)

repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
repaired_df = hist._fix_bad_stock_splits(df_good, interval, tz_exchange)

# Expect no change from repair
df_good = df_good.sort_index()
Expand Down
24 changes: 12 additions & 12 deletions tests/test_ticker.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,13 @@ def test_badTicker(self):
tkr = "DJI" # typo of "^DJI"
dat = yf.Ticker(tkr, session=self.session)

dat.history(period="1wk")
dat.history(period="5d")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk", threads=False, ignore_tz=False)
yf.download([tkr], period="1wk", threads=True, ignore_tz=False)
yf.download([tkr], period="1wk", threads=False, ignore_tz=True)
yf.download([tkr], period="1wk", threads=True, ignore_tz=True)
yf.download([tkr], period="5d", threads=False, ignore_tz=False)
yf.download([tkr], period="5d", threads=True, ignore_tz=False)
yf.download([tkr], period="5d", threads=False, ignore_tz=True)
yf.download([tkr], period="5d", threads=True, ignore_tz=True)

for k in dat.fast_info:
dat.fast_info[k]
Expand Down Expand Up @@ -144,7 +144,7 @@ def test_prices_missing(self):
# META call option, 2024 April 26th @ strike of 180000
tkr = 'META240426C00180000'
dat = yf.Ticker(tkr, session=self.session)
with self.assertRaises(YFPricesMissingError):
with self.assertRaises(YFChartError):
dat.history(period="5d", interval="1m", raise_errors=True)

def test_ticker_missing(self):
Expand All @@ -162,13 +162,13 @@ def test_goodTicker(self):
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)

dat.history(period="1wk")
dat.history(period="5d")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk", threads=False, ignore_tz=False)
yf.download([tkr], period="1wk", threads=True, ignore_tz=False)
yf.download([tkr], period="1wk", threads=False, ignore_tz=True)
yf.download([tkr], period="1wk", threads=True, ignore_tz=True)
yf.download([tkr], period="5d", threads=False, ignore_tz=False)
yf.download([tkr], period="5d", threads=True, ignore_tz=False)
yf.download([tkr], period="5d", threads=False, ignore_tz=True)
yf.download([tkr], period="5d", threads=True, ignore_tz=True)

for k in dat.fast_info:
dat.fast_info[k]
Expand All @@ -182,7 +182,7 @@ def test_goodTicker_withProxy(self):

dat._fetch_ticker_tz(proxy=None, timeout=5)
dat._get_ticker_tz(proxy=None, timeout=5)
dat.history(period="1wk")
dat.history(period="5d")

for attribute_name, attribute_type in ticker_attributes:
assert_attribute_type(self, dat, attribute_name, attribute_type)
Expand Down
22 changes: 19 additions & 3 deletions yfinance/scrapers/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,14 +1204,25 @@ def _fix_bad_stock_splits(self, df, interval, tz_exchange):
logger.debug('price-repair-split: No splits in data')
return df

logger.debug(f'price-repair-split: Splits: {str(df['Stock Splits'][split_f].to_dict())}')

if not 'Repaired?' in df.columns:
df['Repaired?'] = False
for split_idx in np.where(split_f)[0]:
split_dt = df.index[split_idx]
split = df.loc[split_dt, 'Stock Splits']
if split_dt == df.index[0]:
continue

cutoff_idx = min(df.shape[0], split_idx+1) # add one row after to detect big change
# Add on a week:
if interval in ['1wk', '1mo', '3mo']:
split_idx += 1
else:
split_idx += 5
cutoff_idx = min(df.shape[0], split_idx) # add one row after to detect big change
df_pre_split = df.iloc[0:cutoff_idx+1]
logger.debug(f'price-repair-split: split_idx={split_idx} split_dt={split_dt}')
logger.debug(f'price-repair-split: df dt range: {df_pre_split.index[0].date()} -> {df_pre_split.index[-1].date()}')

df_pre_split_repaired = self._fix_prices_sudden_change(df_pre_split, interval, tz_exchange, split, correct_volume=True)
# Merge back in:
Expand Down Expand Up @@ -1240,7 +1251,7 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
# start_min = 1 year before oldest split
f = df['Stock Splits'].to_numpy() != 0.0
start_min = (df.index[f].min() - _dateutil.relativedelta.relativedelta(years=1)).date()
logger.debug(f'price-repair-split: start_min={start_min}')
logger.debug(f'price-repair-split: start_min={start_min} change={change}')

OHLC = ['Open', 'High', 'Low', 'Close']

Expand Down Expand Up @@ -1438,8 +1449,13 @@ def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_v
# if logger.isEnabledFor(logging.DEBUG):
# df_debug['i'] = list(range(0, df_debug.shape[0]))
# df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i']
# if correct_columns_individually:
# f_change = df_debug[[c+'_f_down' for c in debug_cols]].any(axis=1) | df_debug[[c+'_f_up' for c in debug_cols]].any(axis=1)
# else:
# f_change = df_debug['f_down'] | df_debug['f_up']
# f_change = f_change | np.roll(f_change, -1) | np.roll(f_change, 1) | np.roll(f_change, -2) | np.roll(f_change, 2)
# with pd.option_context('display.max_rows', None, 'display.max_columns', 10, 'display.width', 1000): # more options can be specified also
# logger.debug(f"price-repair-split: my workings:" + '\n' + str(df_debug))
# logger.debug(f"price-repair-split: my workings:" + '\n' + str(df_debug[f_change]))

def map_signals_to_ranges(f, f_up, f_down):
# Ensure 0th element is False, because True is nonsense
Expand Down
6 changes: 3 additions & 3 deletions yfinance/scrapers/quote.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def toJSON(self, indent=4):

def _get_1y_prices(self, fullDaysOnly=False):
if self._prices_1y is None:
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, keepna=True, proxy=self.proxy)
self._prices_1y = self._tkr.history(period="1y", auto_adjust=False, keepna=True, proxy=self.proxy)
self._md = self._tkr.get_history_metadata(proxy=self.proxy)
try:
ctp = self._md["currentTradingPeriod"]
Expand All @@ -207,12 +207,12 @@ def _get_1y_prices(self, fullDaysOnly=False):

def _get_1wk_1h_prepost_prices(self):
if self._prices_1wk_1h_prepost is None:
self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True, proxy=self.proxy)
self._prices_1wk_1h_prepost = self._tkr.history(period="5d", interval="1h", auto_adjust=False, prepost=True, proxy=self.proxy)
return self._prices_1wk_1h_prepost

def _get_1wk_1h_reg_prices(self):
if self._prices_1wk_1h_reg is None:
self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False, proxy=self.proxy)
self._prices_1wk_1h_reg = self._tkr.history(period="5d", interval="1h", auto_adjust=False, prepost=False, proxy=self.proxy)
return self._prices_1wk_1h_reg

def _get_exchange_metadata(self):
Expand Down
3 changes: 1 addition & 2 deletions yfinance/ticker.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ def _download_options(self, date=None):
r = self._data.get(url=url, proxy=self.proxy).json()
if len(r.get('optionChain', {}).get('result', [])) > 0:
for exp in r['optionChain']['result'][0]['expirationDates']:
self._expirations[_datetime.datetime.utcfromtimestamp(
exp).strftime('%Y-%m-%d')] = exp
self._expirations[_pd.Timestamp(exp, unit='s').strftime('%Y-%m-%d')] = exp

self._underlying = r['optionChain']['result'][0].get('quote', {})

Expand Down

0 comments on commit 25b6175

Please sign in to comment.