Remove 'repair_intervals'

ranaroussi · Jan 21, 2023 · 2b0ae5a · 2b0ae5a
1 parent 1636839
commit 2b0ae5a
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 114 deletions.
diff --git a/tests/prices.py b/tests/prices.py
@@ -261,44 +261,6 @@ def test_dst_fix(self):
             print("Weekly data not aligned to Monday")
             raise
 
-    def test_correct_early_close(self):
-        # Stockholm exchange closed early on 2022-12-23 @ 13:02. 
-        # For hourly intervals, Yahoo returns:
-        # - 13:00 filled with NaNs
-        # - 13:02 contains data for 13:00
-        # Test that 'repair' fixes this without affecting other intervals.
-        tkr = "AEC.ST"
-        d = "2022-12-23"
-        start = "2022-12-01"
-        end = "2023-01-01"
-        data_cols = ["Open","High","Low","Close","Volume","Dividends","Stock Splits"]
-
-        dat = yf.Ticker(tkr, session=self.session)
-        df_old = dat.history(start=start, end=end, interval="1h", keepna=True)
-        df_repair = dat.history(start=start, end=end, interval="1h", keepna=True, repair_intervals=True)
-
-        tz = df_old.index.tz
-        expected_intervals_fixed = []
-        expected_intervals_fixed.append(tz.localize(_dt.datetime(2022,12,23,13,0)))
-        expected_intervals_lost = []
-        expected_intervals_lost.append(tz.localize(_dt.datetime(2022,12,23,13,2)))
-
-        # Test no unexpected intervals lost
-        dts_lost = df_old.index[~df_old.index.isin(df_repair.index)]
-        self.assertTrue(_np.equal(dts_lost.to_numpy(), expected_intervals_lost))
-
-        # Test only the expected interval changed
-        dts_shared = df_old.index[df_old.index.isin(df_repair.index)]
-        f_changed = (df_old.loc[dts_shared, data_cols].to_numpy() != df_repair.loc[dts_shared, data_cols].to_numpy()).any(axis=1)
-        self.assertTrue(f_changed.any(), "Expected data to change")
-        dts_changed = dts_shared[f_changed]
-        self.assertEqual(len(dts_changed), len(expected_intervals_fixed), "Different number of intervals changed")
-        self.assertTrue(_np.equal(dts_shared[f_changed], expected_intervals_fixed), "Unexpected intervals were changed")
-
-        # Test the expected interval is valid data
-        f_na = df_repair.loc[expected_intervals_fixed, data_cols].isna().any(axis=1)
-        self.assertFalse(f_na.any(), "Repaired interval still contains NaNs")
-
     def test_weekly_2rows_fix(self):
         tkr = "AMZN"
         start = _dt.date.today() - _dt.timedelta(days=14)

diff --git a/yfinance/base.py b/yfinance/base.py
@@ -86,9 +86,7 @@ def stats(self, proxy=None):
 
     def history(self, period="1mo", interval="1d",
                 start=None, end=None, prepost=False, actions=True,
-                auto_adjust=True, back_adjust=False, 
-                repair=None, # deprecated
-                repair_prices=False, repair_intervals=False,
+                auto_adjust=True, back_adjust=False, repair=None, 
                 keepna=False,
                 proxy=None, rounding=False, timeout=10,
                 debug=True, raise_errors=False) -> pd.DataFrame:
@@ -113,12 +111,9 @@ def history(self, period="1mo", interval="1d",
                 Adjust all OHLC automatically? Default is True
             back_adjust: bool
                 Back-adjusted data to mimic true historical prices
-            repair_prices: bool
+            repair: bool
                 Detect currency unit 100x mixups and attempt repair
                 Default is False
-            repair_intervals: bool
-                Detect
-                Default is False
             keepna: bool
                 Keep NaN rows returned by Yahoo?
                 Default is False
@@ -139,11 +134,6 @@ def history(self, period="1mo", interval="1d",
                 exceptions instead of printing to console.
         """
 
-        # Handle deprecated arguments
-        if repair is not None:
-            print("WARNING: 'repair' is deprecated and will be removed in future version. Use 'repair_prices' instead")
-            repair_prices = repair
-
         if start or period is None or period.lower() == "max":
             # Check can get TZ. Fail => probably delisted
             tz = self._get_ticker_tz(debug, proxy, timeout)
@@ -302,9 +292,6 @@ def history(self, period="1mo", interval="1d",
         quotes = utils.fix_Yahoo_dst_issue(quotes, params["interval"])
         quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
 
-        if repair_intervals:
-            quotes = utils.fix_Yahoo_including_unaligned_intervals(quotes, params["interval"])
-
         # actions
         dividends, splits, capital_gains = utils.parse_actions(data["chart"]["result"][0])
         if not expect_capital_gains:
@@ -368,8 +355,7 @@ def history(self, period="1mo", interval="1d",
             else:
                 df["Capital Gains"] = 0.0
 
-
-        if repair_prices:
+        if repair:
             # Do this before auto/back adjust
             df = self._fix_zeroes(df, interval, tz_exchange, prepost)
             df = self._fix_unit_mixups(df, interval, tz_exchange, prepost)
@@ -553,7 +539,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
                 fetch_start = g[0]
                 fetch_end = g[-1] + td_range
 
-            df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair_prices=False, keepna=True)
+            df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair=False, keepna=True)
             if df_fine is None or df_fine.empty:
                 print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
                 continue

diff --git a/yfinance/utils.py b/yfinance/utils.py
@@ -472,64 +472,6 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
     return quotes
 
 
-def fix_Yahoo_including_unaligned_intervals(quotes, interval):
-    if interval[1] not in ['m', 'h']:
-        # Only correct intraday
-        return quotes
-
-    # Merge adjacent rows if in same interval
-    # e.g. 13:02pm with 13:00pm 1h interval
-    n = quotes.shape[0]
-    itd = _interval_to_timedelta(interval)
-    td0 = _pd.Timedelta(0)
-    iend = quotes.index + itd
-    if interval[1:] in ["d", "wk", "mo"]:
-        # # Allow for DST
-        # iend -= _pd.Timedelta('2h')
-        return quotes
-    steps = _np.full(n, td0)
-    steps[1:] = quotes.index[1:] - iend[0:n-1]
-    f_overlap = steps < td0
-    if f_overlap.any():
-        # Process overlaps one-at-time because some may be false positives. 
-        # Recalculate subsequent step after removing an overlap.
-        overlaps_exist = True
-        n_merged = 0
-        dts_to_drop = []
-        dts_merged = []
-        while overlaps_exist:
-            indices = _np.where(f_overlap)[0]
-            i = indices[0]
-            dt1 = quotes.index[i-1]
-            dt2 = quotes.index[i]
-            dt3 = quotes.index[i+1]
-
-            dropped_dt = dt2
-            quotes.loc[dt1] = merge_two_prices_intervals(quotes.iloc[i-1], quotes.iloc[i])
-            dts_merged.append((dt2, dt1))
-
-            # Remove record of i:
-            dts_to_drop.append(dt2)
-            f_overlap[i] = False
-            steps[i] = td0
-            # Recalc step of following dt:
-            steps[i+1] = quotes.index[i+1] - iend[i-1]
-
-            f_overlap[i+1] = steps[i+1] < td0
-            overlaps_exist = f_overlap[i+1:].any()
-
-        # Useful debug code:
-        # for d in [str(dt.date()) for dt in dts_to_drop]:
-        #     print(quotes.loc[d])
-        #
-        # print("Dropped unaligned intervals:", dts_to_drop)
-        print(f"Removed {len(dts_merged)} unaligned intervals by merging:")
-        for i in range(len(dts_merged)):
-            print(f"- {dts_merged[i][0].date()}: {dts_merged[i][0].time()} -> {dts_merged[i][1].time()}")
-        quotes = quotes.drop(dts_to_drop)
-    return quotes
-
-
 def merge_two_prices_intervals(i1, i2):
     TypeCheckSeries(i1, "i1")
     TypeCheckSeries(i2, "i2")