From b49fd797fc29f234d5540cf90c03a275e01021fe Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Wed, 11 Jan 2023 17:41:34 +0000
Subject: [PATCH 01/10] Fix & improve price repair

Fix repair calibration & volume=0 repair ; Extend repair to sub-hour ; Avoid attempting repair of mostly-NaN days
---
 yfinance/base.py | 169 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 117 insertions(+), 52 deletions(-)

diff --git a/yfinance/base.py b/yfinance/base.py
index 34deef9b0..68353e800 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -354,6 +354,21 @@ def history(self, period="1mo", interval="1d",
             else:
                 df["Capital Gains"] = 0.0
 
+        # # Drop any rows that are too close in time to previous row
+        # td = utils._interval_to_timedelta(interval)
+        # steps = _np.full(df.shape[0], td)
+        # steps[1:] = df.index[1:] - df.index[0:df.shape[0]-1]
+        # df["step"] = steps ; print(df) ; raise Exception("here")
+        # if td >= pd.Timedelta("1d"):
+        #     # Allow for DST
+        #     f_drop = steps < (td-pd.Timedelta('1h'))
+        # else:
+        #     f_drop = steps < td
+        # if f_drop.any():
+        #     print(df)
+        #     raise Exception("Dropping too-close rows @", df.index[f_drop])
+        #     df = df[~f_drop].copy()
+
         if repair:
             # Do this before auto/back adjust
             df = self._fix_zeroes(df, interval, tz_exchange)
@@ -407,25 +422,27 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
 
         # Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
 
+        debug = False
+        # debug = True
+
         price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df]
         data_cols = price_cols + ["Volume"]
 
         # If interval is weekly then can construct with daily. But if smaller intervals then
         # restricted to recent times:
-        # - daily = hourly restricted to last 730 days
-        sub_interval = None
-        td_range = None
-        if interval == "1wk":
-            # Correct by fetching week of daily data
-            sub_interval = "1d"
-            td_range = _datetime.timedelta(days=7)
-        elif interval == "1d":
-            # Correct by fetching day of hourly data
-            sub_interval = "1h"
-            td_range = _datetime.timedelta(days=1)
-        elif interval == "1h":
-            sub_interval = "30m"
-            td_range = _datetime.timedelta(hours=1)
+        intervals = ["1wk", "1d", "1h", "30m", "15m", "5m", "2m", "1m"]
+        itds = {i:utils._interval_to_timedelta(interval) for i in intervals}
+        nexts = {intervals[i]:intervals[i+1] for i in range(len(intervals)-1)}
+        min_lookbacks = {"1wk":None, "1d":None, "1h":_datetime.timedelta(days=730)}
+        for i in ["30m", "15m", "5m", "2m"]:
+            min_lookbacks[i] = _datetime.timedelta(days=60)
+        min_lookbacks["1m"] = _datetime.timedelta(days=30)
+        # Hopefully never have to use max_lengths, because complicates fetch logic
+        # max_lengths = {i:None for i in intervals}
+        # max_lengths["1m"] = _datetime.timedelta(days=7)
+        if interval in nexts:
+            sub_interval = nexts[interval]
+            td_range = itds[interval]
         else:
             print("WARNING: Have not implemented repair for '{}' interval. Contact developers".format(interval))
             raise Exception("why here")
@@ -437,15 +454,13 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
         f_repair_rows = f_repair.any(axis=1)
 
         # Ignore old intervals for which Yahoo won't return finer data:
-        if sub_interval == "1h":
-            f_recent = _datetime.date.today() - df.index.date < _datetime.timedelta(days=730)
+        m = min_lookbacks[sub_interval]
+        if m is not None:
+            f_recent = _datetime.date.today() - df.index.date < m
             f_repair_rows = f_repair_rows & f_recent
-        elif sub_interval in ["30m", "15m"]:
-            f_recent = _datetime.date.today() - df.index.date < _datetime.timedelta(days=60)
-            f_repair_rows = f_repair_rows & f_recent
-        if not f_repair_rows.any():
-            print("data too old to fix")
-            return df
+            if not f_repair_rows.any():
+                print("data too old to fix")
+                return df
 
         dts_to_repair = df.index[f_repair_rows]
         indices_to_repair = _np.where(f_repair_rows)[0]
@@ -454,7 +469,9 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
             return df
 
         df_v2 = df.copy()
-        df_noNa = df[~df[price_cols].isna().any(axis=1)]
+        f_good = ~(df[price_cols].isna().any(axis=1))
+        f_good = f_good & (df[price_cols].to_numpy()!=tag).all(axis=1)
+        df_good = df[f_good]
 
         # Group nearby NaN-intervals together to reduce number of Yahoo fetches
         dts_groups = [[dts_to_repair[0]]]
@@ -471,7 +488,6 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
             grp_td_threshold = _datetime.timedelta(days=7)
         else:
             grp_td_threshold = _datetime.timedelta(days=2)
-            # grp_td_threshold = _datetime.timedelta(days=7)
         for i in range(1, len(dts_to_repair)):
             ind = indices_to_repair[i]
             dt = dts_to_repair[i]
@@ -488,25 +504,35 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
         for i in range(len(dts_groups)):
             g = dts_groups[i]
             g0 = g[0]
-            i0 = df_noNa.index.get_loc(g0)
+            i0 = df_good.index.get_indexer([g0], method="nearest")[0]
             if i0 > 0:
-                dts_groups[i].insert(0, df_noNa.index[i0-1])
+                i0 -= 1
             gl = g[-1]
-            il = df_noNa.index.get_loc(gl)
-            if il < len(df_noNa)-1:
-                dts_groups[i].append(df_noNa.index[il+1])
+            il = df_good.index.get_indexer([gl], method="nearest")[0]
+            if il < len(df_good)-1:
+                il += 1
+            good_dts = df_good.index[i0:il+1]
+            dts_groups[i] += good_dts.to_list()
+            dts_groups[i].sort()
 
         n_fixed = 0
         for g in dts_groups:
             df_block = df[df.index.isin(g)]
+            if debug:
+                print("- df_block:")
+                print(df_block)
 
             start_dt = g[0]
             start_d = start_dt.date()
             if sub_interval == "1h" and (_datetime.date.today() - start_d) > _datetime.timedelta(days=729):
                 # Don't bother requesting more price data, Yahoo will reject
+                if debug:
+                    print(f"- Don't bother requesting {sub_interval} price data, Yahoo will reject")
                 continue
             elif sub_interval in ["30m", "15m"] and (_datetime.date.today() - start_d) > _datetime.timedelta(days=59):
                 # Don't bother requesting more price data, Yahoo will reject
+                if debug:
+                    print(f"- Don't bother requesting {sub_interval} price data, Yahoo will reject")
                 continue
 
             td_1d = _datetime.timedelta(days=1)
@@ -528,7 +554,6 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
 
             df_fine["ctr"] = 0
             if interval == "1wk":
-                # df_fine["Week Start"] = df_fine.index.tz_localize(None).to_period("W-SUN").start_time
                 weekdays = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
                 week_end_day = weekdays[(df_block.index[0].weekday()+7-1)%7]
                 df_fine["Week Start"] = df_fine.index.tz_localize(None).to_period("W-"+week_end_day).start_time
@@ -557,31 +582,36 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
                 new_index = _np.append([df_fine.index[0]], df_fine.index[df_fine["intervalID"].diff()>0])
                 df_new.index = new_index
 
+            if debug:
+                print("- df_new:")
+                print(df_new)
+
             # Calibrate! Check whether 'df_fine' has different split-adjustment.
             # If different, then adjust to match 'df'
-            df_block_calib = df_block[price_cols]
-            common_index = df_block_calib.index[df_block_calib.index.isin(df_new.index)]
+            common_index = _np.intersect1d(df_block.index, df_new.index)
             if len(common_index) == 0:
                 # Can't calibrate so don't attempt repair
+                if debug:
+                    print("Can't calibrate so don't attempt repair")
                 continue
-            df_new_calib = df_new[df_new.index.isin(common_index)][price_cols]
-            df_block_calib = df_block_calib[df_block_calib.index.isin(common_index)]
-            calib_filter = (df_block_calib != tag).to_numpy()
+            df_new_calib = df_new[df_new.index.isin(common_index)][price_cols].to_numpy()
+            df_block_calib = df_block[df_block.index.isin(common_index)][price_cols].to_numpy()
+            calib_filter = (df_block_calib != tag)
             if not calib_filter.any():
                 # Can't calibrate so don't attempt repair
+                if debug:
+                    print("Can't calibrate so don't attempt repair")
                 continue
-            # Avoid divide-by-zero warnings printing:
-            df_new_calib = df_new_calib.to_numpy()
-            df_block_calib = df_block_calib.to_numpy()
+            # Avoid divide-by-zero warnings:
             for j in range(len(price_cols)):
-                c = price_cols[j]
                 f = ~calib_filter[:,j]
                 if f.any():
                     df_block_calib[f,j] = 1
                     df_new_calib[f,j] = 1
-            ratios = (df_block_calib / df_new_calib)[calib_filter]
+            ratios = df_block_calib[calib_filter] / df_new_calib[calib_filter]
             ratio = _np.mean(ratios)
-            #
+            if debug:
+                print(f"- price calibration ratio = {ratio}")
             ratio_rcp = round(1.0 / ratio, 1)
             ratio = round(ratio, 1)
             if ratio == 1 and ratio_rcp == 1:
@@ -600,13 +630,14 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
                     df_new["Volume"] *= ratio_rcp
 
             # Repair!
-            bad_dts = df_block.index[(df_block[price_cols]==tag).any(axis=1)]
+            bad_dts = df_block.index[(df_block[price_cols+["Volume"]]==tag).any(axis=1)]
 
             for idx in bad_dts:
                 if not idx in df_new.index:
                     # Yahoo didn't return finer-grain data for this interval, 
                     # so probably no trading happened.
-                    # print("no fine data")
+                    if debug:
+                        print(f"Yahoo didn't return finer-grain data for interval {idx}")
                     continue
                 df_new_row = df_new.loc[idx]
 
@@ -635,6 +666,9 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
                     df_v2.loc[idx, "Volume"] = df_new_row["Volume"]
                 n_fixed += 1
 
+        if debug:
+            print("df_v2:") ; print(df_v2)
+
         return df_v2
 
     def _fix_unit_mixups(self, df, interval, tz_exchange):
@@ -659,7 +693,7 @@ def _fix_unit_mixups(self, df, interval, tz_exchange):
         # adding it to dependencies.
         from scipy import ndimage as _ndimage
 
-        data_cols = ["High", "Open", "Low", "Close"]  # Order important, separate High from Low
+        data_cols = ["High", "Open", "Low", "Close", "Adj Close"]  # Order important, separate High from Low
         data_cols = [c for c in data_cols if c in df2.columns]
         f_zeroes = (df2[data_cols]==0).any(axis=1)
         if f_zeroes.any():
@@ -707,6 +741,11 @@ def _fix_unit_mixups(self, df, interval, tz_exchange):
                 if fi[j]:
                     df2.loc[idx, c] = df.loc[idx, c] * 0.01
                 #
+                c = "Adj Close"
+                j = data_cols.index(c)
+                if fi[j]:
+                    df2.loc[idx, c] = df.loc[idx, c] * 0.01
+                #
                 c = "High"
                 j = data_cols.index(c)
                 if fi[j]:
@@ -749,6 +788,12 @@ def _fix_zeroes(self, df, interval, tz_exchange):
         if df.shape[0] == 0:
             return df
 
+        debug = False
+        # debug = True
+
+        intraday = interval[-1] in ("m", 'h')
+
+        df = df.sort_index()  # important!
         df2 = df.copy()
 
         if df2.index.tz is None:
@@ -757,16 +802,29 @@ def _fix_zeroes(self, df, interval, tz_exchange):
             df2.index = df2.index.tz_convert(tz_exchange)
 
         price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df2.columns]
-        f_zero_or_nan = (df2[price_cols] == 0.0).values | df2[price_cols].isna().values
+        f_zero_or_nan = (df2[price_cols] == 0.0) | df2[price_cols].isna()
+        df2_reserve = None
+        if intraday:
+            # Ignore days with >50% intervals containing NaNs
+            df_nans = pd.DataFrame(f_zero_or_nan.any(axis=1), columns=["nan"])
+            df_nans["_date"] = df_nans.index.date
+            grp = df_nans.groupby("_date")
+            nan_pct = grp.sum() / grp.count()
+            dts = nan_pct.index[nan_pct["nan"]>0.5]
+            f_zero_or_nan_ignore = _np.isin(f_zero_or_nan.index.date, dts)
+            df2_reserve = df2[f_zero_or_nan_ignore]
+            df2 = df2[~f_zero_or_nan_ignore]
+            f_zero_or_nan = (df2[price_cols] == 0.0) | df2[price_cols].isna()
         # Check whether worth attempting repair
+        f_zero_or_nan = f_zero_or_nan.to_numpy()
         if f_zero_or_nan.any(axis=1).sum() == 0:
+            if debug:
+                print("no bad data to repair")
             return df
         if f_zero_or_nan.sum() == len(price_cols)*len(df2):
             # Need some good data to calibrate
-            return df
-        # - avoid repair if many zeroes/NaNs
-        pct_zero_or_nan = f_zero_or_nan.sum() / (len(price_cols)*len(df2))
-        if f_zero_or_nan.any(axis=1).sum()>2 and pct_zero_or_nan > 0.05:
+            if debug:
+                print("no good data to calibrate")
             return df
 
         data_cols = price_cols + ["Volume"]
@@ -777,15 +835,22 @@ def _fix_zeroes(self, df, interval, tz_exchange):
             c = price_cols[i]
             df2.loc[f_zero_or_nan[:,i], c] = tag
         # If volume=0 or NaN for bad prices, then tag volume for repair
-        df2.loc[f_zero_or_nan.any(axis=1) & (df2["Volume"]==0), "Volume"] = tag
-        df2.loc[f_zero_or_nan.any(axis=1) & (df2["Volume"].isna()), "Volume"] = tag
+        f_vol_zero_or_nan = (df2["Volume"].to_numpy()==0) | (df2["Volume"].isna().to_numpy())
+        df2.loc[f_zero_or_nan.any(axis=1) & f_vol_zero_or_nan, "Volume"] = tag
+        # If volume=0 or NaN but price moved in interval, then tag volume for repair
+        f_change = df2["High"].to_numpy() != df2["Low"].to_numpy()
+        df2.loc[f_change & f_vol_zero_or_nan, "Volume"] = tag
 
         n_before = (df2[data_cols].to_numpy()==tag).sum()
         df2 = self._reconstruct_intervals_batch(df2, interval, tag=tag)
         n_after = (df2[data_cols].to_numpy()==tag).sum()
         n_fixed = n_before - n_after
         if n_fixed > 0:
-            print("{}: fixed {} price=0.0 errors in {} price data".format(self.ticker, n_fixed, interval))
+            print(f"{self.ticker}: fixed {n_fixed}/{n_before} value=0 errors in {interval} price data")
+
+        if df2_reserve is not None:
+            df2 = _pd.concat([df2, df2_reserve])
+        df2 = df2.sort_index()
 
         # Restore original values where repair failed (i.e. remove tag values)
         f = df2[data_cols].values==tag

From 7460dbea17b939acd9b747d0942bcb14e169445d Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Sun, 15 Jan 2023 20:58:34 +0000
Subject: [PATCH 02/10] If reconstructing 1d interval with 1h, always request
 prepost

---
 yfinance/base.py | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/yfinance/base.py b/yfinance/base.py
index 68353e800..aa71d27d0 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -354,25 +354,11 @@ def history(self, period="1mo", interval="1d",
             else:
                 df["Capital Gains"] = 0.0
 
-        # # Drop any rows that are too close in time to previous row
-        # td = utils._interval_to_timedelta(interval)
-        # steps = _np.full(df.shape[0], td)
-        # steps[1:] = df.index[1:] - df.index[0:df.shape[0]-1]
-        # df["step"] = steps ; print(df) ; raise Exception("here")
-        # if td >= pd.Timedelta("1d"):
-        #     # Allow for DST
-        #     f_drop = steps < (td-pd.Timedelta('1h'))
-        # else:
-        #     f_drop = steps < td
-        # if f_drop.any():
-        #     print(df)
-        #     raise Exception("Dropping too-close rows @", df.index[f_drop])
-        #     df = df[~f_drop].copy()
 
         if repair:
             # Do this before auto/back adjust
-            df = self._fix_zeroes(df, interval, tz_exchange)
-            df = self._fix_unit_mixups(df, interval, tz_exchange)
+            df = self._fix_zeroes(df, interval, tz_exchange, prepost)
+            df = self._fix_unit_mixups(df, interval, tz_exchange, prepost)
 
         # Auto/back adjust
         try:
@@ -416,7 +402,7 @@ def history(self, period="1mo", interval="1d",
 
     # ------------------------
 
-    def _reconstruct_intervals_batch(self, df, interval, tag=-1):
+    def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
         if not isinstance(df, _pd.DataFrame):
             raise Exception("'df' must be a Pandas DataFrame not", type(df))
 
@@ -425,6 +411,10 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
         debug = False
         # debug = True
 
+        if interval[1:] in ['d', 'wk', 'mo']:
+            # Interday data always includes pre & post
+            prepost = True
+
         price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df]
         data_cols = price_cols + ["Volume"]
 
@@ -546,7 +536,6 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
                 fetch_start = g[0]
                 fetch_end = g[-1] + td_range
 
-            prepost = interval == "1d"
             df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair=False, keepna=True)
             if df_fine is None or df_fine.empty:
                 print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
@@ -671,7 +660,7 @@ def _reconstruct_intervals_batch(self, df, interval, tag=-1):
 
         return df_v2
 
-    def _fix_unit_mixups(self, df, interval, tz_exchange):
+    def _fix_unit_mixups(self, df, interval, tz_exchange, prepost):
         # Sometimes Yahoo returns few prices in cents/pence instead of $/£
         # I.e. 100x bigger
         # Easy to detect and fix, just look for outliers = ~100x local median
@@ -718,7 +707,7 @@ def _fix_unit_mixups(self, df, interval, tz_exchange):
             df2.loc[fi, c] = tag
 
         n_before = (df2[data_cols].to_numpy()==tag).sum()
-        df2 = self._reconstruct_intervals_batch(df2, interval, tag=tag)
+        df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag=tag)
         n_after = (df2[data_cols].to_numpy()==tag).sum()
 
         if n_after > 0:
@@ -780,7 +769,7 @@ def _fix_unit_mixups(self, df, interval, tz_exchange):
 
         return df2
 
-    def _fix_zeroes(self, df, interval, tz_exchange):
+    def _fix_zeroes(self, df, interval, tz_exchange, prepost):
         # Sometimes Yahoo returns prices=0 or NaN when trades occurred.
         # But most times when prices=0 or NaN returned is because no trades.
         # Impossible to distinguish, so only attempt repair if few or rare.
@@ -842,7 +831,7 @@ def _fix_zeroes(self, df, interval, tz_exchange):
         df2.loc[f_change & f_vol_zero_or_nan, "Volume"] = tag
 
         n_before = (df2[data_cols].to_numpy()==tag).sum()
-        df2 = self._reconstruct_intervals_batch(df2, interval, tag=tag)
+        df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag=tag)
         n_after = (df2[data_cols].to_numpy()==tag).sum()
         n_fixed = n_before - n_after
         if n_fixed > 0:

From 197d2968e3abf83713628ca7078404fe20eb7d1b Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Thu, 19 Jan 2023 22:19:16 +0000
Subject: [PATCH 03/10] Add 'repair_intervals', rename
 'repair'->'repair_prices'

---
 README.md         |   5 +-
 tests/prices.py   |  48 ++++++++++++++++--
 yfinance/base.py  |  22 ++++++--
 yfinance/utils.py | 127 +++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 174 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index c3ff752c3..27c10996a 100644
--- a/README.md
+++ b/README.md
@@ -225,8 +225,9 @@ data = yf.download(  # or pdr.get_data_yahoo(...
         # (optional, default is False)
         auto_adjust = True,
 
-        # attempt repair of missing data or currency mixups e.g. $/cents
-        repair = False,
+        # attempt repair of Yahoo data issues
+        repair_prices = False,
+        repair_intervals = False,
 
         # download pre/post regular market hours data
         # (optional, default is False)
diff --git a/tests/prices.py b/tests/prices.py
index e0d722578..9eb983aec 100644
--- a/tests/prices.py
+++ b/tests/prices.py
@@ -261,6 +261,44 @@ def test_dst_fix(self):
             print("Weekly data not aligned to Monday")
             raise
 
+    def test_correct_early_close(self):
+        # Stockholm exchange closed early on 2022-12-23 @ 13:02. 
+        # For hourly intervals, Yahoo returns:
+        # - 13:00 filled with NaNs
+        # - 13:02 contains data for 13:00
+        # Test that 'repair' fixes this without affecting other intervals.
+        tkr = "AEC.ST"
+        d = "2022-12-23"
+        start = "2022-12-01"
+        end = "2023-01-01"
+        data_cols = ["Open","High","Low","Close","Volume","Dividends","Stock Splits"]
+
+        dat = yf.Ticker(tkr, session=self.session)
+        df_old = dat.history(start=start, end=end, interval="1h", keepna=True)
+        df_repair = dat.history(start=start, end=end, interval="1h", keepna=True, repair_intervals=True)
+
+        tz = df_old.index.tz
+        expected_intervals_fixed = []
+        expected_intervals_fixed.append(tz.localize(_dt.datetime(2022,12,23,13,0)))
+        expected_intervals_lost = []
+        expected_intervals_lost.append(tz.localize(_dt.datetime(2022,12,23,13,2)))
+
+        # Test no unexpected intervals lost
+        dts_lost = df_old.index[~df_old.index.isin(df_repair.index)]
+        self.assertTrue(_np.equal(dts_lost.to_numpy(), expected_intervals_lost))
+
+        # Test only the expected interval changed
+        dts_shared = df_old.index[df_old.index.isin(df_repair.index)]
+        f_changed = (df_old.loc[dts_shared, data_cols].to_numpy() != df_repair.loc[dts_shared, data_cols].to_numpy()).any(axis=1)
+        self.assertTrue(f_changed.any(), "Expected data to change")
+        dts_changed = dts_shared[f_changed]
+        self.assertEqual(len(dts_changed), len(expected_intervals_fixed), "Different number of intervals changed")
+        self.assertTrue(_np.equal(dts_shared[f_changed], expected_intervals_fixed), "Unexpected intervals were changed")
+
+        # Test the expected interval is valid data
+        f_na = df_repair.loc[expected_intervals_fixed, data_cols].isna().any(axis=1)
+        self.assertFalse(f_na.any(), "Repaired interval still contains NaNs")
+
     def test_weekly_2rows_fix(self):
         tkr = "AMZN"
         start = _dt.date.today() - _dt.timedelta(days=14)
@@ -298,7 +336,7 @@ def test_repair_100x_weekly(self):
 
         # Run test
 
-        df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
+        df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
 
         # First test - no errors left
         for c in data_cols:
@@ -353,7 +391,7 @@ def test_repair_100x_weekly_preSplit(self):
         df.index = df.index.tz_localize(tz_exchange)
         df_bad.index = df_bad.index.tz_localize(tz_exchange)
 
-        df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
+        df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
 
         # First test - no errors left
         for c in data_cols:
@@ -403,7 +441,7 @@ def test_repair_100x_daily(self):
         df.index = df.index.tz_localize(tz_exchange)
         df_bad.index = df_bad.index.tz_localize(tz_exchange)
 
-        df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange)
+        df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)
 
         # First test - no errors left
         for c in data_cols:
@@ -438,7 +476,7 @@ def test_repair_zeroes_daily(self):
         df_bad.index.name = "Date"
         df_bad.index = df_bad.index.tz_localize(tz_exchange)
 
-        repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange)
+        repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
 
         correct_df = df_bad.copy()
         correct_df.loc["2022-11-01", "Open"] = 102.080002
@@ -467,7 +505,7 @@ def test_repair_zeroes_hourly(self):
         df_bad.index.name = "Date"
         df_bad.index = df_bad.index.tz_localize(tz_exchange)
 
-        repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange)
+        repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
 
         correct_df = df_bad.copy()
         idx = _pd.Timestamp(2022,11,25, 12,30).tz_localize(tz_exchange)
diff --git a/yfinance/base.py b/yfinance/base.py
index aa71d27d0..ca26637c5 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -86,7 +86,10 @@ def stats(self, proxy=None):
 
     def history(self, period="1mo", interval="1d",
                 start=None, end=None, prepost=False, actions=True,
-                auto_adjust=True, back_adjust=False, repair=False, keepna=False,
+                auto_adjust=True, back_adjust=False, 
+                repair=None, # deprecated
+                repair_prices=False, repair_intervals=False,
+                keepna=False,
                 proxy=None, rounding=False, timeout=10,
                 debug=True, raise_errors=False) -> pd.DataFrame:
         """
@@ -110,9 +113,12 @@ def history(self, period="1mo", interval="1d",
                 Adjust all OHLC automatically? Default is True
             back_adjust: bool
                 Back-adjusted data to mimic true historical prices
-            repair: bool
+            repair_prices: bool
                 Detect currency unit 100x mixups and attempt repair
                 Default is False
+            repair_intervals: bool
+                Detect
+                Default is False
             keepna: bool
                 Keep NaN rows returned by Yahoo?
                 Default is False
@@ -133,6 +139,11 @@ def history(self, period="1mo", interval="1d",
                 exceptions instead of printing to console.
         """
 
+        # Handle deprecated arguments
+        if repair is not None:
+            print("WARNING: 'repair' is deprecated and will be removed in future version. Use 'repair_prices' instead")
+            repair_prices = repair
+
         if start or period is None or period.lower() == "max":
             # Check can get TZ. Fail => probably delisted
             tz = self._get_ticker_tz(debug, proxy, timeout)
@@ -291,6 +302,9 @@ def history(self, period="1mo", interval="1d",
         quotes = utils.fix_Yahoo_dst_issue(quotes, params["interval"])
         quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
 
+        if repair_intervals:
+            quotes = utils.fix_Yahoo_including_unaligned_intervals(quotes, params["interval"])
+
         # actions
         dividends, splits, capital_gains = utils.parse_actions(data["chart"]["result"][0])
         if not expect_capital_gains:
@@ -355,7 +369,7 @@ def history(self, period="1mo", interval="1d",
                 df["Capital Gains"] = 0.0
 
 
-        if repair:
+        if repair_prices:
             # Do this before auto/back adjust
             df = self._fix_zeroes(df, interval, tz_exchange, prepost)
             df = self._fix_unit_mixups(df, interval, tz_exchange, prepost)
@@ -536,7 +550,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
                 fetch_start = g[0]
                 fetch_end = g[-1] + td_range
 
-            df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair=False, keepna=True)
+            df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair_prices=False, keepna=True)
             if df_fine is None or df_fine.empty:
                 print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
                 continue
diff --git a/yfinance/utils.py b/yfinance/utils.py
index 48b043435..7922f19a2 100644
--- a/yfinance/utils.py
+++ b/yfinance/utils.py
@@ -49,6 +49,11 @@
     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
 
 
+def TypeCheckSeries(var, varName):
+    if not isinstance(var, _pd.Series) or isinstance(var, _pd.DataFrame):
+        raise TypeError(f"'{varName}' must be _pd.Series not {type(var)}")
+
+
 def is_isin(string):
     return bool(_re.match("^([A-Z]{2})([A-Z0-9]{9})([0-9]{1})$", string))
 
@@ -307,7 +312,7 @@ def _parse_user_dt(dt, exchange_tz):
 
 def _interval_to_timedelta(interval):
     if interval == "1mo":
-        return _dateutil.relativedelta(months=1)
+        return _dateutil.relativedelta.relativedelta(months=1)
     elif interval == "1wk":
         return _pd.Timedelta(days=7, unit='d')
     else: 
@@ -459,26 +464,114 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
 
             if last_rows_same_interval:
                 # Last two rows are within same interval
-                idx1 = quotes.index[n - 1]
-                idx2 = quotes.index[n - 2]
-                if _np.isnan(quotes.loc[idx2, "Open"]):
-                    quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
-                # Note: nanmax() & nanmin() ignores NaNs
-                quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
-                quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
-                quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
-                if "Adj High" in quotes.columns:
-                    quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
-                if "Adj Low" in quotes.columns:
-                    quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
-                if "Adj Close" in quotes.columns:
-                    quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
-                quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
-                quotes = quotes.drop(quotes.index[n - 1])
+                ia = quotes.index[n - 2]
+                ib = quotes.index[n - 1]
+                quotes.loc[ia] = merge_two_prices_intervals(quotes.loc[ia], quotes.loc[ib])
+                quotes = quotes.drop(ib)
+
+    return quotes
+
+
+def fix_Yahoo_including_unaligned_intervals(quotes, interval):
+    if interval[1] not in ['m', 'h']:
+        # Only correct intraday
+        return quotes
 
+    # Merge adjacent rows if in same interval
+    # e.g. 13:02pm with 13:00pm 1h interval
+    n = quotes.shape[0]
+    itd = _interval_to_timedelta(interval)
+    td0 = _pd.Timedelta(0)
+    iend = quotes.index + itd
+    if interval[1:] in ["d", "wk", "mo"]:
+        # # Allow for DST
+        # iend -= _pd.Timedelta('2h')
+        return quotes
+    steps = _np.full(n, td0)
+    steps[1:] = quotes.index[1:] - iend[0:n-1]
+    f_overlap = steps < td0
+    if f_overlap.any():
+        # Process overlaps one-at-time because some may be false positives. 
+        # Recalculate subsequent step after removing an overlap.
+        overlaps_exist = True
+        n_merged = 0
+        dts_to_drop = []
+        while overlaps_exist:
+            indices = _np.where(f_overlap)[0]
+            i = indices[0]
+            dt1 = quotes.index[i-1]
+            dt2 = quotes.index[i]
+            dt3 = quotes.index[i+1]
+
+            dropped_dt = dt2
+            quotes.loc[dt1] = merge_two_prices_intervals(quotes.iloc[i-1], quotes.iloc[i])
+
+            # Remove record of i:
+            dts_to_drop.append(dt2)
+            f_overlap[i] = False
+            steps[i] = td0
+            # Recalc step of following dt:
+            steps[i+1] = quotes.index[i+1] - iend[i-1]
+
+            f_overlap[i+1] = steps[i+1] < td0
+            overlaps_exist = f_overlap[i+1:].any()
+        # Useful debug code:
+        # for d in [str(dt.date()) for dt in dts_to_drop]:
+        #     print(quotes.loc[d])
+        print("Dropping unaligned intervals:", dts_to_drop)
+        quotes = quotes.drop(dts_to_drop)
     return quotes
 
 
+def merge_two_prices_intervals(i1, i2):
+    TypeCheckSeries(i1, "i1")
+    TypeCheckSeries(i2, "i2")
+
+    price_cols = ["Open", "High", "Low", "Close"]
+    na1 = i1[price_cols].isna().all()
+    na2 = i2[price_cols].isna().all()
+    if na1 and na2:
+        return i1
+    elif na1:
+        return i2
+    elif na2:
+        return i1
+
+    # First check if two intervals are almost identical. If yes, keep 2nd
+    ratio = _np.mean(i2[price_cols+["Volume"]] / i1[price_cols+["Volume"]])
+    if ratio > 0.99 and ratio < 1.01:
+        return i2
+
+    m = i1.copy()
+
+    if _np.isnan(m["Open"]):
+        m["Open"] = i2["Open"]
+        if "Adj Open" in m.index:
+            m["Adj Open"] = i2["Adj Open"]
+
+    # Note: nanmax() & nanmin() ignores NaNs
+    m["High"] = _np.nanmax([i2["High"], i1["High"]])
+    m["Low"] = _np.nanmin([i2["Low"], i1["Low"]])
+    if not _np.isnan(i2["Close"]):
+        m["Close"] = i2["Close"]
+
+    if "Adj High" in m.index:
+        m["Adj High"] = _np.nanmax([i2["Adj High"], i1["Adj High"]])
+    if "Adj Low" in m.index:
+        m["Adj Low"] = _np.nanmin([i2["Adj Low"], i1["Adj Low"]])
+    if "Adj Close" in m.index:
+        m["Adj Close"] = i2["Adj Close"]
+
+    if _np.isnan(m["Volume"]):
+        m["Volume"] = i2["Volume"]
+    elif _np.isnan(i2["Volume"]):
+        pass
+    else:
+        m["Volume"] += i2["Volume"]
+
+    return m
+
+
 def safe_merge_dfs(df_main, df_sub, interval):
     # Carefully merge 'df_sub' onto 'df_main'
     # If naive merge fails, try again with reindexing df_sub:

From 65b97d024b211a40472e14170dbe84a40b2301ee Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Fri, 20 Jan 2023 00:13:02 +0000
Subject: [PATCH 04/10] Improve reporting

---
 yfinance/base.py  | 2 +-
 yfinance/utils.py | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/yfinance/base.py b/yfinance/base.py
index ca26637c5..0dc32722f 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -463,7 +463,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
             f_recent = _datetime.date.today() - df.index.date < m
             f_repair_rows = f_repair_rows & f_recent
             if not f_repair_rows.any():
-                print("data too old to fix")
+                # print("data too old to repair")
                 return df
 
         dts_to_repair = df.index[f_repair_rows]
diff --git a/yfinance/utils.py b/yfinance/utils.py
index 7922f19a2..4b4ab4f58 100644
--- a/yfinance/utils.py
+++ b/yfinance/utils.py
@@ -496,6 +496,7 @@ def fix_Yahoo_including_unaligned_intervals(quotes, interval):
         overlaps_exist = True
         n_merged = 0
         dts_to_drop = []
+        dts_merged = []
         while overlaps_exist:
             indices = _np.where(f_overlap)[0]
             i = indices[0]
@@ -505,6 +506,7 @@ def fix_Yahoo_including_unaligned_intervals(quotes, interval):
 
             dropped_dt = dt2
             quotes.loc[dt1] = merge_two_prices_intervals(quotes.iloc[i-1], quotes.iloc[i])
+            dts_merged.append((dt2, dt1))
 
             # Remove record of i:
             dts_to_drop.append(dt2)
@@ -515,10 +517,15 @@ def fix_Yahoo_including_unaligned_intervals(quotes, interval):
 
             f_overlap[i+1] = steps[i+1] < td0
             overlaps_exist = f_overlap[i+1:].any()
+
         # Useful debug code:
         # for d in [str(dt.date()) for dt in dts_to_drop]:
         #     print(quotes.loc[d])
-        print("Dropping unaligned intervals:", dts_to_drop)
+        #
+        # print("Dropped unaligned intervals:", dts_to_drop)
+        print(f"Removed {len(dts_merged)} unaligned intervals by merging:")
+        for i in range(len(dts_merged)):
+            print(f"- {dts_merged[i][0].date()}: {dts_merged[i][0].time()} -> {dts_merged[i][1].time()}")
         quotes = quotes.drop(dts_to_drop)
     return quotes
 

From 1636839b67cf49784f0918706388338c9fe381a7 Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Fri, 20 Jan 2023 00:13:28 +0000
Subject: [PATCH 05/10] Handle request to reconstruct 1m

---
 yfinance/base.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/yfinance/base.py b/yfinance/base.py
index 0dc32722f..a83cb5aa2 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -419,6 +419,9 @@ def history(self, period="1mo", interval="1d",
     def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
         if not isinstance(df, _pd.DataFrame):
             raise Exception("'df' must be a Pandas DataFrame not", type(df))
+        if interval == "1m":
+            # Can't go smaller than 1m so can't reconstruct
+            return df
 
         # Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
 

From 2b0ae5a6c1993816511ec8da7bd62385f73a2dfb Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Fri, 20 Jan 2023 17:29:01 +0000
Subject: [PATCH 06/10] Remove 'repair_intervals'

---
 tests/prices.py   | 38 -------------------------------
 yfinance/base.py  | 22 ++++--------------
 yfinance/utils.py | 58 -----------------------------------------------
 3 files changed, 4 insertions(+), 114 deletions(-)

diff --git a/tests/prices.py b/tests/prices.py
index 9eb983aec..52ac94f33 100644
--- a/tests/prices.py
+++ b/tests/prices.py
@@ -261,44 +261,6 @@ def test_dst_fix(self):
             print("Weekly data not aligned to Monday")
             raise
 
-    def test_correct_early_close(self):
-        # Stockholm exchange closed early on 2022-12-23 @ 13:02. 
-        # For hourly intervals, Yahoo returns:
-        # - 13:00 filled with NaNs
-        # - 13:02 contains data for 13:00
-        # Test that 'repair' fixes this without affecting other intervals.
-        tkr = "AEC.ST"
-        d = "2022-12-23"
-        start = "2022-12-01"
-        end = "2023-01-01"
-        data_cols = ["Open","High","Low","Close","Volume","Dividends","Stock Splits"]
-
-        dat = yf.Ticker(tkr, session=self.session)
-        df_old = dat.history(start=start, end=end, interval="1h", keepna=True)
-        df_repair = dat.history(start=start, end=end, interval="1h", keepna=True, repair_intervals=True)
-
-        tz = df_old.index.tz
-        expected_intervals_fixed = []
-        expected_intervals_fixed.append(tz.localize(_dt.datetime(2022,12,23,13,0)))
-        expected_intervals_lost = []
-        expected_intervals_lost.append(tz.localize(_dt.datetime(2022,12,23,13,2)))
-
-        # Test no unexpected intervals lost
-        dts_lost = df_old.index[~df_old.index.isin(df_repair.index)]
-        self.assertTrue(_np.equal(dts_lost.to_numpy(), expected_intervals_lost))
-
-        # Test only the expected interval changed
-        dts_shared = df_old.index[df_old.index.isin(df_repair.index)]
-        f_changed = (df_old.loc[dts_shared, data_cols].to_numpy() != df_repair.loc[dts_shared, data_cols].to_numpy()).any(axis=1)
-        self.assertTrue(f_changed.any(), "Expected data to change")
-        dts_changed = dts_shared[f_changed]
-        self.assertEqual(len(dts_changed), len(expected_intervals_fixed), "Different number of intervals changed")
-        self.assertTrue(_np.equal(dts_shared[f_changed], expected_intervals_fixed), "Unexpected intervals were changed")
-
-        # Test the expected interval is valid data
-        f_na = df_repair.loc[expected_intervals_fixed, data_cols].isna().any(axis=1)
-        self.assertFalse(f_na.any(), "Repaired interval still contains NaNs")
-
     def test_weekly_2rows_fix(self):
         tkr = "AMZN"
         start = _dt.date.today() - _dt.timedelta(days=14)
diff --git a/yfinance/base.py b/yfinance/base.py
index a83cb5aa2..5ab863377 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -86,9 +86,7 @@ def stats(self, proxy=None):
 
     def history(self, period="1mo", interval="1d",
                 start=None, end=None, prepost=False, actions=True,
-                auto_adjust=True, back_adjust=False, 
-                repair=None, # deprecated
-                repair_prices=False, repair_intervals=False,
+                auto_adjust=True, back_adjust=False, repair=None, 
                 keepna=False,
                 proxy=None, rounding=False, timeout=10,
                 debug=True, raise_errors=False) -> pd.DataFrame:
@@ -113,12 +111,9 @@ def history(self, period="1mo", interval="1d",
                 Adjust all OHLC automatically? Default is True
             back_adjust: bool
                 Back-adjusted data to mimic true historical prices
-            repair_prices: bool
+            repair: bool
                 Detect currency unit 100x mixups and attempt repair
                 Default is False
-            repair_intervals: bool
-                Detect
-                Default is False
             keepna: bool
                 Keep NaN rows returned by Yahoo?
                 Default is False
@@ -139,11 +134,6 @@ def history(self, period="1mo", interval="1d",
                 exceptions instead of printing to console.
         """
 
-        # Handle deprecated arguments
-        if repair is not None:
-            print("WARNING: 'repair' is deprecated and will be removed in future version. Use 'repair_prices' instead")
-            repair_prices = repair
-
         if start or period is None or period.lower() == "max":
             # Check can get TZ. Fail => probably delisted
             tz = self._get_ticker_tz(debug, proxy, timeout)
@@ -302,9 +292,6 @@ def history(self, period="1mo", interval="1d",
         quotes = utils.fix_Yahoo_dst_issue(quotes, params["interval"])
         quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
 
-        if repair_intervals:
-            quotes = utils.fix_Yahoo_including_unaligned_intervals(quotes, params["interval"])
-
         # actions
         dividends, splits, capital_gains = utils.parse_actions(data["chart"]["result"][0])
         if not expect_capital_gains:
@@ -368,8 +355,7 @@ def history(self, period="1mo", interval="1d",
             else:
                 df["Capital Gains"] = 0.0
 
-
-        if repair_prices:
+        if repair:
             # Do this before auto/back adjust
             df = self._fix_zeroes(df, interval, tz_exchange, prepost)
             df = self._fix_unit_mixups(df, interval, tz_exchange, prepost)
@@ -553,7 +539,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
                 fetch_start = g[0]
                 fetch_end = g[-1] + td_range
 
-            df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair_prices=False, keepna=True)
+            df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair=False, keepna=True)
             if df_fine is None or df_fine.empty:
                 print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
                 continue
diff --git a/yfinance/utils.py b/yfinance/utils.py
index 4b4ab4f58..1de147a1a 100644
--- a/yfinance/utils.py
+++ b/yfinance/utils.py
@@ -472,64 +472,6 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
     return quotes
 
 
-def fix_Yahoo_including_unaligned_intervals(quotes, interval):
-    if interval[1] not in ['m', 'h']:
-        # Only correct intraday
-        return quotes
-
-    # Merge adjacent rows if in same interval
-    # e.g. 13:02pm with 13:00pm 1h interval
-    n = quotes.shape[0]
-    itd = _interval_to_timedelta(interval)
-    td0 = _pd.Timedelta(0)
-    iend = quotes.index + itd
-    if interval[1:] in ["d", "wk", "mo"]:
-        # # Allow for DST
-        # iend -= _pd.Timedelta('2h')
-        return quotes
-    steps = _np.full(n, td0)
-    steps[1:] = quotes.index[1:] - iend[0:n-1]
-    f_overlap = steps < td0
-    if f_overlap.any():
-        # Process overlaps one-at-time because some may be false positives. 
-        # Recalculate subsequent step after removing an overlap.
-        overlaps_exist = True
-        n_merged = 0
-        dts_to_drop = []
-        dts_merged = []
-        while overlaps_exist:
-            indices = _np.where(f_overlap)[0]
-            i = indices[0]
-            dt1 = quotes.index[i-1]
-            dt2 = quotes.index[i]
-            dt3 = quotes.index[i+1]
-
-            dropped_dt = dt2
-            quotes.loc[dt1] = merge_two_prices_intervals(quotes.iloc[i-1], quotes.iloc[i])
-            dts_merged.append((dt2, dt1))
-
-            # Remove record of i:
-            dts_to_drop.append(dt2)
-            f_overlap[i] = False
-            steps[i] = td0
-            # Recalc step of following dt:
-            steps[i+1] = quotes.index[i+1] - iend[i-1]
-
-            f_overlap[i+1] = steps[i+1] < td0
-            overlaps_exist = f_overlap[i+1:].any()
-
-        # Useful debug code:
-        # for d in [str(dt.date()) for dt in dts_to_drop]:
-        #     print(quotes.loc[d])
-        #
-        # print("Dropped unaligned intervals:", dts_to_drop)
-        print(f"Removed {len(dts_merged)} unaligned intervals by merging:")
-        for i in range(len(dts_merged)):
-            print(f"- {dts_merged[i][0].date()}: {dts_merged[i][0].time()} -> {dts_merged[i][1].time()}")
-        quotes = quotes.drop(dts_to_drop)
-    return quotes
-
-
 def merge_two_prices_intervals(i1, i2):
     TypeCheckSeries(i1, "i1")
     TypeCheckSeries(i2, "i2")

From eb6d830e2a9e3badad3a26aac084a6b933cfb185 Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Sat, 21 Jan 2023 23:00:30 +0000
Subject: [PATCH 07/10] Fix repair volume=0 ; Tidy code

---
 README.md        |  3 +--
 yfinance/base.py | 27 ++++++++++++++++-----------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 27c10996a..1e5d9a4ca 100644
--- a/README.md
+++ b/README.md
@@ -226,8 +226,7 @@ data = yf.download(  # or pdr.get_data_yahoo(...
         auto_adjust = True,
 
         # attempt repair of Yahoo data issues
-        repair_prices = False,
-        repair_intervals = False,
+        repair = False,
 
         # download pre/post regular market hours data
         # (optional, default is False)
diff --git a/yfinance/base.py b/yfinance/base.py
index 5ab863377..086fbf33a 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -86,8 +86,7 @@ def stats(self, proxy=None):
 
     def history(self, period="1mo", interval="1d",
                 start=None, end=None, prepost=False, actions=True,
-                auto_adjust=True, back_adjust=False, repair=None, 
-                keepna=False,
+                auto_adjust=True, back_adjust=False, repair=False, keepna=False,
                 proxy=None, rounding=False, timeout=10,
                 debug=True, raise_errors=False) -> pd.DataFrame:
         """
@@ -794,26 +793,31 @@ def _fix_zeroes(self, df, interval, tz_exchange, prepost):
             df2.index = df2.index.tz_convert(tz_exchange)
 
         price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df2.columns]
-        f_zero_or_nan = (df2[price_cols] == 0.0) | df2[price_cols].isna()
+        f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna()
         df2_reserve = None
         if intraday:
             # Ignore days with >50% intervals containing NaNs
-            df_nans = pd.DataFrame(f_zero_or_nan.any(axis=1), columns=["nan"])
+            df_nans = pd.DataFrame(f_prices_bad.any(axis=1), columns=["nan"])
             df_nans["_date"] = df_nans.index.date
             grp = df_nans.groupby("_date")
             nan_pct = grp.sum() / grp.count()
             dts = nan_pct.index[nan_pct["nan"]>0.5]
-            f_zero_or_nan_ignore = _np.isin(f_zero_or_nan.index.date, dts)
+            f_zero_or_nan_ignore = _np.isin(f_prices_bad.index.date, dts)
             df2_reserve = df2[f_zero_or_nan_ignore]
             df2 = df2[~f_zero_or_nan_ignore]
-            f_zero_or_nan = (df2[price_cols] == 0.0) | df2[price_cols].isna()
+            f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna()
+
+        f_high_low_good = (~df2["High"].isna()) & (~df2["Low"].isna())
+        f_vol_bad = (df2["Volume"]==0).to_numpy() & f_high_low_good & (df2["High"]!=df2["Low"]).to_numpy()
+
         # Check whether worth attempting repair
-        f_zero_or_nan = f_zero_or_nan.to_numpy()
-        if f_zero_or_nan.any(axis=1).sum() == 0:
+        f_prices_bad = f_prices_bad.to_numpy()
+        f_bad_rows = f_prices_bad.any(axis=1) | f_vol_bad
+        if not f_bad_rows.any():
             if debug:
                 print("no bad data to repair")
             return df
-        if f_zero_or_nan.sum() == len(price_cols)*len(df2):
+        if f_prices_bad.sum() == len(price_cols)*len(df2):
             # Need some good data to calibrate
             if debug:
                 print("no good data to calibrate")
@@ -825,10 +829,11 @@ def _fix_zeroes(self, df, interval, tz_exchange, prepost):
         tag = -1.0
         for i in range(len(price_cols)):
             c = price_cols[i]
-            df2.loc[f_zero_or_nan[:,i], c] = tag
+            df2.loc[f_prices_bad[:,i], c] = tag
+        df2.loc[f_vol_bad, "Volume"] = tag
         # If volume=0 or NaN for bad prices, then tag volume for repair
         f_vol_zero_or_nan = (df2["Volume"].to_numpy()==0) | (df2["Volume"].isna().to_numpy())
-        df2.loc[f_zero_or_nan.any(axis=1) & f_vol_zero_or_nan, "Volume"] = tag
+        df2.loc[f_prices_bad.any(axis=1) & f_vol_zero_or_nan, "Volume"] = tag
         # If volume=0 or NaN but price moved in interval, then tag volume for repair
         f_change = df2["High"].to_numpy() != df2["Low"].to_numpy()
         df2.loc[f_change & f_vol_zero_or_nan, "Volume"] = tag

From 39c1ecc7a29717c6b0bf43e0705c14d68ddcbdf3 Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Wed, 25 Jan 2023 14:37:43 +0000
Subject: [PATCH 08/10] Improve price repair - reduce spam, improve data
 reliability

Extend 'reconstruct groups' to reduce Yahoo spam ; Extend fetch range to avoid first/last day irregularities ; Improve handling of 'max fetch days' Yahoo limit
---
 yfinance/base.py | 83 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 62 insertions(+), 21 deletions(-)

diff --git a/yfinance/base.py b/yfinance/base.py
index 086fbf33a..4795b8ff1 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -23,6 +23,7 @@
 
 import time as _time
 import datetime as _datetime
+import dateutil as _dateutil
 from typing import Optional
 
 import pandas as _pd
@@ -416,6 +417,9 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
         if interval[1:] in ['d', 'wk', 'mo']:
             # Interday data always includes pre & post
             prepost = True
+            intraday = False
+        else:
+            intraday = True
 
         price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df]
         data_cols = price_cols + ["Volume"]
@@ -447,8 +451,14 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
 
         # Ignore old intervals for which Yahoo won't return finer data:
         m = min_lookbacks[sub_interval]
-        if m is not None:
-            f_recent = _datetime.date.today() - df.index.date < m
+        if m is None:
+            min_dt = None
+        else:
+            min_dt = _pd.Timestamp.utcnow() - m
+        if debug:
+            print(f"- min_dt={min_dt} interval={interval} sub_interval={sub_interval}")
+        if min_dt is not None:
+            f_recent = df.index >= min_dt
             f_repair_rows = f_repair_rows & f_recent
             if not f_repair_rows.any():
                 # print("data too old to repair")
@@ -470,39 +480,50 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
         last_dt = dts_to_repair[0]
         last_ind = indices_to_repair[0]
         td = utils._interval_to_timedelta(interval)
-        if interval == "1mo":
-            grp_td_threshold = _datetime.timedelta(days=28)
-        elif interval == "1wk":
-            grp_td_threshold = _datetime.timedelta(days=28)
-        elif interval == "1d":
-            grp_td_threshold = _datetime.timedelta(days=14)
-        elif interval == "1h":
-            grp_td_threshold = _datetime.timedelta(days=7)
+        # Note on setting max size: have to allow space for adding good data
+        if sub_interval == "1mo":
+            grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
+        elif sub_interval == "1wk":
+            grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
+        elif sub_interval == "1d":
+            grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
+        elif sub_interval == "1h":
+            grp_max_size = _dateutil.relativedelta.relativedelta(years=1)
+        elif sub_interval == "1m":
+            grp_max_size = _datetime.timedelta(days=5)  # allow 2 days for buffer below
         else:
-            grp_td_threshold = _datetime.timedelta(days=2)
+            grp_max_size = _datetime.timedelta(days=30)
+        if debug:
+            print("- grp_max_size =", grp_max_size)
         for i in range(1, len(dts_to_repair)):
             ind = indices_to_repair[i]
             dt = dts_to_repair[i]
-            if (dt-dts_groups[-1][-1]) < grp_td_threshold:
-                dts_groups[-1].append(dt)
-            elif ind - last_ind <= 3:
+            if dt.date() < dts_groups[-1][0].date()+grp_max_size:
                 dts_groups[-1].append(dt)
             else:
                 dts_groups.append([dt])
             last_dt = dt
             last_ind = ind
 
+        if debug:
+            print("Repair groups:")
+            for g in dts_groups:
+                print(f"- {g[0]} -> {g[-1]}")
+
         # Add some good data to each group, so can calibrate later:
         for i in range(len(dts_groups)):
             g = dts_groups[i]
             g0 = g[0]
             i0 = df_good.index.get_indexer([g0], method="nearest")[0]
             if i0 > 0:
-                i0 -= 1
+                if (min_dt is None or df_good.index[i0-1] >= min_dt) and \
+                    ((not intraday) or df_good.index[i0-1].date()==g0.date()):
+                    i0 -= 1
             gl = g[-1]
             il = df_good.index.get_indexer([gl], method="nearest")[0]
             if il < len(df_good)-1:
-                il += 1
+                if (not intraday) or df_good.index[il+1].date()==gl.date():
+                    il += 1
             good_dts = df_good.index[i0:il+1]
             dts_groups[i] += good_dts.to_list()
             dts_groups[i].sort()
@@ -538,7 +559,13 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
                 fetch_start = g[0]
                 fetch_end = g[-1] + td_range
 
-            df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair=False, keepna=True)
+            # The first and last day returned by Yahoo can be slightly wrong, so add buffer:
+            fetch_start -= td_1d
+            fetch_end += td_1d
+            if intraday:
+                df_fine = self.history(start=fetch_start.date(), end=fetch_end.date()+td_1d, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=False, keepna=True)
+            else:
+                df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=False, keepna=True)
             if df_fine is None or df_fine.empty:
                 print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
                 continue
@@ -602,7 +629,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
             ratios = df_block_calib[calib_filter] / df_new_calib[calib_filter]
             ratio = _np.mean(ratios)
             if debug:
-                print(f"- price calibration ratio = {ratio}")
+                print(f"- price calibration ratio (raw) = {ratio}")
             ratio_rcp = round(1.0 / ratio, 1)
             ratio = round(ratio, 1)
             if ratio == 1 and ratio_rcp == 1:
@@ -623,12 +650,20 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
             # Repair!
             bad_dts = df_block.index[(df_block[price_cols+["Volume"]]==tag).any(axis=1)]
 
+            if debug:
+                no_fine_data_dts = []
+                for idx in bad_dts:
+                    if not idx in df_new.index:
+                        # Yahoo didn't return finer-grain data for this interval, 
+                        # so probably no trading happened.
+                        no_fine_data_dts.append(idx)
+                if len(no_fine_data_dts) > 0:
+                    print(f"Yahoo didn't return finer-grain data for these intervals:")
+                    print(no_fine_data_dts)
             for idx in bad_dts:
                 if not idx in df_new.index:
                     # Yahoo didn't return finer-grain data for this interval, 
                     # so probably no trading happened.
-                    if debug:
-                        print(f"Yahoo didn't return finer-grain data for interval {idx}")
                     continue
                 df_new_row = df_new.loc[idx]
 
@@ -839,11 +874,17 @@ def _fix_zeroes(self, df, interval, tz_exchange, prepost):
         df2.loc[f_change & f_vol_zero_or_nan, "Volume"] = tag
 
         n_before = (df2[data_cols].to_numpy()==tag).sum()
+        dts_tagged = df2.index[(df2[data_cols].to_numpy()==tag).any(axis=1)]
         df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag=tag)
         n_after = (df2[data_cols].to_numpy()==tag).sum()
+        dts_not_repaired = df2.index[(df2[data_cols].to_numpy()==tag).any(axis=1)]
         n_fixed = n_before - n_after
         if n_fixed > 0:
-            print(f"{self.ticker}: fixed {n_fixed}/{n_before} value=0 errors in {interval} price data")
+            msg = f"{self.ticker}: fixed {n_fixed}/{n_before} value=0 errors in {interval} price data"
+            if n_fixed < 4:
+                dts_repaired = sorted(list(set(dts_tagged).difference(dts_not_repaired)))
+                msg += f": {dts_repaired}"
+            print(msg)
 
         if df2_reserve is not None:
             df2 = _pd.concat([df2, df2_reserve])

From aad46baf286c9556301fb0644273281decc33dfa Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Sat, 28 Jan 2023 23:14:28 +0000
Subject: [PATCH 09/10] price repair: Fix 'min_dt', add 'silent' mode

---
 yfinance/base.py | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/yfinance/base.py b/yfinance/base.py
index 4795b8ff1..da6de912a 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -111,8 +111,9 @@ def history(self, period="1mo", interval="1d",
                 Adjust all OHLC automatically? Default is True
             back_adjust: bool
                 Back-adjusted data to mimic true historical prices
-            repair: bool
-                Detect currency unit 100x mixups and attempt repair
+            repair: bool or "silent"
+                Detect currency unit 100x mixups and attempt repair.
+                If True, fix & print summary. If "silent", just fix.
                 Default is False
             keepna: bool
                 Keep NaN rows returned by Yahoo?
@@ -355,10 +356,10 @@ def history(self, period="1mo", interval="1d",
             else:
                 df["Capital Gains"] = 0.0
 
-        if repair:
+        if repair==True or repair=="silent":
             # Do this before auto/back adjust
-            df = self._fix_zeroes(df, interval, tz_exchange, prepost)
-            df = self._fix_unit_mixups(df, interval, tz_exchange, prepost)
+            df = self._fix_zeroes(df, interval, tz_exchange, prepost, silent=(repair=="silent"))
+            df = self._fix_unit_mixups(df, interval, tz_exchange, prepost, silent=(repair=="silent"))
 
         # Auto/back adjust
         try:
@@ -402,7 +403,7 @@ def history(self, period="1mo", interval="1d",
 
     # ------------------------
 
-    def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
+    def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1, silent=False):
         if not isinstance(df, _pd.DataFrame):
             raise Exception("'df' must be a Pandas DataFrame not", type(df))
         if interval == "1m":
@@ -433,9 +434,6 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
         for i in ["30m", "15m", "5m", "2m"]:
             min_lookbacks[i] = _datetime.timedelta(days=60)
         min_lookbacks["1m"] = _datetime.timedelta(days=30)
-        # Hopefully never have to use max_lengths, because complicates fetch logic
-        # max_lengths = {i:None for i in intervals}
-        # max_lengths["1m"] = _datetime.timedelta(days=7)
         if interval in nexts:
             sub_interval = nexts[interval]
             td_range = itds[interval]
@@ -454,7 +452,9 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
         if m is None:
             min_dt = None
         else:
+            m -= _datetime.timedelta(days=1)  # allow space for 1-day padding
             min_dt = _pd.Timestamp.utcnow() - m
+            min_dt = min_dt.tz_convert(df.index.tz).ceil("D")
         if debug:
             print(f"- min_dt={min_dt} interval={interval} sub_interval={sub_interval}")
         if min_dt is not None:
@@ -563,11 +563,15 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
             fetch_start -= td_1d
             fetch_end += td_1d
             if intraday:
-                df_fine = self.history(start=fetch_start.date(), end=fetch_end.date()+td_1d, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=False, keepna=True)
-            else:
-                df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=False, keepna=True)
+                fetch_start = fetch_start.date()
+                fetch_end = fetch_end.date()+td_1d
+            if debug:
+                print(f"- fetching {sub_interval} prepost={prepost} {fetch_start}->{fetch_end}")
+            r = "silent" if silent else True
+            df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=r, keepna=True)
             if df_fine is None or df_fine.empty:
-                print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
+                if not silent:
+                    print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
                 continue
 
             df_fine["ctr"] = 0
@@ -697,7 +701,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1):
 
         return df_v2
 
-    def _fix_unit_mixups(self, df, interval, tz_exchange, prepost):
+    def _fix_unit_mixups(self, df, interval, tz_exchange, prepost, silent=False):
         # Sometimes Yahoo returns few prices in cents/pence instead of $/£
         # I.e. 100x bigger
         # Easy to detect and fix, just look for outliers = ~100x local median
@@ -744,7 +748,7 @@ def _fix_unit_mixups(self, df, interval, tz_exchange, prepost):
             df2.loc[fi, c] = tag
 
         n_before = (df2[data_cols].to_numpy()==tag).sum()
-        df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag=tag)
+        df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag, silent)
         n_after = (df2[data_cols].to_numpy()==tag).sum()
 
         if n_after > 0:
@@ -786,7 +790,7 @@ def _fix_unit_mixups(self, df, interval, tz_exchange, prepost):
 
         n_fixed = n_before - n_after_crude
         n_fixed_crudely = n_after - n_after_crude
-        if n_fixed > 0:
+        if not silent and n_fixed > 0:
             report_msg = f"{self.ticker}: fixed {n_fixed}/{n_before} currency unit mixups "
             if n_fixed_crudely > 0:
                 report_msg += f"({n_fixed_crudely} crudely) "
@@ -806,7 +810,7 @@ def _fix_unit_mixups(self, df, interval, tz_exchange, prepost):
 
         return df2
 
-    def _fix_zeroes(self, df, interval, tz_exchange, prepost):
+    def _fix_zeroes(self, df, interval, tz_exchange, prepost, silent=False):
         # Sometimes Yahoo returns prices=0 or NaN when trades occurred.
         # But most times when prices=0 or NaN returned is because no trades.
         # Impossible to distinguish, so only attempt repair if few or rare.
@@ -875,11 +879,11 @@ def _fix_zeroes(self, df, interval, tz_exchange, prepost):
 
         n_before = (df2[data_cols].to_numpy()==tag).sum()
         dts_tagged = df2.index[(df2[data_cols].to_numpy()==tag).any(axis=1)]
-        df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag=tag)
+        df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag, silent)
         n_after = (df2[data_cols].to_numpy()==tag).sum()
         dts_not_repaired = df2.index[(df2[data_cols].to_numpy()==tag).any(axis=1)]
         n_fixed = n_before - n_after
-        if n_fixed > 0:
+        if not silent and n_fixed > 0:
             msg = f"{self.ticker}: fixed {n_fixed}/{n_before} value=0 errors in {interval} price data"
             if n_fixed < 4:
                 dts_repaired = sorted(list(set(dts_tagged).difference(dts_not_repaired)))

From a4f11b0243c393bfe578b61e0a428508dd00c277 Mon Sep 17 00:00:00 2001
From: ValueRaider <ValueRaider@protonmail.com>
Date: Sun, 29 Jan 2023 12:45:30 +0000
Subject: [PATCH 10/10] Fix price repair tests, remove unrelated changes

---
 tests/prices.py   | 67 ++++++++++++++++++++++++++++--------------
 yfinance/base.py  |  5 +++-
 yfinance/utils.py | 74 ++++++++++-------------------------------------
 3 files changed, 65 insertions(+), 81 deletions(-)

diff --git a/tests/prices.py b/tests/prices.py
index 52ac94f33..ccbd425ac 100644
--- a/tests/prices.py
+++ b/tests/prices.py
@@ -270,6 +270,38 @@ def test_weekly_2rows_fix(self):
         df = dat.history(start=start, interval="1wk")
         self.assertTrue((df.index.weekday == 0).all())
 
+class TestPriceRepair(unittest.TestCase):
+    session = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.session = requests_cache.CachedSession(backend='memory')
+
+    @classmethod
+    def tearDownClass(cls):
+        if cls.session is not None:
+            cls.session.close()
+
+    def test_reconstruct_2m(self):
+        # 2m repair requires 1m data.
+        # Yahoo restricts 1m fetches to 7 days max within last 30 days.
+        # Need to test that '_reconstruct_intervals_batch()' can handle this.
+
+        tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
+
+        dt_now = _pd.Timestamp.utcnow()
+        td_7d = _dt.timedelta(days=7)
+        td_60d = _dt.timedelta(days=60)
+
+        # Round time for 'requests_cache' reuse
+        dt_now = dt_now.ceil("1h")
+
+        for tkr in tkrs:
+            dat = yf.Ticker(tkr, session=self.session)
+            end_dt = dt_now
+            start_dt = end_dt - td_60d
+            df = dat.history(start=start_dt, end=end_dt, interval="2m", repair=True)
+
     def test_repair_100x_weekly(self):
         # Setup:
         tkr = "PNL.L"
@@ -452,38 +484,29 @@ def test_repair_zeroes_hourly(self):
         dat = yf.Ticker(tkr, session=self.session)
         tz_exchange = dat.info["exchangeTimezoneName"]
 
-        df_bad = _pd.DataFrame(data={"Open":      [29.68, 29.49, 29.545, _np.nan, 29.485],
-                                     "High":      [29.68, 29.625, 29.58, _np.nan, 29.49],
-                                     "Low":       [29.46, 29.4, 29.45, _np.nan, 29.31],
-                                     "Close":     [29.485, 29.545, 29.485, _np.nan, 29.325],
-                                     "Adj Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
-                                     "Volume": [3258528, 2140195, 1621010, 0, 0]},
-                                index=_pd.to_datetime([_dt.datetime(2022,11,25, 9,30),
-                                                       _dt.datetime(2022,11,25, 10,30),
-                                                       _dt.datetime(2022,11,25, 11,30),
-                                                       _dt.datetime(2022,11,25, 12,30),
-                                                       _dt.datetime(2022,11,25, 13,00)]))
-        df_bad = df_bad.sort_index()
-        df_bad.index.name = "Date"
-        df_bad.index = df_bad.index.tz_localize(tz_exchange)
+        correct_df = dat.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
+
+        df_bad = correct_df.copy()
+        bad_idx = correct_df.index[10]
+        df_bad.loc[bad_idx, "Open"] = _np.nan
+        df_bad.loc[bad_idx, "High"] = _np.nan
+        df_bad.loc[bad_idx, "Low"] = _np.nan
+        df_bad.loc[bad_idx, "Close"] = _np.nan
+        df_bad.loc[bad_idx, "Adj Close"] = _np.nan
+        df_bad.loc[bad_idx, "Volume"] = 0
 
         repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
 
-        correct_df = df_bad.copy()
-        idx = _pd.Timestamp(2022,11,25, 12,30).tz_localize(tz_exchange)
-        correct_df.loc[idx, "Open"] = 29.485001
-        correct_df.loc[idx, "High"] = 29.49
-        correct_df.loc[idx, "Low"] = 29.43
-        correct_df.loc[idx, "Close"] = 29.455
-        correct_df.loc[idx, "Adj Close"] = 29.455
-        correct_df.loc[idx, "Volume"] = 609164
         for c in ["Open", "Low", "High", "Close"]:
             try:
                 self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
             except:
                 print("COLUMN", c)
+                print("- repaired_df")
                 print(repaired_df)
+                print("- correct_df[c]:")
                 print(correct_df[c])
+                print("- diff:")
                 print(repaired_df[c] - correct_df[c])
                 raise
 
diff --git a/yfinance/base.py b/yfinance/base.py
index db403f67a..afcc0faa7 100644
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -852,13 +852,16 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1, silent=Fal
             f_recent = df.index >= min_dt
             f_repair_rows = f_repair_rows & f_recent
             if not f_repair_rows.any():
-                # print("data too old to repair")
+                if debug:
+                    print("data too old to repair")
                 return df
 
         dts_to_repair = df.index[f_repair_rows]
         indices_to_repair = _np.where(f_repair_rows)[0]
 
         if len(dts_to_repair) == 0:
+            if debug:
+                print("dts_to_repair[] is empty")
             return df
 
         df_v2 = df.copy()
diff --git a/yfinance/utils.py b/yfinance/utils.py
index 7c99bdedd..927609d85 100644
--- a/yfinance/utils.py
+++ b/yfinance/utils.py
@@ -49,11 +49,6 @@
     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
 
 
-def TypeCheckSeries(var, varName):
-    if not isinstance(var, _pd.Series) or isinstance(var, _pd.DataFrame):
-        raise TypeError(f"'{varName}' must be _pd.Series not {type(var)}")
-
-
 # From https://stackoverflow.com/a/59128615
 from types import FunctionType
 from inspect import getmembers
@@ -485,63 +480,26 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
 
             if last_rows_same_interval:
                 # Last two rows are within same interval
-                ia = quotes.index[n - 2]
-                ib = quotes.index[n - 1]
-                quotes.loc[ia] = merge_two_prices_intervals(quotes.loc[ia], quotes.loc[ib])
-                quotes = quotes.drop(ib)
+                idx1 = quotes.index[n - 1]
+                idx2 = quotes.index[n - 2]
+                if _np.isnan(quotes.loc[idx2, "Open"]):
+                    quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
+                # Note: nanmax() & nanmin() ignores NaNs
+                quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
+                quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
+                quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
+                if "Adj High" in quotes.columns:
+                    quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
+                if "Adj Low" in quotes.columns:
+                    quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
+                if "Adj Close" in quotes.columns:
+                    quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
+                quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
+                quotes = quotes.drop(quotes.index[n - 1])
 
     return quotes
 
 
-def merge_two_prices_intervals(i1, i2):
-    TypeCheckSeries(i1, "i1")
-    TypeCheckSeries(i2, "i2")
-
-    price_cols = ["Open", "High", "Low", "Close"]
-    na1 = i1[price_cols].isna().all()
-    na2 = i2[price_cols].isna().all()
-    if na1 and na2:
-        return i1
-    elif na1:
-        return i2
-    elif na2:
-        return i1
-
-    # First check if two intervals are almost identical. If yes, keep 2nd
-    ratio = _np.mean(i2[price_cols+["Volume"]] / i1[price_cols+["Volume"]])
-    if ratio > 0.99 and ratio < 1.01:
-        return i2
-
-    m = i1.copy()
-
-    if _np.isnan(m["Open"]):
-        m["Open"] = i2["Open"]
-        if "Adj Open" in m.index:
-            m["Adj Open"] = i2["Adj Open"]
-
-    # Note: nanmax() & nanmin() ignores NaNs
-    m["High"] = _np.nanmax([i2["High"], i1["High"]])
-    m["Low"] = _np.nanmin([i2["Low"], i1["Low"]])
-    if not _np.isnan(i2["Close"]):
-        m["Close"] = i2["Close"]
-
-    if "Adj High" in m.index:
-        m["Adj High"] = _np.nanmax([i2["Adj High"], i1["Adj High"]])
-    if "Adj Low" in m.index:
-        m["Adj Low"] = _np.nanmin([i2["Adj Low"], i1["Adj Low"]])
-    if "Adj Close" in m.index:
-        m["Adj Close"] = i2["Adj Close"]
-
-    if _np.isnan(m["Volume"]):
-        m["Volume"] = i2["Volume"]
-    elif _np.isnan(i2["Volume"]):
-        pass
-    else:
-        m["Volume"] += i2["Volume"]
-
-    return m
-
-
 def safe_merge_dfs(df_main, df_sub, interval):
     # Carefully merge 'df_sub' onto 'df_main'
     # If naive merge fails, try again with reindexing df_sub: