From ec2020735d72ff73e0a6a607689281aad173c702 Mon Sep 17 00:00:00 2001
From: Kalyan Gokhale <4734245+KalyanGokhale@users.noreply.github.com>
Date: Wed, 20 Jun 2018 16:03:07 +0530
Subject: [PATCH] REGR: Fixes first_valid_index when DataFrame or Series has
 duplicate row index (GH21441) (#21497)

---
 doc/source/whatsnew/v0.23.2.txt       |  3 ++-
 pandas/core/generic.py                | 23 +++++++++++------------
 pandas/tests/frame/test_timeseries.py | 15 ++++++++++++++-
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 5454dc9eca360..5b3e607956f7a 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -17,7 +17,8 @@ Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
--
+- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
+- 
 
 .. _whatsnew_0232.performance:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 555108a5d9349..1780e359164e2 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8968,18 +8968,17 @@ def _find_valid_index(self, how):
             is_valid = is_valid.any(1)  # reduce axis 1
 
         if how == 'first':
-            # First valid value case
-            i = is_valid.idxmax()
-            if not is_valid[i]:
-                return None
-            return i
-
-        elif how == 'last':
-            # Last valid value case
-            i = is_valid.values[::-1].argmax()
-            if not is_valid.iat[len(self) - i - 1]:
-                return None
-            return self.index[len(self) - i - 1]
+            idxpos = is_valid.values[::].argmax()
+
+        if how == 'last':
+            idxpos = len(self) - 1 - is_valid.values[::-1].argmax()
+
+        chk_notna = is_valid.iat[idxpos]
+        idx = self.index[idxpos]
+
+        if not chk_notna:
+            return None
+        return idx
 
     @Appender(_shared_docs['valid_index'] % {'position': 'first',
                                              'klass': 'NDFrame'})
diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py
index 90fbc6e628369..fb9bd74d9876d 100644
--- a/pandas/tests/frame/test_timeseries.py
+++ b/pandas/tests/frame/test_timeseries.py
@@ -506,7 +506,15 @@ def test_asfreq_fillvalue(self):
         actual_series = ts.asfreq(freq='1S', fill_value=9.0)
         assert_series_equal(expected_series, actual_series)
 
-    def test_first_last_valid(self):
+    @pytest.mark.parametrize("data,idx,expected_first,expected_last", [
+        ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2),
+        ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2),
+        ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'),
+        ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2),
+        ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2),
+        ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)])
+    def test_first_last_valid(self, data, idx,
+                              expected_first, expected_last):
         N = len(self.frame.index)
         mat = randn(N)
         mat[:5] = nan
@@ -539,6 +547,11 @@ def test_first_last_valid(self):
         assert frame.first_valid_index().freq == frame.index.freq
         assert frame.last_valid_index().freq == frame.index.freq
 
+        # GH 21441
+        df = DataFrame(data, index=idx)
+        assert expected_first == df.first_valid_index()
+        assert expected_last == df.last_valid_index()
+
     def test_first_subset(self):
         ts = tm.makeTimeDataFrame(freq='12h')
         result = ts.first('10d')