Skip to content

Commit

Permalink
REGR: Fixes first_valid_index when DataFrame or Series has duplicate …
Browse files Browse the repository at this point in the history
…row index (GH21441) (#21497)

(cherry picked from commit ec20207)
  • Loading branch information
KalyanGokhale authored and jorisvandenbossche committed Jul 2, 2018
1 parent 030a058 commit d44fddb
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 14 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.23.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ Fixed Regressions
~~~~~~~~~~~~~~~~~

- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
-
- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
-

.. _whatsnew_0232.performance:

Expand Down
23 changes: 11 additions & 12 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8969,18 +8969,17 @@ def _find_valid_index(self, how):
is_valid = is_valid.any(1) # reduce axis 1

if how == 'first':
# First valid value case
i = is_valid.idxmax()
if not is_valid[i]:
return None
return i

elif how == 'last':
# Last valid value case
i = is_valid.values[::-1].argmax()
if not is_valid.iat[len(self) - i - 1]:
return None
return self.index[len(self) - i - 1]
idxpos = is_valid.values[::].argmax()

if how == 'last':
idxpos = len(self) - 1 - is_valid.values[::-1].argmax()

chk_notna = is_valid.iat[idxpos]
idx = self.index[idxpos]

if not chk_notna:
return None
return idx

@Appender(_shared_docs['valid_index'] % {'position': 'first',
'klass': 'NDFrame'})
Expand Down
15 changes: 14 additions & 1 deletion pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,15 @@ def test_asfreq_fillvalue(self):
actual_series = ts.asfreq(freq='1S', fill_value=9.0)
assert_series_equal(expected_series, actual_series)

def test_first_last_valid(self):
@pytest.mark.parametrize("data,idx,expected_first,expected_last", [
({'A': [1, 2, 3]}, [1, 1, 2], 1, 2),
({'A': [1, 2, 3]}, [1, 2, 2], 1, 2),
({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'),
({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2),
({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2),
({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)])
def test_first_last_valid(self, data, idx,
expected_first, expected_last):
N = len(self.frame.index)
mat = randn(N)
mat[:5] = nan
Expand Down Expand Up @@ -539,6 +547,11 @@ def test_first_last_valid(self):
assert frame.first_valid_index().freq == frame.index.freq
assert frame.last_valid_index().freq == frame.index.freq

# GH 21441
df = DataFrame(data, index=idx)
assert expected_first == df.first_valid_index()
assert expected_last == df.last_valid_index()

def test_first_subset(self):
ts = tm.makeTimeDataFrame(freq='12h')
result = ts.first('10d')
Expand Down

0 comments on commit d44fddb

Please sign in to comment.