diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f3984ba54ba6e..fcc2f296f9230 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2841,6 +2841,54 @@ def skew(self, axis=0, skipna=True, level=None): return Series(result, index=axis_labels) _add_stat_doc(skew, 'unbiased skewness', 'skew') + def idxmin(self, axis=0, skipna=True): + """ + Return index of first occurence of minimum over requested axis. + NA/null values are excluded. + + Parameters + ---------- + axis : {0, 1} + 0 for row-wise, 1 for column-wise + skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA + + Returns + ------- + idxmin : Series + """ + values = self.values.copy() + if skipna and not issubclass(values.dtype.type, np.integer): + np.putmask(values, -np.isfinite(values), np.inf) + argmin_index = self._get_axis(axis) + return Series([argmin_index[i] for i in values.argmin(axis)], + index=self._get_agg_axis(axis)) + + def idxmax(self, axis=0, skipna=True): + """ + Return index of first occurence of maximum over requested axis. + NA/null values are excluded. + + Parameters + ---------- + axis : {0, 1} + 0 for row-wise, 1 for column-wise + skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be first index. + + Returns + ------- + idxmax : Series + """ + values = self.values.copy() + if skipna and not issubclass(values.dtype.type, np.integer): + np.putmask(values, -np.isfinite(values), -np.inf) + argmax_index = self._get_axis(axis) + return Series([argmax_index[i] for i in values.argmax(axis)], + index=self._get_agg_axis(axis)) + def _agg_by_level(self, name, axis=0, level=0, skipna=True): method = getattr(type(self), name) applyf = lambda x: method(x, axis=axis, skipna=skipna) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0e5708c064e34..31694fe3bbb36 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -755,6 +755,44 @@ def skew(self, skipna=True, level=None): return (np.sqrt((count**2-count))*C) / ((count-2)*np.sqrt(B)**3) _add_stat_doc(skew, 'unbiased skewness', 'skew') + def idxmin(self, axis=None, out=None, skipna=True): + """ + Index of first occurence of minimum of values. + + Parameters + ---------- + skipna : boolean, default True + Exclude NA/null values + + Returns + ------- + idxmin : Index of mimimum of values + """ + arr = self.values.copy() + if skipna: + if not issubclass(arr.dtype.type, np.integer): + np.putmask(arr, isnull(arr), np.inf) + return self.index[arr.argmin()] + + def idxmax(self, axis=None, out=None, skipna=True): + """ + Index of first occurence of maximum of values. + + Parameters + ---------- + skipna : boolean, default True + Exclude NA/null values + + Returns + ------- + idxmax : Index of mimimum of values + """ + arr = self.values.copy() + if skipna: + if not issubclass(arr.dtype.type, np.integer): + np.putmask(arr, isnull(arr), -np.inf) + return self.index[arr.argmax()] + def _ndarray_statistic(self, funcname, dtype=None, skipna=True): arr = self.values retVal = getattr(arr, funcname)(dtype=dtype) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 2df81d1429f3f..eb25ed949a50e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3432,6 +3432,70 @@ def test_dot(self): columns=['one', 'two']) assert_frame_equal(result, expected) + def test_idxmin(self): + def validate(f, s, axis, skipna): + def get_result(f, i, v, axis, skipna): + if axis == 0: + return (f[i][v], f[i].min(skipna=skipna)) + else: + return (f[v][i], f.ix[i].min(skipna=skipna)) + for i, v in s.iteritems(): + (r1, r2) = get_result(f, i, v, axis, skipna) + if np.isnan(r1) or np.isinf(r1): + self.assert_(np.isnan(r2) or np.isinf(r2)) + elif np.isnan(r2) or np.isinf(r2): + self.assert_(np.isnan(r1) or np.isinf(r1)) + else: + self.assertEqual(r1, r2) + + frame = self.frame + frame.ix[5:10] = np.nan + frame.ix[15:20, -2:] = np.nan + for skipna in [True, False]: + for axis in [0, 1]: + validate(frame, + frame.idxmin(axis=axis, skipna=skipna), + axis, + skipna) + validate(self.intframe, + self.intframe.idxmin(axis=axis, skipna=skipna), + axis, + skipna) + + self.assertRaises(Exception, frame.idxmin, axis=2) + + def test_idxmax(self): + def validate(f, s, axis, skipna): + def get_result(f, i, v, axis, skipna): + if axis == 0: + return (f[i][v], f[i].max(skipna=skipna)) + else: + return (f[v][i], f.ix[i].max(skipna=skipna)) + for i, v in s.iteritems(): + (r1, r2) = get_result(f, i, v, axis, skipna) + if np.isnan(r1) or np.isinf(r1): + self.assert_(np.isnan(r2) or np.isinf(r2)) + elif np.isnan(r2) or np.isinf(r2): + self.assert_(np.isnan(r1) or np.isinf(r1)) + else: + self.assertEqual(r1, r2) + + frame = self.frame + frame.ix[5:10] = np.nan + frame.ix[15:20, -2:] = np.nan + for skipna in [True, False]: + for axis in [0, 1]: + validate(frame, + frame.idxmax(axis=axis, skipna=skipna), + axis, + skipna) + validate(self.intframe, + self.intframe.idxmax(axis=axis, skipna=skipna), + axis, + skipna) + + self.assertRaises(Exception, frame.idxmax, axis=2) + class TestDataFrameJoin(unittest.TestCase): def setUp(self): diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 36181abf49aa1..a6603d84d6a70 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -678,6 +678,48 @@ def check_comparators(other): check_comparators(5) check_comparators(self.ts + 1) + def test_idxmin(self): + # test idxmin + # _check_stat_op approach can not be used here because of isnull check. + + # add some NaNs + self.series[5:15] = np.NaN + + # skipna or no + self.assertEqual(self.series[self.series.idxmin()], self.series.min()) + self.assert_(isnull(self.series[self.series.idxmin(skipna=False)])) + + # no NaNs + nona = self.series.dropna() + self.assertEqual(nona[nona.idxmin()], nona.min()) + self.assertEqual(nona.index.values.tolist().index(nona.idxmin()), + nona.values.argmin()) + + # all NaNs + allna = self.series * nan + self.assertEqual(allna.idxmin(), allna.index[0]) + + def test_idxmax(self): + # test idxmax + # _check_stat_op approach can not be used here because of isnull check. + + # add some NaNs + self.series[5:15] = np.NaN + + # skipna or no + self.assertEqual(self.series[self.series.idxmax()], self.series.max()) + self.assert_(isnull(self.series[self.series.idxmax(skipna=False)])) + + # no NaNs + nona = self.series.dropna() + self.assertEqual(nona[nona.idxmax()], nona.max()) + self.assertEqual(nona.index.values.tolist().index(nona.idxmax()), + nona.values.argmax()) + + # all NaNs + allna = self.series * nan + self.assertEqual(allna.idxmax(), allna.index[0]) + def test_operators_date(self): result = self.objSeries + timedelta(1) result = self.objSeries - timedelta(1)