Skip to content

Commit

Permalink
ENH: idxmin/idxmax methods, close PR #286, had to do by hand
Browse files Browse the repository at this point in the history
  • Loading branch information
lodagro authored and wesm committed Nov 14, 2011
1 parent 6ee3eae commit 9558105
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 0 deletions.
48 changes: 48 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2841,6 +2841,54 @@ def skew(self, axis=0, skipna=True, level=None):
return Series(result, index=axis_labels)
_add_stat_doc(skew, 'unbiased skewness', 'skew')

def idxmin(self, axis=0, skipna=True):
"""
Return index of first occurence of minimum over requested axis.
NA/null values are excluded.
Parameters
----------
axis : {0, 1}
0 for row-wise, 1 for column-wise
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
Returns
-------
idxmin : Series
"""
values = self.values.copy()
if skipna and not issubclass(values.dtype.type, np.integer):
np.putmask(values, -np.isfinite(values), np.inf)
argmin_index = self._get_axis(axis)
return Series([argmin_index[i] for i in values.argmin(axis)],
index=self._get_agg_axis(axis))

def idxmax(self, axis=0, skipna=True):
"""
Return index of first occurence of maximum over requested axis.
NA/null values are excluded.
Parameters
----------
axis : {0, 1}
0 for row-wise, 1 for column-wise
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be first index.
Returns
-------
idxmax : Series
"""
values = self.values.copy()
if skipna and not issubclass(values.dtype.type, np.integer):
np.putmask(values, -np.isfinite(values), -np.inf)
argmax_index = self._get_axis(axis)
return Series([argmax_index[i] for i in values.argmax(axis)],
index=self._get_agg_axis(axis))

def _agg_by_level(self, name, axis=0, level=0, skipna=True):
method = getattr(type(self), name)
applyf = lambda x: method(x, axis=axis, skipna=skipna)
Expand Down
38 changes: 38 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,44 @@ def skew(self, skipna=True, level=None):
return (np.sqrt((count**2-count))*C) / ((count-2)*np.sqrt(B)**3)
_add_stat_doc(skew, 'unbiased skewness', 'skew')

def idxmin(self, axis=None, out=None, skipna=True):
"""
Index of first occurence of minimum of values.
Parameters
----------
skipna : boolean, default True
Exclude NA/null values
Returns
-------
idxmin : Index of mimimum of values
"""
arr = self.values.copy()
if skipna:
if not issubclass(arr.dtype.type, np.integer):
np.putmask(arr, isnull(arr), np.inf)
return self.index[arr.argmin()]

def idxmax(self, axis=None, out=None, skipna=True):
"""
Index of first occurence of maximum of values.
Parameters
----------
skipna : boolean, default True
Exclude NA/null values
Returns
-------
idxmax : Index of mimimum of values
"""
arr = self.values.copy()
if skipna:
if not issubclass(arr.dtype.type, np.integer):
np.putmask(arr, isnull(arr), -np.inf)
return self.index[arr.argmax()]

def _ndarray_statistic(self, funcname, dtype=None, skipna=True):
arr = self.values
retVal = getattr(arr, funcname)(dtype=dtype)
Expand Down
64 changes: 64 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3432,6 +3432,70 @@ def test_dot(self):
columns=['one', 'two'])
assert_frame_equal(result, expected)

def test_idxmin(self):
def validate(f, s, axis, skipna):
def get_result(f, i, v, axis, skipna):
if axis == 0:
return (f[i][v], f[i].min(skipna=skipna))
else:
return (f[v][i], f.ix[i].min(skipna=skipna))
for i, v in s.iteritems():
(r1, r2) = get_result(f, i, v, axis, skipna)
if np.isnan(r1) or np.isinf(r1):
self.assert_(np.isnan(r2) or np.isinf(r2))
elif np.isnan(r2) or np.isinf(r2):
self.assert_(np.isnan(r1) or np.isinf(r1))
else:
self.assertEqual(r1, r2)

frame = self.frame
frame.ix[5:10] = np.nan
frame.ix[15:20, -2:] = np.nan
for skipna in [True, False]:
for axis in [0, 1]:
validate(frame,
frame.idxmin(axis=axis, skipna=skipna),
axis,
skipna)
validate(self.intframe,
self.intframe.idxmin(axis=axis, skipna=skipna),
axis,
skipna)

self.assertRaises(Exception, frame.idxmin, axis=2)

def test_idxmax(self):
def validate(f, s, axis, skipna):
def get_result(f, i, v, axis, skipna):
if axis == 0:
return (f[i][v], f[i].max(skipna=skipna))
else:
return (f[v][i], f.ix[i].max(skipna=skipna))
for i, v in s.iteritems():
(r1, r2) = get_result(f, i, v, axis, skipna)
if np.isnan(r1) or np.isinf(r1):
self.assert_(np.isnan(r2) or np.isinf(r2))
elif np.isnan(r2) or np.isinf(r2):
self.assert_(np.isnan(r1) or np.isinf(r1))
else:
self.assertEqual(r1, r2)

frame = self.frame
frame.ix[5:10] = np.nan
frame.ix[15:20, -2:] = np.nan
for skipna in [True, False]:
for axis in [0, 1]:
validate(frame,
frame.idxmax(axis=axis, skipna=skipna),
axis,
skipna)
validate(self.intframe,
self.intframe.idxmax(axis=axis, skipna=skipna),
axis,
skipna)

self.assertRaises(Exception, frame.idxmax, axis=2)

class TestDataFrameJoin(unittest.TestCase):

def setUp(self):
Expand Down
42 changes: 42 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,48 @@ def check_comparators(other):
check_comparators(5)
check_comparators(self.ts + 1)

def test_idxmin(self):
# test idxmin
# _check_stat_op approach can not be used here because of isnull check.

# add some NaNs
self.series[5:15] = np.NaN

# skipna or no
self.assertEqual(self.series[self.series.idxmin()], self.series.min())
self.assert_(isnull(self.series[self.series.idxmin(skipna=False)]))

# no NaNs
nona = self.series.dropna()
self.assertEqual(nona[nona.idxmin()], nona.min())
self.assertEqual(nona.index.values.tolist().index(nona.idxmin()),
nona.values.argmin())

# all NaNs
allna = self.series * nan
self.assertEqual(allna.idxmin(), allna.index[0])

def test_idxmax(self):
# test idxmax
# _check_stat_op approach can not be used here because of isnull check.

# add some NaNs
self.series[5:15] = np.NaN

# skipna or no
self.assertEqual(self.series[self.series.idxmax()], self.series.max())
self.assert_(isnull(self.series[self.series.idxmax(skipna=False)]))

# no NaNs
nona = self.series.dropna()
self.assertEqual(nona[nona.idxmax()], nona.max())
self.assertEqual(nona.index.values.tolist().index(nona.idxmax()),
nona.values.argmax())

# all NaNs
allna = self.series * nan
self.assertEqual(allna.idxmax(), allna.index[0])

def test_operators_date(self):
result = self.objSeries + timedelta(1)
result = self.objSeries - timedelta(1)
Expand Down

0 comments on commit 9558105

Please sign in to comment.