Skip to content

Commit

Permalink
ENH: work on rank function, finish testing, ascending param, #875
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 13, 2012
1 parent 24d7b02 commit 73c71df
Show file tree
Hide file tree
Showing 6 changed files with 350 additions and 69 deletions.
8 changes: 5 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,18 @@ def value_counts(values, sort=True, ascending=False):

return result

def rank(values, axis=0, method='average', na_option='keep'):
def rank(values, axis=0, method='average', na_option='keep',
ascending=True):
"""
"""
if values.ndim == 1:
f, values = _get_data_algo(values, _rank1d_functions)
ranks = f(values, ties_method=method)
ranks = f(values, ties_method=method, ascending=ascending)
elif values.ndim == 2:
f, values = _get_data_algo(values, _rank2d_functions)
ranks = f(values, axis=axis, ties_method=method)
ranks = f(values, axis=axis, ties_method=method,
ascending=ascending)
return ranks


Expand Down
10 changes: 7 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3731,7 +3731,7 @@ def clip_lower(self, threshold):
return self.apply(lambda x: x.clip_lower(threshold))

def rank(self, axis=0, numeric_only=None, method='average',
na_option='keep'):
na_option='keep', ascending=True):
"""
Compute numerical data ranks (1 through n) along axis. Equal values are
assigned a rank that is the average of the ranks of those values
Expand All @@ -3749,6 +3749,8 @@ def rank(self, axis=0, numeric_only=None, method='average',
first: ranks assigned in order they appear in the array
na_option : {'keep'}
keep: leave NA values where they are
ascending : boolean, default True
False for ranks by high (1) to low (N)
Returns
-------
Expand All @@ -3758,7 +3760,8 @@ def rank(self, axis=0, numeric_only=None, method='average',

if numeric_only is None:
try:
ranks = rank(self.values, axis=axis, method=method)
ranks = rank(self.values, axis=axis, method=method,
ascending=ascending)
return DataFrame(ranks, index=self.index, columns=self.columns)
except TypeError:
numeric_only = True
Expand All @@ -3767,7 +3770,8 @@ def rank(self, axis=0, numeric_only=None, method='average',
data = self._get_numeric_data()
else:
data = self
ranks = rank(data.values, axis=axis, method=method)
ranks = rank(data.values, axis=axis, method=method,
ascending=ascending)
return DataFrame(ranks, index=data.index, columns=data.columns)

#----------------------------------------------------------------------
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1576,7 +1576,7 @@ def argsort(self, axis=0, kind='quicksort', order=None):
return Series(np.argsort(values, kind=kind), index=self.index,
name=self.name)

def rank(self, method='average', na_option='keep'):
def rank(self, method='average', na_option='keep', ascending=True):
"""
Compute data ranks (1 through n). Equal values are assigned a rank that
is the average of the ranks of those values
Expand All @@ -1590,13 +1590,16 @@ def rank(self, method='average', na_option='keep'):
first: ranks assigned in order they appear in the array
na_option : {'keep'}
keep: leave NA values where they are
ascending : boolean, default True
False for ranks by high (1) to low (N)
Returns
-------
ranks : Series
"""
from pandas.core.algorithms import rank
ranks = rank(self.values, method=method, na_option=na_option)
ranks = rank(self.values, method=method, na_option=na_option,
ascending=ascending)
return Series(ranks, index=self.index, name=self.name)

def order(self, na_last=True, ascending=True, kind='mergesort'):
Expand Down
15 changes: 15 additions & 0 deletions pandas/src/sandbox.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,18 @@ cdef extern from "math.h":
double fabs(double)

cdef float64_t FP_ERR = 1e-13

cimport util

cdef:
int TIEBREAK_AVERAGE = 0
int TIEBREAK_MIN = 1
int TIEBREAK_MAX = 2
int TIEBREAK_FIRST = 3

tiebreakers = {
'average' : TIEBREAK_AVERAGE,
'min' : TIEBREAK_MIN,
'max' : TIEBREAK_MAX,
'first' : TIEBREAK_FIRST
}
Loading

0 comments on commit 73c71df

Please sign in to comment.