Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Added the level arguments to the series and frame math operations #313

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
try:
import pandas._tseries as lib
except Exception, e: # pragma: no cover
if 'No module named' in e.message:
if 'No module named' in str(e):
raise ImportError('C extensions not built: if you installed already '
'verify that you are not importing from the source '
'directory')
Expand Down
85 changes: 73 additions & 12 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2425,7 +2425,7 @@ def _count_level(self, level, axis=0, numeric_only=False):

return DataFrame(result, index=index, columns=columns)

def sum(self, axis=0, numeric_only=False, skipna=True):
def sum(self, axis=0, numeric_only=False, skipna=True, level=None):
"""
Return sum over requested axis

Expand All @@ -2438,6 +2438,8 @@ def sum(self, axis=0, numeric_only=False, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Examples
--------
Expand All @@ -2456,6 +2458,10 @@ def sum(self, axis=0, numeric_only=False, skipna=True):
-------
sum : Series
"""
if not level is None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can write level is not None instead of not level is None

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the preferred syntax? I know they're the same.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see x is not None more than not x is None, but they're indeed equivalent

sumfunc = lambda x: x.sum(skipna=skipna)
return self.groupby(level=level).aggregate(sumfunc)

y, axis_labels = self._get_agg_data(axis, numeric_only=numeric_only)

if len(axis_labels) == 0:
Expand All @@ -2479,7 +2485,7 @@ def sum(self, axis=0, numeric_only=False, skipna=True):

return Series(the_sum, index=axis_labels)

def min(self, axis=0, skipna=True):
def min(self, axis=0, skipna=True, level=None):
"""
Return minimum over requested axis. NA/null values are excluded

Expand All @@ -2490,6 +2496,8 @@ def min(self, axis=0, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Returns
-------
Expand All @@ -2498,9 +2506,14 @@ def min(self, axis=0, skipna=True):
values = self.values.copy()
if skipna and not issubclass(values.dtype.type, np.integer):
np.putmask(values, -np.isfinite(values), np.inf)

if not level is None:
minfunc = lambda x: x.min(skipna=skipna)
return self.groupby(level=level).aggregate(minfunc)

return Series(values.min(axis), index=self._get_agg_axis(axis))

def max(self, axis=0, skipna=True):
def max(self, axis=0, skipna=True, level=None):
"""
Return maximum over requested axis. NA/null values are excluded

Expand All @@ -2511,6 +2524,8 @@ def max(self, axis=0, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Returns
-------
Expand All @@ -2519,9 +2534,14 @@ def max(self, axis=0, skipna=True):
values = self.values.copy()
if skipna and not issubclass(values.dtype.type, np.integer):
np.putmask(values, -np.isfinite(values), -np.inf)

if not level is None:
maxfunc = lambda x: x.max(skipna=skipna)
return self.groupby(level=level).aggregate(maxfunc)

return Series(values.max(axis), index=self._get_agg_axis(axis))

def prod(self, axis=0, skipna=True):
def prod(self, axis=0, skipna=True, level=None):
"""
Return product over requested axis. NA/null values are treated as 1

Expand All @@ -2532,23 +2552,28 @@ def prod(self, axis=0, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Returns
-------
product : Series
"""
if not level is None:
prodfunc = lambda x: x.prod(skipna=skipna)
return self.groupby(level=level).aggregate(prodfunc)

y = np.array(self.values, subok=True)
if skipna:
if not issubclass(y.dtype.type, np.integer):
y[np.isnan(y)] = 1
result = y.prod(axis)
count = self.count(axis)
result[count == 0] = nan
return Series(result, index=self._get_agg_axis(axis))

product = prod
return Series(result, index=self._get_agg_axis(axis))

def mean(self, axis=0, skipna=True):
def mean(self, axis=0, skipna=True, level=None):
"""
Return mean over requested axis. NA/null values are excluded

Expand All @@ -2559,11 +2584,17 @@ def mean(self, axis=0, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Returns
-------
mean : Series
"""
if not level is None:
meanfunc = lambda x: x.mean(skipna=skipna)
return self.groupby(level=level).aggregate(meanfunc)

summed = self.sum(axis, numeric_only=True, skipna=skipna)
count = self.count(axis, numeric_only=True).astype(float)
return summed / count
Expand Down Expand Up @@ -2599,7 +2630,7 @@ def f(arr):

return self.apply(f, axis=axis)

def median(self, axis=0, skipna=True):
def median(self, axis=0, skipna=True, level=None):
"""
Return median over requested axis, NA/null are exluded

Expand All @@ -2610,11 +2641,17 @@ def median(self, axis=0, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Returns
-------
Series or TimeSeries
"""
if not level is None:
medianfunc = lambda x: x.median(skipna=skipna)
return self.groupby(level=level).aggregate(medianfunc)

if axis == 0:
med = [self[col].median(skipna=skipna) for col in self.columns]
return Series(med, index=self.columns)
Expand All @@ -2624,7 +2661,7 @@ def median(self, axis=0, skipna=True):
else:
raise Exception('Must have 0<= axis <= 1')

def mad(self, axis=0, skipna=True):
def mad(self, axis=0, skipna=True, level=None):
"""
Return mean absolute deviation over requested axis

Expand All @@ -2635,18 +2672,24 @@ def mad(self, axis=0, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Returns
-------
mad : Series
"""
if not level is None:
madfunc = lambda x: x.mad(skipna=skipna)
return self.groupby(level=level).aggregate(madfunc)

if axis == 0:
demeaned = self - self.mean(axis=0)
else:
demeaned = self.sub(self.mean(axis=1), axis=0)
return np.abs(demeaned).mean(axis=axis, skipna=skipna)

def var(self, axis=0, skipna=True):
def var(self, axis=0, skipna=True, level=None):
"""
Return unbiased variance over requested axis

Expand All @@ -2657,11 +2700,17 @@ def var(self, axis=0, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Returns
-------
var : Series
"""
if not level is None:
varfunc = lambda x: x.var(skipna=skipna)
return self.groupby(level=level).aggregate(varfunc)

y, axis_labels = self._get_agg_data(axis, numeric_only=True)

mask = np.isnan(y)
Expand All @@ -2677,7 +2726,7 @@ def var(self, axis=0, skipna=True):

return Series(theVar, index=axis_labels)

def std(self, axis=0, skipna=True):
def std(self, axis=0, skipna=True, level=None):
"""
Return unbiased std deviation over requested axis

Expand All @@ -2688,14 +2737,20 @@ def std(self, axis=0, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Returns
-------
std : Series
"""
if not level is None:
stdfunc = lambda x: x.std(skipna=skipna)
return self.groupby(level=level).aggregate(stdfunc)

return np.sqrt(self.var(axis=axis, skipna=skipna))

def skew(self, axis=0, skipna=True):
def skew(self, axis=0, skipna=True, level=None):
"""
Return unbiased skewness over requested axis

Expand All @@ -2706,11 +2761,17 @@ def skew(self, axis=0, skipna=True):
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA
level : integer, default None
Choose a level to groupby before applying operation

Returns
-------
skew : Series
"""
if not level is None:
skewfunc = lambda x: x.skew(skipna=skipna)
return self.groupby(level=level).aggregate(skewfunc)

y, axis_labels = self._get_agg_data(axis, numeric_only=True)

mask = np.isnan(y)
Expand Down
Loading