Skip to content

Commit

Permalink
CLN: Separate transform tests (pandas-dev#36146)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach authored and Kevin D Smith committed Nov 2, 2020
1 parent 23e6e72 commit eb0103b
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 78 deletions.
49 changes: 1 addition & 48 deletions pandas/tests/frame/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from collections import OrderedDict
from datetime import datetime
from itertools import chain
import operator
import warnings

import numpy as np
Expand All @@ -14,6 +13,7 @@
import pandas._testing as tm
from pandas.core.apply import frame_apply
from pandas.core.base import SpecificationError
from pandas.tests.frame.common import zip_frames


@pytest.fixture
Expand Down Expand Up @@ -1058,25 +1058,6 @@ def test_consistency_for_boxed(self, box, int_frame_const_col):
tm.assert_frame_equal(result, expected)


def zip_frames(frames, axis=1):
"""
take a list of frames, zip them together under the
assumption that these all have the first frames' index/columns.
Returns
-------
new_frame : DataFrame
"""
if axis == 1:
columns = frames[0].columns
zipped = [f.loc[:, c] for c in columns for f in frames]
return pd.concat(zipped, axis=1)
else:
index = frames[0].index
zipped = [f.loc[i, :] for i in index for f in frames]
return pd.DataFrame(zipped)


class TestDataFrameAggregate:
def test_agg_transform(self, axis, float_frame):
other_axis = 1 if axis in {0, "index"} else 0
Expand All @@ -1087,16 +1068,10 @@ def test_agg_transform(self, axis, float_frame):
f_sqrt = np.sqrt(float_frame)

# ufunc
result = float_frame.transform(np.sqrt, axis=axis)
expected = f_sqrt.copy()
tm.assert_frame_equal(result, expected)

result = float_frame.apply(np.sqrt, axis=axis)
tm.assert_frame_equal(result, expected)

result = float_frame.transform(np.sqrt, axis=axis)
tm.assert_frame_equal(result, expected)

# list-like
result = float_frame.apply([np.sqrt], axis=axis)
expected = f_sqrt.copy()
Expand All @@ -1110,9 +1085,6 @@ def test_agg_transform(self, axis, float_frame):
)
tm.assert_frame_equal(result, expected)

result = float_frame.transform([np.sqrt], axis=axis)
tm.assert_frame_equal(result, expected)

# multiple items in list
# these are in the order as if we are applying both
# functions per series and then concatting
Expand All @@ -1128,38 +1100,19 @@ def test_agg_transform(self, axis, float_frame):
)
tm.assert_frame_equal(result, expected)

result = float_frame.transform([np.abs, "sqrt"], axis=axis)
tm.assert_frame_equal(result, expected)

def test_transform_and_agg_err(self, axis, float_frame):
# cannot both transform and agg
msg = "transforms cannot produce aggregated results"
with pytest.raises(ValueError, match=msg):
float_frame.transform(["max", "min"], axis=axis)

msg = "cannot combine transform and aggregation operations"
with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
float_frame.agg(["max", "sqrt"], axis=axis)

with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
float_frame.transform(["max", "sqrt"], axis=axis)

df = pd.DataFrame({"A": range(5), "B": 5})

def f():
with np.errstate(all="ignore"):
df.agg({"A": ["abs", "sum"], "B": ["mean", "max"]}, axis=axis)

@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
def test_transform_method_name(self, method):
# GH 19760
df = pd.DataFrame({"A": [-1, 2]})
result = df.transform(method)
expected = operator.methodcaller(method)(df)
tm.assert_frame_equal(result, expected)

def test_demo(self):
# demonstration tests
df = pd.DataFrame({"A": range(5), "B": 5})
Expand Down
72 changes: 72 additions & 0 deletions pandas/tests/frame/apply/test_frame_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import operator

import numpy as np
import pytest

import pandas as pd
import pandas._testing as tm
from pandas.tests.frame.common import zip_frames


def test_agg_transform(axis, float_frame):
other_axis = 1 if axis in {0, "index"} else 0

with np.errstate(all="ignore"):

f_abs = np.abs(float_frame)
f_sqrt = np.sqrt(float_frame)

# ufunc
result = float_frame.transform(np.sqrt, axis=axis)
expected = f_sqrt.copy()
tm.assert_frame_equal(result, expected)

result = float_frame.transform(np.sqrt, axis=axis)
tm.assert_frame_equal(result, expected)

# list-like
expected = f_sqrt.copy()
if axis in {0, "index"}:
expected.columns = pd.MultiIndex.from_product(
[float_frame.columns, ["sqrt"]]
)
else:
expected.index = pd.MultiIndex.from_product([float_frame.index, ["sqrt"]])
result = float_frame.transform([np.sqrt], axis=axis)
tm.assert_frame_equal(result, expected)

# multiple items in list
# these are in the order as if we are applying both
# functions per series and then concatting
expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
if axis in {0, "index"}:
expected.columns = pd.MultiIndex.from_product(
[float_frame.columns, ["absolute", "sqrt"]]
)
else:
expected.index = pd.MultiIndex.from_product(
[float_frame.index, ["absolute", "sqrt"]]
)
result = float_frame.transform([np.abs, "sqrt"], axis=axis)
tm.assert_frame_equal(result, expected)


def test_transform_and_agg_err(axis, float_frame):
# cannot both transform and agg
msg = "transforms cannot produce aggregated results"
with pytest.raises(ValueError, match=msg):
float_frame.transform(["max", "min"], axis=axis)

msg = "cannot combine transform and aggregation operations"
with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
float_frame.transform(["max", "sqrt"], axis=axis)


@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
def test_transform_method_name(method):
# GH 19760
df = pd.DataFrame({"A": [-1, 2]})
result = df.transform(method)
expected = operator.methodcaller(method)(df)
tm.assert_frame_equal(result, expected)
24 changes: 24 additions & 0 deletions pandas/tests/frame/common.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from typing import List

from pandas import DataFrame, concat


def _check_mixed_float(df, dtype=None):
# float16 are most likely to be upcasted to float32
dtypes = dict(A="float32", B="float32", C="float16", D="float64")
Expand Down Expand Up @@ -29,3 +34,22 @@ def _check_mixed_int(df, dtype=None):
assert df.dtypes["C"] == dtypes["C"]
if dtypes.get("D"):
assert df.dtypes["D"] == dtypes["D"]


def zip_frames(frames: List[DataFrame], axis: int = 1) -> DataFrame:
"""
take a list of frames, zip them together under the
assumption that these all have the first frames' index/columns.
Returns
-------
new_frame : DataFrame
"""
if axis == 1:
columns = frames[0].columns
zipped = [f.loc[:, c] for c in columns for f in frames]
return concat(zipped, axis=1)
else:
index = frames[0].index
zipped = [f.loc[i, :] for i in index for f in frames]
return DataFrame(zipped)
31 changes: 1 addition & 30 deletions pandas/tests/series/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,25 +209,16 @@ def test_transform(self, string_series):
f_abs = np.abs(string_series)

# ufunc
result = string_series.transform(np.sqrt)
expected = f_sqrt.copy()
tm.assert_series_equal(result, expected)

result = string_series.apply(np.sqrt)
tm.assert_series_equal(result, expected)

# list-like
result = string_series.transform([np.sqrt])
result = string_series.apply([np.sqrt])
expected = f_sqrt.to_frame().copy()
expected.columns = ["sqrt"]
tm.assert_frame_equal(result, expected)

result = string_series.transform([np.sqrt])
tm.assert_frame_equal(result, expected)

result = string_series.transform(["sqrt"])
tm.assert_frame_equal(result, expected)

# multiple items in list
# these are in the order as if we are applying both functions per
# series and then concatting
Expand All @@ -236,10 +227,6 @@ def test_transform(self, string_series):
result = string_series.apply([np.sqrt, np.abs])
tm.assert_frame_equal(result, expected)

result = string_series.transform(["sqrt", "abs"])
expected.columns = ["sqrt", "abs"]
tm.assert_frame_equal(result, expected)

# dict, provide renaming
expected = pd.concat([f_sqrt, f_abs], axis=1)
expected.columns = ["foo", "bar"]
Expand All @@ -250,19 +237,11 @@ def test_transform(self, string_series):

def test_transform_and_agg_error(self, string_series):
# we are trying to transform with an aggregator
msg = "transforms cannot produce aggregated results"
with pytest.raises(ValueError, match=msg):
string_series.transform(["min", "max"])

msg = "cannot combine transform and aggregation"
with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
string_series.agg(["sqrt", "max"])

with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
string_series.transform(["sqrt", "max"])

msg = "cannot perform both aggregation and transformation"
with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
Expand Down Expand Up @@ -463,14 +442,6 @@ def test_agg_cython_table_raises(self, series, func, expected):
# e.g. Series('a b'.split()).cumprod() will raise
series.agg(func)

def test_transform_none_to_type(self):
# GH34377
df = pd.DataFrame({"a": [None]})

msg = "DataFrame constructor called with incompatible data and dtype"
with pytest.raises(TypeError, match=msg):
df.transform({"a": int})


class TestSeriesMap:
def test_map(self, datetime_series):
Expand Down
59 changes: 59 additions & 0 deletions pandas/tests/series/apply/test_series_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
import pytest

import pandas as pd
import pandas._testing as tm


def test_transform(string_series):
# transforming functions

with np.errstate(all="ignore"):
f_sqrt = np.sqrt(string_series)
f_abs = np.abs(string_series)

# ufunc
result = string_series.transform(np.sqrt)
expected = f_sqrt.copy()
tm.assert_series_equal(result, expected)

# list-like
result = string_series.transform([np.sqrt])
expected = f_sqrt.to_frame().copy()
expected.columns = ["sqrt"]
tm.assert_frame_equal(result, expected)

result = string_series.transform([np.sqrt])
tm.assert_frame_equal(result, expected)

result = string_series.transform(["sqrt"])
tm.assert_frame_equal(result, expected)

# multiple items in list
# these are in the order as if we are applying both functions per
# series and then concatting
expected = pd.concat([f_sqrt, f_abs], axis=1)
result = string_series.transform(["sqrt", "abs"])
expected.columns = ["sqrt", "abs"]
tm.assert_frame_equal(result, expected)


def test_transform_and_agg_error(string_series):
# we are trying to transform with an aggregator
msg = "transforms cannot produce aggregated results"
with pytest.raises(ValueError, match=msg):
string_series.transform(["min", "max"])

msg = "cannot combine transform and aggregation operations"
with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
string_series.transform(["sqrt", "max"])


def test_transform_none_to_type():
# GH34377
df = pd.DataFrame({"a": [None]})

msg = "DataFrame constructor called with incompatible data and dtype"
with pytest.raises(TypeError, match=msg):
df.transform({"a": int})

0 comments on commit eb0103b

Please sign in to comment.