Skip to content

Commit

Permalink
FEAT-modin-project#1201: pivot implementation via unstack (modin-proj…
Browse files Browse the repository at this point in the history
…ect#1645)

Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
  • Loading branch information
dchigarev authored Aug 31, 2020
1 parent 3ddd5c0 commit 363da6d
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 16 deletions.
2 changes: 1 addition & 1 deletion docs/supported_apis/dataframe_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ default to pandas.
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``pipe`` | `pipe`_ | Y | |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``pivot`` | `pivot`_ | D | |
| ``pivot`` | `pivot`_ | Y | |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
| ``pivot_table`` | `pivot_table`_ | D | |
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
Expand Down
4 changes: 4 additions & 0 deletions modin/backends/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,10 @@ def groupby_agg(self, by, axis, agg_func, groupby_args, agg_args):
def unstack(self, level, fill_value):
pass

@abc.abstractmethod
def pivot(self, index, columns, values):
pass

@abc.abstractmethod
def get_dummies(self, columns, **kwargs):
"""Convert categorical variables to dummy variables for certain columns.
Expand Down
49 changes: 49 additions & 0 deletions modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2253,6 +2253,55 @@ def compute_groupby(df):

# END Manual Partitioning methods

def pivot(self, index, columns, values):
from pandas.core.reshape.pivot import _convert_by

def __convert_by(by):
if isinstance(by, pandas.Index):
by = list(by)
by = _convert_by(by)
if (
len(by) > 0
and (not is_list_like(by[0]) or isinstance(by[0], tuple))
and not all([key in self.columns for key in by])
):
by = [by]
return by

index, columns, values = map(__convert_by, [index, columns, values])
is_custom_index = (
len(index) == 1
and is_list_like(index[0])
and not isinstance(index[0], tuple)
)

if is_custom_index or len(index) == 0:
to_reindex = columns
else:
to_reindex = index + columns

if len(values) != 0:
obj = self.getitem_column_array(to_reindex + values)
else:
obj = self

if is_custom_index:
obj.index = index

reindexed = self.__constructor__(
obj._modin_frame._apply_full_axis(
1,
lambda df: df.set_index(to_reindex, append=(len(to_reindex) == 1)),
new_columns=obj.columns.drop(to_reindex),
)
)

unstacked = reindexed.unstack(level=columns, fill_value=None)
if len(reindexed.columns) == 1 and unstacked.columns.nlevels > 1:
unstacked.columns = unstacked.columns.droplevel(0)

return unstacked

# Get_dummies
def get_dummies(self, columns, **kwargs):
"""Convert categorical variables to dummy variables for certain columns.
Expand Down
23 changes: 21 additions & 2 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1714,8 +1714,27 @@ def unstack(self, level=-1, fill_value=None):
)

def pivot(self, index=None, columns=None, values=None):
return self._default_to_pandas(
pandas.DataFrame.pivot, index=index, columns=columns, values=values
"""
Return reshaped DataFrame organized by given index / column values.
Reshape data (produce a "pivot" table) based on column values. Uses
unique values from specified `index` / `columns` to form axes of the
resulting DataFrame.
Parameters
----------
index : str or object, optional
Column to use to make new frame's index. If None, uses
existing index.
columns : str or object
Column to use to make new frame's columns.
values : str, object or a list of the previous, optional
Column(s) to use for populating new frame's values. If not
specified, all remaining columns will be used and the result will
have hierarchically indexed columns.
"""
return self.__constructor__(
query_compiler=self._query_compiler.pivot(
index=index, columns=columns, values=values
)
)

def pivot_table(
Expand Down
26 changes: 16 additions & 10 deletions modin/pandas/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2493,17 +2493,23 @@ def test_pct_change(self):
with pytest.warns(UserWarning):
pd.DataFrame(data).pct_change()

def test_pivot(self):
df = pd.DataFrame(
{
"foo": ["one", "one", "one", "two", "two", "two"],
"bar": ["A", "B", "C", "A", "B", "C"],
"baz": [1, 2, 3, 4, 5, 6],
"zoo": ["x", "y", "z", "q", "w", "t"],
}
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize(
"index", [lambda df: df.columns[0], lambda df: df[df.columns[0]].values, None]
)
@pytest.mark.parametrize("columns", [lambda df: df.columns[len(df.columns) // 2]])
@pytest.mark.parametrize(
"values", [lambda df: df.columns[-1], lambda df: df.columns[-2:], None]
)
def test_pivot(self, data, index, columns, values):
eval_general(
*create_test_dfs(data),
lambda df, *args, **kwargs: df.pivot(*args, **kwargs),
index=index,
columns=columns,
values=values,
check_exception_type=None,
)
with pytest.warns(UserWarning):
df.pivot(index="foo", columns="bar", values="baz")

def test_pivot_table(self):
df = pd.DataFrame(
Expand Down
6 changes: 3 additions & 3 deletions modin/pandas/test/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,9 @@ def test_pivot():
"zoo": ["x", "y", "z", "q", "w", "t"],
}
)
with pytest.warns(UserWarning):
df = pd.pivot(test_df, index="foo", columns="bar", values="baz")
assert isinstance(df, pd.DataFrame)

df = pd.pivot(test_df, index="foo", columns="bar", values="baz")
assert isinstance(df, pd.DataFrame)

with pytest.raises(ValueError):
pd.pivot(test_df["bar"], index="foo", columns="bar", values="baz")
Expand Down

0 comments on commit 363da6d

Please sign in to comment.