Skip to content

Commit

Permalink
Implemented max and min dataframes functions (#4)
Browse files Browse the repository at this point in the history
* changes to min/max dataframes functions

* max/min now return a Series - fixed tests to check equality in pandas series objects

* added error checking for axis in min and max

* updated error checking for axis in min/max
  • Loading branch information
osalpekar authored and kunalgosar committed Mar 16, 2018
1 parent b1c638c commit 68470e7
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 23 deletions.
53 changes: 34 additions & 19 deletions python/ray/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
If None, infer
copy (boolean): Copy data from inputs.
Only affects DataFrame / 2d ndarray input
_col_partitions ([ObjectID]): The list of ObjectIDs that contain
col_partitions ([ObjectID]): The list of ObjectIDs that contain
the column dataframe partitions.
rows ([ObjectID]): The list of ObjectIDs that contain the row
row_partitions ([ObjectID]): The list of ObjectIDs that contain the row
dataframe partitions.
"""
# Check type of data and use appropriate constructor
Expand Down Expand Up @@ -1873,14 +1873,22 @@ def max(self, axis=None, skipna=None, level=None, numeric_only=None,
Returns:
The max of the DataFrame.
"""
return # Fix this
if axis == 1:
return self._map_partitions(
lambda df: df.max(axis=axis, skipna=skipna, level=level,
numeric_only=numeric_only, **kwargs))
else:
return self.T.max(axis=1, skipna=None, level=None,
numeric_only=None, **kwargs)
# TODO: doesn't work for multi-level indices
axis = self._row_index._get_axis_name(axis) if axis is not None \
else 'index'

max_series = pd.concat(ray.get(
_map_partitions(lambda df: df.max(axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs),
self._row_partitions if axis == 1 or axis == 'rows'
else self._col_partitions)))
max_series.index = self.columns
#max_series is a pandas.Series object
#return Series(max_series)
return max_series

def mean(self, axis=None, skipna=None, level=None, numeric_only=None,
**kwargs):
Expand Down Expand Up @@ -1924,15 +1932,22 @@ def min(self, axis=None, skipna=None, level=None, numeric_only=None,
Returns:
The min of the DataFrame.
"""
return
# Fix this
if axis == 1:
return self._map_partitions(
lambda df: df.min(axis=axis, skipna=skipna, level=level,
numeric_only=numeric_only, **kwargs))
else:
return self.T.min(axis=1, skipna=skipna, level=level,
numeric_only=numeric_only, **kwargs)
# TODO: doesn't work for multi-level indices
axis = self._row_index._get_axis_name(axis) if axis is not None \
else 'index'

min_series = pd.concat(ray.get(
_map_partitions(lambda df: df.min(axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs),
self._row_partitions if axis == 1 or axis == 'rows'
else self._col_partitions)))
min_series.index = self.columns
#min_series is a pandas.Series object
#return Series(min_series)
return min_series

def mod(self, other, axis='columns', level=None, fill_value=None):
raise NotImplementedError(
Expand Down
10 changes: 6 additions & 4 deletions python/ray/dataframe/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def ray_df_equals_pandas(ray_df, pandas_df):
return to_pandas(ray_df).sort_index().equals(pandas_df.sort_index())


@pytest.fixture
def ray_series_equals_pandas(ray_df, pandas_df):
return ray_df.sort_index().equals(pandas_df.sort_index())

@pytest.fixture
def ray_df_equals(ray_df1, ray_df2):
return to_pandas(ray_df1).sort_index().equals(
Expand Down Expand Up @@ -1737,8 +1741,7 @@ def test_mask():

@pytest.fixture
def test_max(ray_df, pandas_df):
return
assert(ray_df_equals_pandas(ray_df.max(), pandas_df.max()))
assert(ray_series_equals_pandas(ray_df.max(), pandas_df.max()))


def test_mean():
Expand Down Expand Up @@ -1778,8 +1781,7 @@ def test_merge():

@pytest.fixture
def test_min(ray_df, pandas_df):
return
assert(ray_df_equals_pandas(ray_df.min(), pandas_df.min()))
assert(ray_series_equals_pandas(ray_df.min(), pandas_df.min()))


def test_mod():
Expand Down

0 comments on commit 68470e7

Please sign in to comment.