Skip to content

Commit

Permalink
Merge branch 'master' into dev/yigoshev-value_counts
Browse files Browse the repository at this point in the history
  • Loading branch information
YarShev committed Jun 5, 2020
2 parents b31375a + 2d74813 commit 8b2c4aa
Show file tree
Hide file tree
Showing 8 changed files with 392 additions and 5 deletions.
2 changes: 1 addition & 1 deletion docs/supported_apis/series_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ the related section on `Defaulting to pandas`_.
+-----------------------------+---------------------------------+
| ``dropna`` | Y |
+-----------------------------+---------------------------------+
| ``dt`` | D |
| ``dt`` | Y |
+-----------------------------+---------------------------------+
| ``dtype`` | Y |
+-----------------------------+---------------------------------+
Expand Down
2 changes: 1 addition & 1 deletion docs/supported_apis/utilities_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ default to pandas.
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.eval`_ | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.unique`_ | D | |
| `pd.unique`_ | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pd.value_counts`` | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
Expand Down
99 changes: 99 additions & 0 deletions modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,61 @@ def str_op_builder(df, *args, **kwargs):
return str_op_builder


def _dt_prop_map(property_name):
"""
Create a function that call property of property `dt` of the series.
Parameters
----------
property_name
The property of `dt`, which will be applied.
Returns
-------
A callable function to be applied in the partitions
Notes
-----
This applies non-callable properties of `Series.dt`.
"""

def dt_op_builder(df, *args, **kwargs):
prop_val = getattr(df.squeeze().dt, property_name)
if isinstance(prop_val, pandas.Series):
return prop_val.to_frame()
else:
return pandas.DataFrame([prop_val])

return dt_op_builder


def _dt_func_map(func_name):
"""
Create a function that call method of property `dt` of the series.
Parameters
----------
func_name
The method of `dt`, which will be applied.
Returns
-------
A callable function to be applied in the partitions
Notes
-----
This applies callable methods of `Series.dt`.
"""

def dt_op_builder(df, *args, **kwargs):
dt_s = df.squeeze().dt
return pandas.DataFrame(
getattr(pandas.Series.dt, func_name)(dt_s, *args, **kwargs)
)

return dt_op_builder


def copy_df_for_func(func):
"""Create a function that copies the dataframe, likely because `func` is inplace.
Expand Down Expand Up @@ -506,6 +561,50 @@ def unique(self):
)
return self.__constructor__(new_modin_frame)

# Dt map partitions operations

dt_date = MapFunction.register(_dt_prop_map("date"))
dt_time = MapFunction.register(_dt_prop_map("time"))
dt_timetz = MapFunction.register(_dt_prop_map("timetz"))
dt_year = MapFunction.register(_dt_prop_map("year"))
dt_month = MapFunction.register(_dt_prop_map("month"))
dt_day = MapFunction.register(_dt_prop_map("day"))
dt_hour = MapFunction.register(_dt_prop_map("hour"))
dt_minute = MapFunction.register(_dt_prop_map("minute"))
dt_second = MapFunction.register(_dt_prop_map("second"))
dt_microsecond = MapFunction.register(_dt_prop_map("microsecond"))
dt_nanosecond = MapFunction.register(_dt_prop_map("nanosecond"))
dt_week = MapFunction.register(_dt_prop_map("week"))
dt_weekofyear = MapFunction.register(_dt_prop_map("weekofyear"))
dt_dayofweek = MapFunction.register(_dt_prop_map("dayofweek"))
dt_weekday = MapFunction.register(_dt_prop_map("weekday"))
dt_dayofyear = MapFunction.register(_dt_prop_map("dayofyear"))
dt_quarter = MapFunction.register(_dt_prop_map("quarter"))
dt_is_month_start = MapFunction.register(_dt_prop_map("is_month_start"))
dt_is_month_end = MapFunction.register(_dt_prop_map("is_month_end"))
dt_is_quarter_start = MapFunction.register(_dt_prop_map("is_quarter_start"))
dt_is_quarter_end = MapFunction.register(_dt_prop_map("is_quarter_end"))
dt_is_year_start = MapFunction.register(_dt_prop_map("is_year_start"))
dt_is_year_end = MapFunction.register(_dt_prop_map("is_year_end"))
dt_is_leap_year = MapFunction.register(_dt_prop_map("is_leap_year"))
dt_daysinmonth = MapFunction.register(_dt_prop_map("daysinmonth"))
dt_days_in_month = MapFunction.register(_dt_prop_map("days_in_month"))
dt_tz = MapFunction.register(_dt_prop_map("tz"))
dt_freq = MapFunction.register(_dt_prop_map("freq"))
dt_to_period = MapFunction.register(_dt_func_map("to_period"))
dt_to_pydatetime = MapFunction.register(_dt_func_map("to_pydatetime"))
dt_tz_localize = MapFunction.register(_dt_func_map("tz_localize"))
dt_tz_convert = MapFunction.register(_dt_func_map("tz_convert"))
dt_normalize = MapFunction.register(_dt_func_map("normalize"))
dt_strftime = MapFunction.register(_dt_func_map("strftime"))
dt_round = MapFunction.register(_dt_func_map("round"))
dt_floor = MapFunction.register(_dt_func_map("floor"))
dt_ceil = MapFunction.register(_dt_func_map("ceil"))
dt_month_name = MapFunction.register(_dt_func_map("month_name"))
dt_day_name = MapFunction.register(_dt_func_map("day_name"))

# END Dt map partitions operations

def astype(self, col_dtypes, **kwargs):
"""Converts columns dtypes to given dtypes.
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

from pandas import (
eval,
unique,
cut,
to_numeric,
factorize,
Expand Down Expand Up @@ -131,6 +130,7 @@
notnull,
notna,
pivot,
unique,
value_counts,
)
from .plotting import Plotting as plotting
Expand Down Expand Up @@ -283,7 +283,6 @@ def import_pandas(*args):
"json_normalize",
"concat",
"eval",
"unique",
"cut",
"to_numeric",
"factorize",
Expand Down Expand Up @@ -362,6 +361,7 @@ def import_pandas(*args):
"notnull",
"notna",
"pivot",
"unique",
"value_counts",
"datetime",
"NamedAgg",
Expand Down
15 changes: 15 additions & 0 deletions modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,21 @@ def pivot(data, index=None, columns=None, values=None):
return data.pivot(index=index, columns=columns, values=values)


def unique(values):
"""
Return unique values of input data.
Uniques are returned in order of appearance. Hash table-based unique,
therefore does NOT sort.
Returns
-------
ndarray
The unique values returned as a NumPy array.
"""
return Series(values).unique()


def value_counts(
values, sort=True, ascending=False, normalize=False, bins=None, dropna=True,
):
Expand Down
159 changes: 158 additions & 1 deletion modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1338,7 +1338,7 @@ def cat(self):

@property
def dt(self):
return self._default_to_pandas(pandas.Series.dt)
return DatetimeProperties(self)

@property
def dtype(self):
Expand Down Expand Up @@ -1426,6 +1426,163 @@ def _to_pandas(self):
return series


class DatetimeProperties(object):
def __init__(self, series):
self._series = series
self._query_compiler = series._query_compiler

@property
def date(self):
return Series(query_compiler=self._query_compiler.dt_date())

@property
def time(self):
return Series(query_compiler=self._query_compiler.dt_time())

@property
def timetz(self):
return Series(query_compiler=self._query_compiler.dt_timetz())

@property
def year(self):
return Series(query_compiler=self._query_compiler.dt_year())

@property
def month(self):
return Series(query_compiler=self._query_compiler.dt_month())

@property
def day(self):
return Series(query_compiler=self._query_compiler.dt_day())

@property
def hour(self):
return Series(query_compiler=self._query_compiler.dt_hour())

@property
def minute(self):
return Series(query_compiler=self._query_compiler.dt_minute())

@property
def second(self):
return Series(query_compiler=self._query_compiler.dt_second())

@property
def microsecond(self):
return Series(query_compiler=self._query_compiler.dt_microsecond())

@property
def nanosecond(self):
return Series(query_compiler=self._query_compiler.dt_nanosecond())

@property
def week(self):
return Series(query_compiler=self._query_compiler.dt_week())

@property
def weekofyear(self):
return Series(query_compiler=self._query_compiler.dt_weekofyear())

@property
def dayofweek(self):
return Series(query_compiler=self._query_compiler.dt_dayofweek())

@property
def weekday(self):
return Series(query_compiler=self._query_compiler.dt_weekday())

@property
def dayofyear(self):
return Series(query_compiler=self._query_compiler.dt_dayofyear())

@property
def quarter(self):
return Series(query_compiler=self._query_compiler.dt_quarter())

@property
def is_month_start(self):
return Series(query_compiler=self._query_compiler.dt_is_month_start())

@property
def is_month_end(self):
return Series(query_compiler=self._query_compiler.dt_is_month_end())

@property
def is_quarter_start(self):
return Series(query_compiler=self._query_compiler.dt_is_quarter_start())

@property
def is_quarter_end(self):
return Series(query_compiler=self._query_compiler.dt_is_quarter_end())

@property
def is_year_start(self):
return Series(query_compiler=self._query_compiler.dt_is_year_start())

@property
def is_year_end(self):
return Series(query_compiler=self._query_compiler.dt_is_year_end())

@property
def is_leap_year(self):
return Series(query_compiler=self._query_compiler.dt_is_leap_year())

@property
def daysinmonth(self):
return Series(query_compiler=self._query_compiler.dt_daysinmonth())

@property
def days_in_month(self):
return Series(query_compiler=self._query_compiler.dt_days_in_month())

@property
def tz(self):
return self._query_compiler.dt_tz().to_pandas().squeeze()

@property
def freq(self):
return self._query_compiler.dt_freq().to_pandas().squeeze()

def to_period(self, *args, **kwargs):
return Series(query_compiler=self._query_compiler.dt_to_period(*args, **kwargs))

def to_pydatetime(self):
return Series(query_compiler=self._query_compiler.dt_to_pydatetime()).to_numpy()

def tz_localize(self, *args, **kwargs):
return Series(
query_compiler=self._query_compiler.dt_tz_localize(*args, **kwargs)
)

def tz_convert(self, *args, **kwargs):
return Series(
query_compiler=self._query_compiler.dt_tz_convert(*args, **kwargs)
)

def normalize(self, *args, **kwargs):
return Series(query_compiler=self._query_compiler.dt_normalize(*args, **kwargs))

def strftime(self, *args, **kwargs):
return Series(query_compiler=self._query_compiler.dt_strftime(*args, **kwargs))

def round(self, *args, **kwargs):
return Series(query_compiler=self._query_compiler.dt_round(*args, **kwargs))

def floor(self, *args, **kwargs):
return Series(query_compiler=self._query_compiler.dt_floor(*args, **kwargs))

def ceil(self, *args, **kwargs):
return Series(query_compiler=self._query_compiler.dt_ceil(*args, **kwargs))

def month_name(self, *args, **kwargs):
return Series(
query_compiler=self._query_compiler.dt_month_name(*args, **kwargs)
)

def day_name(self, *args, **kwargs):
return Series(query_compiler=self._query_compiler.dt_day_name(*args, **kwargs))


class StringMethods(object):
def __init__(self, series):
# Check if dtypes is objects
Expand Down
Loading

0 comments on commit 8b2c4aa

Please sign in to comment.