Skip to content

Commit

Permalink
Add unique implementation as free function (#1537)
Browse files Browse the repository at this point in the history
Signed-off-by: Yaroslav Igoshev yaroslav.igoshev@intel.com
  • Loading branch information
YarShev committed Jun 4, 2020
1 parent 63cf839 commit 2d74813
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 3 deletions.
2 changes: 1 addition & 1 deletion docs/supported_apis/utilities_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ default to pandas.
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.eval`_ | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| `pd.unique`_ | D | |
| `pd.unique`_ | Y | |
+---------------------------+---------------------------------+----------------------------------------------------+
| ``pd.value_counts`` | D | |
+---------------------------+---------------------------------+----------------------------------------------------+
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

from pandas import (
eval,
unique,
value_counts,
cut,
to_numeric,
Expand Down Expand Up @@ -132,6 +131,7 @@
notnull,
notna,
pivot,
unique,
)
from .plotting import Plotting as plotting
from .. import __execution_engine__ as execution_engine
Expand Down Expand Up @@ -283,7 +283,6 @@ def import_pandas(*args):
"json_normalize",
"concat",
"eval",
"unique",
"value_counts",
"cut",
"to_numeric",
Expand Down Expand Up @@ -363,6 +362,7 @@ def import_pandas(*args):
"notnull",
"notna",
"pivot",
"unique",
"datetime",
"NamedAgg",
"DEFAULT_NPARTITIONS",
Expand Down
16 changes: 16 additions & 0 deletions modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from modin.error_message import ErrorMessage
from .base import BasePandasDataset
from .dataframe import DataFrame
from .series import Series
from .utils import to_pandas


Expand Down Expand Up @@ -217,3 +218,18 @@ def pivot(data, index=None, columns=None, values=None):
if not isinstance(data, DataFrame):
raise ValueError("can not pivot with instance of type {}".format(type(data)))
return data.pivot(index=index, columns=columns, values=values)


def unique(values):
"""
Return unique values of input data.
Uniques are returned in order of appearance. Hash table-based unique,
therefore does NOT sort.
Returns
-------
ndarray
The unique values returned as a NumPy array.
"""
return Series(values).unique()
59 changes: 59 additions & 0 deletions modin/pandas/test/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pytest
import modin.pandas as pd
import numpy as np
from numpy.testing import assert_array_equal

from .utils import test_data_values, test_data_keys, df_equals

Expand Down Expand Up @@ -260,6 +261,64 @@ def test_pivot_table():
)


def test_unique():
modin_result = pd.unique([2, 1, 3, 3])
pandas_result = pandas.unique([2, 1, 3, 3])
assert_array_equal(modin_result, pandas_result)

modin_result = pd.unique(pd.Series([2] + [1] * 5))
pandas_result = pandas.unique(pandas.Series([2] + [1] * 5))
assert_array_equal(modin_result, pandas_result)

modin_result = pd.unique(
pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])
)
pandas_result = pandas.unique(
pandas.Series([pandas.Timestamp("20160101"), pandas.Timestamp("20160101")])
)
assert_array_equal(modin_result, pandas_result)

modin_result = pd.unique(
pd.Series(
[
pd.Timestamp("20160101", tz="US/Eastern"),
pd.Timestamp("20160101", tz="US/Eastern"),
]
)
)
pandas_result = pandas.unique(
pandas.Series(
[
pandas.Timestamp("20160101", tz="US/Eastern"),
pandas.Timestamp("20160101", tz="US/Eastern"),
]
)
)
assert_array_equal(modin_result, pandas_result)

modin_result = pd.unique(
pd.Index(
[
pd.Timestamp("20160101", tz="US/Eastern"),
pd.Timestamp("20160101", tz="US/Eastern"),
]
)
)
pandas_result = pandas.unique(
pandas.Index(
[
pandas.Timestamp("20160101", tz="US/Eastern"),
pandas.Timestamp("20160101", tz="US/Eastern"),
]
)
)
assert_array_equal(modin_result, pandas_result)

modin_result = pd.unique(pd.Series(pd.Categorical(list("baabc"))))
pandas_result = pandas.unique(pandas.Series(pandas.Categorical(list("baabc"))))
assert_array_equal(modin_result, pandas_result)


def test_to_datetime():
# DataFrame input for to_datetime
modin_df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
Expand Down

0 comments on commit 2d74813

Please sign in to comment.