diff --git a/docs/supported_apis/utilities_supported.rst b/docs/supported_apis/utilities_supported.rst index 419b9ec8006..894a8a97f6b 100644 --- a/docs/supported_apis/utilities_supported.rst +++ b/docs/supported_apis/utilities_supported.rst @@ -19,7 +19,7 @@ default to pandas. +---------------------------+---------------------------------+----------------------------------------------------+ | `pd.eval`_ | Y | | +---------------------------+---------------------------------+----------------------------------------------------+ -| `pd.unique`_ | D | | +| `pd.unique`_ | Y | | +---------------------------+---------------------------------+----------------------------------------------------+ | ``pd.value_counts`` | D | | +---------------------------+---------------------------------+----------------------------------------------------+ diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py index ab61e2a5821..6e79f2c7548 100644 --- a/modin/pandas/__init__.py +++ b/modin/pandas/__init__.py @@ -27,7 +27,6 @@ from pandas import ( eval, - unique, value_counts, cut, to_numeric, @@ -132,6 +131,7 @@ notnull, notna, pivot, + unique, ) from .plotting import Plotting as plotting from .. import __execution_engine__ as execution_engine @@ -283,7 +283,6 @@ def import_pandas(*args): "json_normalize", "concat", "eval", - "unique", "value_counts", "cut", "to_numeric", @@ -363,6 +362,7 @@ def import_pandas(*args): "notnull", "notna", "pivot", + "unique", "datetime", "NamedAgg", "DEFAULT_NPARTITIONS", diff --git a/modin/pandas/general.py b/modin/pandas/general.py index 636d98f80c2..8d1f29f0b89 100644 --- a/modin/pandas/general.py +++ b/modin/pandas/general.py @@ -16,6 +16,7 @@ from modin.error_message import ErrorMessage from .base import BasePandasDataset from .dataframe import DataFrame +from .series import Series from .utils import to_pandas @@ -217,3 +218,18 @@ def pivot(data, index=None, columns=None, values=None): if not isinstance(data, DataFrame): raise ValueError("can not pivot with instance of type {}".format(type(data))) return data.pivot(index=index, columns=columns, values=values) + + +def unique(values): + """ + Return unique values of input data. + + Uniques are returned in order of appearance. Hash table-based unique, + therefore does NOT sort. + + Returns + ------- + ndarray + The unique values returned as a NumPy array. + """ + return Series(values).unique() diff --git a/modin/pandas/test/test_general.py b/modin/pandas/test/test_general.py index c5d93e88a92..d7418792f30 100644 --- a/modin/pandas/test/test_general.py +++ b/modin/pandas/test/test_general.py @@ -15,6 +15,7 @@ import pytest import modin.pandas as pd import numpy as np +from numpy.testing import assert_array_equal from .utils import test_data_values, test_data_keys, df_equals @@ -260,6 +261,64 @@ def test_pivot_table(): ) +def test_unique(): + modin_result = pd.unique([2, 1, 3, 3]) + pandas_result = pandas.unique([2, 1, 3, 3]) + assert_array_equal(modin_result, pandas_result) + + modin_result = pd.unique(pd.Series([2] + [1] * 5)) + pandas_result = pandas.unique(pandas.Series([2] + [1] * 5)) + assert_array_equal(modin_result, pandas_result) + + modin_result = pd.unique( + pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]) + ) + pandas_result = pandas.unique( + pandas.Series([pandas.Timestamp("20160101"), pandas.Timestamp("20160101")]) + ) + assert_array_equal(modin_result, pandas_result) + + modin_result = pd.unique( + pd.Series( + [ + pd.Timestamp("20160101", tz="US/Eastern"), + pd.Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + pandas_result = pandas.unique( + pandas.Series( + [ + pandas.Timestamp("20160101", tz="US/Eastern"), + pandas.Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + assert_array_equal(modin_result, pandas_result) + + modin_result = pd.unique( + pd.Index( + [ + pd.Timestamp("20160101", tz="US/Eastern"), + pd.Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + pandas_result = pandas.unique( + pandas.Index( + [ + pandas.Timestamp("20160101", tz="US/Eastern"), + pandas.Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + assert_array_equal(modin_result, pandas_result) + + modin_result = pd.unique(pd.Series(pd.Categorical(list("baabc")))) + pandas_result = pandas.unique(pandas.Series(pandas.Categorical(list("baabc")))) + assert_array_equal(modin_result, pandas_result) + + def test_to_datetime(): # DataFrame input for to_datetime modin_df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})