Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

in tests, change pd.arrays.SparseArray to SparseArray #30765

Merged
merged 1 commit into from
Jan 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 42 additions & 52 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@
import pandas._testing as tm
from pandas.api.extensions import register_extension_dtype
from pandas.api.types import is_scalar
from pandas.arrays import (
BooleanArray,
DatetimeArray,
IntegerArray,
IntervalArray,
SparseArray,
StringArray,
TimedeltaArray,
)
from pandas.core.arrays import PandasArray, integer_array, period_array
from pandas.tests.extension.decimal import DecimalArray, DecimalDtype, to_decimal

Expand All @@ -19,18 +28,14 @@
"data, dtype, expected",
[
# Basic NumPy defaults.
([1, 2], None, pd.arrays.IntegerArray._from_sequence([1, 2])),
([1, 2], None, IntegerArray._from_sequence([1, 2])),
([1, 2], object, PandasArray(np.array([1, 2], dtype=object))),
(
[1, 2],
np.dtype("float32"),
PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))),
),
(
np.array([1, 2], dtype="int64"),
None,
pd.arrays.IntegerArray._from_sequence([1, 2]),
),
(np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2]),),
# String alias passes through to NumPy
([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))),
# Period alias
Expand All @@ -49,55 +54,51 @@
(
[1, 2],
np.dtype("datetime64[ns]"),
pd.arrays.DatetimeArray._from_sequence(
np.array([1, 2], dtype="datetime64[ns]")
),
DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")),
),
(
np.array([1, 2], dtype="datetime64[ns]"),
None,
pd.arrays.DatetimeArray._from_sequence(
np.array([1, 2], dtype="datetime64[ns]")
),
DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")),
),
(
pd.DatetimeIndex(["2000", "2001"]),
np.dtype("datetime64[ns]"),
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
DatetimeArray._from_sequence(["2000", "2001"]),
),
(
pd.DatetimeIndex(["2000", "2001"]),
None,
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
DatetimeArray._from_sequence(["2000", "2001"]),
),
(
["2000", "2001"],
np.dtype("datetime64[ns]"),
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
DatetimeArray._from_sequence(["2000", "2001"]),
),
# Datetime (tz-aware)
(
["2000", "2001"],
pd.DatetimeTZDtype(tz="CET"),
pd.arrays.DatetimeArray._from_sequence(
DatetimeArray._from_sequence(
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET")
),
),
# Timedelta
(
["1H", "2H"],
np.dtype("timedelta64[ns]"),
pd.arrays.TimedeltaArray._from_sequence(["1H", "2H"]),
TimedeltaArray._from_sequence(["1H", "2H"]),
),
(
pd.TimedeltaIndex(["1H", "2H"]),
np.dtype("timedelta64[ns]"),
pd.arrays.TimedeltaArray._from_sequence(["1H", "2H"]),
TimedeltaArray._from_sequence(["1H", "2H"]),
),
(
pd.TimedeltaIndex(["1H", "2H"]),
None,
pd.arrays.TimedeltaArray._from_sequence(["1H", "2H"]),
TimedeltaArray._from_sequence(["1H", "2H"]),
),
# Category
(["a", "b"], "category", pd.Categorical(["a", "b"])),
Expand All @@ -110,27 +111,19 @@
(
[pd.Interval(1, 2), pd.Interval(3, 4)],
"interval",
pd.arrays.IntervalArray.from_tuples([(1, 2), (3, 4)]),
IntervalArray.from_tuples([(1, 2), (3, 4)]),
),
# Sparse
([0, 1], "Sparse[int64]", pd.arrays.SparseArray([0, 1], dtype="int64")),
([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")),
# IntegerNA
([1, None], "Int16", integer_array([1, None], dtype="Int16")),
(pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
# String
(["a", None], "string", pd.arrays.StringArray._from_sequence(["a", None])),
(
["a", None],
pd.StringDtype(),
pd.arrays.StringArray._from_sequence(["a", None]),
),
(["a", None], "string", StringArray._from_sequence(["a", None])),
(["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None]),),
# Boolean
([True, None], "boolean", pd.arrays.BooleanArray._from_sequence([True, None])),
(
[True, None],
pd.BooleanDtype(),
pd.arrays.BooleanArray._from_sequence([True, None]),
),
([True, None], "boolean", BooleanArray._from_sequence([True, None])),
([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None]),),
# Index
(pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
# Series[EA] returns the EA
Expand Down Expand Up @@ -181,31 +174,28 @@ def test_array_copy():
period_array(["2000", "2001"], freq="D"),
),
# interval
(
[pd.Interval(0, 1), pd.Interval(1, 2)],
pd.arrays.IntervalArray.from_breaks([0, 1, 2]),
),
([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2]),),
# datetime
(
[pd.Timestamp("2000"), pd.Timestamp("2001")],
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
DatetimeArray._from_sequence(["2000", "2001"]),
),
(
[datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"]),
DatetimeArray._from_sequence(["2000", "2001"]),
),
(
np.array([1, 2], dtype="M8[ns]"),
pd.arrays.DatetimeArray(np.array([1, 2], dtype="M8[ns]")),
DatetimeArray(np.array([1, 2], dtype="M8[ns]")),
),
(
np.array([1, 2], dtype="M8[us]"),
pd.arrays.DatetimeArray(np.array([1000, 2000], dtype="M8[ns]")),
DatetimeArray(np.array([1000, 2000], dtype="M8[ns]")),
),
# datetimetz
(
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
pd.arrays.DatetimeArray._from_sequence(
DatetimeArray._from_sequence(
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET")
),
),
Expand All @@ -214,30 +204,30 @@ def test_array_copy():
datetime.datetime(2000, 1, 1, tzinfo=cet),
datetime.datetime(2001, 1, 1, tzinfo=cet),
],
pd.arrays.DatetimeArray._from_sequence(["2000", "2001"], tz=cet),
DatetimeArray._from_sequence(["2000", "2001"], tz=cet),
),
# timedelta
(
[pd.Timedelta("1H"), pd.Timedelta("2H")],
pd.arrays.TimedeltaArray._from_sequence(["1H", "2H"]),
TimedeltaArray._from_sequence(["1H", "2H"]),
),
(
np.array([1, 2], dtype="m8[ns]"),
pd.arrays.TimedeltaArray(np.array([1, 2], dtype="m8[ns]")),
TimedeltaArray(np.array([1, 2], dtype="m8[ns]")),
),
(
np.array([1, 2], dtype="m8[us]"),
pd.arrays.TimedeltaArray(np.array([1000, 2000], dtype="m8[ns]")),
TimedeltaArray(np.array([1000, 2000], dtype="m8[ns]")),
),
# integer
([1, 2], pd.arrays.IntegerArray._from_sequence([1, 2])),
([1, None], pd.arrays.IntegerArray._from_sequence([1, None])),
([1, 2], IntegerArray._from_sequence([1, 2])),
([1, None], IntegerArray._from_sequence([1, None])),
# string
(["a", "b"], pd.arrays.StringArray._from_sequence(["a", "b"])),
(["a", None], pd.arrays.StringArray._from_sequence(["a", None])),
(["a", "b"], StringArray._from_sequence(["a", "b"])),
(["a", None], StringArray._from_sequence(["a", None])),
# Boolean
([True, False], pd.arrays.BooleanArray._from_sequence([True, False])),
([True, None], pd.arrays.BooleanArray._from_sequence([True, None])),
([True, False], BooleanArray._from_sequence([True, False])),
([True, None], BooleanArray._from_sequence([True, None])),
],
)
def test_array_inference(data, expected):
Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import pandas as pd
import pandas._testing as tm
from pandas.arrays import SparseArray
from pandas.conftest import (
ALL_EA_INT_DTYPES,
ALL_INT_DTYPES,
Expand Down Expand Up @@ -182,7 +183,7 @@ def test_is_object():
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
)
def test_is_sparse(check_scipy):
assert com.is_sparse(pd.arrays.SparseArray([1, 2, 3]))
assert com.is_sparse(SparseArray([1, 2, 3]))

assert not com.is_sparse(np.array([1, 2, 3]))

Expand All @@ -198,7 +199,7 @@ def test_is_scipy_sparse():

assert com.is_scipy_sparse(bsr_matrix([1, 2, 3]))

assert not com.is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3]))
assert not com.is_scipy_sparse(SparseArray([1, 2, 3]))


def test_is_categorical():
Expand Down Expand Up @@ -576,7 +577,7 @@ def test_is_extension_type(check_scipy):
cat = pd.Categorical([1, 2, 3])
assert com.is_extension_type(cat)
assert com.is_extension_type(pd.Series(cat))
assert com.is_extension_type(pd.arrays.SparseArray([1, 2, 3]))
assert com.is_extension_type(SparseArray([1, 2, 3]))
assert com.is_extension_type(pd.DatetimeIndex(["2000"], tz="US/Eastern"))

dtype = DatetimeTZDtype("ns", tz="US/Eastern")
Expand Down Expand Up @@ -605,7 +606,7 @@ def test_is_extension_array_dtype(check_scipy):
cat = pd.Categorical([1, 2, 3])
assert com.is_extension_array_dtype(cat)
assert com.is_extension_array_dtype(pd.Series(cat))
assert com.is_extension_array_dtype(pd.arrays.SparseArray([1, 2, 3]))
assert com.is_extension_array_dtype(SparseArray([1, 2, 3]))
assert com.is_extension_array_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern"))

dtype = DatetimeTZDtype("ns", tz="US/Eastern")
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import pandas as pd
from pandas import Categorical, CategoricalIndex, IntervalIndex, Series, date_range
import pandas._testing as tm
from pandas.core.arrays.sparse import SparseDtype
from pandas.core.arrays.sparse import SparseArray, SparseDtype


class Base:
Expand Down Expand Up @@ -914,7 +914,7 @@ def test_registry_find(dtype, expected):
(pd.Series([1, 2]), False),
(np.array([True, False]), True),
(pd.Series([True, False]), True),
(pd.arrays.SparseArray([True, False]), True),
(SparseArray([True, False]), True),
(SparseDtype(bool), True),
],
)
Expand All @@ -924,7 +924,7 @@ def test_is_bool_dtype(dtype, expected):


def test_is_bool_dtype_sparse():
result = is_bool_dtype(pd.Series(pd.arrays.SparseArray([True, False])))
result = is_bool_dtype(pd.Series(SparseArray([True, False])))
assert result is True


Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
notna,
)
import pandas._testing as tm
from pandas.arrays import SparseArray
import pandas.core.common as com
from pandas.core.indexing import IndexingError

Expand Down Expand Up @@ -1776,7 +1777,7 @@ def test_getitem_ix_float_duplicates(self):

def test_getitem_sparse_column(self):
# https://github.com/pandas-dev/pandas/issues/23559
data = pd.arrays.SparseArray([0, 1])
data = SparseArray([0, 1])
df = pd.DataFrame({"A": data})
expected = pd.Series(data, name="A")
result = df["A"]
Expand All @@ -1791,17 +1792,17 @@ def test_getitem_sparse_column(self):
def test_setitem_with_sparse_value(self):
# GH8131
df = pd.DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]})
sp_array = pd.arrays.SparseArray([0, 0, 1])
sp_array = SparseArray([0, 0, 1])
df["new_column"] = sp_array
tm.assert_series_equal(
df["new_column"], pd.Series(sp_array, name="new_column"), check_names=False
)

def test_setitem_with_unaligned_sparse_value(self):
df = pd.DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]})
sp_series = pd.Series(pd.arrays.SparseArray([0, 0, 1]), index=[2, 1, 0])
sp_series = pd.Series(SparseArray([0, 0, 1]), index=[2, 1, 0])
df["new_column"] = sp_series
exp = pd.Series(pd.arrays.SparseArray([1, 0, 0]), name="new_column")
exp = pd.Series(SparseArray([1, 0, 0]), name="new_column")
tm.assert_series_equal(df["new_column"], exp)

def test_setitem_with_unaligned_tz_aware_datetime_column(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
isna,
)
import pandas._testing as tm
from pandas.arrays import IntervalArray, PeriodArray
from pandas.arrays import IntervalArray, PeriodArray, SparseArray
from pandas.core.construction import create_series_with_explicit_dtype

MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"]
Expand Down Expand Up @@ -2414,7 +2414,7 @@ class List(list):
"extension_arr",
[
Categorical(list("aabbc")),
pd.arrays.SparseArray([1, np.nan, np.nan, np.nan]),
SparseArray([1, np.nan, np.nan, np.nan]),
IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")),
],
Expand Down
Loading