Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: CategoricalBlock; combine Block.replace methods #40527

Merged
merged 1 commit into from
Mar 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
)
from pandas.core.internals.blocks import ( # io.pytables, io.packers
Block,
CategoricalBlock,
DatetimeBlock,
DatetimeTZBlock,
ExtensionBlock,
Expand All @@ -28,7 +27,6 @@

__all__ = [
"Block",
"CategoricalBlock",
"NumericBlock",
"DatetimeBlock",
"DatetimeTZBlock",
Expand All @@ -48,3 +46,18 @@
"create_block_manager_from_arrays",
"create_block_manager_from_blocks",
]


def __getattr__(name: str):
import warnings

if name == "CategoricalBlock":
warnings.warn(
"CategoricalBlock is deprecated and will be removed in a future version. "
"Use ExtensionBlock instead.",
FutureWarning,
stacklevel=2,
)
return ExtensionBlock

raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")
64 changes: 22 additions & 42 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,7 @@ def copy(self, deep: bool = True):
# ---------------------------------------------------------------------
# Replace

@final
def replace(
self,
to_replace,
Expand All @@ -687,15 +688,30 @@ def replace(
"""
inplace = validate_bool_kwarg(inplace, "inplace")

# Note: the checks we do in NDFrame.replace ensure we never get
# here with listlike to_replace or value, as those cases
# go through _replace_list

values = self.values

if isinstance(values, Categorical):
# TODO: avoid special-casing
blk = self if inplace else self.copy()
blk.values.replace(to_replace, value, inplace=True)
return [blk]

regex = should_use_regex(regex, to_replace)

if regex:
return self._replace_regex(to_replace, value, inplace=inplace)

if not self._can_hold_element(to_replace):
# We cannot hold `to_replace`, so we know immediately that
# replacing it is a no-op.
# Note: If to_replace were a list, NDFrame.replace would call
# replace_list instead of replace.
return [self] if inplace else [self.copy()]

values = self.values

mask = missing.mask_missing(values, to_replace)
if not mask.any():
# Note: we get here with test_replace_extension_other incorrectly
Expand All @@ -720,7 +736,7 @@ def replace(
else:
# split so that we only upcast where necessary
return self.split_and_operate(
type(self).replace, to_replace, value, inplace=inplace, regex=regex
type(self).replace, to_replace, value, inplace=True, regex=regex
)

@final
Expand Down Expand Up @@ -1223,7 +1239,7 @@ def take_nd(
Take values according to indexer and return them as a block.bb

"""
# algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
# algos.take_nd dispatches for DatetimeTZBlock
# so need to preserve types
# sparse is treated like an ndarray, but needs .get_values() shaping

Expand Down Expand Up @@ -1422,7 +1438,7 @@ class ExtensionBlock(Block):
Notes
-----
This holds all 3rd-party extension array types. It's also the immediate
parent class for our internal extension types' blocks, CategoricalBlock.
parent class for our internal extension types' blocks.

ExtensionArrays are limited to 1-D.
"""
Expand Down Expand Up @@ -1579,7 +1595,6 @@ def take_nd(

def _can_hold_element(self, element: Any) -> bool:
# TODO: We may need to think about pushing this onto the array.
# We're doing the same as CategoricalBlock here.
return True

def _slice(self, slicer):
Expand Down Expand Up @@ -2019,41 +2034,6 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]:
def _can_hold_element(self, element: Any) -> bool:
return True

def replace(
self,
to_replace,
value,
inplace: bool = False,
regex: bool = False,
) -> List[Block]:
# Note: the checks we do in NDFrame.replace ensure we never get
# here with listlike to_replace or value, as those cases
# go through _replace_list

regex = should_use_regex(regex, to_replace)

if regex:
return self._replace_regex(to_replace, value, inplace=inplace)
else:
return super().replace(to_replace, value, inplace=inplace, regex=False)


class CategoricalBlock(ExtensionBlock):
__slots__ = ()

def replace(
self,
to_replace,
value,
inplace: bool = False,
regex: bool = False,
) -> List[Block]:
inplace = validate_bool_kwarg(inplace, "inplace")
result = self if inplace else self.copy()

result.values.replace(to_replace, value, inplace=True)
return [result]


# -----------------------------------------------------------------
# Constructor Helpers
Expand Down Expand Up @@ -2116,7 +2096,7 @@ def get_block_type(values, dtype: Optional[Dtype] = None):
# Need this first(ish) so that Sparse[datetime] is sparse
cls = ExtensionBlock
elif isinstance(dtype, CategoricalDtype):
cls = CategoricalBlock
cls = ExtensionBlock
elif vtype is Timestamp:
cls = DatetimeTZBlock
elif vtype is Interval or vtype is Period:
Expand Down
8 changes: 0 additions & 8 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
)
from pandas.core.internals.blocks import (
Block,
CategoricalBlock,
DatetimeTZBlock,
ExtensionBlock,
ObjectValuesExtensionBlock,
Expand Down Expand Up @@ -1867,13 +1866,6 @@ def _form_blocks(
object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_)
blocks.extend(object_blocks)

if len(items_dict["CategoricalBlock"]) > 0:
cat_blocks = [
new_block(array, klass=CategoricalBlock, placement=i, ndim=2)
for i, array in items_dict["CategoricalBlock"]
]
blocks.extend(cat_blocks)

if len(items_dict["ExtensionBlock"]):
external_blocks = [
new_block(array, klass=ExtensionBlock, placement=i, ndim=2)
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/internals/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def test_namespace():
]
expected = [
"Block",
"CategoricalBlock",
"NumericBlock",
"DatetimeBlock",
"DatetimeTZBlock",
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def test_read_expands_user_home_dir(
),
],
)
@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning")
def test_read_fspath_all(self, reader, module, path, datapath):
pytest.importorskip(module)
path = datapath(*path)
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

@filter_sparse
@pytest.mark.single
@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning")
class TestFeather:
def check_error_on_write(self, df, exc, err_msg):
# check that we are raising the exception
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ def test_write_column_index_nonstring(self, pa):
self.check_error_on_write(df, engine, ValueError, msg)


@pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:FutureWarning")
class TestParquetPyArrow(Base):
def test_basic(self, pa, df_full):

Expand Down