Skip to content

Commit

Permalink
CLN: remove Block._try_coerce_arg (pandas-dev#29139)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and proost committed Dec 19, 2019
1 parent 81d726f commit e48d606
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 131 deletions.
22 changes: 12 additions & 10 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ cnp.import_array()
cimport pandas._libs.util as util

from pandas._libs.tslibs.conversion cimport maybe_datetimelike_to_i8
from pandas._libs.tslibs.nattype cimport c_NaT as NaT

from pandas._libs.hashtable cimport HashTable

Expand Down Expand Up @@ -547,30 +548,31 @@ cpdef convert_scalar(ndarray arr, object value):
if util.is_array(value):
pass
elif isinstance(value, (datetime, np.datetime64, date)):
return Timestamp(value).value
return Timestamp(value).to_datetime64()
elif util.is_timedelta64_object(value):
# exclude np.timedelta64("NaT") from value != value below
pass
elif value is None or value != value:
return NPY_NAT
elif isinstance(value, str):
return Timestamp(value).value
raise ValueError("cannot set a Timestamp with a non-timestamp")
return np.datetime64("NaT", "ns")
raise ValueError("cannot set a Timestamp with a non-timestamp {typ}"
.format(typ=type(value).__name__))

elif arr.descr.type_num == NPY_TIMEDELTA:
if util.is_array(value):
pass
elif isinstance(value, timedelta) or util.is_timedelta64_object(value):
return Timedelta(value).value
value = Timedelta(value)
if value is NaT:
return np.timedelta64("NaT", "ns")
return value.to_timedelta64()
elif util.is_datetime64_object(value):
# exclude np.datetime64("NaT") which would otherwise be picked up
# by the `value != value check below
pass
elif value is None or value != value:
return NPY_NAT
elif isinstance(value, str):
return Timedelta(value).value
raise ValueError("cannot set a Timedelta with a non-timedelta")
return np.timedelta64("NaT", "ns")
raise ValueError("cannot set a Timedelta with a non-timedelta {typ}"
.format(typ=type(value).__name__))

if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and
not issubclass(arr.dtype.type, np.bool_)):
Expand Down
145 changes: 32 additions & 113 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import date, datetime, timedelta
from datetime import datetime, timedelta
import functools
import inspect
import re
Expand All @@ -7,7 +7,8 @@

import numpy as np

from pandas._libs import NaT, Timestamp, lib, tslib, writers
from pandas._libs import NaT, lib, tslib, writers
from pandas._libs.index import convert_scalar
import pandas._libs.internals as libinternals
from pandas._libs.tslibs import Timedelta, conversion
from pandas._libs.tslibs.timezones import tz_compare
Expand Down Expand Up @@ -54,7 +55,6 @@
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCDatetimeIndex,
ABCExtensionArray,
ABCPandasArray,
ABCSeries,
Expand All @@ -64,7 +64,6 @@
array_equivalent,
is_valid_nat_for_dtype,
isna,
notna,
)

import pandas.core.algorithms as algos
Expand Down Expand Up @@ -663,28 +662,6 @@ def _can_hold_element(self, element: Any) -> bool:
return issubclass(tipo.type, dtype)
return isinstance(element, dtype)

def _try_coerce_args(self, other):
""" provide coercion to our input arguments """

if np.any(notna(other)) and not self._can_hold_element(other):
# coercion issues
# let higher levels handle
raise TypeError(
"cannot convert {} to an {}".format(
type(other).__name__,
type(self).__name__.lower().replace("Block", ""),
)
)
if np.any(isna(other)) and not self._can_hold_na:
raise TypeError(
"cannot convert {} to an {}".format(
type(other).__name__,
type(self).__name__.lower().replace("Block", ""),
)
)

return other

def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
""" convert to our native types format, slicing if desired """
values = self.get_values()
Expand Down Expand Up @@ -766,7 +743,11 @@ def replace(
)

values = self.values
to_replace = self._try_coerce_args(to_replace)
if lib.is_scalar(to_replace) and isinstance(values, np.ndarray):
# The only non-DatetimeLike class that also has a non-trivial
# try_coerce_args is ObjectBlock, but that overrides replace,
# so does not get here.
to_replace = convert_scalar(values, to_replace)

mask = missing.mask_missing(values, to_replace)
if filter is not None:
Expand Down Expand Up @@ -813,7 +794,8 @@ def _replace_single(self, *args, **kwargs):
return self if kwargs["inplace"] else self.copy()

def setitem(self, indexer, value):
"""Set the value inplace, returning a a maybe different typed block.
"""
Set the value inplace, returning a a maybe different typed block.
Parameters
----------
Expand Down Expand Up @@ -841,7 +823,10 @@ def setitem(self, indexer, value):
# coerce if block dtype can store value
values = self.values
if self._can_hold_element(value):
value = self._try_coerce_args(value)
# We only get here for non-Extension Blocks, so _try_coerce_args
# is only relevant for DatetimeBlock and TimedeltaBlock
if lib.is_scalar(value):
value = convert_scalar(values, value)

else:
# current dtype cannot store value, coerce to common dtype
Expand All @@ -862,7 +847,12 @@ def setitem(self, indexer, value):
return b.setitem(indexer, value)

# value must be storeable at this moment
arr_value = np.array(value)
if is_extension_array_dtype(getattr(value, "dtype", None)):
# We need to be careful not to allow through strings that
# can be parsed to EADtypes
arr_value = value
else:
arr_value = np.array(value)

# cast the values to a type that can hold nan (if necessary)
if not self._can_hold_element(value):
Expand Down Expand Up @@ -938,7 +928,10 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False)
new = self.fill_value

if self._can_hold_element(new):
new = self._try_coerce_args(new)
# We only get here for non-Extension Blocks, so _try_coerce_args
# is only relevant for DatetimeBlock and TimedeltaBlock
if lib.is_scalar(new):
new = convert_scalar(new_values, new)

if transpose:
new_values = new_values.T
Expand Down Expand Up @@ -1176,7 +1169,10 @@ def _interpolate_with_fill(
return [self.copy()]

values = self.values if inplace else self.values.copy()
fill_value = self._try_coerce_args(fill_value)

# We only get here for non-ExtensionBlock
fill_value = convert_scalar(self.values, fill_value)

values = missing.interpolate_2d(
values,
method=method,
Expand Down Expand Up @@ -1375,7 +1371,10 @@ def func(cond, values, other):
and np.isnan(other)
):
# np.where will cast integer array to floats in this case
other = self._try_coerce_args(other)
if not self._can_hold_element(other):
raise TypeError
if lib.is_scalar(other) and isinstance(values, np.ndarray):
other = convert_scalar(values, other)

fastres = expressions.where(cond, values, other)
return fastres
Expand Down Expand Up @@ -1641,7 +1640,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False)
# use block's copy logic.
# .values may be an Index which does shallow copy by default
new_values = self.values if inplace else self.copy().values
new = self._try_coerce_args(new)

if isinstance(new, np.ndarray) and len(new) == len(mask):
new = new[mask]
Expand Down Expand Up @@ -2194,38 +2192,6 @@ def _can_hold_element(self, element: Any) -> bool:

return is_valid_nat_for_dtype(element, self.dtype)

def _try_coerce_args(self, other):
"""
Coerce other to dtype 'i8'. NaN and NaT convert to
the smallest i8, and will correctly round-trip to NaT if converted
back in _try_coerce_result. values is always ndarray-like, other
may not be
Parameters
----------
other : ndarray-like or scalar
Returns
-------
base-type other
"""
if is_valid_nat_for_dtype(other, self.dtype):
other = np.datetime64("NaT", "ns")
elif isinstance(other, (datetime, np.datetime64, date)):
other = Timestamp(other)
if other.tz is not None:
raise TypeError("cannot coerce a Timestamp with a tz on a naive Block")
other = other.asm8
elif hasattr(other, "dtype") and is_datetime64_dtype(other):
# TODO: can we get here with non-nano?
pass
else:
# coercion issues
# let higher levels handle
raise TypeError(other)

return other

def to_native_types(
self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs
):
Expand Down Expand Up @@ -2364,10 +2330,6 @@ def _slice(self, slicer):
return self.values[loc]
return self.values[slicer]

def _try_coerce_args(self, other):
# DatetimeArray handles this for us
return other

def diff(self, n: int, axis: int = 0) -> List["Block"]:
"""
1st discrete difference.
Expand Down Expand Up @@ -2505,34 +2467,6 @@ def fillna(self, value, **kwargs):
value = Timedelta(value, unit="s")
return super().fillna(value, **kwargs)

def _try_coerce_args(self, other):
"""
Coerce values and other to datetime64[ns], with null values
converted to datetime64("NaT", "ns").
Parameters
----------
other : ndarray-like or scalar
Returns
-------
base-type other
"""

if is_valid_nat_for_dtype(other, self.dtype):
other = np.timedelta64("NaT", "ns")
elif isinstance(other, (timedelta, np.timedelta64)):
other = Timedelta(other).to_timedelta64()
elif hasattr(other, "dtype") and is_timedelta64_dtype(other):
# TODO: can we get here with non-nano dtype?
pass
else:
# coercion issues
# let higher levels handle
raise TypeError(other)

return other

def should_store(self, value):
return issubclass(
value.dtype.type, np.timedelta64
Expand Down Expand Up @@ -2668,21 +2602,6 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
def _can_hold_element(self, element: Any) -> bool:
return True

def _try_coerce_args(self, other):
""" provide coercion to our input arguments """

if isinstance(other, ABCDatetimeIndex):
# May get a DatetimeIndex here. Unbox it.
other = other.array

if isinstance(other, DatetimeArray):
# hit in pandas/tests/indexing/test_coercion.py
# ::TestWhereCoercion::test_where_series_datetime64[datetime64tz]
# when falling back to ObjectBlock.where
other = other.astype(object)

return other

def should_store(self, value):
return not (
issubclass(
Expand Down
24 changes: 16 additions & 8 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,19 +327,27 @@ def test_make_block_same_class(self):


class TestDatetimeBlock:
def test_try_coerce_arg(self):
def test_can_hold_element(self):
block = create_block("datetime", [0])

# We will check that block._can_hold_element iff arr.__setitem__ works
arr = pd.array(block.values.ravel())

# coerce None
none_coerced = block._try_coerce_args(None)
assert pd.Timestamp(none_coerced) is pd.NaT
assert block._can_hold_element(None)
arr[0] = None
assert arr[0] is pd.NaT

# coerce different types of date bojects
vals = (np.datetime64("2010-10-10"), datetime(2010, 10, 10), date(2010, 10, 10))
# coerce different types of datetime objects
vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)]
for val in vals:
coerced = block._try_coerce_args(val)
assert np.datetime64 == type(coerced)
assert pd.Timestamp("2010-10-10") == pd.Timestamp(coerced)
assert block._can_hold_element(val)
arr[0] = val

val = date(2010, 10, 10)
assert not block._can_hold_element(val)
with pytest.raises(TypeError):
arr[0] = val


class TestBlockManager:
Expand Down

0 comments on commit e48d606

Please sign in to comment.