From e48d606cd96e841b02428bf95bfe49d082aed888 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 24 Oct 2019 05:10:07 -0700 Subject: [PATCH] CLN: remove Block._try_coerce_arg (#29139) --- pandas/_libs/index.pyx | 22 ++-- pandas/core/internals/blocks.py | 145 +++++------------------ pandas/tests/internals/test_internals.py | 24 ++-- 3 files changed, 60 insertions(+), 131 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 144d555258c50a..255fd85531d143 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -18,6 +18,7 @@ cnp.import_array() cimport pandas._libs.util as util from pandas._libs.tslibs.conversion cimport maybe_datetimelike_to_i8 +from pandas._libs.tslibs.nattype cimport c_NaT as NaT from pandas._libs.hashtable cimport HashTable @@ -547,30 +548,31 @@ cpdef convert_scalar(ndarray arr, object value): if util.is_array(value): pass elif isinstance(value, (datetime, np.datetime64, date)): - return Timestamp(value).value + return Timestamp(value).to_datetime64() elif util.is_timedelta64_object(value): # exclude np.timedelta64("NaT") from value != value below pass elif value is None or value != value: - return NPY_NAT - elif isinstance(value, str): - return Timestamp(value).value - raise ValueError("cannot set a Timestamp with a non-timestamp") + return np.datetime64("NaT", "ns") + raise ValueError("cannot set a Timestamp with a non-timestamp {typ}" + .format(typ=type(value).__name__)) elif arr.descr.type_num == NPY_TIMEDELTA: if util.is_array(value): pass elif isinstance(value, timedelta) or util.is_timedelta64_object(value): - return Timedelta(value).value + value = Timedelta(value) + if value is NaT: + return np.timedelta64("NaT", "ns") + return value.to_timedelta64() elif util.is_datetime64_object(value): # exclude np.datetime64("NaT") which would otherwise be picked up # by the `value != value check below pass elif value is None or value != value: - return NPY_NAT - elif isinstance(value, str): - return Timedelta(value).value - raise ValueError("cannot set a Timedelta with a non-timedelta") + return np.timedelta64("NaT", "ns") + raise ValueError("cannot set a Timedelta with a non-timedelta {typ}" + .format(typ=type(value).__name__)) if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and not issubclass(arr.dtype.type, np.bool_)): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1495be1f26df53..51108d9a5a573f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1,4 +1,4 @@ -from datetime import date, datetime, timedelta +from datetime import datetime, timedelta import functools import inspect import re @@ -7,7 +7,8 @@ import numpy as np -from pandas._libs import NaT, Timestamp, lib, tslib, writers +from pandas._libs import NaT, lib, tslib, writers +from pandas._libs.index import convert_scalar import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare @@ -54,7 +55,6 @@ from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, - ABCDatetimeIndex, ABCExtensionArray, ABCPandasArray, ABCSeries, @@ -64,7 +64,6 @@ array_equivalent, is_valid_nat_for_dtype, isna, - notna, ) import pandas.core.algorithms as algos @@ -663,28 +662,6 @@ def _can_hold_element(self, element: Any) -> bool: return issubclass(tipo.type, dtype) return isinstance(element, dtype) - def _try_coerce_args(self, other): - """ provide coercion to our input arguments """ - - if np.any(notna(other)) and not self._can_hold_element(other): - # coercion issues - # let higher levels handle - raise TypeError( - "cannot convert {} to an {}".format( - type(other).__name__, - type(self).__name__.lower().replace("Block", ""), - ) - ) - if np.any(isna(other)) and not self._can_hold_na: - raise TypeError( - "cannot convert {} to an {}".format( - type(other).__name__, - type(self).__name__.lower().replace("Block", ""), - ) - ) - - return other - def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ values = self.get_values() @@ -766,7 +743,11 @@ def replace( ) values = self.values - to_replace = self._try_coerce_args(to_replace) + if lib.is_scalar(to_replace) and isinstance(values, np.ndarray): + # The only non-DatetimeLike class that also has a non-trivial + # try_coerce_args is ObjectBlock, but that overrides replace, + # so does not get here. + to_replace = convert_scalar(values, to_replace) mask = missing.mask_missing(values, to_replace) if filter is not None: @@ -813,7 +794,8 @@ def _replace_single(self, *args, **kwargs): return self if kwargs["inplace"] else self.copy() def setitem(self, indexer, value): - """Set the value inplace, returning a a maybe different typed block. + """ + Set the value inplace, returning a a maybe different typed block. Parameters ---------- @@ -841,7 +823,10 @@ def setitem(self, indexer, value): # coerce if block dtype can store value values = self.values if self._can_hold_element(value): - value = self._try_coerce_args(value) + # We only get here for non-Extension Blocks, so _try_coerce_args + # is only relevant for DatetimeBlock and TimedeltaBlock + if lib.is_scalar(value): + value = convert_scalar(values, value) else: # current dtype cannot store value, coerce to common dtype @@ -862,7 +847,12 @@ def setitem(self, indexer, value): return b.setitem(indexer, value) # value must be storeable at this moment - arr_value = np.array(value) + if is_extension_array_dtype(getattr(value, "dtype", None)): + # We need to be careful not to allow through strings that + # can be parsed to EADtypes + arr_value = value + else: + arr_value = np.array(value) # cast the values to a type that can hold nan (if necessary) if not self._can_hold_element(value): @@ -938,7 +928,10 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) new = self.fill_value if self._can_hold_element(new): - new = self._try_coerce_args(new) + # We only get here for non-Extension Blocks, so _try_coerce_args + # is only relevant for DatetimeBlock and TimedeltaBlock + if lib.is_scalar(new): + new = convert_scalar(new_values, new) if transpose: new_values = new_values.T @@ -1176,7 +1169,10 @@ def _interpolate_with_fill( return [self.copy()] values = self.values if inplace else self.values.copy() - fill_value = self._try_coerce_args(fill_value) + + # We only get here for non-ExtensionBlock + fill_value = convert_scalar(self.values, fill_value) + values = missing.interpolate_2d( values, method=method, @@ -1375,7 +1371,10 @@ def func(cond, values, other): and np.isnan(other) ): # np.where will cast integer array to floats in this case - other = self._try_coerce_args(other) + if not self._can_hold_element(other): + raise TypeError + if lib.is_scalar(other) and isinstance(values, np.ndarray): + other = convert_scalar(values, other) fastres = expressions.where(cond, values, other) return fastres @@ -1641,7 +1640,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) # use block's copy logic. # .values may be an Index which does shallow copy by default new_values = self.values if inplace else self.copy().values - new = self._try_coerce_args(new) if isinstance(new, np.ndarray) and len(new) == len(mask): new = new[mask] @@ -2194,38 +2192,6 @@ def _can_hold_element(self, element: Any) -> bool: return is_valid_nat_for_dtype(element, self.dtype) - def _try_coerce_args(self, other): - """ - Coerce other to dtype 'i8'. NaN and NaT convert to - the smallest i8, and will correctly round-trip to NaT if converted - back in _try_coerce_result. values is always ndarray-like, other - may not be - - Parameters - ---------- - other : ndarray-like or scalar - - Returns - ------- - base-type other - """ - if is_valid_nat_for_dtype(other, self.dtype): - other = np.datetime64("NaT", "ns") - elif isinstance(other, (datetime, np.datetime64, date)): - other = Timestamp(other) - if other.tz is not None: - raise TypeError("cannot coerce a Timestamp with a tz on a naive Block") - other = other.asm8 - elif hasattr(other, "dtype") and is_datetime64_dtype(other): - # TODO: can we get here with non-nano? - pass - else: - # coercion issues - # let higher levels handle - raise TypeError(other) - - return other - def to_native_types( self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs ): @@ -2364,10 +2330,6 @@ def _slice(self, slicer): return self.values[loc] return self.values[slicer] - def _try_coerce_args(self, other): - # DatetimeArray handles this for us - return other - def diff(self, n: int, axis: int = 0) -> List["Block"]: """ 1st discrete difference. @@ -2505,34 +2467,6 @@ def fillna(self, value, **kwargs): value = Timedelta(value, unit="s") return super().fillna(value, **kwargs) - def _try_coerce_args(self, other): - """ - Coerce values and other to datetime64[ns], with null values - converted to datetime64("NaT", "ns"). - - Parameters - ---------- - other : ndarray-like or scalar - - Returns - ------- - base-type other - """ - - if is_valid_nat_for_dtype(other, self.dtype): - other = np.timedelta64("NaT", "ns") - elif isinstance(other, (timedelta, np.timedelta64)): - other = Timedelta(other).to_timedelta64() - elif hasattr(other, "dtype") and is_timedelta64_dtype(other): - # TODO: can we get here with non-nano dtype? - pass - else: - # coercion issues - # let higher levels handle - raise TypeError(other) - - return other - def should_store(self, value): return issubclass( value.dtype.type, np.timedelta64 @@ -2668,21 +2602,6 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] def _can_hold_element(self, element: Any) -> bool: return True - def _try_coerce_args(self, other): - """ provide coercion to our input arguments """ - - if isinstance(other, ABCDatetimeIndex): - # May get a DatetimeIndex here. Unbox it. - other = other.array - - if isinstance(other, DatetimeArray): - # hit in pandas/tests/indexing/test_coercion.py - # ::TestWhereCoercion::test_where_series_datetime64[datetime64tz] - # when falling back to ObjectBlock.where - other = other.astype(object) - - return other - def should_store(self, value): return not ( issubclass( diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 5eb9a067b11e4d..ed1a321a3d7e68 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -327,19 +327,27 @@ def test_make_block_same_class(self): class TestDatetimeBlock: - def test_try_coerce_arg(self): + def test_can_hold_element(self): block = create_block("datetime", [0]) + # We will check that block._can_hold_element iff arr.__setitem__ works + arr = pd.array(block.values.ravel()) + # coerce None - none_coerced = block._try_coerce_args(None) - assert pd.Timestamp(none_coerced) is pd.NaT + assert block._can_hold_element(None) + arr[0] = None + assert arr[0] is pd.NaT - # coerce different types of date bojects - vals = (np.datetime64("2010-10-10"), datetime(2010, 10, 10), date(2010, 10, 10)) + # coerce different types of datetime objects + vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)] for val in vals: - coerced = block._try_coerce_args(val) - assert np.datetime64 == type(coerced) - assert pd.Timestamp("2010-10-10") == pd.Timestamp(coerced) + assert block._can_hold_element(val) + arr[0] = val + + val = date(2010, 10, 10) + assert not block._can_hold_element(val) + with pytest.raises(TypeError): + arr[0] = val class TestBlockManager: