From c721915114c5dd39cd7aec6c9dec762ca28abcac Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 25 Apr 2018 07:50:54 -0500 Subject: [PATCH] Removed default_fill_value --- pandas/compat/__init__.py | 3 -- pandas/core/algorithms.py | 5 ++- pandas/core/arrays/base.py | 3 +- pandas/core/dtypes/cast.py | 5 +-- pandas/core/frame.py | 4 +-- pandas/core/generic.py | 17 +++-------- pandas/core/internals.py | 61 +++---------------------------------- pandas/core/series.py | 9 ++---- pandas/core/sparse/frame.py | 4 +-- 9 files changed, 20 insertions(+), 91 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index ffac335dea87a..12517372fedd1 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -451,6 +451,3 @@ def is_platform_mac(): def is_platform_32bit(): return struct.calcsize("P") * 8 < 64 - - -_default_fill_value = object() diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3935d0829f1c9..d1fbaf91f3365 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -30,7 +30,6 @@ _ensure_platform_int, _ensure_object, _ensure_float64, _ensure_uint64, _ensure_int64) -from pandas.compat import _default_fill_value from pandas.compat.numpy import _np_version_under1p10 from pandas.core.dtypes.missing import isna, na_value_for_dtype @@ -1449,10 +1448,10 @@ def func(arr, indexer, out, fill_value=np.nan): return func -def take(arr, indexer, fill_value=_default_fill_value): +def take(arr, indexer, fill_value=None, allow_fill=None): indexer = np.asarray(indexer) - if fill_value is _default_fill_value: + if allow_fill is None: # NumPy style result = arr.take(indexer) else: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9364ba7a228d8..1942a848c57bf 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -10,7 +10,6 @@ import numpy as np from pandas.errors import AbstractMethodError -from pandas.compat import _default_fill_value from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution @@ -535,7 +534,7 @@ def _values_for_take(self): @Substitution(arr='') @Appender(_take_docstring) - def take(self, indexer, fill_value=_default_fill_value): + def take(self, indexer, fill_value=None, allow_fill=None): # type: (Sequence[int], Optional[Any]) -> ExtensionArray # assert fill_value is not np.nan from pandas.core.algorithms import take diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fc0a5375b5d75..8deb777ca7d73 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -7,7 +7,7 @@ from pandas._libs import tslib, lib from pandas._libs.tslib import iNaT -from pandas.compat import string_types, text_type, PY3, _default_fill_value +from pandas.compat import string_types, text_type, PY3 from .common import (_ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, is_datetimelike, @@ -255,9 +255,6 @@ def changeit(): def maybe_promote(dtype, fill_value=np.nan): - if fill_value is _default_fill_value: - fill_value = np.nan - # if we passed an array here, determine the fill value by dtype if isinstance(fill_value, np.ndarray): if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f5920417a3899..de6985ef3b4ea 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,7 +77,7 @@ from pandas.core.arrays import Categorical, ExtensionArray import pandas.core.algorithms as algorithms from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, - OrderedDict, raise_with_traceback, _default_fill_value) + OrderedDict, raise_with_traceback) from pandas import compat from pandas.compat import PY36 from pandas.compat.numpy import function as nv @@ -3504,7 +3504,7 @@ def _reindex_multi(self, axes, copy, fill_value): @Appender(_shared_docs['align'] % _shared_doc_kwargs) def align(self, other, join='outer', axis=None, level=None, copy=True, - fill_value=_default_fill_value, method=None, limit=None, fill_axis=0, + fill_value=None, method=None, limit=None, fill_axis=0, broadcast_axis=None): return super(DataFrame, self).align(other, join=join, axis=axis, level=level, copy=copy, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ca53ad44a733a..86342b6996abf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -50,8 +50,7 @@ from pandas import compat from pandas.compat.numpy import function as nv from pandas.compat import (map, zip, lzip, lrange, string_types, to_str, - isidentifier, set_function_name, cPickle as pkl, - _default_fill_value) + isidentifier, set_function_name, cPickle as pkl) from pandas.core.ops import _align_method_FRAME import pandas.core.nanops as nanops from pandas.util._decorators import (Appender, Substitution, @@ -3661,7 +3660,7 @@ def reindex(self, *args, **kwargs): copy = kwargs.pop('copy', True) limit = kwargs.pop('limit', None) tolerance = kwargs.pop('tolerance', None) - fill_value = kwargs.pop('fill_value', _default_fill_value) + fill_value = kwargs.pop('fill_value', np.nan) # Series.reindex doesn't use / need the axis kwarg # We pop and ignore it here, to make writing Series/Frame generic code @@ -3791,9 +3790,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, return self._reindex_with_indexers({axis: [new_index, indexer]}, fill_value=fill_value, copy=copy) - def _reindex_with_indexers(self, reindexers, - fill_value=_default_fill_value, - copy=False, + def _reindex_with_indexers(self, reindexers, fill_value=np.nan, copy=False, allow_dups=False): """allow_dups indicates an internal call here """ @@ -7212,7 +7209,7 @@ def ranker(data): @Appender(_shared_docs['align'] % _shared_doc_kwargs) def align(self, other, join='outer', axis=None, level=None, copy=True, - fill_value=_default_fill_value, method=None, limit=None, fill_axis=0, + fill_value=None, method=None, limit=None, fill_axis=0, broadcast_axis=None): from pandas import DataFrame, Series method = missing.clean_fill_method(method) @@ -7263,9 +7260,6 @@ def _align_frame(self, other, join='outer', axis=None, level=None, clidx, cridx = None, None is_series = isinstance(self, ABCSeries) - if fill_value is _default_fill_value: - # XXX: per-column? - fill_value = np.nan if axis is None or axis == 0: if not self.index.equals(other.index): @@ -7365,9 +7359,6 @@ def _align_series(self, other, join='outer', axis=None, level=None, right = other.reindex(join_index, level=level) # fill - if fill_value is _default_fill_value: - fill_value = None - fill_na = notna(fill_value) or (method is not None) if fill_na: left = left.fillna(fill_value, method=method, limit=limit, diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 0534308cb4857..e98899b2f5c1a 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,5 +1,4 @@ import warnings - import copy from warnings import catch_warnings import inspect @@ -83,7 +82,7 @@ from pandas.util._decorators import cache_readonly from pandas.util._validators import validate_bool_kwarg from pandas import compat -from pandas.compat import range, map, zip, u, _default_fill_value +from pandas.compat import range, map, zip, u class Block(PandasObject): @@ -1889,10 +1888,6 @@ def _holder(self): # For extension blocks, the holder is values-dependent. return type(self.values) - @property - def fill_value(self): - return self.values.dtype.na_value # TODO: change to _na_value - @property def _can_hold_na(self): # The default ExtensionArray._can_hold_na is True @@ -4391,8 +4386,6 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None, pandas-indexer with -1's only. """ - # TODO: see if we can make fill_value be {col -> fill_value} - # maybe earlier... if indexer is None: if new_axis is self.axes[axis] and not copy: return self @@ -4415,17 +4408,9 @@ def reindex_indexer(self, new_axis, indexer, axis, fill_value=None, new_blocks = self._slice_take_blocks_ax0(indexer, fill_tuple=(fill_value,)) else: - if fill_value is None: - fill_value = _default_fill_value - - new_blocks = [] - for blk in self.blocks: - if fill_value is not _default_fill_value: - fill_tuple = (fill_value,) - else: - fill_tuple = (blk.fill_value,) - new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=fill_tuple) - for blk in self.blocks] + new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=( + fill_value if fill_value is not None else blk.fill_value,)) + for blk in self.blocks] new_axes = list(self.axes) new_axes[axis] = new_axis @@ -4451,9 +4436,6 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): if self._is_single_block: blk = self.blocks[0] - if allow_fill and fill_tuple[0] is _default_fill_value: - fill_tuple = (blk.fill_value,) - if sl_type in ('slice', 'mask'): return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: @@ -5422,25 +5404,6 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): elif is_uniform_join_units(join_units): b = join_units[0].block.concat_same_type( [ju.block for ju in join_units], placement=placement) - elif is_uniform_reindexer(join_units): - old_block = join_units[0].block - - new_values = concatenate_join_units(join_units, concat_axis, - copy=copy) - if new_values.ndim == 2: - # XXX: categorical returns a categorical here - # EA returns a 2d ndarray - # need to harmoinze these to always be EAs? - assert new_values.shape[0] == 1 - new_values = new_values[0] - - assert isinstance(old_block._holder, ABCExtensionArray) - - b = old_block.make_block_same_class( - old_block._holder._from_sequence(new_values), - placement=placement - ) - else: b = make_block( concatenate_join_units(join_units, concat_axis, copy=copy), @@ -5471,13 +5434,6 @@ def is_uniform_join_units(join_units): len(join_units) > 1) -def is_uniform_reindexer(join_units): - # For when we know we can reindex without changing type - return ( - all(ju.block and ju.block.is_extension for ju in join_units) - ) - - def get_empty_dtype_and_na(join_units): """ Return dtype and N/A values to use when concatenating specified units. @@ -5505,15 +5461,12 @@ def get_empty_dtype_and_na(join_units): upcast_classes = defaultdict(list) null_upcast_classes = defaultdict(list) - for dtype, unit in zip(dtypes, join_units): if dtype is None: continue if is_categorical_dtype(dtype): upcast_cls = 'category' - elif is_extension_array_dtype(dtype): - upcast_cls = 'extension' elif is_datetimetz(dtype): upcast_cls = 'datetimetz' elif issubclass(dtype.type, np.bool_): @@ -5543,8 +5496,6 @@ def get_empty_dtype_and_na(join_units): # create the result if 'object' in upcast_classes: return np.dtype(np.object_), np.nan - elif 'extension' in upcast_classes: - return np.dtype(np.object_), None elif 'bool' in upcast_classes: if has_none_blocks: return np.dtype(np.object_), np.nan @@ -5804,9 +5755,7 @@ def dtype(self): if self.block is None: raise AssertionError("Block is None, no dtype") - if not self.needs_filling or self.block.is_extension: - # ExtensionDtypes by definition can hold their - # NA value. + if not self.needs_filling: return self.block.dtype else: return _get_dtype(maybe_promote(self.block.dtype, diff --git a/pandas/core/series.py b/pandas/core/series.py index 3e7e68bed9b41..f2ee225f50514 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -56,7 +56,7 @@ from pandas import compat from pandas.io.formats.terminal import get_terminal_size from pandas.compat import ( - zip, u, OrderedDict, StringIO, range, get_range_parameters, PY36, _default_fill_value) + zip, u, OrderedDict, StringIO, range, get_range_parameters, PY36) from pandas.compat.numpy import function as nv import pandas.core.ops as ops @@ -3216,10 +3216,7 @@ def _reindex_indexer(self, new_index, indexer, copy): return self.copy() return self - from pandas.core.dtypes.missing import na_value_for_dtype - fill_value = na_value_for_dtype(self.dtype, compat=False) - new_values = algorithms.take(self._values, indexer, - fill_value=fill_value) + new_values = algorithms.take_1d(self._values, indexer) return self._constructor(new_values, index=new_index) def _needs_reindex_multi(self, axes, method, level): @@ -3230,7 +3227,7 @@ def _needs_reindex_multi(self, axes, method, level): @Appender(generic._shared_docs['align'] % _shared_doc_kwargs) def align(self, other, join='outer', axis=None, level=None, copy=True, - fill_value=_default_fill_value, method=None, limit=None, fill_axis=0, + fill_value=None, method=None, limit=None, fill_axis=0, broadcast_axis=None): return super(Series, self).align(other, join=join, axis=axis, level=level, copy=copy, diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index b282450efb037..2cefbea722098 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -6,7 +6,7 @@ # pylint: disable=E1101,E1103,W0231,E0202 import warnings -from pandas.compat import lmap, _default_fill_value +from pandas.compat import lmap from pandas import compat import numpy as np @@ -690,7 +690,7 @@ def _reindex_columns(self, columns, method, copy, level, fill_value=None, if level is not None: raise TypeError('Reindex by level not supported for sparse') - if not (isna(fill_value) or fill_value is _default_fill_value): + if notna(fill_value): raise NotImplementedError("'fill_value' argument is not supported") if limit: