From eaf486002c8f8c7ac5184835993ec78d18231b8f Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:41:14 +0200 Subject: [PATCH 01/49] STY: x.__class__ to type(x) #batch-2 (#29893) --- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/generic.py | 10 +++++----- pandas/core/groupby/base.py | 2 +- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/grouper.py | 2 +- pandas/core/indexes/base.py | 12 ++++++------ pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/frozen.py | 14 +++++++------- pandas/core/indexes/interval.py | 6 +++--- pandas/core/indexes/multi.py | 8 +++----- pandas/core/indexes/range.py | 12 ++++++------ 11 files changed, 35 insertions(+), 37 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2c601b01dbae5..fb3097684f0c3 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -420,7 +420,7 @@ def __repr__(self) -> str_type: if self.categories is None: data = "None, " else: - data = self.categories._format_data(name=self.__class__.__name__) + data = self.categories._format_data(name=type(self).__name__) return tpl.format(data=data, ordered=self._ordered) @staticmethod diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 29eeb5999b88f..b13aee238efb3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -251,7 +251,7 @@ def _validate_dtype(self, dtype): if dtype.kind == "V": raise NotImplementedError( "compound dtypes are not implemented" - " in the {0} constructor".format(self.__class__.__name__) + " in the {0} constructor".format(type(self).__name__) ) return dtype @@ -1534,7 +1534,7 @@ def __nonzero__(self): raise ValueError( "The truth value of a {0} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all().".format( - self.__class__.__name__ + type(self).__name__ ) ) @@ -1559,7 +1559,7 @@ def bool(self): elif is_scalar(v): raise ValueError( "bool cannot act on a non-boolean single element " - "{0}".format(self.__class__.__name__) + "{0}".format(type(self).__name__) ) self.__nonzero__() @@ -1865,7 +1865,7 @@ def _drop_labels_or_levels(self, keys, axis=0): def __hash__(self): raise TypeError( "{0!r} objects are mutable, thus they cannot be" - " hashed".format(self.__class__.__name__) + " hashed".format(type(self).__name__) ) def __iter__(self): @@ -2059,7 +2059,7 @@ def __repr__(self) -> str: # string representation based upon iterating over self # (since, by definition, `PandasContainers` are iterable) prepr = "[%s]" % ",".join(map(pprint_thing, self)) - return f"{self.__class__.__name__}({prepr})" + return f"{type(self).__name__}({prepr})" def _repr_latex_(self): """ diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 407cd8342d486..e088400b25f0f 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -41,7 +41,7 @@ def _gotitem(self, key, ndim, subset=None): except IndexError: groupby = self._groupby - self = self.__class__(subset, groupby=groupby, parent=self, **kwargs) + self = type(self)(subset, groupby=groupby, parent=self, **kwargs) self._reset_cache() if subset.ndim == 2: if is_scalar(key) and key in subset or is_list_like(key): diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 99ef281e842b1..4726cdfb05a70 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -473,7 +473,7 @@ def _transform_general(self, func, *args, **kwargs): """ Transform with a non-str `func`. """ - klass = self._selected_obj.__class__ + klass = type(self._selected_obj) results = [] for name, group in self: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index dc924455b141d..9b2f43d8dd484 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -211,7 +211,7 @@ def __repr__(self) -> str: if getattr(self, attr_name) is not None ) attrs = ", ".join(attrs_list) - cls_name = self.__class__.__name__ + cls_name = type(self).__name__ return f"{cls_name}({attrs})" diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 486cc0cd9032d..4a3fa26c3460e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -815,7 +815,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): else: if allow_fill and fill_value is not None: msg = "Unable to fill values because {0} cannot contain NA" - raise ValueError(msg.format(self.__class__.__name__)) + raise ValueError(msg.format(type(self).__name__)) taken = self.values.take(indices) return self._shallow_copy(taken) @@ -948,7 +948,7 @@ def __repr__(self): """ Return a string representation for this object. """ - klass = self.__class__.__name__ + klass_name = type(self).__name__ data = self._format_data() attrs = self._format_attrs() space = self._format_space() @@ -959,7 +959,7 @@ def __repr__(self): if data is None: data = "" - res = f"{klass}({data}{prepr})" + res = f"{klass_name}({data}{prepr})" return res @@ -1287,7 +1287,7 @@ def _set_names(self, values, level=None): for name in values: if not is_hashable(name): raise TypeError( - "{}.name must be a hashable type".format(self.__class__.__name__) + "{}.name must be a hashable type".format(type(self).__name__) ) self.name = values[0] @@ -1794,7 +1794,7 @@ def is_all_dates(self) -> bool: def __reduce__(self): d = dict(data=self._data) d.update(self._get_attributes_dict()) - return _new_Index, (self.__class__, d), None + return _new_Index, (type(self), d), None def __setstate__(self, state): """ @@ -2290,7 +2290,7 @@ def __nonzero__(self): raise ValueError( "The truth value of a {0} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all().".format( - self.__class__.__name__ + type(self).__name__ ) ) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ab9f57ff9ac69..0d368845ea4f2 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -423,7 +423,7 @@ def __reduce__(self): d = dict(data=self._data) d.update(self._get_attributes_dict()) - return _new_DatetimeIndex, (self.__class__, d), None + return _new_DatetimeIndex, (type(self), d), None def __setstate__(self, state): """Necessary for making this object picklable""" diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 13c386187a9e5..ab9852157b9ef 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -69,13 +69,13 @@ def difference(self, other) -> "FrozenList": def __getitem__(self, n): if isinstance(n, slice): - return self.__class__(super().__getitem__(n)) + return type(self)(super().__getitem__(n)) return super().__getitem__(n) def __radd__(self, other): if isinstance(other, tuple): other = list(other) - return self.__class__(other + list(self)) + return type(self)(other + list(self)) def __eq__(self, other) -> bool: if isinstance(other, (tuple, FrozenList)): @@ -85,12 +85,12 @@ def __eq__(self, other) -> bool: __req__ = __eq__ def __mul__(self, other): - return self.__class__(super().__mul__(other)) + return type(self)(super().__mul__(other)) __imul__ = __mul__ def __reduce__(self): - return self.__class__, (list(self),) + return type(self), (list(self),) def __hash__(self): return hash(tuple(self)) @@ -99,7 +99,7 @@ def _disabled(self, *args, **kwargs): """This method will not function because object is immutable.""" raise TypeError( "'{cls}' does not support mutable operations.".format( - cls=self.__class__.__name__ + cls=type(self).__name__ ) ) @@ -107,7 +107,7 @@ def __str__(self) -> str: return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n")) def __repr__(self) -> str: - return f"{self.__class__.__name__}({str(self)})" + return f"{type(self).__name__}({str(self)})" __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled pop = append = extend = remove = sort = insert = _disabled @@ -132,7 +132,7 @@ def __new__(cls, data, dtype=None, copy=False): def _disabled(self, *args, **kwargs): """This method will not function because object is immutable.""" raise TypeError( - "'{cls}' does not support mutable operations.".format(cls=self.__class__) + "'{cls}' does not support mutable operations.".format(cls=type(self)) ) __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 35e8405e0f1aa..a9e119f3c5f87 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -497,7 +497,7 @@ def __array_wrap__(self, result, context=None): def __reduce__(self): d = dict(left=self.left, right=self.right) d.update(self._get_attributes_dict()) - return _new_IntervalIndex, (self.__class__, d), None + return _new_IntervalIndex, (type(self), d), None @Appender(_index_shared_docs["copy"]) def copy(self, deep=False, name=None): @@ -512,7 +512,7 @@ def copy(self, deep=False, name=None): @Appender(_index_shared_docs["astype"]) def astype(self, dtype, copy=True): - with rewrite_exception("IntervalArray", self.__class__.__name__): + with rewrite_exception("IntervalArray", type(self).__name__): new_values = self.values.astype(dtype, copy=copy) if is_interval_dtype(new_values): return self._shallow_copy(new_values.left, new_values.right) @@ -1205,7 +1205,7 @@ def _format_attrs(self): return attrs def _format_space(self): - space = " " * (len(self.__class__.__name__) + 1) + space = " " * (len(type(self).__name__) + 1) return "\n{space}".format(space=space) # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 048112cbf0836..d151fb7260a58 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1245,9 +1245,7 @@ def _set_names(self, names, level=None, validate=True): # All items in 'names' need to be hashable: if not is_hashable(name): raise TypeError( - "{}.name must be a hashable type".format( - self.__class__.__name__ - ) + "{}.name must be a hashable type".format(type(self).__name__) ) self._names[lev] = name @@ -1911,7 +1909,7 @@ def __reduce__(self): sortorder=self.sortorder, names=list(self.names), ) - return ibase._new_Index, (self.__class__, d), None + return ibase._new_Index, (type(self), d), None def __setstate__(self, state): """Necessary for making this object picklable""" @@ -3264,7 +3262,7 @@ def astype(self, dtype, copy=True): elif not is_object_dtype(dtype): msg = ( "Setting {cls} dtype to anything other than object is not supported" - ).format(cls=self.__class__) + ).format(cls=type(self)) raise TypeError(msg) elif copy is True: return self._shallow_copy() diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index f7bbbee461e8d..f300cde3b5bcc 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -179,7 +179,7 @@ def _get_data_as_items(self): def __reduce__(self): d = self._get_attributes_dict() d.update(dict(self._get_data_as_items())) - return ibase._new_Index, (self.__class__, d), None + return ibase._new_Index, (type(self), d), None # -------------------------------------------------------------------- # Rendering Methods @@ -592,27 +592,27 @@ def _union(self, other, sort): and (start_s - end_o) <= step_s and (start_o - end_s) <= step_s ): - return self.__class__(start_r, end_r + step_s, step_s) + return type(self)(start_r, end_r + step_s, step_s) if ( (step_s % 2 == 0) and (abs(start_s - start_o) <= step_s / 2) and (abs(end_s - end_o) <= step_s / 2) ): - return self.__class__(start_r, end_r + step_s / 2, step_s / 2) + return type(self)(start_r, end_r + step_s / 2, step_s / 2) elif step_o % step_s == 0: if ( (start_o - start_s) % step_s == 0 and (start_o + step_s >= start_s) and (end_o - step_s <= end_s) ): - return self.__class__(start_r, end_r + step_s, step_s) + return type(self)(start_r, end_r + step_s, step_s) elif step_s % step_o == 0: if ( (start_s - start_o) % step_o == 0 and (start_s + step_o >= start_o) and (end_s - step_o <= end_o) ): - return self.__class__(start_r, end_r + step_o, step_o) + return type(self)(start_r, end_r + step_o, step_o) return self._int64index._union(other, sort=sort) @Appender(_index_shared_docs["join"]) @@ -781,7 +781,7 @@ def _evaluate_numeric_binop(self, other): rstart = op(left.start, right) rstop = op(left.stop, right) - result = self.__class__(rstart, rstop, rstep, **attrs) + result = type(self)(rstart, rstop, rstep, **attrs) # for compat with numpy / Int64Index # even if we can represent as a RangeIndex, return From ed98ff0ea7a920356dbb1c3af9e29eab1c041823 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:43:43 +0200 Subject: [PATCH 02/49] STY: x.__class__ to type(x) #batch-3 (#29894) --- pandas/core/indexing.py | 6 ++++-- pandas/core/internals/blocks.py | 4 ++-- pandas/core/internals/concat.py | 2 +- pandas/core/internals/managers.py | 22 ++++++++++------------ pandas/core/resample.py | 4 ++-- pandas/core/series.py | 9 +++------ pandas/core/window/rolling.py | 2 +- pandas/errors/__init__.py | 2 +- pandas/io/clipboard/__init__.py | 4 ++-- 9 files changed, 26 insertions(+), 29 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b52015b738c6e..67412ed5e5b26 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -105,7 +105,7 @@ class _NDFrameIndexer(_NDFrameIndexerBase): def __call__(self, axis=None): # we need to return a copy of ourselves - new_self = self.__class__(self.name, self.obj) + new_self = type(self)(self.name, self.obj) if axis is not None: axis = self.obj._get_axis_number(axis) @@ -228,7 +228,9 @@ def _validate_key(self, key, axis: int): raise AbstractMethodError(self) def _has_valid_tuple(self, key: Tuple): - """ check the key for valid keys across my indexer """ + """ + Check the key for valid keys across my indexer. + """ for i, k in enumerate(key): if i >= self.ndim: raise IndexingError("Too many indexers") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2d6ffb7277742..e4de1c94da450 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -257,11 +257,11 @@ def make_block_same_class(self, values, placement=None, ndim=None): placement = self.mgr_locs if ndim is None: ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim, klass=self.__class__) + return make_block(values, placement=placement, ndim=ndim, klass=type(self)) def __repr__(self) -> str: # don't want to print out all of the items here - name = pprint_thing(self.__class__.__name__) + name = type(self).__name__ if self._is_single_block: result = "{name}: {len} dtype: {dtype}".format( diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index f981c00fdad36..6c4ab2882d67f 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -122,7 +122,7 @@ def __init__(self, block, shape, indexers=None): def __repr__(self) -> str: return "{name}({block!r}, {indexers})".format( - name=self.__class__.__name__, block=self.block, indexers=self.indexers + name=type(self).__name__, block=self.block, indexers=self.indexers ) @cache_readonly diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 19901dc510199..0fe95a4b7f370 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -153,7 +153,7 @@ def make_empty(self, axes=None): blocks = np.array([], dtype=self.array_dtype) else: blocks = [] - return self.__class__(blocks, axes) + return type(self)(blocks, axes) def __nonzero__(self): return True @@ -316,7 +316,7 @@ def __len__(self) -> int: return len(self.items) def __repr__(self) -> str: - output = pprint_thing(self.__class__.__name__) + output = type(self).__name__ for i, ax in enumerate(self.axes): if i == 0: output += "\nItems: {ax}".format(ax=ax) @@ -430,7 +430,7 @@ def apply( if len(result_blocks) == 0: return self.make_empty(axes or self.axes) - bm = self.__class__( + bm = type(self)( result_blocks, axes or self.axes, do_integrity_check=do_integrity_check ) bm._consolidate_inplace() @@ -519,7 +519,7 @@ def get_axe(block, qs, axes): for b in blocks ] - return self.__class__(blocks, new_axes) + return type(self)(blocks, new_axes) # single block, i.e. ndim == {1} values = concat_compat([b.values for b in blocks]) @@ -629,7 +629,7 @@ def comp(s, regex=False): rb = new_rb result_blocks.extend(rb) - bm = self.__class__(result_blocks, self.axes) + bm = type(self)(result_blocks, self.axes) bm._consolidate_inplace() return bm @@ -724,7 +724,7 @@ def combine(self, blocks, copy=True): axes = list(self.axes) axes[0] = self.items.take(indexer) - return self.__class__(new_blocks, axes, do_integrity_check=False) + return type(self)(new_blocks, axes, do_integrity_check=False) def get_slice(self, slobj, axis=0): if axis >= self.ndim: @@ -741,7 +741,7 @@ def get_slice(self, slobj, axis=0): new_axes = list(self.axes) new_axes[axis] = new_axes[axis][slobj] - bm = self.__class__(new_blocks, new_axes, do_integrity_check=False) + bm = type(self)(new_blocks, new_axes, do_integrity_check=False) bm._consolidate_inplace() return bm @@ -917,7 +917,7 @@ def consolidate(self): if self.is_consolidated(): return self - bm = self.__class__(self.blocks, self.axes) + bm = type(self)(self.blocks, self.axes) bm._is_consolidated = False bm._consolidate_inplace() return bm @@ -1251,7 +1251,7 @@ def reindex_indexer( new_axes = list(self.axes) new_axes[axis] = new_axis - return self.__class__(new_blocks, new_axes) + return type(self)(new_blocks, new_axes) def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): """ @@ -1521,9 +1521,7 @@ def get_slice(self, slobj, axis=0): if axis >= self.ndim: raise IndexError("Requested axis not found in manager") - return self.__class__( - self._block._slice(slobj), self.index[slobj], fastpath=True - ) + return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True) @property def index(self): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2433e3f52b4a9..58c4a97d651d8 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -96,7 +96,7 @@ def __str__(self) -> str: if getattr(self.groupby, k, None) is not None ) return "{klass} [{attrs}]".format( - klass=self.__class__.__name__, attrs=", ".join(attrs) + klass=type(self).__name__, attrs=", ".join(attrs) ) def __getattr__(self, attr): @@ -885,7 +885,7 @@ def count(self): result = self._downsample("count") if not len(self.ax): if self._selected_obj.ndim == 1: - result = self._selected_obj.__class__( + result = type(self._selected_obj)( [], index=result.index, dtype="int64", name=self._selected_obj.name ) else: diff --git a/pandas/core/series.py b/pandas/core/series.py index 7f63b2575382a..56039605651ac 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -256,9 +256,7 @@ def __init__( elif is_extension_array_dtype(data): pass elif isinstance(data, (set, frozenset)): - raise TypeError( - "{0!r} type is unordered".format(data.__class__.__name__) - ) + raise TypeError(f"{repr(type(data).__name__)} type is unordered") elif isinstance(data, ABCSparseArray): # handle sparse passed here (and force conversion) data = data.to_dense() @@ -1535,9 +1533,8 @@ def to_string( # catch contract violations if not isinstance(result, str): raise AssertionError( - "result must be of type unicode, type" - " of result is {0!r}" - "".format(result.__class__.__name__) + "result must be of type str, type" + f" of result is {repr(type(result).__name__)}" ) if buf is None: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 7f3404100f71c..d8aa362080093 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -204,7 +204,7 @@ def _get_window(self, other=None, win_type: Optional[str] = None) -> int: @property def _window_type(self) -> str: - return self.__class__.__name__ + return type(self).__name__ def __repr__(self) -> str: """ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 73cc40ae0e0d3..34838af5fd6e4 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -178,6 +178,6 @@ def __str__(self) -> str: if self.methodtype == "classmethod": name = self.class_instance.__name__ else: - name = self.class_instance.__class__.__name__ + name = type(self.class_instance).__name__ msg = "This {methodtype} must be defined in the concrete class {name}" return msg.format(methodtype=self.methodtype, name=name) diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 7d3dbaf6ee021..f808b7e706afb 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -95,8 +95,8 @@ def _stringifyText(text) -> str: acceptedTypes = (str, int, float, bool) if not isinstance(text, acceptedTypes): raise PyperclipException( - f"only str, int, float, and bool values" - f"can be copied to the clipboard, not {text.__class__.__name__}" + f"only str, int, float, and bool values " + f"can be copied to the clipboard, not {type(text).__name__}" ) return str(text) From 6f03e76f9d47ecfcfdd44641de6df1fc7dd57a01 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:45:11 +0200 Subject: [PATCH 03/49] x.__class__ to type(x) (#29902) --- pandas/io/formats/format.py | 2 +- pandas/io/formats/printing.py | 2 +- pandas/io/packers.py | 24 ++++++++++++------------ pandas/io/pytables.py | 2 +- pandas/io/stata.py | 5 ++--- pandas/tests/extension/base/dtype.py | 6 +++--- pandas/tests/extension/base/ops.py | 8 ++------ pandas/tests/extension/base/printing.py | 2 +- pandas/tests/frame/test_apply.py | 2 +- pandas/tests/indexes/common.py | 18 +++++++++--------- 10 files changed, 33 insertions(+), 38 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b18f0db622b3e..f8f5d337185c4 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -352,7 +352,7 @@ def to_string(self) -> str: if len(series) == 0: return "{name}([], {footer})".format( - name=self.series.__class__.__name__, footer=footer + name=type(self.series).__name__, footer=footer ) fmt_index, have_header = self._get_formatted_index() diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index a4f1488fb6b69..8218799129952 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -321,7 +321,7 @@ def format_object_summary( if display_width is None: display_width = get_option("display.width") or 80 if name is None: - name = obj.__class__.__name__ + name = type(obj).__name__ if indent_for_name: name_len = len(name) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 253441ab25813..bb7b00571b0df 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -404,7 +404,7 @@ def encode(obj): if isinstance(obj, RangeIndex): return { "typ": "range_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "start": obj._range.start, "stop": obj._range.stop, @@ -413,7 +413,7 @@ def encode(obj): elif isinstance(obj, PeriodIndex): return { "typ": "period_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "freq": getattr(obj, "freqstr", None), "dtype": obj.dtype.name, @@ -429,7 +429,7 @@ def encode(obj): obj = obj.tz_convert("UTC") return { "typ": "datetime_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "dtype": obj.dtype.name, "data": convert(obj.asi8), @@ -444,7 +444,7 @@ def encode(obj): typ = "interval_array" return { "typ": typ, - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "left": getattr(obj, "left", None), "right": getattr(obj, "right", None), @@ -453,7 +453,7 @@ def encode(obj): elif isinstance(obj, MultiIndex): return { "typ": "multi_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "names": getattr(obj, "names", None), "dtype": obj.dtype.name, "data": convert(obj.values), @@ -462,7 +462,7 @@ def encode(obj): else: return { "typ": "index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "dtype": obj.dtype.name, "data": convert(obj.values), @@ -472,7 +472,7 @@ def encode(obj): elif isinstance(obj, Categorical): return { "typ": "category", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "codes": obj.codes, "categories": obj.categories, @@ -483,7 +483,7 @@ def encode(obj): elif isinstance(obj, Series): return { "typ": "series", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "index": obj.index, "dtype": obj.dtype.name, @@ -498,7 +498,7 @@ def encode(obj): # the block manager return { "typ": "block_manager", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "axes": data.axes, "blocks": [ { @@ -506,7 +506,7 @@ def encode(obj): "values": convert(b.values), "shape": b.values.shape, "dtype": b.dtype.name, - "klass": b.__class__.__name__, + "klass": type(b).__name__, "compress": compressor, } for b in data.blocks @@ -553,7 +553,7 @@ def encode(obj): elif isinstance(obj, BlockIndex): return { "typ": "block_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "blocs": obj.blocs, "blengths": obj.blengths, "length": obj.length, @@ -561,7 +561,7 @@ def encode(obj): elif isinstance(obj, IntIndex): return { "typ": "int_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "indices": obj.indices, "length": obj.length, } diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index bf7aa5970519f..fb63bdcaaa876 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3692,7 +3692,7 @@ def create_axes( # the non_index_axes info info = _get_info(self.info, i) info["names"] = list(a.names) - info["type"] = a.__class__.__name__ + info["type"] = type(a).__name__ self.non_index_axes.append((i, append_axis)) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index bd5e215730397..59bb4e3bf236a 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -856,12 +856,11 @@ def __str__(self) -> str: return self.string def __repr__(self) -> str: - # not perfect :-/ - return "{cls}({obj})".format(cls=self.__class__, obj=self) + return f"{type(self)}({self})" def __eq__(self, other) -> bool: return ( - isinstance(other, self.__class__) + isinstance(other, type(self)) and self.string == other.string and self.value == other.value ) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index a5040c8cfc2fc..d1e1717225e15 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -96,7 +96,7 @@ def test_eq(self, dtype): assert dtype != "anonther_type" def test_construct_from_string(self, dtype): - dtype_instance = dtype.__class__.construct_from_string(dtype.name) - assert isinstance(dtype_instance, dtype.__class__) + dtype_instance = type(dtype).construct_from_string(dtype.name) + assert isinstance(dtype_instance, type(dtype)) with pytest.raises(TypeError): - dtype.__class__.construct_from_string("another_type") + type(dtype).construct_from_string("another_type") diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 5e4fb6d69e52c..20d06ef2e5647 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -123,9 +123,7 @@ def test_direct_arith_with_series_returns_not_implemented(self, data): result = data.__add__(other) assert result is NotImplemented else: - raise pytest.skip( - "{} does not implement add".format(data.__class__.__name__) - ) + raise pytest.skip(f"{type(data).__name__} does not implement add") class BaseComparisonOpsTests(BaseOpsUtil): @@ -169,6 +167,4 @@ def test_direct_arith_with_series_returns_not_implemented(self, data): result = data.__eq__(other) assert result is NotImplemented else: - raise pytest.skip( - "{} does not implement __eq__".format(data.__class__.__name__) - ) + raise pytest.skip(f"{type(data).__name__} does not implement __eq__") diff --git a/pandas/tests/extension/base/printing.py b/pandas/tests/extension/base/printing.py index 0f10efbf32a49..5d17a4b0cbee2 100644 --- a/pandas/tests/extension/base/printing.py +++ b/pandas/tests/extension/base/printing.py @@ -18,7 +18,7 @@ def test_array_repr(self, data, size): data = type(data)._concat_same_type([data] * 5) result = repr(data) - assert data.__class__.__name__ in result + assert type(data).__name__ in result assert "Length: {}".format(len(data)) in result assert str(data.dtype) in result if size == "big": diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 3c97a87c95bd2..26a3c738750ca 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -642,7 +642,7 @@ def test_applymap_box(self): } ) - result = df.applymap(lambda x: "{0}".format(x.__class__.__name__)) + result = df.applymap(lambda x: type(x).__name__) expected = pd.DataFrame( { "a": ["Timestamp", "Timestamp"], diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c35c4c3568f74..102949fe3f05e 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -244,7 +244,7 @@ def test_str(self): idx = self.create_index() idx.name = "foo" assert "'foo'" in str(idx) - assert idx.__class__.__name__ in str(idx) + assert type(idx).__name__ in str(idx) def test_repr_max_seq_item_setting(self): # GH10182 @@ -260,8 +260,8 @@ def test_copy_name(self, indices): if isinstance(indices, MultiIndex): return - first = indices.__class__(indices, copy=True, name="mario") - second = first.__class__(first, copy=False) + first = type(indices)(indices, copy=True, name="mario") + second = type(first)(first, copy=False) # Even though "copy=False", we want a new object. assert first is not second @@ -292,7 +292,7 @@ def test_ensure_copied_data(self, indices): # MultiIndex and CategoricalIndex are tested separately return - index_type = indices.__class__ + index_type = type(indices) result = index_type(indices.values, copy=True, **init_kwargs) tm.assert_index_equal(indices, result) tm.assert_numpy_array_equal( @@ -502,7 +502,7 @@ def test_difference_base(self, sort, indices): cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: if isinstance(indices, (DatetimeIndex, TimedeltaIndex)): - assert result.__class__ == answer.__class__ + assert type(result) == type(answer) tm.assert_numpy_array_equal( result.sort_values().asi8, answer.sort_values().asi8 ) @@ -677,9 +677,9 @@ def test_hasnans_isnans(self, indices): values[1] = np.nan if isinstance(indices, PeriodIndex): - idx = indices.__class__(values, freq=indices.freq) + idx = type(indices)(values, freq=indices.freq) else: - idx = indices.__class__(values) + idx = type(indices)(values) expected = np.array([False] * len(idx), dtype=bool) expected[1] = True @@ -716,9 +716,9 @@ def test_fillna(self, indices): values[1] = np.nan if isinstance(indices, PeriodIndex): - idx = indices.__class__(values, freq=indices.freq) + idx = type(indices)(values, freq=indices.freq) else: - idx = indices.__class__(values) + idx = type(indices)(values) expected = np.array([False] * len(idx), dtype=bool) expected[1] = True From dbae24036cf16d1e74eb7648752acf07a2ce9817 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:46:36 +0200 Subject: [PATCH 04/49] x.__class__ to type(x) (#29904) --- pandas/tests/indexes/datetimelike.py | 2 +- pandas/tests/indexes/multi/test_missing.py | 6 +++--- .../tests/indexes/period/test_partial_slicing.py | 2 +- pandas/tests/indexes/test_base.py | 8 ++++---- pandas/tests/reshape/test_concat.py | 8 ++++---- pandas/tests/series/test_apply.py | 16 ++++++++-------- pandas/tests/test_base.py | 2 +- pandas/tests/tseries/holiday/test_holiday.py | 2 +- pandas/tests/tseries/offsets/test_offsets.py | 4 ++-- pandas/tseries/holiday.py | 2 +- 10 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index e6e38ce9921f5..42244626749b9 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -38,7 +38,7 @@ def test_str(self): idx.name = "foo" assert not "length={}".format(len(idx)) in str(idx) assert "'foo'" in str(idx) - assert idx.__class__.__name__ in str(idx) + assert type(idx).__name__ in str(idx) if hasattr(idx, "tz"): if idx.tz is not None: diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 15bbd2ce97c3c..31de40512c474 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -42,9 +42,9 @@ def test_fillna(idx): values[1] = np.nan if isinstance(index, PeriodIndex): - idx = index.__class__(values, freq=index.freq) + idx = type(index)(values, freq=index.freq) else: - idx = index.__class__(values) + idx = type(index)(values) expected = np.array([False] * len(idx), dtype=bool) expected[1] = True @@ -115,7 +115,7 @@ def test_hasnans_isnans(idx): values = index.values values[1] = np.nan - index = idx.__class__(values) + index = type(idx)(values) expected = np.array([False] * len(index), dtype=bool) expected[1] = True diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 50a12baf352d9..501c2a4d8edcc 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -123,7 +123,7 @@ def test_range_slice_outofbounds(self): for idx in [didx, pidx]: df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx) - empty = DataFrame(index=idx.__class__([], freq="D"), columns=["units"]) + empty = DataFrame(index=type(idx)([], freq="D"), columns=["units"]) empty["units"] = empty["units"].astype("int64") tm.assert_frame_equal(df["2013/09/01":"2013/09/30"], empty) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 1f99ba7ad01db..77d81a4a9566e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -752,7 +752,7 @@ def test_fancy(self): @pytest.mark.parametrize("dtype", [np.int_, np.bool_]) def test_empty_fancy(self, index, dtype): empty_arr = np.array([], dtype=dtype) - empty_index = index.__class__([]) + empty_index = type(index)([]) assert index[[]].identical(empty_index) assert index[empty_arr].identical(empty_index) @@ -762,7 +762,7 @@ def test_empty_fancy_raises(self, index): # pd.DatetimeIndex is excluded, because it overrides getitem and should # be tested separately. empty_farr = np.array([], dtype=np.float_) - empty_index = index.__class__([]) + empty_index = type(index)([]) assert index[[]].identical(empty_index) # np.ndarray only accepts ndarray of int & bool dtypes, so should Index @@ -2446,8 +2446,8 @@ def test_copy_name(self): # GH12309 index = self.create_index() - first = index.__class__(index, copy=True, name="mario") - second = first.__class__(first, copy=False) + first = type(index)(index, copy=True, name="mario") + second = type(first)(first, copy=False) # Even though "copy=False", we want a new object. assert first is not second diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index bb8339439d339..63f1ef7595f31 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -949,7 +949,7 @@ def test_append_preserve_index_name(self): all_indexes = indexes_can_append + indexes_cannot_append_with_other - @pytest.mark.parametrize("index", all_indexes, ids=lambda x: x.__class__.__name__) + @pytest.mark.parametrize("index", all_indexes, ids=lambda x: type(x).__name__) def test_append_same_columns_type(self, index): # GH18359 @@ -979,7 +979,7 @@ def test_append_same_columns_type(self, index): @pytest.mark.parametrize( "df_columns, series_index", combinations(indexes_can_append, r=2), - ids=lambda x: x.__class__.__name__, + ids=lambda x: type(x).__name__, ) def test_append_different_columns_types(self, df_columns, series_index): # GH18359 @@ -1004,12 +1004,12 @@ def test_append_different_columns_types(self, df_columns, series_index): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( - "index_can_append", indexes_can_append, ids=lambda x: x.__class__.__name__ + "index_can_append", indexes_can_append, ids=lambda x: type(x).__name__ ) @pytest.mark.parametrize( "index_cannot_append_with_other", indexes_cannot_append_with_other, - ids=lambda x: x.__class__.__name__, + ids=lambda x: type(x).__name__, ) def test_append_different_columns_types_raises( self, index_can_append, index_cannot_append_with_other diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index bdbfa333ef33a..eb4f3273f8713 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -92,7 +92,7 @@ def test_apply_box(self): s = pd.Series(vals) assert s.dtype == "datetime64[ns]" # boxed value must be Timestamp instance - res = s.apply(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"]) tm.assert_series_equal(res, exp) @@ -102,7 +102,7 @@ def test_apply_box(self): ] s = pd.Series(vals) assert s.dtype == "datetime64[ns, US/Eastern]" - res = s.apply(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) tm.assert_series_equal(res, exp) @@ -110,7 +110,7 @@ def test_apply_box(self): vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] s = pd.Series(vals) assert s.dtype == "timedelta64[ns]" - res = s.apply(lambda x: "{0}_{1}".format(x.__class__.__name__, x.days)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") exp = pd.Series(["Timedelta_1", "Timedelta_2"]) tm.assert_series_equal(res, exp) @@ -118,7 +118,7 @@ def test_apply_box(self): vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] s = pd.Series(vals) assert s.dtype == "Period[M]" - res = s.apply(lambda x: "{0}_{1}".format(x.__class__.__name__, x.freqstr)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") exp = pd.Series(["Period_M", "Period_M"]) tm.assert_series_equal(res, exp) @@ -614,7 +614,7 @@ def test_map_box(self): s = pd.Series(vals) assert s.dtype == "datetime64[ns]" # boxed value must be Timestamp instance - res = s.map(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"]) tm.assert_series_equal(res, exp) @@ -624,7 +624,7 @@ def test_map_box(self): ] s = pd.Series(vals) assert s.dtype == "datetime64[ns, US/Eastern]" - res = s.map(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) tm.assert_series_equal(res, exp) @@ -632,7 +632,7 @@ def test_map_box(self): vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] s = pd.Series(vals) assert s.dtype == "timedelta64[ns]" - res = s.map(lambda x: "{0}_{1}".format(x.__class__.__name__, x.days)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") exp = pd.Series(["Timedelta_1", "Timedelta_2"]) tm.assert_series_equal(res, exp) @@ -640,7 +640,7 @@ def test_map_box(self): vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] s = pd.Series(vals) assert s.dtype == "Period[M]" - res = s.map(lambda x: "{0}_{1}".format(x.__class__.__name__, x.freqstr)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") exp = pd.Series(["Period_M", "Period_M"]) tm.assert_series_equal(res, exp) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index f24bb9e72aef5..e65388be2ba7d 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -400,7 +400,7 @@ def test_value_counts_unique_nunique(self): result = o.unique() if isinstance(o, Index): - assert isinstance(result, o.__class__) + assert isinstance(result, type(o)) tm.assert_index_equal(result, orig) assert result.dtype == orig.dtype elif is_datetime64tz_dtype(o): diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index 06869fcd7a4f8..7748b965f8962 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -238,7 +238,7 @@ class TestCalendar(AbstractHolidayCalendar): rules = [] calendar = get_calendar("TestCalendar") - assert TestCalendar == calendar.__class__ + assert TestCalendar == type(calendar) def test_factory(): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index d70780741aa88..ae78d5a55bb5e 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -358,7 +358,7 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=Fals ts = Timestamp(dt) + Nano(5) if ( - offset_s.__class__.__name__ == "DateOffset" + type(offset_s).__name__ == "DateOffset" and (funcname == "apply" or normalize) and ts.nanosecond > 0 ): @@ -395,7 +395,7 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=Fals ts = Timestamp(dt, tz=tz) + Nano(5) if ( - offset_s.__class__.__name__ == "DateOffset" + type(offset_s).__name__ == "DateOffset" and (funcname == "apply" or normalize) and ts.nanosecond > 0 ): diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 9417dc4b48499..2e5477ea00e39 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -363,7 +363,7 @@ def __init__(self, name=None, rules=None): """ super().__init__() if name is None: - name = self.__class__.__name__ + name = type(self).__name__ self.name = name if rules is not None: From 878d31914197e81f9dabe427effd285f59b8db5d Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:48:03 +0200 Subject: [PATCH 05/49] STY: x.__class__ to type(x) #batch-6 (#29905) --- pandas/tseries/offsets.py | 9 ++++----- pandas/util/_depr_module.py | 2 +- pandas/util/testing.py | 18 +++++++++--------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 0620f2b9aae49..9c0bceb1d5110 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -309,9 +309,8 @@ def apply_index(self, i): if type(self) is not DateOffset: raise NotImplementedError( - "DateOffset subclass {name} " - "does not have a vectorized " - "implementation".format(name=self.__class__.__name__) + f"DateOffset subclass {type(self).__name__} " + "does not have a vectorized implementation" ) kwds = self.kwds relativedelta_fast = { @@ -402,7 +401,7 @@ def rollback(self, dt): """ dt = as_timestamp(dt) if not self.onOffset(dt): - dt = dt - self.__class__(1, normalize=self.normalize, **self.kwds) + dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) return dt def rollforward(self, dt): @@ -416,7 +415,7 @@ def rollforward(self, dt): """ dt = as_timestamp(dt) if not self.onOffset(dt): - dt = dt + self.__class__(1, normalize=self.normalize, **self.kwds) + dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) return dt def onOffset(self, dt): diff --git a/pandas/util/_depr_module.py b/pandas/util/_depr_module.py index 45e7db9281837..ae3c6359d20e0 100644 --- a/pandas/util/_depr_module.py +++ b/pandas/util/_depr_module.py @@ -32,7 +32,7 @@ def __init__(self, deprmod, deprmodto=None, removals=None, moved=None): self.moved = moved # For introspection purposes. - self.self_dir = frozenset(dir(self.__class__)) + self.self_dir = frozenset(dir(type(self))) def __dir__(self): deprmodule = self._import_deprmod() diff --git a/pandas/util/testing.py b/pandas/util/testing.py index bcd12eba1651a..9adbf4cee5d74 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -713,7 +713,7 @@ def repr_class(x): return x try: - return x.__class__.__name__ + return type(x).__name__ except AttributeError: return repr(type(x)) @@ -780,17 +780,17 @@ def assert_is_valid_plot_return_object(objs): if isinstance(objs, (pd.Series, np.ndarray)): for el in objs.ravel(): msg = ( - "one of 'objs' is not a matplotlib Axes instance, type " - "encountered {name!r}" - ).format(name=el.__class__.__name__) + "one of 'objs' is not a matplotlib Axes instance, " + f"type encountered {repr(type(el).__name__)}" + ) assert isinstance(el, (plt.Axes, dict)), msg else: - assert isinstance(objs, (plt.Artist, tuple, dict)), ( - "objs is neither an ndarray of Artist instances nor a " - 'single Artist instance, tuple, or dict, "objs" is a {name!r}'.format( - name=objs.__class__.__name__ - ) + msg = ( + "objs is neither an ndarray of Artist instances nor a single " + "ArtistArtist instance, tuple, or dict, 'objs' is a " + f"{repr(type(objs).__name__)}" ) + assert isinstance(objs, (plt.Artist, tuple, dict)), msg def isiterable(obj): From 69ef5433a00616c24499fd7f06fc5c44b138ba32 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 29 Nov 2019 16:59:58 +0000 Subject: [PATCH 06/49] TYP: some types for pandas/core/arrays/sparse/array.py (#29898) --- pandas/core/arrays/sparse/array.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 943dea4252499..593ba7a643193 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -260,6 +260,7 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): _pandas_ftype = "sparse" _subtyp = "sparse_array" # register ABCSparseArray _deprecations = PandasObject._deprecations | frozenset(["get_values"]) + _sparse_index: SparseIndex def __init__( self, @@ -372,8 +373,8 @@ def __init__( @classmethod def _simple_new( - cls, sparse_array: np.ndarray, sparse_index: SparseIndex, dtype: SparseDtype - ) -> ABCSparseArray: + cls, sparse_array: np.ndarray, sparse_index: SparseIndex, dtype: SparseDtype, + ) -> "SparseArray": new = cls([]) new._sparse_index = sparse_index new._sparse_values = sparse_array @@ -1392,8 +1393,8 @@ def __abs__(self): # ------------------------------------------------------------------------ @classmethod - def _create_unary_method(cls, op): - def sparse_unary_method(self): + def _create_unary_method(cls, op) -> Callable[["SparseArray"], "SparseArray"]: + def sparse_unary_method(self) -> "SparseArray": fill_value = op(np.array(self.fill_value)).item() values = op(self.sp_values) dtype = SparseDtype(values.dtype, fill_value) From 29fdc6946be8a60373d089c0aded86eea5d3a567 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 29 Nov 2019 17:01:13 +0000 Subject: [PATCH 07/49] TYP: some types for pandas/core/arrays/sparse/dtype.py (#29899) --- pandas/core/arrays/sparse/dtype.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 3b656705f5568..0124304727ab3 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -1,7 +1,7 @@ """Sparse Dtype""" import re -from typing import Any +from typing import Any, Tuple import numpy as np @@ -223,7 +223,7 @@ def construct_from_string(cls, string): raise TypeError(msg) @staticmethod - def _parse_subtype(dtype): + def _parse_subtype(dtype: str) -> Tuple[str, bool]: """ Parse a string to get the subtype @@ -249,7 +249,7 @@ def _parse_subtype(dtype): has_fill_value = False if m: subtype = m.groupdict()["subtype"] - has_fill_value = m.groupdict()["fill_value"] or has_fill_value + has_fill_value = bool(m.groupdict()["fill_value"]) elif dtype == "Sparse": subtype = "float64" else: From c63ae14859cd74cd4a7e64e33570e49cbc12663d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 29 Nov 2019 18:31:41 +0100 Subject: [PATCH 08/49] DOC: remove reference to get_value (removed) in DataFrame.lookup docstring (#29925) --- doc/source/user_guide/indexing.rst | 2 +- pandas/core/frame.py | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index cf55ce0c9a6d4..0229331127441 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -374,7 +374,7 @@ For getting values with a boolean array: df1.loc['a'] > 0 df1.loc[:, df1.loc['a'] > 0] -For getting a value explicitly (equivalent to deprecated ``df.get_value('a','A')``): +For getting a value explicitly: .. ipython:: python diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d436385ba61ce..d2e396284c5a7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3544,13 +3544,6 @@ def lookup(self, row_labels, col_labels): ------- numpy.ndarray - Notes - ----- - Akin to:: - - result = [df.get_value(row, col) - for row, col in zip(row_labels, col_labels)] - Examples -------- values : ndarray From fc100fea90bb8ee95aaf33e4218e98b3655535d4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 29 Nov 2019 09:39:39 -0800 Subject: [PATCH 09/49] DEPR: Remove errors argument in tz_localize (#29911) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/_libs/tslibs/nattype.pyx | 12 -------- pandas/_libs/tslibs/timestamps.pyx | 28 +---------------- pandas/core/arrays/datetimes.py | 30 +------------------ .../tests/indexes/datetimes/test_timezones.py | 22 ++------------ .../tests/scalar/timestamp/test_timezones.py | 29 ++++-------------- pandas/tests/series/test_timezones.py | 15 ---------- 7 files changed, 11 insertions(+), 126 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e54397e635c77..1468718b16170 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -458,6 +458,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) - Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`) +- Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) - .. _whatsnew_1000.performance: diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 966f72dcd7889..76a694c64e1fb 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -720,18 +720,6 @@ default 'raise' nonexistent times. .. versionadded:: 0.24.0 - errors : 'raise', 'coerce', default None - Determine how errors should be handled. - - The behavior is as follows: - - * 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified timezone (e.g. due to a transition from - or to DST time). Use ``nonexistent='raise'`` instead. - * 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone. Use ``nonexistent='NaT'`` instead. - - .. deprecated:: 0.24.0 Returns ------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 08e504ada789e..e7dc911ff0bae 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -753,8 +753,7 @@ timedelta}, default 'raise' # GH#21336, GH#21365 return Timedelta(nanoseconds=1) - def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - errors=None): + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): """ Convert naive Timestamp to local time zone, or remove timezone from tz-aware Timestamp. @@ -797,18 +796,6 @@ default 'raise' nonexistent times. .. versionadded:: 0.24.0 - errors : 'raise', 'coerce', default None - Determine how errors should be handled. - - The behavior is as follows: - - * 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified timezone (e.g. due to a transition from - or to DST time). Use ``nonexistent='raise'`` instead. - * 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone. Use ``nonexistent='NaT'`` instead. - - .. deprecated:: 0.24.0 Returns ------- @@ -822,19 +809,6 @@ default 'raise' if ambiguous == 'infer': raise ValueError('Cannot infer offset with only one time.') - if errors is not None: - warnings.warn("The errors argument is deprecated and will be " - "removed in a future release. Use " - "nonexistent='NaT' or nonexistent='raise' " - "instead.", FutureWarning) - if errors == 'coerce': - nonexistent = 'NaT' - elif errors == 'raise': - nonexistent = 'raise' - else: - raise ValueError("The errors argument must be either 'coerce' " - "or 'raise'.") - nonexistent_options = ('raise', 'NaT', 'shift_forward', 'shift_backward') if nonexistent not in nonexistent_options and not isinstance( diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 07cbaa8cd1eb6..47f236c19ffe7 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -955,7 +955,7 @@ def tz_convert(self, tz): dtype = tz_to_dtype(tz) return self._simple_new(self.asi8, dtype=dtype, freq=self.freq) - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise", errors=None): + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -1004,17 +1004,6 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise", errors=None): .. versionadded:: 0.24.0 - errors : {'raise', 'coerce'}, default None - The method to handle errors: - - - 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified time zone (e.g. due to a transition from - or to DST time). Use ``nonexistent='raise'`` instead. - - 'coerce' will return NaT if the timestamp can not be converted - to the specified time zone. Use ``nonexistent='NaT'`` instead. - - .. deprecated:: 0.24.0 - Returns ------- Same type as self @@ -1105,23 +1094,6 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise", errors=None): 1 2015-03-29 03:30:00+02:00 dtype: datetime64[ns, 'Europe/Warsaw'] """ - if errors is not None: - warnings.warn( - "The errors argument is deprecated and will be " - "removed in a future release. Use " - "nonexistent='NaT' or nonexistent='raise' " - "instead.", - FutureWarning, - ) - if errors == "coerce": - nonexistent = "NaT" - elif errors == "raise": - nonexistent = "raise" - else: - raise ValueError( - "The errors argument must be either 'coerce' or 'raise'." - ) - nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") if nonexistent not in nonexistent_options and not isinstance( nonexistent, timedelta diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 059dbb00019d8..3f942f9b79428 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -323,13 +323,9 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): index.tz_localize(tz=tz) with pytest.raises(pytz.NonExistentTimeError): - with tm.assert_produces_warning(FutureWarning): - index.tz_localize(tz=tz, errors="raise") + index.tz_localize(tz=tz, nonexistent="raise") - with tm.assert_produces_warning( - FutureWarning, clear=FutureWarning, check_stacklevel=False - ): - result = index.tz_localize(tz=tz, errors="coerce") + result = index.tz_localize(tz=tz, nonexistent="NaT") test_times = ["2015-03-08 01:00-05:00", "NaT", "2015-03-08 03:00-04:00"] dti = to_datetime(test_times, utc=True) expected = dti.tz_convert("US/Eastern") @@ -704,20 +700,6 @@ def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): with pytest.raises(ValueError, match=msg): dti.tz_localize(tz, nonexistent=timedelta(seconds=offset)) - @pytest.mark.filterwarnings("ignore::FutureWarning") - def test_dti_tz_localize_errors_deprecation(self): - # GH 22644 - tz = "Europe/Warsaw" - n = 60 - dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with pytest.raises(ValueError): - dti.tz_localize(tz, errors="foo") - # make sure errors='coerce' gets mapped correctly to nonexistent - result = dti.tz_localize(tz, errors="coerce") - expected = dti.tz_localize(tz, nonexistent="NaT") - tm.assert_index_equal(result, expected) - # ------------------------------------------------------------- # DatetimeIndex.normalize diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 250f48b7e711b..6537f6ccd8432 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -14,7 +14,6 @@ import pandas.util._test_decorators as td from pandas import NaT, Timestamp -import pandas.util.testing as tm class TestTimestampTZOperations: @@ -80,7 +79,6 @@ def test_tz_localize_ambiguous(self): ("2015-03-29 02:30", "Europe/Belgrade"), ], ) - @pytest.mark.filterwarnings("ignore::FutureWarning") def test_tz_localize_nonexistent(self, stamp, tz): # GH#13057 ts = Timestamp(stamp) @@ -88,36 +86,21 @@ def test_tz_localize_nonexistent(self, stamp, tz): ts.tz_localize(tz) # GH 22644 with pytest.raises(NonExistentTimeError): - with tm.assert_produces_warning(FutureWarning): - ts.tz_localize(tz, errors="raise") - with tm.assert_produces_warning(FutureWarning): - assert ts.tz_localize(tz, errors="coerce") is NaT + ts.tz_localize(tz, nonexistent="raise") + assert ts.tz_localize(tz, nonexistent="NaT") is NaT - def test_tz_localize_errors_ambiguous(self): + def test_tz_localize_ambiguous_raise(self): # GH#13057 ts = Timestamp("2015-11-1 01:00") with pytest.raises(AmbiguousTimeError): - with tm.assert_produces_warning(FutureWarning): - ts.tz_localize("US/Pacific", errors="coerce") + ts.tz_localize("US/Pacific", ambiguous="raise") - @pytest.mark.filterwarnings("ignore::FutureWarning") - def test_tz_localize_errors_invalid_arg(self): + def test_tz_localize_nonexistent_invalid_arg(self): # GH 22644 tz = "Europe/Warsaw" ts = Timestamp("2015-03-29 02:00:00") with pytest.raises(ValueError): - with tm.assert_produces_warning(FutureWarning): - ts.tz_localize(tz, errors="foo") - - def test_tz_localize_errors_coerce(self): - # GH 22644 - # make sure errors='coerce' gets mapped correctly to nonexistent - tz = "Europe/Warsaw" - ts = Timestamp("2015-03-29 02:00:00") - with tm.assert_produces_warning(FutureWarning): - result = ts.tz_localize(tz, errors="coerce") - expected = ts.tz_localize(tz, nonexistent="NaT") - assert result is expected + ts.tz_localize(tz, nonexistent="foo") @pytest.mark.parametrize( "stamp", diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index c16e2864b131f..c03101265f7e7 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -33,21 +33,6 @@ def test_series_tz_localize(self): with pytest.raises(TypeError, match="Already tz-aware"): ts.tz_localize("US/Eastern") - @pytest.mark.filterwarnings("ignore::FutureWarning") - def test_tz_localize_errors_deprecation(self): - # GH 22644 - tz = "Europe/Warsaw" - n = 60 - rng = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") - ts = Series(rng) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with pytest.raises(ValueError): - ts.dt.tz_localize(tz, errors="foo") - # make sure errors='coerce' gets mapped correctly to nonexistent - result = ts.dt.tz_localize(tz, errors="coerce") - expected = ts.dt.tz_localize(tz, nonexistent="NaT") - tm.assert_series_equal(result, expected) - def test_series_tz_localize_ambiguous_bool(self): # make sure that we are correctly accepting bool values as ambiguous From f98d2b6587b74c9a640b062d94911b199d962119 Mon Sep 17 00:00:00 2001 From: Tang Heyi Date: Sat, 30 Nov 2019 01:51:04 +0800 Subject: [PATCH 10/49] BUG: .count() raises if use_inf_as_na is enabled (#29888) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/dtypes/missing.py | 2 +- pandas/tests/series/test_analytics.py | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1468718b16170..14f36a808c468 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -682,6 +682,7 @@ Other - Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) - Fix :class:`AbstractHolidayCalendar` to return correct results for years after 2030 (now goes up to 2200) (:issue:`27790`) +- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) .. _whatsnew_1000.contributors: diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index cb4199272f574..205ca193636c6 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -176,7 +176,7 @@ def _isna_old(obj): raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, type): return False - elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): + elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, ABCExtensionArray)): return _isna_ndarraylike_old(obj) elif isinstance(obj, ABCGeneric): return obj._constructor(obj._data.isna(func=_isna_old)) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index e25c4456147f7..fe9306a06efc7 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -554,6 +554,10 @@ def test_count(self, datetime_series): ts.iloc[[0, 3, 5]] = np.nan tm.assert_series_equal(ts.count(level=1), right - 1) + # GH29478 + with pd.option_context("use_inf_as_na", True): + assert pd.Series([pd.Timestamp("1990/1/1")]).count() == 1 + def test_dot(self): a = Series(np.random.randn(4), index=["p", "q", "r", "s"]) b = DataFrame( From 774fe7b30572883c5a49d9b36e3dacaa0fa20994 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 13:07:55 -0800 Subject: [PATCH 11/49] BUG/DEPR: Timestamp/Timedelta resolution (#29910) * BUG/DEPR: Timestamp/Timedelta resolution * GH ref --- doc/source/whatsnew/v1.0.0.rst | 2 + pandas/_libs/tslibs/timedeltas.pyx | 51 +------------------ pandas/_libs/tslibs/timestamps.pyx | 10 +--- .../tests/scalar/timedelta/test_timedelta.py | 10 ++-- .../tests/scalar/timestamp/test_timestamp.py | 4 ++ 5 files changed, 15 insertions(+), 62 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 14f36a808c468..bb4a24e1b4eb5 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -457,6 +457,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) +- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) - Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`) - Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) - @@ -516,6 +517,7 @@ Datetimelike - Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`) - Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`) - Bug in :meth:`DatetimeIndex.strftime` and :meth:`Series.dt.strftime` where ``NaT`` was converted to the string ``'NaT'`` instead of ``np.nan`` (:issue:`29578`) +- Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 48a2a05011ab5..726d664c1ebea 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1005,56 +1005,6 @@ cdef class _Timedelta(timedelta): else: return "D" - @property - def resolution(self): - """ - Return a string representing the lowest timedelta resolution. - - Each timedelta has a defined resolution that represents the lowest OR - most granular level of precision. Each level of resolution is - represented by a short string as defined below: - - Resolution: Return value - - * Days: 'D' - * Hours: 'H' - * Minutes: 'T' - * Seconds: 'S' - * Milliseconds: 'L' - * Microseconds: 'U' - * Nanoseconds: 'N' - - Returns - ------- - str - Timedelta resolution. - - Examples - -------- - >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') - >>> td.resolution - 'N' - - >>> td = pd.Timedelta('1 days 2 min 3 us') - >>> td.resolution - 'U' - - >>> td = pd.Timedelta('2 min 3 s') - >>> td.resolution - 'S' - - >>> td = pd.Timedelta(36, unit='us') - >>> td.resolution - 'U' - """ - # See GH#21344 - warnings.warn("Timedelta.resolution is deprecated, in a future " - "version will behave like the standard library " - "datetime.timedelta.resolution attribute. " - "Use Timedelta.resolution_string instead.", - FutureWarning) - return self.resolution_string - @property def nanoseconds(self): """ @@ -1602,3 +1552,4 @@ cdef _broadcast_floordiv_td64(int64_t value, object other, # resolution in ns Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) Timedelta.max = Timedelta(np.iinfo(np.int64).max) +Timedelta.resolution = Timedelta(nanoseconds=1) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e7dc911ff0bae..fbe71a0a6d198 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -744,15 +744,6 @@ timedelta}, default 'raise' """ return bool(ccalendar.is_leapyear(self.year)) - @property - def resolution(self): - """ - Return resolution describing the smallest difference between two - times that can be represented by Timestamp object_state. - """ - # GH#21336, GH#21365 - return Timedelta(nanoseconds=1) - def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): """ Convert naive Timestamp to local time zone, or remove @@ -1036,3 +1027,4 @@ cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 # Resolution is in nanoseconds Timestamp.min = Timestamp(_NS_LOWER_BOUND) Timestamp.max = Timestamp(_NS_UPPER_BOUND) +Timestamp.resolution = Timedelta(nanoseconds=1) # GH#21336, GH#21365 diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index d4881ff0e1747..5a5724401029c 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -804,9 +804,13 @@ def test_resolution_string(self): def test_resolution_deprecated(self): # GH#21344 td = Timedelta(days=4, hours=3) - with tm.assert_produces_warning(FutureWarning) as w: - td.resolution - assert "Use Timedelta.resolution_string instead" in str(w[0].message) + result = td.resolution + assert result == Timedelta(nanoseconds=1) + + # Check that the attribute is available on the class, mirroring + # the stdlib timedelta behavior + result = Timedelta.resolution + assert result == Timedelta(nanoseconds=1) @pytest.mark.parametrize( diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index a33afc8b3ccca..d6251ffc7940d 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -192,6 +192,10 @@ def test_resolution(self): dt = Timestamp("2100-01-01 00:00:00") assert dt.resolution == Timedelta(nanoseconds=1) + # Check that the attribute is available on the class, mirroring + # the stdlib datetime behavior + assert Timestamp.resolution == Timedelta(nanoseconds=1) + class TestTimestampConstructors: def test_constructor(self): From 7e9da33f346a3328632fba5441a9b4524f3aa17c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 14:27:06 -0800 Subject: [PATCH 12/49] DEPR: passing a dtype alias to DatetimeTZDtype(unit) (#29927) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index bb4a24e1b4eb5..b5f8e8ca4a126 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -427,6 +427,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :meth:`MultiIndex.set_labels`, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`) - Removed the previously deprecated "labels" keyword from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`) - Removed support for legacy HDF5 formats (:issue:`29787`) +- Passing a dtype alias (e.g. 'datetime64[ns, UTC]') to :class:`DatetimeTZDtype` is no longer allowed, use :meth:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`) - :func:`read_excel` removed support for "skip_footer" argument, use "skipfooter" instead (:issue:`18836`) - :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`) - :meth:`DataFrame.to_records` no longer supports the argument "convert_datetime64" (:issue:`18902`) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index fb3097684f0c3..9414786424245 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -676,7 +676,7 @@ def __init__(self, unit="ns", tz=None): "to DatetimeTZDtype is deprecated. Use " "'DatetimeTZDtype.construct_from_string()' instead." ) - warnings.warn(msg.format(tz=tz), FutureWarning, stacklevel=2) + raise ValueError(msg) else: raise ValueError("DatetimeTZDtype only supports ns units") diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index fc896e6a9d348..bbf44006611fb 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -187,7 +187,7 @@ def create(self): def test_alias_to_unit_raises(self): # 23990 - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(ValueError, match="Passing a dtype alias"): DatetimeTZDtype("datetime64[ns, US/Central]") def test_alias_to_unit_bad_alias_raises(self): From 8ffecdc729d6a5e1ef636034200d5ebc6cc32b06 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 29 Nov 2019 22:36:09 +0000 Subject: [PATCH 13/49] CI: Highlighting flake8 and grep errors in GitHub Actions (#29915) --- ci/code_checks.sh | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7c6c98d910492..4e25fd0ddb5ea 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -34,17 +34,13 @@ function invgrep { # # This is useful for the CI, as we want to fail if one of the patterns # that we want to avoid is found by grep. - if [[ "$AZURE" == "true" ]]; then - set -o pipefail - grep -n "$@" | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Found unwanted pattern: " $3}' - else - grep "$@" - fi - return $((! $?)) + grep -n "$@" | sed "s/^/$INVGREP_PREPEND/" | sed "s/$/$INVGREP_APPEND/" ; EXIT_STATUS=${PIPESTATUS[0]} + return $((! $EXIT_STATUS)) } -if [[ "$AZURE" == "true" ]]; then - FLAKE8_FORMAT="##vso[task.logissue type=error;sourcepath=%(path)s;linenumber=%(row)s;columnnumber=%(col)s;code=%(code)s;]%(text)s" +if [[ "$GITHUB_ACTIONS" == "true" ]]; then + FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code):%(text)s" + INVGREP_PREPEND="##[error]" else FLAKE8_FORMAT="default" fi @@ -199,14 +195,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG - set -o pipefail - if [[ "$AZURE" == "true" ]]; then - # we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files - ! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}' - else - ! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}' - fi + INVGREP_APPEND=" <- trailing whitespaces found" + invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" * RET=$(($RET + $?)) ; echo $MSG "DONE" + unset INVGREP_APPEND fi ### CODE ### From 7cf189d13a4681f6ebe30224dbf45e3278e1d60e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 14:36:49 -0800 Subject: [PATCH 14/49] DEPR: CategoricalBlock.where casting to object (#29913) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/internals/blocks.py | 31 ------------------- .../tests/arrays/categorical/test_indexing.py | 17 ++++------ 3 files changed, 7 insertions(+), 42 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b5f8e8ca4a126..052d58f1487fe 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -418,6 +418,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed :meth:`DataFrame.as_blocks`, :meth:`Series.as_blocks`, `DataFrame.blocks`, :meth:`Series.blocks` (:issue:`17656`) - :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`) - :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`) +- :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`) - :func:`core.internals.blocks.make_block` no longer accepts the "fastpath" keyword(:issue:`19265`) - :meth:`Block.make_block_same_class` no longer accepts the "dtype" keyword(:issue:`19434`) - Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e4de1c94da450..b0382755f2edb 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2887,37 +2887,6 @@ def concat_same_type(self, to_concat, placement=None): values, placement=placement or slice(0, len(values), 1), ndim=self.ndim ) - def where( - self, - other, - cond, - align=True, - errors="raise", - try_cast: bool = False, - axis: int = 0, - ) -> List["Block"]: - # TODO(CategoricalBlock.where): - # This can all be deleted in favor of ExtensionBlock.where once - # we enforce the deprecation. - object_msg = ( - "Implicitly converting categorical to object-dtype ndarray. " - "One or more of the values in 'other' are not present in this " - "categorical's categories. A future version of pandas will raise " - "a ValueError when 'other' contains different categories.\n\n" - "To preserve the current behavior, add the new categories to " - "the categorical before calling 'where', or convert the " - "categorical to a different dtype." - ) - try: - # Attempt to do preserve categorical dtype. - result = super().where(other, cond, align, errors, try_cast, axis) - except (TypeError, ValueError): - warnings.warn(object_msg, FutureWarning, stacklevel=6) - result = self.astype(object).where( - other, cond, align=align, errors=errors, try_cast=try_cast, axis=axis - ) - return result - def replace( self, to_replace, diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 6edd7fd00b707..f929eb24c9f19 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -206,13 +206,11 @@ def test_where_other_categorical(self): expected = pd.Series(Categorical(["a", "c", "c"], dtype=ser.dtype)) tm.assert_series_equal(result, expected) - def test_where_warns(self): + def test_where_new_category_raises(self): ser = pd.Series(Categorical(["a", "b", "c"])) - with tm.assert_produces_warning(FutureWarning): - result = ser.where([True, False, True], "d") - - expected = pd.Series(np.array(["a", "d", "c"], dtype="object")) - tm.assert_series_equal(result, expected) + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(ValueError, match=msg): + ser.where([True, False, True], "d") def test_where_ordered_differs_rasies(self): ser = pd.Series( @@ -221,11 +219,8 @@ def test_where_ordered_differs_rasies(self): other = Categorical( ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True ) - with tm.assert_produces_warning(FutureWarning): - result = ser.where([True, False, True], other) - - expected = pd.Series(np.array(["a", "c", "c"], dtype=object)) - tm.assert_series_equal(result, expected) + with pytest.raises(ValueError, match="without identical categories"): + ser.where([True, False, True], other) @pytest.mark.parametrize("index", [True, False]) From fe2117cf5c4354426baddbf29e082fc232714253 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 14:39:27 -0800 Subject: [PATCH 15/49] DEPR: categorical.take allow_fill default (#29912) --- doc/source/whatsnew/v1.0.0.rst | 2 ++ pandas/core/arrays/categorical.py | 36 +++++++------------ pandas/tests/arrays/categorical/test_algos.py | 13 +++++-- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 052d58f1487fe..d66d165bf7125 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -365,6 +365,7 @@ Deprecations is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). - :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) - :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`) +- :meth:`Categorical.take_nd` is deprecated, use :meth:`Categorical.take` instead (:issue:`27745`) .. _whatsnew_1000.prior_deprecations: @@ -457,6 +458,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`) - Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) - Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) +- Changed the default ``fill_value`` in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) - Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f20308be1ee09..bb4ceea420d8d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1,6 +1,5 @@ import operator from shutil import get_terminal_size -import textwrap from typing import Type, Union, cast from warnings import warn @@ -59,18 +58,6 @@ from .base import ExtensionArray, _extension_array_shared_docs, try_cast_to_ea -_take_msg = textwrap.dedent( - """\ - Interpreting negative values in 'indexer' as missing values. - In the future, this will change to meaning positional indices - from the right. - - Use 'allow_fill=True' to retain the previous behavior and silence this - warning. - - Use 'allow_fill=False' to accept the new behavior.""" -) - def _cat_compare_op(op): opname = f"__{op.__name__}__" @@ -1829,7 +1816,7 @@ def fillna(self, value=None, method=None, limit=None): return self._constructor(codes, dtype=self.dtype, fastpath=True) - def take_nd(self, indexer, allow_fill=None, fill_value=None): + def take(self, indexer, allow_fill: bool = False, fill_value=None): """ Take elements from the Categorical. @@ -1838,7 +1825,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): indexer : sequence of int The indices in `self` to take. The meaning of negative values in `indexer` depends on the value of `allow_fill`. - allow_fill : bool, default None + allow_fill : bool, default False How to handle negative values in `indexer`. * False: negative values in `indices` indicate positional indices @@ -1849,11 +1836,9 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): (the default). These values are set to `fill_value`. Any other other negative values raise a ``ValueError``. - .. versionchanged:: 0.23.0 + .. versionchanged:: 1.0.0 - Deprecated the default value of `allow_fill`. The deprecated - default is ``True``. In the future, this will change to - ``False``. + Default value changed from ``True`` to ``False``. fill_value : object The value to use for `indices` that are missing (-1), when @@ -1903,10 +1888,6 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): will raise a ``TypeError``. """ indexer = np.asarray(indexer, dtype=np.intp) - if allow_fill is None: - if (indexer < 0).any(): - warn(_take_msg, FutureWarning, stacklevel=2) - allow_fill = True dtype = self.dtype @@ -1927,7 +1908,14 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): result = type(self).from_codes(codes, dtype=dtype) return result - take = take_nd + def take_nd(self, indexer, allow_fill: bool = False, fill_value=None): + # GH#27745 deprecate alias that other EAs dont have + warn( + "Categorical.take_nd is deprecated, use Categorical.take instead", + FutureWarning, + stacklevel=2, + ) + return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value) def __len__(self) -> int: """ diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index e076015c5f61d..dce3c4e4d5e98 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -89,10 +89,12 @@ def test_isin_empty(empty): class TestTake: # https://github.com/pandas-dev/pandas/issues/20664 - def test_take_warns(self): + def test_take_default_allow_fill(self): cat = pd.Categorical(["a", "b"]) - with tm.assert_produces_warning(FutureWarning): - cat.take([0, -1]) + with tm.assert_produces_warning(None): + result = cat.take([0, -1]) + + assert result.equals(cat) def test_take_positive_no_warning(self): cat = pd.Categorical(["a", "b"]) @@ -158,3 +160,8 @@ def test_take_fill_value_new_raises(self): xpr = r"'fill_value' \('d'\) is not in this Categorical's categories." with pytest.raises(TypeError, match=xpr): cat.take([0, 1, -1], fill_value="d", allow_fill=True) + + def test_take_nd_deprecated(self): + cat = pd.Categorical(["a", "b", "c"]) + with tm.assert_produces_warning(FutureWarning): + cat.take_nd([0, 1]) From 00561438b9b7deda2df99d8ae591cf72f3eb7955 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 29 Nov 2019 22:50:19 +0000 Subject: [PATCH 16/49] CI: Making benchmark errors easier to find (#29907) --- .github/workflows/ci.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b689da8e39ff0..f68080d05bea6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -80,15 +80,18 @@ jobs: git fetch upstream if git diff upstream/master --name-only | grep -q "^asv_bench/"; then asv machine --yes - ASV_OUTPUT="$(asv dev)" - if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then - echo "##vso[task.logissue type=error]Benchmarks run with errors" - echo "$ASV_OUTPUT" + asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log + if grep "failed" benchmarks.log > /dev/null ; then exit 1 - else - echo "Benchmarks run without errors" fi else echo "Benchmarks did not run, no changes detected" fi if: true + + - name: Publish benchmarks artifact + uses: actions/upload-artifact@master + with: + name: Benchmarks log + path: asv_bench/benchmarks.log + if: failure() From 3a374643e127159c065f0f83d9aca1c4f2c471f8 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 29 Nov 2019 22:59:10 +0000 Subject: [PATCH 17/49] TYP: some types for pandas/_config/config.py (#29897) --- pandas/_config/config.py | 59 ++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 814f855cceeac..8f75d0381c1a6 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -51,7 +51,7 @@ from collections import namedtuple from contextlib import contextmanager import re -from typing import Dict, List +from typing import Any, Dict, Iterable, List import warnings DeprecatedOption = namedtuple("DeprecatedOption", "key msg rkey removal_ver") @@ -64,7 +64,7 @@ _registered_options: Dict[str, RegisteredOption] = {} # holds the current values for registered options -_global_config: Dict[str, str] = {} +_global_config: Dict[str, Any] = {} # keys which have a special meaning _reserved_keys: List[str] = ["all"] @@ -85,7 +85,7 @@ def _get_single_key(pat, silent): if len(keys) == 0: if not silent: _warn_if_deprecated(pat) - raise OptionError("No such keys(s): {pat!r}".format(pat=pat)) + raise OptionError(f"No such keys(s): {repr(pat)}") if len(keys) > 1: raise OptionError("Pattern matched multiple keys") key = keys[0] @@ -116,8 +116,8 @@ def _set_option(*args, **kwargs): silent = kwargs.pop("silent", False) if kwargs: - msg = '_set_option() got an unexpected keyword argument "{kwarg}"' - raise TypeError(msg.format(list(kwargs.keys())[0])) + kwarg = list(kwargs.keys())[0] + raise TypeError(f'_set_option() got an unexpected keyword argument "{kwarg}"') for k, v in zip(args[::2], args[1::2]): key = _get_single_key(k, silent) @@ -412,7 +412,7 @@ def __exit__(self, *args): _set_option(pat, val, silent=True) -def register_option(key, defval, doc="", validator=None, cb=None): +def register_option(key: str, defval: object, doc="", validator=None, cb=None): """Register an option in the package-wide pandas config object Parameters @@ -441,11 +441,9 @@ def register_option(key, defval, doc="", validator=None, cb=None): key = key.lower() if key in _registered_options: - msg = "Option '{key}' has already been registered" - raise OptionError(msg.format(key=key)) + raise OptionError(f"Option '{key}' has already been registered") if key in _reserved_keys: - msg = "Option '{key}' is a reserved key" - raise OptionError(msg.format(key=key)) + raise OptionError(f"Option '{key}' is a reserved key") # the default value should be legal if validator: @@ -455,10 +453,12 @@ def register_option(key, defval, doc="", validator=None, cb=None): path = key.split(".") for k in path: - if not bool(re.match("^" + tokenize.Name + "$", k)): - raise ValueError("{k} is not a valid identifier".format(k=k)) + # NOTE: tokenize.Name is not a public constant + # error: Module has no attribute "Name" [attr-defined] + if not re.match("^" + tokenize.Name + "$", k): # type: ignore + raise ValueError(f"{k} is not a valid identifier") if keyword.iskeyword(k): - raise ValueError("{k} is a python keyword".format(k=k)) + raise ValueError(f"{k} is a python keyword") cursor = _global_config msg = "Path prefix to option '{option}' is already an option" @@ -522,8 +522,7 @@ def deprecate_option(key, msg=None, rkey=None, removal_ver=None): key = key.lower() if key in _deprecated_options: - msg = "Option '{key}' has already been defined as deprecated." - raise OptionError(msg.format(key=key)) + raise OptionError(f"Option '{key}' has already been defined as deprecated.") _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver) @@ -621,11 +620,11 @@ def _warn_if_deprecated(key): print(d.msg) warnings.warn(d.msg, FutureWarning) else: - msg = "'{key}' is deprecated".format(key=key) + msg = f"'{key}' is deprecated" if d.removal_ver: - msg += " and will be removed in {version}".format(version=d.removal_ver) + msg += f" and will be removed in {d.removal_ver}" if d.rkey: - msg += ", please use '{rkey}' instead.".format(rkey=d.rkey) + msg += f", please use '{d.rkey}' instead." else: msg += ", please refrain from using it." @@ -640,7 +639,7 @@ def _build_option_description(k): o = _get_registered_option(k) d = _get_deprecated_option(k) - s = "{k} ".format(k=k) + s = f"{k} " if o.doc: s += "\n".join(o.doc.strip().split("\n")) @@ -648,9 +647,7 @@ def _build_option_description(k): s += "No description available." if o: - s += "\n [default: {default}] [currently: {current}]".format( - default=o.defval, current=_get_option(k, True) - ) + s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]" if d: s += "\n (Deprecated" @@ -666,7 +663,7 @@ def pp_options_list(keys, width=80, _print=False): from textwrap import wrap from itertools import groupby - def pp(name, ks): + def pp(name: str, ks: Iterable[str]) -> List[str]: pfx = "- " + name + ".[" if name else "" ls = wrap( ", ".join(ks), @@ -679,7 +676,7 @@ def pp(name, ks): ls[-1] = ls[-1] + "]" return ls - ls = [] + ls: List[str] = [] singles = [x for x in sorted(keys) if x.find(".") < 0] if singles: ls += pp("", singles) @@ -731,7 +728,7 @@ def config_prefix(prefix): def wrap(func): def inner(key, *args, **kwds): - pkey = "{prefix}.{key}".format(prefix=prefix, key=key) + pkey = f"{prefix}.{key}" return func(pkey, *args, **kwds) return inner @@ -768,8 +765,7 @@ def is_type_factory(_type): def inner(x): if type(x) != _type: - msg = "Value must have type '{typ!s}'" - raise ValueError(msg.format(typ=_type)) + raise ValueError(f"Value must have type '{_type}'") return inner @@ -792,12 +788,11 @@ def is_instance_factory(_type): _type = tuple(_type) type_repr = "|".join(map(str, _type)) else: - type_repr = "'{typ}'".format(typ=_type) + type_repr = f"'{_type}'" def inner(x): if not isinstance(x, _type): - msg = "Value must be an instance of {type_repr}" - raise ValueError(msg.format(type_repr=type_repr)) + raise ValueError(f"Value must be an instance of {type_repr}") return inner @@ -813,10 +808,10 @@ def inner(x): if not any(c(x) for c in callables): uvals = [str(lval) for lval in legal_values] pp_values = "|".join(uvals) - msg = "Value must be one of {pp_values}" + msg = f"Value must be one of {pp_values}" if len(callables): msg += " or a callable" - raise ValueError(msg.format(pp_values=pp_values)) + raise ValueError(msg) return inner From 273dd40f4f8b75d12df6aa53da5de48df44e6bfd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 15:06:22 -0800 Subject: [PATCH 18/49] TYP: io.pytables annotations (#29861) --- pandas/io/pytables.py | 66 +++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fb63bdcaaa876..dae16dfdef01f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -53,7 +53,7 @@ from pandas.io.formats.printing import adjoin, pprint_thing if TYPE_CHECKING: - from tables import File # noqa:F401 + from tables import File, Node # noqa:F401 # versioning attribute @@ -244,7 +244,7 @@ def to_hdf( key, value, mode=None, - complevel=None, + complevel: Optional[int] = None, complib=None, append=None, **kwargs, @@ -459,12 +459,14 @@ class HDFStore: """ _handle: Optional["File"] + _complevel: int + _fletcher32: bool def __init__( self, path, mode=None, - complevel=None, + complevel: Optional[int] = None, complib=None, fletcher32: bool = False, **kwargs, @@ -526,7 +528,7 @@ def __getattr__(self, name: str): f"'{type(self).__name__}' object has no attribute '{name}'" ) - def __contains__(self, key: str): + def __contains__(self, key: str) -> bool: """ check for existence of this key can match the exact pathname or the pathnm w/o the leading '/' """ @@ -1267,18 +1269,22 @@ def walk(self, where="/"): yield (g._v_pathname.rstrip("/"), groups, leaves) - def get_node(self, key: str): + def get_node(self, key: str) -> Optional["Node"]: """ return the node with the key or None if it does not exist """ self._check_if_open() if not key.startswith("/"): key = "/" + key assert self._handle is not None + assert _table_mod is not None # for mypy try: - return self._handle.get_node(self.root, key) - except _table_mod.exceptions.NoSuchNodeError: # type: ignore + node = self._handle.get_node(self.root, key) + except _table_mod.exceptions.NoSuchNodeError: return None + assert isinstance(node, _table_mod.Node), type(node) + return node + def get_storer(self, key: str) -> Union["GenericFixed", "Table"]: """ return the storer object for a key, raise if not in the file """ group = self.get_node(key) @@ -1296,7 +1302,7 @@ def copy( propindexes: bool = True, keys=None, complib=None, - complevel=None, + complevel: Optional[int] = None, fletcher32: bool = False, overwrite=True, ): @@ -1387,7 +1393,9 @@ def info(self) -> str: return output - # private methods ###### + # ------------------------------------------------------------------------ + # private methods + def _check_if_open(self): if not self.is_open: raise ClosedFileError(f"{self._path} file is not open!") @@ -1559,7 +1567,7 @@ def _write_to_group( if isinstance(s, Table) and index: s.create_index(columns=index) - def _read_group(self, group, **kwargs): + def _read_group(self, group: "Node", **kwargs): s = self._create_storer(group) s.infer_axes() return s.read(**kwargs) @@ -1786,7 +1794,7 @@ def copy(self): new_self = copy.copy(self) return new_self - def infer(self, handler): + def infer(self, handler: "Table"): """infer this column from the table: create and return a new object""" table = handler.table new_self = self.copy() @@ -2499,9 +2507,16 @@ class Fixed: pandas_kind: str obj_type: Type[Union[DataFrame, Series]] ndim: int + parent: HDFStore + group: "Node" is_table = False - def __init__(self, parent, group, encoding=None, errors="strict", **kwargs): + def __init__( + self, parent: HDFStore, group: "Node", encoding=None, errors="strict", **kwargs + ): + assert isinstance(parent, HDFStore), type(parent) + assert _table_mod is not None # needed for mypy + assert isinstance(group, _table_mod.Node), type(group) self.parent = parent self.group = group self.encoding = _ensure_encoding(encoding) @@ -2568,11 +2583,11 @@ def _filters(self): return self.parent._filters @property - def _complevel(self): + def _complevel(self) -> int: return self.parent._complevel @property - def _fletcher32(self): + def _fletcher32(self) -> bool: return self.parent._fletcher32 @property @@ -2637,7 +2652,7 @@ def read( def write(self, **kwargs): raise NotImplementedError( - "cannot write on an abstract storer: sublcasses should implement" + "cannot write on an abstract storer: subclasses should implement" ) def delete( @@ -2803,7 +2818,7 @@ def write_index(self, key: str, index: Index): if isinstance(index, DatetimeIndex) and index.tz is not None: node._v_attrs.tz = _get_tz(index.tz) - def write_multi_index(self, key, index): + def write_multi_index(self, key: str, index: MultiIndex): setattr(self.attrs, f"{key}_nlevels", index.nlevels) for i, (lev, level_codes, name) in enumerate( @@ -2828,7 +2843,7 @@ def write_multi_index(self, key, index): label_key = f"{key}_label{i}" self.write_array(label_key, level_codes) - def read_multi_index(self, key, **kwargs) -> MultiIndex: + def read_multi_index(self, key: str, **kwargs) -> MultiIndex: nlevels = getattr(self.attrs, f"{key}_nlevels") levels = [] @@ -2849,7 +2864,7 @@ def read_multi_index(self, key, **kwargs) -> MultiIndex: ) def read_index_node( - self, node, start: Optional[int] = None, stop: Optional[int] = None + self, node: "Node", start: Optional[int] = None, stop: Optional[int] = None ): data = node[start:stop] # If the index was an empty array write_array_empty() will @@ -3310,7 +3325,7 @@ def values_cols(self) -> List[str]: """ return a list of my values cols """ return [i.cname for i in self.values_axes] - def _get_metadata_path(self, key) -> str: + def _get_metadata_path(self, key: str) -> str: """ return the metadata pathname for this key """ group = self.group._v_pathname return f"{group}/meta/{key}/meta" @@ -3877,10 +3892,10 @@ def process_filter(field, filt): def create_description( self, complib=None, - complevel=None, + complevel: Optional[int] = None, fletcher32: bool = False, expectedrows: Optional[int] = None, - ): + ) -> Dict[str, Any]: """ create the description of the table from the axes & values """ # provided expected rows if its passed @@ -4537,10 +4552,10 @@ def _set_tz(values, tz, preserve_UTC: bool = False, coerce: bool = False): return values -def _convert_index(name: str, index, encoding=None, errors="strict"): +def _convert_index(name: str, index: Index, encoding=None, errors="strict"): assert isinstance(name, str) - index_name = getattr(index, "name", None) + index_name = index.name if isinstance(index, DatetimeIndex): converted = index.asi8 @@ -4630,8 +4645,9 @@ def _convert_index(name: str, index, encoding=None, errors="strict"): ) -def _unconvert_index(data, kind, encoding=None, errors="strict"): - kind = _ensure_decoded(kind) +def _unconvert_index(data, kind: str, encoding=None, errors="strict"): + index: Union[Index, np.ndarray] + if kind == "datetime64": index = DatetimeIndex(data) elif kind == "timedelta64": From a91194e4eef4dfc78efbdb7dfa4dd6d5d90d92de Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 15:07:40 -0800 Subject: [PATCH 19/49] DEPR: DataFrame.sort_index by keyword (#29931) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/frame.py | 12 ------ pandas/tests/frame/test_sorting.py | 68 ------------------------------ 3 files changed, 1 insertion(+), 80 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index d66d165bf7125..4279f949db1aa 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -409,6 +409,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`) - Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`) - Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`) +- Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`) - Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`) - Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`) - A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d2e396284c5a7..5dfa7002abfca 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4764,24 +4764,12 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, - by=None, ): # TODO: this can be combined with Series.sort_index impl as # almost identical inplace = validate_bool_kwarg(inplace, "inplace") - # 10726 - if by is not None: - warnings.warn( - "by argument to sort_index is deprecated, " - "please use .sort_values(by=...)", - FutureWarning, - stacklevel=2, - ) - if level is not None: - raise ValueError("unable to simultaneously sort by and level") - return self.sort_values(by, axis=axis, ascending=ascending, inplace=inplace) axis = self._get_axis_number(axis) labels = self._get_axis(axis) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 6ed245b6ebb98..64294d5cdcb81 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -385,17 +385,11 @@ def test_sort_index_multicolumn(self): random.shuffle(B) frame = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - frame.sort_index(by=["A", "B"]) result = frame.sort_values(by=["A", "B"]) indexer = np.lexsort((frame["B"], frame["A"])) expected = frame.take(indexer) tm.assert_frame_equal(result, expected) - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - frame.sort_index(by=["A", "B"], ascending=False) result = frame.sort_values(by=["A", "B"], ascending=False) indexer = np.lexsort( (frame["B"].rank(ascending=False), frame["A"].rank(ascending=False)) @@ -403,9 +397,6 @@ def test_sort_index_multicolumn(self): expected = frame.take(indexer) tm.assert_frame_equal(result, expected) - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - frame.sort_index(by=["B", "A"]) result = frame.sort_values(by=["B", "A"]) indexer = np.lexsort((frame["A"], frame["B"])) expected = frame.take(indexer) @@ -452,14 +443,8 @@ def test_sort_index_different_sortorder(self): df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by=["A", "B"], ascending=[1, 0]) - result = df.sort_values(by=["A", "B"], ascending=[1, 0]) - ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) expected = df.take(ex_indexer) - tm.assert_frame_equal(result, expected) # test with multiindex, too idf = df.set_index(["A", "B"]) @@ -472,59 +457,6 @@ def test_sort_index_different_sortorder(self): result = idf["C"].sort_index(ascending=[1, 0]) tm.assert_series_equal(result, expected["C"]) - def test_sort_index_duplicates(self): - - # with 9816, these are all translated to .sort_values - - df = DataFrame([range(5, 9), range(4)], columns=["a", "a", "b", "b"]) - - with pytest.raises(ValueError, match="not unique"): - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by="a") - with pytest.raises(ValueError, match="not unique"): - df.sort_values(by="a") - - with pytest.raises(ValueError, match="not unique"): - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by=["a"]) - with pytest.raises(ValueError, match="not unique"): - df.sort_values(by=["a"]) - - with pytest.raises(ValueError, match="not unique"): - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - # multi-column 'by' is separate codepath - df.sort_index(by=["a", "b"]) - with pytest.raises(ValueError, match="not unique"): - # multi-column 'by' is separate codepath - df.sort_values(by=["a", "b"]) - - # with multi-index - # GH4370 - df = DataFrame( - np.random.randn(4, 2), columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]) - ) - with pytest.raises(ValueError, match="level"): - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by="a") - with pytest.raises(ValueError, match="level"): - df.sort_values(by="a") - - # convert tuples to a list of tuples - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by=[("a", 1)]) - expected = df.sort_values(by=[("a", 1)]) - - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by=("a", 1)) - result = df.sort_values(by=("a", 1)) - tm.assert_frame_equal(result, expected) - def test_sort_index_level(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) df = DataFrame([[1, 2], [3, 4]], mi) From 9dd1b50f2cf70c0ec7de07137b08ae5866afae69 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 15:10:02 -0800 Subject: [PATCH 20/49] DEPR: remove FrozenNDarray (#29840) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/compat/pickle_compat.py | 17 +---- pandas/core/indexes/frozen.py | 80 -------------------- pandas/core/indexes/multi.py | 37 +++++++-- pandas/tests/indexes/multi/test_integrity.py | 2 +- pandas/tests/indexes/test_frozen.py | 64 +--------------- 6 files changed, 35 insertions(+), 166 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4279f949db1aa..05aba65888c55 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -441,6 +441,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed previously deprecated :func:`pandas.tseries.plotting.tsplot` (:issue:`18627`) - Removed the previously deprecated ``reduce`` and ``broadcast`` arguments from :meth:`DataFrame.apply` (:issue:`18577`) - Removed the previously deprecated ``assert_raises_regex`` function in ``pandas.util.testing`` (:issue:`29174`) +- Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) - Removed previously deprecated "nthreads" argument from :func:`read_feather`, use "use_threads" instead (:issue:`23053`) - Removed :meth:`Index.is_lexsorted_for_tuple` (:issue:`29305`) - Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 458c0c07c7602..aeec5e8a0400a 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -89,21 +89,8 @@ def __new__(cls) -> "DataFrame": # type: ignore _class_locations_map = { ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"), # 15477 - # - # TODO: When FrozenNDArray is removed, add - # the following lines for compat: - # - # ('pandas.core.base', 'FrozenNDArray'): - # ('numpy', 'ndarray'), - # ('pandas.core.indexes.frozen', 'FrozenNDArray'): - # ('numpy', 'ndarray'), - # - # Afterwards, remove the current entry - # for `pandas.core.base.FrozenNDArray`. - ("pandas.core.base", "FrozenNDArray"): ( - "pandas.core.indexes.frozen", - "FrozenNDArray", - ), + ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"), + ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"), ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"), # 10890 ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"), diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index ab9852157b9ef..2ea83ba889fd2 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -4,14 +4,8 @@ These are used for: - .names (FrozenList) -- .levels & .codes (FrozenNDArray) """ -import warnings - -import numpy as np - -from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.core.base import PandasObject @@ -111,77 +105,3 @@ def __repr__(self) -> str: __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled pop = append = extend = remove = sort = insert = _disabled - - -class FrozenNDArray(PandasObject, np.ndarray): - - # no __array_finalize__ for now because no metadata - def __new__(cls, data, dtype=None, copy=False): - warnings.warn( - "\nFrozenNDArray is deprecated and will be removed in a " - "future version.\nPlease use `numpy.ndarray` instead.\n", - FutureWarning, - stacklevel=2, - ) - - if copy is None: - copy = not isinstance(data, FrozenNDArray) - res = np.array(data, dtype=dtype, copy=copy).view(cls) - return res - - def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" - raise TypeError( - "'{cls}' does not support mutable operations.".format(cls=type(self)) - ) - - __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled - put = itemset = fill = _disabled - - def _shallow_copy(self): - return self.view() - - def values(self): - """returns *copy* of underlying array""" - arr = self.view(np.ndarray).copy() - return arr - - def __repr__(self) -> str: - """ - Return a string representation for this object. - """ - prepr = pprint_thing(self, escape_chars=("\t", "\r", "\n"), quote_strings=True) - return f"{type(self).__name__}({prepr}, dtype='{self.dtype}')" - - def searchsorted(self, value, side="left", sorter=None): - """ - Find indices to insert `value` so as to maintain order. - - For full documentation, see `numpy.searchsorted` - - See Also - -------- - numpy.searchsorted : Equivalent function. - """ - - # We are much more performant if the searched - # indexer is the same type as the array. - # - # This doesn't matter for int64, but DOES - # matter for smaller int dtypes. - # - # xref: https://github.com/numpy/numpy/issues/5370 - try: - value = self.dtype.type(value) - except ValueError: - pass - - return super().searchsorted(value, side=side, sorter=sorter) - - -def _ensure_frozen(array_like, categories, copy=False): - array_like = coerce_indexer_dtype(array_like, categories) - array_like = array_like.view(FrozenNDArray) - if copy: - array_like = array_like.copy() - return array_like diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d151fb7260a58..f319c1e74452c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -13,6 +13,7 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly +from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, @@ -40,7 +41,7 @@ _index_shared_docs, ensure_index, ) -from pandas.core.indexes.frozen import FrozenList, _ensure_frozen +from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.sorting import ( get_group_index, @@ -821,7 +822,7 @@ def _set_codes( if level is None: new_codes = FrozenList( - _ensure_frozen(level_codes, lev, copy=copy)._shallow_copy() + _coerce_indexer_frozen(level_codes, lev, copy=copy).view() for lev, level_codes in zip(self._levels, codes) ) else: @@ -829,9 +830,7 @@ def _set_codes( new_codes = list(self._codes) for lev_num, level_codes in zip(level_numbers, codes): lev = self.levels[lev_num] - new_codes[lev_num] = _ensure_frozen( - level_codes, lev, copy=copy - )._shallow_copy() + new_codes[lev_num] = _coerce_indexer_frozen(level_codes, lev, copy=copy) new_codes = FrozenList(new_codes) if verify_integrity: @@ -1095,7 +1094,8 @@ def _format_native_types(self, na_rep="nan", **kwargs): if mask.any(): nan_index = len(level) level = np.append(level, na_rep) - level_codes = level_codes.values() + assert not level_codes.flags.writeable # i.e. copy is needed + level_codes = level_codes.copy() # make writeable level_codes[mask] = nan_index new_levels.append(level) new_codes.append(level_codes) @@ -1998,7 +1998,7 @@ def _assert_take_fillable( if mask.any(): masked = [] for new_label in taken: - label_values = new_label.values() + label_values = new_label label_values[mask] = na_value masked.append(np.asarray(label_values)) taken = masked @@ -3431,3 +3431,26 @@ def maybe_droplevels(index, key): pass return index + + +def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: + """ + Coerce the array_like indexer to the smallest integer dtype that can encode all + of the given categories. + + Parameters + ---------- + array_like : array-like + categories : array-like + copy : bool + + Returns + ------- + np.ndarray + Non-writeable. + """ + array_like = coerce_indexer_dtype(array_like, categories) + if copy: + array_like = array_like.copy() + array_like.flags.writeable = False + return array_like diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 472a404c2a8ef..7cdb5cf31338a 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -210,7 +210,7 @@ def test_metadata_immutable(idx): # ditto for labels with pytest.raises(TypeError, match=mutable_regex): codes[0] = codes[0] - with pytest.raises(TypeError, match=mutable_regex): + with pytest.raises(ValueError, match="assignment destination is read-only"): codes[0][0] = codes[0][0] # and for names names = idx.names diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index c7b219b5ee890..9f6b0325b7b33 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -1,11 +1,7 @@ -import warnings - -import numpy as np import pytest -from pandas.core.indexes.frozen import FrozenList, FrozenNDArray +from pandas.core.indexes.frozen import FrozenList from pandas.tests.test_base import CheckImmutable, CheckStringMixin -import pandas.util.testing as tm class TestFrozenList(CheckImmutable, CheckStringMixin): @@ -55,61 +51,3 @@ def test_tricky_container_to_bytes_raises(self): msg = "^'str' object cannot be interpreted as an integer$" with pytest.raises(TypeError, match=msg): bytes(self.unicode_container) - - -class TestFrozenNDArray(CheckImmutable, CheckStringMixin): - mutable_methods = ("put", "itemset", "fill") - - def setup_method(self, _): - self.lst = [3, 5, 7, -2] - self.klass = FrozenNDArray - - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", FutureWarning) - - self.container = FrozenNDArray(self.lst) - self.unicode_container = FrozenNDArray(["\u05d0", "\u05d1", "c"]) - - def test_constructor_warns(self): - # see gh-9031 - with tm.assert_produces_warning(FutureWarning): - FrozenNDArray([1, 2, 3]) - - def test_tricky_container_to_bytes(self): - bytes(self.unicode_container) - - def test_shallow_copying(self): - original = self.container.copy() - assert isinstance(self.container.view(), FrozenNDArray) - assert not isinstance(self.container.view(np.ndarray), FrozenNDArray) - assert self.container.view() is not self.container - tm.assert_numpy_array_equal(self.container, original) - - # Shallow copy should be the same too - assert isinstance(self.container._shallow_copy(), FrozenNDArray) - - # setting should not be allowed - def testit(container): - container[0] = 16 - - self.check_mutable_error(testit, self.container) - - def test_values(self): - original = self.container.view(np.ndarray).copy() - n = original[0] + 15 - - vals = self.container.values() - tm.assert_numpy_array_equal(original, vals) - - assert original is not vals - vals[0] = n - - assert isinstance(self.container, FrozenNDArray) - tm.assert_numpy_array_equal(self.container.values(), original) - assert vals[0] == n - - def test_searchsorted(self): - expected = 2 - assert self.container.searchsorted(7) == expected - - assert self.container.searchsorted(value=7) == expected From cb05112fa1f8859279125246d4f41425867db929 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 15:31:45 -0800 Subject: [PATCH 21/49] DEPR: loc with listlikes with missing elements (#29802) --- pandas/core/indexing.py | 16 +-- pandas/io/formats/excel.py | 12 +-- pandas/tests/indexing/test_datetime.py | 16 +-- pandas/tests/indexing/test_floats.py | 28 ++--- pandas/tests/indexing/test_iloc.py | 16 +-- pandas/tests/indexing/test_indexing.py | 56 +++------- pandas/tests/indexing/test_loc.py | 78 ++++++-------- pandas/tests/indexing/test_partial.py | 102 ++++++------------ pandas/tests/io/excel/test_writers.py | 6 +- pandas/tests/series/indexing/test_indexing.py | 21 ++-- pandas/tests/series/indexing/test_numeric.py | 6 +- 11 files changed, 116 insertions(+), 241 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 67412ed5e5b26..5b875ef3fdc4f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1176,18 +1176,12 @@ def _validate_read_indexer( # non-missing values), but a bit later in the # code, so we want to avoid warning & then # just raising - - _missing_key_warning = textwrap.dedent( - """ - Passing list-likes to .loc or [] with any missing label will raise - KeyError in the future, you can use .reindex() as an alternative. - - See the documentation here: - https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike""" # noqa: E501 - ) - if not (ax.is_categorical() or ax.is_interval()): - warnings.warn(_missing_key_warning, FutureWarning, stacklevel=6) + raise KeyError( + "Passing list-likes to .loc or [] with any missing labels " + "is no longer supported, see " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 + ) def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): """ diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index cd0889044094f..0413dcf18d04a 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -393,16 +393,12 @@ def __init__( if not len(Index(cols) & df.columns): raise KeyError("passes columns are not ALL present dataframe") - # deprecatedin gh-17295 - # 1 missing is ok (for now) if len(Index(cols) & df.columns) != len(cols): - warnings.warn( - "Not all names specified in 'columns' are found; " - "this will raise a KeyError in the future", - FutureWarning, - ) + # Deprecated in GH#17295, enforced in 1.0.0 + raise KeyError("Not all names specified in 'columns' are found") + + self.df = df - self.df = df.reindex(columns=cols) self.columns = self.df.columns self.float_format = float_format self.index = index diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index ab4a8fe89c6e3..f2e3f7f6b3723 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -2,6 +2,7 @@ from dateutil import tz import numpy as np +import pytest import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range @@ -242,11 +243,8 @@ def test_series_partial_set_datetime(self): Timestamp("2011-01-02"), Timestamp("2011-01-03"), ] - exp = Series( - [np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name="idx"), name="s" - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[keys] def test_series_partial_set_period(self): # GH 11497 @@ -273,12 +271,8 @@ def test_series_partial_set_period(self): pd.Period("2011-01-02", freq="D"), pd.Period("2011-01-03", freq="D"), ] - exp = Series( - [np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name="idx"), name="s" - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[keys] - tm.assert_series_equal(result, exp) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[keys] def test_nanosecond_getitem_setitem_with_tz(self): # GH 11679 diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index eadaeaba63a26..0a3b513ff0167 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -726,25 +726,15 @@ def test_floating_misc(self): tm.assert_series_equal(result1, result3) tm.assert_series_equal(result1, result4) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result1 = s[[1.6, 5, 10]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result2 = s.loc[[1.6, 5, 10]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result3 = s.loc[[1.6, 5, 10]] - tm.assert_series_equal(result1, result2) - tm.assert_series_equal(result1, result3) - tm.assert_series_equal(result1, Series([np.nan, 2, 4], index=[1.6, 5, 10])) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result1 = s[[0, 1, 2]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result2 = s.loc[[0, 1, 2]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result3 = s.loc[[0, 1, 2]] - tm.assert_series_equal(result1, result2) - tm.assert_series_equal(result1, result3) - tm.assert_series_equal(result1, Series([0.0, np.nan, np.nan], index=[0, 1, 2])) + with pytest.raises(KeyError, match="with any missing labels"): + s[[1.6, 5, 10]] + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[1.6, 5, 10]] + + with pytest.raises(KeyError, match="with any missing labels"): + s[[0, 1, 2]] + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[0, 1, 2]] result1 = s.loc[[2.5, 5]] result2 = s.loc[[2.5, 5]] diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index d826d89f85ef5..e4d387fd3ac38 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -728,20 +728,8 @@ def test_iloc_non_unique_indexing(self): df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000}) df2 = concat([df2, 2 * df2, 3 * df2]) - sidx = df2.index.to_series() - expected = df2.iloc[idx[idx <= sidx.max()]] - - new_list = [] - for r, s in expected.iterrows(): - new_list.append(s) - new_list.append(s * 2) - new_list.append(s * 3) - - expected = DataFrame(new_list) - expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])], sort=True) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df2.loc[idx] - tm.assert_frame_equal(result, expected, check_index_type=False) + with pytest.raises(KeyError, match="with any missing labels"): + df2.loc[idx] def test_iloc_empty_list_indexer_is_ok(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 09a66efb6a312..e53e02ed750cb 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -299,32 +299,13 @@ def test_dups_fancy_indexing(self): tm.assert_frame_equal(result, expected) rows = ["C", "B", "E"] - expected = DataFrame( - { - "test": [11, 9, np.nan], - "test1": [7.0, 6, np.nan], - "other": ["d", "c", np.nan], - }, - index=rows, - ) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[rows] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[rows] # see GH5553, make sure we use the right indexer rows = ["F", "G", "H", "C", "B", "E"] - expected = DataFrame( - { - "test": [np.nan, np.nan, np.nan, 11, 9, np.nan], - "test1": [np.nan, np.nan, np.nan, 7.0, 6, np.nan], - "other": [np.nan, np.nan, np.nan, "d", "c", np.nan], - }, - index=rows, - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[rows] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[rows] # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) @@ -340,38 +321,25 @@ def test_dups_fancy_indexing(self): # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[[0, 8, 0]] - expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) - tm.assert_frame_equal(result, expected, check_index_type=False) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[[0, 8, 0]] df = DataFrame({"A": list("abc")}) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[[0, 8, 0]] - expected = DataFrame({"A": ["a", np.nan, "a"]}, index=[0, 8, 0]) - tm.assert_frame_equal(result, expected, check_index_type=False) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[[0, 8, 0]] # non unique with non unique selector df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) - expected = DataFrame( - {"test": [5, 7, 5, 7, np.nan]}, index=["A", "A", "A", "A", "E"] - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[["A", "A", "E"]] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[["A", "A", "E"]] def test_dups_fancy_indexing2(self): # GH 5835 # dups on index and missing values df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"]) - expected = pd.concat( - [df.loc[:, ["A", "B"]], DataFrame(np.nan, columns=["C"], index=df.index)], - axis=1, - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[:, ["A", "B", "C"]] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[:, ["A", "B", "C"]] # GH 6504, multi-axis indexing df = DataFrame( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d3af3f6322ef2..cb523efb78cf4 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -159,48 +159,46 @@ def test_loc_getitem_label_list_with_missing(self): self.check_result( "loc", [0, 1, 2], "indexer", [0, 1, 2], typs=["empty"], fails=KeyError, ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.check_result( - "loc", - [0, 2, 10], - "ix", - [0, 2, 10], - typs=["ints", "uints", "floats"], - axes=0, - fails=KeyError, - ) + self.check_result( + "loc", + [0, 2, 10], + "ix", + [0, 2, 10], + typs=["ints", "uints", "floats"], + axes=0, + fails=KeyError, + ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.check_result( - "loc", - [3, 6, 7], - "ix", - [3, 6, 7], - typs=["ints", "uints", "floats"], - axes=1, - fails=KeyError, - ) + self.check_result( + "loc", + [3, 6, 7], + "ix", + [3, 6, 7], + typs=["ints", "uints", "floats"], + axes=1, + fails=KeyError, + ) # GH 17758 - MultiIndex and missing keys - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.check_result( - "loc", - [(1, 3), (1, 4), (2, 5)], - "ix", - [(1, 3), (1, 4), (2, 5)], - typs=["multi"], - axes=0, - ) + self.check_result( + "loc", + [(1, 3), (1, 4), (2, 5)], + "ix", + [(1, 3), (1, 4), (2, 5)], + typs=["multi"], + axes=0, + fails=KeyError, + ) def test_getitem_label_list_with_missing(self): s = Series(range(3), index=["a", "b", "c"]) # consistency - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): s[["a", "d"]] s = Series(range(3)) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): s[[0, 3]] def test_loc_getitem_label_list_fails(self): @@ -305,10 +303,8 @@ def test_loc_to_fail(self): s.loc[["4"]] s.loc[-1] = 3 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[-1, -2]] - expected = Series([3, np.nan], index=[-1, -2]) - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[-1, -2]] s["a"] = 2 msg = ( @@ -354,10 +350,8 @@ def test_loc_getitem_list_with_fail(self): s.loc[[3]] # a non-match and a match - with tm.assert_produces_warning(FutureWarning): - expected = s.loc[[2, 3]] - result = s.reindex([2, 3]) - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[2, 3]] def test_loc_getitem_label_slice(self): @@ -1034,10 +1028,8 @@ def test_series_loc_getitem_label_list_missing_values(): ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" ) s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) - expected = Series([11.0, 5.0, 11.0, np.nan], index=key) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[key] - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[key] @pytest.mark.parametrize( diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 0fb71bfea76c0..aa49edd51aa39 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -186,17 +186,15 @@ def test_series_partial_set(self): # loc equiv to .reindex expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) result = ser.reindex([3, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): result = ser.loc[[3, 2, 3, "x"]] - tm.assert_series_equal(result, expected, check_index_type=True) result = ser.reindex([3, 2, 3, "x"]) tm.assert_series_equal(result, expected, check_index_type=True) @@ -206,9 +204,8 @@ def test_series_partial_set(self): tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): result = ser.loc[[2, 2, "x", 1]] - tm.assert_series_equal(result, expected, check_index_type=True) result = ser.reindex([2, 2, "x", 1]) tm.assert_series_equal(result, expected, check_index_type=True) @@ -222,54 +219,48 @@ def test_series_partial_set(self): ser.loc[[3, 3, 3]] expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[2, 2, 3]] result = ser.reindex([2, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[3, 4, 4]] result = s.reindex([3, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[5, 3, 3]] result = s.reindex([5, 3, 3]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[5, 4, 4]] result = s.reindex([5, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]) expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[7, 2, 2]] result = s.reindex([7, 2, 2]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[4, 5, 5]] result = s.reindex([4, 5, 5]) tm.assert_series_equal(result, expected, check_index_type=True) @@ -286,28 +277,19 @@ def test_series_partial_set_with_name(self): ser = Series([0.1, 0.2], index=idx, name="s") # loc - exp_idx = Index([3, 2, 3], dtype="int64", name="idx") - expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name="s") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[3, 2, 3]] - exp_idx = Index([3, 2, 3, "x"], dtype="object", name="idx") - expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, name="s") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[3, 2, 3, "x"]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[3, 2, 3, "x"]] exp_idx = Index([2, 2, 1], dtype="int64", name="idx") expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s") result = ser.loc[[2, 2, 1]] tm.assert_series_equal(result, expected, check_index_type=True) - exp_idx = Index([2, 2, "x", 1], dtype="object", name="idx") - expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name="s") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[2, 2, "x", 1]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[2, 2, "x", 1]] # raises as nothing in in the index msg = ( @@ -317,46 +299,28 @@ def test_series_partial_set_with_name(self): with pytest.raises(KeyError, match=msg): ser.loc[[3, 3, 3]] - exp_idx = Index([2, 2, 3], dtype="int64", name="idx") - expected = Series([0.2, 0.2, np.nan], index=exp_idx, name="s") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[2, 2, 3]] - exp_idx = Index([3, 4, 4], dtype="int64", name="idx") - expected = Series([0.3, np.nan, np.nan], index=exp_idx, name="s") idx = Index([1, 2, 3], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] - exp_idx = Index([5, 3, 3], dtype="int64", name="idx") - expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name="s") idx = Index([1, 2, 3, 4], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] - exp_idx = Index([5, 4, 4], dtype="int64", name="idx") - expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name="s") idx = Index([1, 2, 3, 4], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] - exp_idx = Index([7, 2, 2], dtype="int64", name="idx") - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name="s") idx = Index([4, 5, 6, 7], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] - exp_idx = Index([4, 5, 5], dtype="int64", name="idx") - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name="s") idx = Index([1, 2, 3, 4], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] # iloc exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx") diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index a7730e079a1bb..b1be0a1a2fece 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1009,13 +1009,9 @@ def test_invalid_columns(self, path): # see gh-10982 write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="Not all names specified"): write_frame.to_excel(path, "test1", columns=["B", "C"]) - expected = write_frame.reindex(columns=["B", "C"]) - read_frame = pd.read_excel(path, "test1", index_col=0) - tm.assert_frame_equal(expected, read_frame) - with pytest.raises( KeyError, match="'passes columns are not ALL present dataframe'" ): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 5aba2920999d5..173bc9d9d6409 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -52,15 +52,11 @@ def test_basic_getitem_with_labels(datetime_series): s = Series(np.random.randn(10), index=list(range(0, 20, 2))) inds = [0, 2, 5, 7, 8] arr_inds = np.array([0, 2, 5, 7, 8]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s[inds] - expected = s.reindex(inds) - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s[inds] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s[arr_inds] - expected = s.reindex(arr_inds) - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s[arr_inds] # GH12089 # with tz for values @@ -262,12 +258,11 @@ def test_getitem_dups_with_missing(): # breaks reindex, so need to use .loc internally # GH 4246 s = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = s.loc[["foo", "bar", "bah", "bam"]] + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[["foo", "bar", "bah", "bam"]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s[["foo", "bar", "bah", "bam"]] - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s[["foo", "bar", "bah", "bam"]] def test_getitem_dups(): diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index 60b89c01cc22d..426a98b00827e 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -123,12 +123,10 @@ def test_get_nan_multiple(): s = pd.Float64Index(range(10)).to_series() idx = [2, 30] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - tm.assert_series_equal(s.get(idx), Series([2, np.nan], index=idx)) + assert s.get(idx) is None idx = [2, np.nan] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - tm.assert_series_equal(s.get(idx), Series([2, np.nan], index=idx)) + assert s.get(idx) is None # GH 17295 - all missing keys idx = [20, 30] From 5264f71a8d158060286e77f83ad600e846f6c18f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 30 Nov 2019 09:42:53 -0800 Subject: [PATCH 22/49] CLN: assorted cleanups (#29933) --- doc/redirects.csv | 168 -------------------------------- doc/source/whatsnew/v0.25.1.rst | 2 +- doc/source/whatsnew/v1.0.0.rst | 20 ++-- pandas/io/pytables.py | 7 +- 4 files changed, 13 insertions(+), 184 deletions(-) diff --git a/doc/redirects.csv b/doc/redirects.csv index f124fdb840ce0..b8dac83e301d5 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -829,174 +829,6 @@ generated/pandas.MultiIndex.to_frame,../reference/api/pandas.MultiIndex.to_frame generated/pandas.notna,../reference/api/pandas.notna generated/pandas.notnull,../reference/api/pandas.notnull generated/pandas.option_context,../reference/api/pandas.option_context -generated/pandas.Panel.abs,../reference/api/pandas.Panel.abs -generated/pandas.Panel.add,../reference/api/pandas.Panel.add -generated/pandas.Panel.add_prefix,../reference/api/pandas.Panel.add_prefix -generated/pandas.Panel.add_suffix,../reference/api/pandas.Panel.add_suffix -generated/pandas.Panel.agg,../reference/api/pandas.Panel.agg -generated/pandas.Panel.aggregate,../reference/api/pandas.Panel.aggregate -generated/pandas.Panel.align,../reference/api/pandas.Panel.align -generated/pandas.Panel.all,../reference/api/pandas.Panel.all -generated/pandas.Panel.any,../reference/api/pandas.Panel.any -generated/pandas.Panel.apply,../reference/api/pandas.Panel.apply -generated/pandas.Panel.as_blocks,../reference/api/pandas.Panel.as_blocks -generated/pandas.Panel.asfreq,../reference/api/pandas.Panel.asfreq -generated/pandas.Panel.as_matrix,../reference/api/pandas.Panel.as_matrix -generated/pandas.Panel.asof,../reference/api/pandas.Panel.asof -generated/pandas.Panel.astype,../reference/api/pandas.Panel.astype -generated/pandas.Panel.at,../reference/api/pandas.Panel.at -generated/pandas.Panel.at_time,../reference/api/pandas.Panel.at_time -generated/pandas.Panel.axes,../reference/api/pandas.Panel.axes -generated/pandas.Panel.between_time,../reference/api/pandas.Panel.between_time -generated/pandas.Panel.bfill,../reference/api/pandas.Panel.bfill -generated/pandas.Panel.blocks,../reference/api/pandas.Panel.blocks -generated/pandas.Panel.bool,../reference/api/pandas.Panel.bool -generated/pandas.Panel.clip,../reference/api/pandas.Panel.clip -generated/pandas.Panel.clip_lower,../reference/api/pandas.Panel.clip_lower -generated/pandas.Panel.clip_upper,../reference/api/pandas.Panel.clip_upper -generated/pandas.Panel.compound,../reference/api/pandas.Panel.compound -generated/pandas.Panel.conform,../reference/api/pandas.Panel.conform -generated/pandas.Panel.convert_objects,../reference/api/pandas.Panel.convert_objects -generated/pandas.Panel.copy,../reference/api/pandas.Panel.copy -generated/pandas.Panel.count,../reference/api/pandas.Panel.count -generated/pandas.Panel.cummax,../reference/api/pandas.Panel.cummax -generated/pandas.Panel.cummin,../reference/api/pandas.Panel.cummin -generated/pandas.Panel.cumprod,../reference/api/pandas.Panel.cumprod -generated/pandas.Panel.cumsum,../reference/api/pandas.Panel.cumsum -generated/pandas.Panel.describe,../reference/api/pandas.Panel.describe -generated/pandas.Panel.div,../reference/api/pandas.Panel.div -generated/pandas.Panel.divide,../reference/api/pandas.Panel.divide -generated/pandas.Panel.drop,../reference/api/pandas.Panel.drop -generated/pandas.Panel.droplevel,../reference/api/pandas.Panel.droplevel -generated/pandas.Panel.dropna,../reference/api/pandas.Panel.dropna -generated/pandas.Panel.dtypes,../reference/api/pandas.Panel.dtypes -generated/pandas.Panel.empty,../reference/api/pandas.Panel.empty -generated/pandas.Panel.eq,../reference/api/pandas.Panel.eq -generated/pandas.Panel.equals,../reference/api/pandas.Panel.equals -generated/pandas.Panel.ffill,../reference/api/pandas.Panel.ffill -generated/pandas.Panel.fillna,../reference/api/pandas.Panel.fillna -generated/pandas.Panel.filter,../reference/api/pandas.Panel.filter -generated/pandas.Panel.first,../reference/api/pandas.Panel.first -generated/pandas.Panel.first_valid_index,../reference/api/pandas.Panel.first_valid_index -generated/pandas.Panel.floordiv,../reference/api/pandas.Panel.floordiv -generated/pandas.Panel.from_dict,../reference/api/pandas.Panel.from_dict -generated/pandas.Panel.fromDict,../reference/api/pandas.Panel.fromDict -generated/pandas.Panel.ge,../reference/api/pandas.Panel.ge -generated/pandas.Panel.get_dtype_counts,../reference/api/pandas.Panel.get_dtype_counts -generated/pandas.Panel.get,../reference/api/pandas.Panel.get -generated/pandas.Panel.get_value,../reference/api/pandas.Panel.get_value -generated/pandas.Panel.get_values,../reference/api/pandas.Panel.get_values -generated/pandas.Panel.groupby,../reference/api/pandas.Panel.groupby -generated/pandas.Panel.gt,../reference/api/pandas.Panel.gt -generated/pandas.Panel.head,../reference/api/pandas.Panel.head -generated/pandas.Panel,../reference/api/pandas.Panel -generated/pandas.Panel.iat,../reference/api/pandas.Panel.iat -generated/pandas.Panel.iloc,../reference/api/pandas.Panel.iloc -generated/pandas.Panel.infer_objects,../reference/api/pandas.Panel.infer_objects -generated/pandas.Panel.interpolate,../reference/api/pandas.Panel.interpolate -generated/pandas.Panel.is_copy,../reference/api/pandas.Panel.is_copy -generated/pandas.Panel.isna,../reference/api/pandas.Panel.isna -generated/pandas.Panel.isnull,../reference/api/pandas.Panel.isnull -generated/pandas.Panel.items,../reference/api/pandas.Panel.items -generated/pandas.Panel.__iter__,../reference/api/pandas.Panel.__iter__ -generated/pandas.Panel.iteritems,../reference/api/pandas.Panel.iteritems -generated/pandas.Panel.ix,../reference/api/pandas.Panel.ix -generated/pandas.Panel.join,../reference/api/pandas.Panel.join -generated/pandas.Panel.keys,../reference/api/pandas.Panel.keys -generated/pandas.Panel.kurt,../reference/api/pandas.Panel.kurt -generated/pandas.Panel.kurtosis,../reference/api/pandas.Panel.kurtosis -generated/pandas.Panel.last,../reference/api/pandas.Panel.last -generated/pandas.Panel.last_valid_index,../reference/api/pandas.Panel.last_valid_index -generated/pandas.Panel.le,../reference/api/pandas.Panel.le -generated/pandas.Panel.loc,../reference/api/pandas.Panel.loc -generated/pandas.Panel.lt,../reference/api/pandas.Panel.lt -generated/pandas.Panel.mad,../reference/api/pandas.Panel.mad -generated/pandas.Panel.major_axis,../reference/api/pandas.Panel.major_axis -generated/pandas.Panel.major_xs,../reference/api/pandas.Panel.major_xs -generated/pandas.Panel.mask,../reference/api/pandas.Panel.mask -generated/pandas.Panel.max,../reference/api/pandas.Panel.max -generated/pandas.Panel.mean,../reference/api/pandas.Panel.mean -generated/pandas.Panel.median,../reference/api/pandas.Panel.median -generated/pandas.Panel.min,../reference/api/pandas.Panel.min -generated/pandas.Panel.minor_axis,../reference/api/pandas.Panel.minor_axis -generated/pandas.Panel.minor_xs,../reference/api/pandas.Panel.minor_xs -generated/pandas.Panel.mod,../reference/api/pandas.Panel.mod -generated/pandas.Panel.mul,../reference/api/pandas.Panel.mul -generated/pandas.Panel.multiply,../reference/api/pandas.Panel.multiply -generated/pandas.Panel.ndim,../reference/api/pandas.Panel.ndim -generated/pandas.Panel.ne,../reference/api/pandas.Panel.ne -generated/pandas.Panel.notna,../reference/api/pandas.Panel.notna -generated/pandas.Panel.notnull,../reference/api/pandas.Panel.notnull -generated/pandas.Panel.pct_change,../reference/api/pandas.Panel.pct_change -generated/pandas.Panel.pipe,../reference/api/pandas.Panel.pipe -generated/pandas.Panel.pop,../reference/api/pandas.Panel.pop -generated/pandas.Panel.pow,../reference/api/pandas.Panel.pow -generated/pandas.Panel.prod,../reference/api/pandas.Panel.prod -generated/pandas.Panel.product,../reference/api/pandas.Panel.product -generated/pandas.Panel.radd,../reference/api/pandas.Panel.radd -generated/pandas.Panel.rank,../reference/api/pandas.Panel.rank -generated/pandas.Panel.rdiv,../reference/api/pandas.Panel.rdiv -generated/pandas.Panel.reindex_axis,../reference/api/pandas.Panel.reindex_axis -generated/pandas.Panel.reindex,../reference/api/pandas.Panel.reindex -generated/pandas.Panel.reindex_like,../reference/api/pandas.Panel.reindex_like -generated/pandas.Panel.rename_axis,../reference/api/pandas.Panel.rename_axis -generated/pandas.Panel.rename,../reference/api/pandas.Panel.rename -generated/pandas.Panel.replace,../reference/api/pandas.Panel.replace -generated/pandas.Panel.resample,../reference/api/pandas.Panel.resample -generated/pandas.Panel.rfloordiv,../reference/api/pandas.Panel.rfloordiv -generated/pandas.Panel.rmod,../reference/api/pandas.Panel.rmod -generated/pandas.Panel.rmul,../reference/api/pandas.Panel.rmul -generated/pandas.Panel.round,../reference/api/pandas.Panel.round -generated/pandas.Panel.rpow,../reference/api/pandas.Panel.rpow -generated/pandas.Panel.rsub,../reference/api/pandas.Panel.rsub -generated/pandas.Panel.rtruediv,../reference/api/pandas.Panel.rtruediv -generated/pandas.Panel.sample,../reference/api/pandas.Panel.sample -generated/pandas.Panel.select,../reference/api/pandas.Panel.select -generated/pandas.Panel.sem,../reference/api/pandas.Panel.sem -generated/pandas.Panel.set_axis,../reference/api/pandas.Panel.set_axis -generated/pandas.Panel.set_value,../reference/api/pandas.Panel.set_value -generated/pandas.Panel.shape,../reference/api/pandas.Panel.shape -generated/pandas.Panel.shift,../reference/api/pandas.Panel.shift -generated/pandas.Panel.size,../reference/api/pandas.Panel.size -generated/pandas.Panel.skew,../reference/api/pandas.Panel.skew -generated/pandas.Panel.slice_shift,../reference/api/pandas.Panel.slice_shift -generated/pandas.Panel.sort_index,../reference/api/pandas.Panel.sort_index -generated/pandas.Panel.sort_values,../reference/api/pandas.Panel.sort_values -generated/pandas.Panel.squeeze,../reference/api/pandas.Panel.squeeze -generated/pandas.Panel.std,../reference/api/pandas.Panel.std -generated/pandas.Panel.sub,../reference/api/pandas.Panel.sub -generated/pandas.Panel.subtract,../reference/api/pandas.Panel.subtract -generated/pandas.Panel.sum,../reference/api/pandas.Panel.sum -generated/pandas.Panel.swapaxes,../reference/api/pandas.Panel.swapaxes -generated/pandas.Panel.swaplevel,../reference/api/pandas.Panel.swaplevel -generated/pandas.Panel.tail,../reference/api/pandas.Panel.tail -generated/pandas.Panel.take,../reference/api/pandas.Panel.take -generated/pandas.Panel.timetuple,../reference/api/pandas.Panel.timetuple -generated/pandas.Panel.to_clipboard,../reference/api/pandas.Panel.to_clipboard -generated/pandas.Panel.to_csv,../reference/api/pandas.Panel.to_csv -generated/pandas.Panel.to_dense,../reference/api/pandas.Panel.to_dense -generated/pandas.Panel.to_excel,../reference/api/pandas.Panel.to_excel -generated/pandas.Panel.to_frame,../reference/api/pandas.Panel.to_frame -generated/pandas.Panel.to_hdf,../reference/api/pandas.Panel.to_hdf -generated/pandas.Panel.to_json,../reference/api/pandas.Panel.to_json -generated/pandas.Panel.to_latex,../reference/api/pandas.Panel.to_latex -generated/pandas.Panel.to_msgpack,../reference/api/pandas.Panel.to_msgpack -generated/pandas.Panel.to_pickle,../reference/api/pandas.Panel.to_pickle -generated/pandas.Panel.to_sparse,../reference/api/pandas.Panel.to_sparse -generated/pandas.Panel.to_sql,../reference/api/pandas.Panel.to_sql -generated/pandas.Panel.to_xarray,../reference/api/pandas.Panel.to_xarray -generated/pandas.Panel.transform,../reference/api/pandas.Panel.transform -generated/pandas.Panel.transpose,../reference/api/pandas.Panel.transpose -generated/pandas.Panel.truediv,../reference/api/pandas.Panel.truediv -generated/pandas.Panel.truncate,../reference/api/pandas.Panel.truncate -generated/pandas.Panel.tshift,../reference/api/pandas.Panel.tshift -generated/pandas.Panel.tz_convert,../reference/api/pandas.Panel.tz_convert -generated/pandas.Panel.tz_localize,../reference/api/pandas.Panel.tz_localize -generated/pandas.Panel.update,../reference/api/pandas.Panel.update -generated/pandas.Panel.values,../reference/api/pandas.Panel.values -generated/pandas.Panel.var,../reference/api/pandas.Panel.var -generated/pandas.Panel.where,../reference/api/pandas.Panel.where -generated/pandas.Panel.xs,../reference/api/pandas.Panel.xs generated/pandas.Period.asfreq,../reference/api/pandas.Period.asfreq generated/pandas.Period.day,../reference/api/pandas.Period.day generated/pandas.Period.dayofweek,../reference/api/pandas.Period.dayofweek diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 2e9524fea89b1..944021ca0fcae 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -9,7 +9,7 @@ including other versions of pandas. I/O and LZMA ~~~~~~~~~~~~ -Some users may unknowingly have an incomplete Python installation lacking the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue: `27575`). +Some users may unknowingly have an incomplete Python installation lacking the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue:`27575`). Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`. A possible fix for the lack of the `lzma` module is to ensure you have the necessary libraries and then re-install Python. For example, on MacOS installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python. diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 05aba65888c55..7ffd3a071aa59 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -407,7 +407,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :meth:`Index.summary` (:issue:`18217`) - Removed the previously deprecated "fastpath" keyword from the :class:`Index` constructor (:issue:`23110`) - Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`) -- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`) +- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`) - Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`) - Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`) - Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`) @@ -455,7 +455,6 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`) - Removed the previously deprecated :meth:`DatetimeIndex.asobject`, :meth:`TimedeltaIndex.asobject`, :meth:`PeriodIndex.asobject`, use ``astype(object)`` instead (:issue:`29801`) - Removed previously deprecated "order" argument from :func:`factorize` (:issue:`19751`) -- Removed previously deprecated "v" argument from :meth:`FrozenNDarray.searchsorted`, use "value" instead (:issue:`22672`) - :func:`read_stata` and :meth:`DataFrame.to_stata` no longer supports the "encoding" argument (:issue:`21400`) - In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`) - Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) @@ -493,7 +492,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) +- Added test to assert the :func:`fillna` raises the correct ``ValueError`` message when the value isn't a value from categories (:issue:`13628`) - Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) - :meth:`DataFrame.reindex` with a :class:`CategoricalIndex` would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`) - Bug in :meth:`Categorical.astype` not allowing for casting to extension dtypes (:issue:`28668`) @@ -503,7 +502,7 @@ Categorical - Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) - Using date accessors on a categorical dtyped :class:`Series` of datetimes was not returning an object of the same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a - :class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue: `27952`) + :class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue:`27952`) - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` that would give incorrect results on categorical data (:issue:`26988`) @@ -541,7 +540,7 @@ Timezones Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) -- :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth: `DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) +- :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth:`DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) - Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`) @@ -638,21 +637,22 @@ Groupby/resample/rolling - - Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`) - Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`) -- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`) -- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`). -- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`). +- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`) +- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue:`15584`). +- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue:`19248`). - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) - Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`) - Bug in :meth:`DataFrameGroupBy.rolling().quantile()` ignoring ``interpolation`` keyword argument (:issue:`28779`) - Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`) +- Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) - Reshaping ^^^^^^^^^ - Bug in :meth:`DataFrame.apply` that caused incorrect output with empty :class:`DataFrame` (:issue:`28202`, :issue:`21959`) -- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue: `28301`) +- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue:`28301`) - Bug in :meth:`pivot_table` not returning correct type ``float`` when ``margins=True`` and ``aggfunc='mean'`` (:issue:`24893`) - Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`) - Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`) @@ -685,7 +685,7 @@ Other - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) - Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) -- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479) +- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue:`28479`) - Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) - Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) - Fix :class:`AbstractHolidayCalendar` to return correct results for diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index dae16dfdef01f..29835a9bd0c00 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4010,7 +4010,7 @@ def write(self, **kwargs): to): write out the indices and the values using _write_array (e.g. a CArray) create an indexing table so that we can search """ - raise NotImplementedError("WORKTable needs to implement write") + raise NotImplementedError("WORMTable needs to implement write") class AppendableTable(Table): @@ -4033,7 +4033,7 @@ def write( dropna=False, nan_rep=None, data_columns=None, - errors="strict", # not used hre, but passed to super + errors="strict", # not used here, but passed to super ): if not append and self.is_exists: @@ -4067,9 +4067,6 @@ def write( # create the table self._handle.create_table(self.group, **options) - else: - pass - # table = self.table # update my info self.attrs.info = self.info From ce821118093131dec1b2c24ec0e43319c95a8c59 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 30 Nov 2019 09:47:09 -0800 Subject: [PATCH 23/49] DEPR: remove itemsize, data, base, flags, strides (#29918) --- doc/redirects.csv | 8 -- doc/source/reference/indexing.rst | 3 - doc/source/reference/series.rst | 5 -- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/arrays/categorical.py | 7 -- pandas/core/arrays/period.py | 8 -- pandas/core/base.py | 85 +------------------ pandas/core/indexes/interval.py | 14 --- pandas/core/indexes/period.py | 37 +------- .../tests/indexes/interval/test_interval.py | 11 --- pandas/tests/indexes/test_category.py | 6 +- pandas/tests/test_base.py | 16 +--- 12 files changed, 9 insertions(+), 192 deletions(-) diff --git a/doc/redirects.csv b/doc/redirects.csv index b8dac83e301d5..46021d052679f 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -622,7 +622,6 @@ generated/pandas.Index.asi8,../reference/api/pandas.Index.asi8 generated/pandas.Index.asof,../reference/api/pandas.Index.asof generated/pandas.Index.asof_locs,../reference/api/pandas.Index.asof_locs generated/pandas.Index.astype,../reference/api/pandas.Index.astype -generated/pandas.Index.base,../reference/api/pandas.Index.base generated/pandas.Index.contains,../reference/api/pandas.Index.contains generated/pandas.Index.copy,../reference/api/pandas.Index.copy generated/pandas.Index.data,../reference/api/pandas.Index.data @@ -639,7 +638,6 @@ generated/pandas.Index.empty,../reference/api/pandas.Index.empty generated/pandas.Index.equals,../reference/api/pandas.Index.equals generated/pandas.Index.factorize,../reference/api/pandas.Index.factorize generated/pandas.Index.fillna,../reference/api/pandas.Index.fillna -generated/pandas.Index.flags,../reference/api/pandas.Index.flags generated/pandas.Index.format,../reference/api/pandas.Index.format generated/pandas.Index.get_duplicates,../reference/api/pandas.Index.get_duplicates generated/pandas.Index.get_indexer_for,../reference/api/pandas.Index.get_indexer_for @@ -679,7 +677,6 @@ generated/pandas.Index.is_object,../reference/api/pandas.Index.is_object generated/pandas.Index.is_type_compatible,../reference/api/pandas.Index.is_type_compatible generated/pandas.Index.is_unique,../reference/api/pandas.Index.is_unique generated/pandas.Index.item,../reference/api/pandas.Index.item -generated/pandas.Index.itemsize,../reference/api/pandas.Index.itemsize generated/pandas.Index.join,../reference/api/pandas.Index.join generated/pandas.Index.map,../reference/api/pandas.Index.map generated/pandas.Index.max,../reference/api/pandas.Index.max @@ -711,7 +708,6 @@ generated/pandas.Index.sort,../reference/api/pandas.Index.sort generated/pandas.Index.sortlevel,../reference/api/pandas.Index.sortlevel generated/pandas.Index.sort_values,../reference/api/pandas.Index.sort_values generated/pandas.Index.str,../reference/api/pandas.Index.str -generated/pandas.Index.strides,../reference/api/pandas.Index.strides generated/pandas.Index.summary,../reference/api/pandas.Index.summary generated/pandas.Index.symmetric_difference,../reference/api/pandas.Index.symmetric_difference generated/pandas.Index.take,../reference/api/pandas.Index.take @@ -938,7 +934,6 @@ generated/pandas.Series.at,../reference/api/pandas.Series.at generated/pandas.Series.at_time,../reference/api/pandas.Series.at_time generated/pandas.Series.autocorr,../reference/api/pandas.Series.autocorr generated/pandas.Series.axes,../reference/api/pandas.Series.axes -generated/pandas.Series.base,../reference/api/pandas.Series.base generated/pandas.Series.between,../reference/api/pandas.Series.between generated/pandas.Series.between_time,../reference/api/pandas.Series.between_time generated/pandas.Series.bfill,../reference/api/pandas.Series.bfill @@ -1047,7 +1042,6 @@ generated/pandas.Series.fillna,../reference/api/pandas.Series.fillna generated/pandas.Series.filter,../reference/api/pandas.Series.filter generated/pandas.Series.first,../reference/api/pandas.Series.first generated/pandas.Series.first_valid_index,../reference/api/pandas.Series.first_valid_index -generated/pandas.Series.flags,../reference/api/pandas.Series.flags generated/pandas.Series.floordiv,../reference/api/pandas.Series.floordiv generated/pandas.Series.from_array,../reference/api/pandas.Series.from_array generated/pandas.Series.from_csv,../reference/api/pandas.Series.from_csv @@ -1080,7 +1074,6 @@ generated/pandas.Series.isnull,../reference/api/pandas.Series.isnull generated/pandas.Series.is_unique,../reference/api/pandas.Series.is_unique generated/pandas.Series.item,../reference/api/pandas.Series.item generated/pandas.Series.items,../reference/api/pandas.Series.items -generated/pandas.Series.itemsize,../reference/api/pandas.Series.itemsize generated/pandas.Series.__iter__,../reference/api/pandas.Series.__iter__ generated/pandas.Series.iteritems,../reference/api/pandas.Series.iteritems generated/pandas.Series.ix,../reference/api/pandas.Series.ix @@ -1193,7 +1186,6 @@ generated/pandas.Series.str.find,../reference/api/pandas.Series.str.find generated/pandas.Series.str.get_dummies,../reference/api/pandas.Series.str.get_dummies generated/pandas.Series.str.get,../reference/api/pandas.Series.str.get generated/pandas.Series.str,../reference/api/pandas.Series.str -generated/pandas.Series.strides,../reference/api/pandas.Series.strides generated/pandas.Series.str.index,../reference/api/pandas.Series.str.index generated/pandas.Series.str.isalnum,../reference/api/pandas.Series.str.isalnum generated/pandas.Series.str.isalpha,../reference/api/pandas.Series.str.isalpha diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 448f020cfa56f..8edea28c17318 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -42,9 +42,6 @@ Properties Index.ndim Index.size Index.empty - Index.strides - Index.itemsize - Index.base Index.T Index.memory_usage diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 528cc8a0c3920..91843c7975a2c 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -33,16 +33,11 @@ Attributes Series.nbytes Series.ndim Series.size - Series.strides - Series.itemsize - Series.base Series.T Series.memory_usage Series.hasnans - Series.flags Series.empty Series.dtypes - Series.data Series.name Series.put diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7ffd3a071aa59..3f8d9d3916797 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -462,6 +462,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Changed the default ``fill_value`` in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) +- Removed the previously deprecated :attr:`Series.base`, :attr:`Index.base`, :attr:`Categorical.base`, :attr:`Series.flags`, :attr:`Index.flags`, :attr:`PeriodArray.flags`, :attr:`Series.strides`, :attr:`Index.strides`, :attr:`Series.itemsize`, :attr:`Index.itemsize`, :attr:`Series.data`, :attr:`Index.data` (:issue:`20721`) - Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) - Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`) - Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index bb4ceea420d8d..ec1f5d2d6214c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -527,13 +527,6 @@ def tolist(self) -> list: to_list = tolist - @property - def base(self) -> None: - """ - compat, we are always our own object - """ - return None - @classmethod def _from_inferred_categories( cls, inferred_categories, inferred_codes, dtype, true_values=None diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 41a8c48452647..1012abd0b5d13 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -618,14 +618,6 @@ def astype(self, dtype, copy=True): return self.asfreq(dtype.freq) return super().astype(dtype, copy=copy) - @property - def flags(self): - # TODO: remove - # We need this since reduction.SeriesBinGrouper uses values.flags - # Ideally, we wouldn't be passing objects down there in the first - # place. - return self._data.flags - # ------------------------------------------------------------------ # Arithmetic Methods _create_comparison_method = classmethod(_period_array_cmp) diff --git a/pandas/core/base.py b/pandas/core/base.py index 83d6ac76cdd98..066a7628be364 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -628,15 +628,7 @@ class IndexOpsMixin: # ndarray compatibility __array_priority__ = 1000 _deprecations: FrozenSet[str] = frozenset( - [ - "tolist", # tolist is not deprecated, just suppressed in the __dir__ - "base", - "data", - "item", - "itemsize", - "flags", - "strides", - ] + ["tolist", "item"] # tolist is not deprecated, just suppressed in the __dir__ ) def transpose(self, *args, **kwargs): @@ -707,36 +699,6 @@ def item(self): ) return self.values.item() - @property - def data(self): - """ - Return the data pointer of the underlying data. - - .. deprecated:: 0.23.0 - """ - warnings.warn( - "{obj}.data is deprecated and will be removed " - "in a future version".format(obj=type(self).__name__), - FutureWarning, - stacklevel=2, - ) - return self.values.data - - @property - def itemsize(self): - """ - Return the size of the dtype of the item of the underlying data. - - .. deprecated:: 0.23.0 - """ - warnings.warn( - "{obj}.itemsize is deprecated and will be removed " - "in a future version".format(obj=type(self).__name__), - FutureWarning, - stacklevel=2, - ) - return self._ndarray_values.itemsize - @property def nbytes(self): """ @@ -744,21 +706,6 @@ def nbytes(self): """ return self._values.nbytes - @property - def strides(self): - """ - Return the strides of the underlying data. - - .. deprecated:: 0.23.0 - """ - warnings.warn( - "{obj}.strides is deprecated and will be removed " - "in a future version".format(obj=type(self).__name__), - FutureWarning, - stacklevel=2, - ) - return self._ndarray_values.strides - @property def size(self): """ @@ -766,36 +713,6 @@ def size(self): """ return len(self._values) - @property - def flags(self): - """ - Return the ndarray.flags for the underlying data. - - .. deprecated:: 0.23.0 - """ - warnings.warn( - "{obj}.flags is deprecated and will be removed " - "in a future version".format(obj=type(self).__name__), - FutureWarning, - stacklevel=2, - ) - return self.values.flags - - @property - def base(self): - """ - Return the base object if the memory of the underlying data is shared. - - .. deprecated:: 0.23.0 - """ - warnings.warn( - "{obj}.base is deprecated and will be removed " - "in a future version".format(obj=type(self).__name__), - FutureWarning, - stacklevel=2, - ) - return self.values.base - @property def array(self) -> ExtensionArray: """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index a9e119f3c5f87..56957b2f879ec 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -2,7 +2,6 @@ from operator import le, lt import textwrap from typing import Any, Optional, Tuple, Union -import warnings import numpy as np @@ -455,19 +454,6 @@ def size(self): # Avoid materializing ndarray[Interval] return self._data.size - @property - def itemsize(self): - msg = ( - "IntervalIndex.itemsize is deprecated and will be removed in " - "a future version" - ) - warnings.warn(msg, FutureWarning, stacklevel=2) - - # suppress the warning from the underlying left/right itemsize - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - return self.left.itemsize + self.right.itemsize - def __len__(self) -> int: return len(self.left) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index cdd0e600c888d..b3476dcb12abd 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -911,20 +911,9 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ - @property - def flags(self): - """ return the ndarray.flags for the underlying data """ - warnings.warn( - "{obj}.flags is deprecated and will be removed " - "in a future version".format(obj=type(self).__name__), - FutureWarning, - stacklevel=2, - ) - return self._ndarray_values.flags - def item(self): """ - return the first element of the underlying data as a python + Return the first element of the underlying data as a python scalar .. deprecated:: 0.25.0 @@ -943,30 +932,6 @@ def item(self): # copy numpy's message here because Py26 raises an IndexError raise ValueError("can only convert an array of size 1 to a Python scalar") - @property - def data(self): - """ return the data pointer of the underlying data """ - warnings.warn( - "{obj}.data is deprecated and will be removed " - "in a future version".format(obj=type(self).__name__), - FutureWarning, - stacklevel=2, - ) - return np.asarray(self._data).data - - @property - def base(self): - """ return the base object if the memory of the underlying data is - shared - """ - warnings.warn( - "{obj}.base is deprecated and will be removed " - "in a future version".format(obj=type(self).__name__), - FutureWarning, - stacklevel=2, - ) - return np.asarray(self._data) - def memory_usage(self, deep=False): result = super().memory_usage(deep=deep) if hasattr(self, "_cache") and "_int64index" in self._cache: diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index f3c8c5cb6efa1..6ad7dfb22f2b3 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -836,17 +836,6 @@ def test_nbytes(self): expected = 64 # 4 * 8 * 2 assert result == expected - def test_itemsize(self): - # GH 19209 - left = np.arange(0, 4, dtype="i8") - right = np.arange(1, 5, dtype="i8") - expected = 16 # 8 * 2 - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = IntervalIndex.from_arrays(left, right).itemsize - - assert result == expected - @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, name, closed, new_closed): # GH 21670 diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 84f98a55376f7..86219d77542af 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -788,8 +788,10 @@ def test_ensure_copied_data(self, indices): # Index.__new__ is honored. # # Must be tested separately from other indexes because - # self.value is not an ndarray. - _base = lambda ar: ar if ar.base is None else ar.base + # self.values is not an ndarray. + # GH#29918 Index.base has been removed + # FIXME: is this test still meaningful? + _base = lambda ar: ar if getattr(ar, "base", None) is None else ar.base result = CategoricalIndex(indices.values, copy=True) tm.assert_index_equal(indices, result) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index e65388be2ba7d..d515a015cdbec 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -333,20 +333,8 @@ def test_ndarray_compat_properties(self): assert getattr(o, p, None) is not None # deprecated properties - for p in ["flags", "strides", "itemsize"]: - with tm.assert_produces_warning(FutureWarning): - assert getattr(o, p, None) is not None - - with tm.assert_produces_warning(FutureWarning): - assert hasattr(o, "base") - - # If we have a datetime-like dtype then needs a view to work - # but the user is responsible for that - try: - with tm.assert_produces_warning(FutureWarning): - assert o.data is not None - except ValueError: - pass + for p in ["flags", "strides", "itemsize", "base", "data"]: + assert not hasattr(o, p) with pytest.raises(ValueError): with tm.assert_produces_warning(FutureWarning): From f51f0987f0d7ed20ef8cd0ce2c02697a9f2426c8 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 30 Nov 2019 19:48:10 +0200 Subject: [PATCH 24/49] Annotated functions (#29908) --- pandas/core/indexing.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5b875ef3fdc4f..0896339bc28c7 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -242,7 +242,7 @@ def _has_valid_tuple(self, key: Tuple): "[{types}] types".format(types=self._valid_types) ) - def _is_nested_tuple_indexer(self, tup: Tuple): + def _is_nested_tuple_indexer(self, tup: Tuple) -> bool: if any(isinstance(ax, ABCMultiIndex) for ax in self.obj.axes): return any(is_nested_tuple(tup, ax) for ax in self.obj.axes) return False @@ -275,10 +275,10 @@ def _convert_slice_indexer(self, key: slice, axis: int): ax = self.obj._get_axis(min(axis, self.ndim - 1)) return ax._convert_slice_indexer(key, kind=self.name) - def _has_valid_setitem_indexer(self, indexer): + def _has_valid_setitem_indexer(self, indexer) -> bool: return True - def _has_valid_positional_setitem_indexer(self, indexer): + def _has_valid_positional_setitem_indexer(self, indexer) -> bool: """ validate that an positional indexer cannot enlarge its target will raise if needed, does not modify the indexer externally """ @@ -1314,7 +1314,7 @@ def __init__(self, name, obj): super().__init__(name, obj) @Appender(_NDFrameIndexer._validate_key.__doc__) - def _validate_key(self, key, axis: int): + def _validate_key(self, key, axis: int) -> bool: if isinstance(key, slice): return True @@ -1685,7 +1685,7 @@ def _validate_key(self, key, axis: int): if not is_list_like_indexer(key): self._convert_scalar_indexer(key, axis) - def _is_scalar_access(self, key: Tuple): + def _is_scalar_access(self, key: Tuple) -> bool: # this is a shortcut accessor to both .loc and .iloc # that provide the equivalent access of .at and .iat # a) avoid getting things via sections and (to minimize dtype changes) @@ -1998,7 +1998,7 @@ def _validate_key(self, key, axis: int): def _has_valid_setitem_indexer(self, indexer): self._has_valid_positional_setitem_indexer(indexer) - def _is_scalar_access(self, key: Tuple): + def _is_scalar_access(self, key: Tuple) -> bool: # this is a shortcut accessor to both .loc and .iloc # that provide the equivalent access of .at and .iat # a) avoid getting things via sections and (to minimize dtype changes) @@ -2022,7 +2022,7 @@ def _getitem_scalar(self, key): values = self.obj._get_value(*key, takeable=True) return values - def _validate_integer(self, key: int, axis: int): + def _validate_integer(self, key: int, axis: int) -> None: """ Check that 'key' is a valid position in the desired axis. @@ -2448,7 +2448,7 @@ def maybe_convert_ix(*args): return args -def is_nested_tuple(tup, labels): +def is_nested_tuple(tup, labels) -> bool: # check for a compatible nested tuple and multiindexes among the axes if not isinstance(tup, tuple): return False @@ -2461,12 +2461,12 @@ def is_nested_tuple(tup, labels): return False -def is_label_like(key): +def is_label_like(key) -> bool: # select a label or row return not isinstance(key, slice) and not is_list_like_indexer(key) -def need_slice(obj): +def need_slice(obj) -> bool: return ( obj.start is not None or obj.stop is not None @@ -2487,7 +2487,7 @@ def _non_reducing_slice(slice_): if isinstance(slice_, kinds): slice_ = IndexSlice[:, slice_] - def pred(part): + def pred(part) -> bool: # true when slice does *not* reduce, False when part is a tuple, # i.e. MultiIndex slice return (isinstance(part, slice) or is_list_like(part)) and not isinstance( @@ -2519,7 +2519,7 @@ def _maybe_numeric_slice(df, slice_, include_bool=False): return slice_ -def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, obj): +def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, obj) -> bool: """ return True if we have an equal len settable """ if not len(labels) == 1 or not np.iterable(value) or is_scalar(plane_indexer[0]): return False From 6ffbc4efee864e3c6b48a27d82ea0435797ac0c6 Mon Sep 17 00:00:00 2001 From: Tambe Tabitha Achere Date: Sun, 1 Dec 2019 08:08:05 +0100 Subject: [PATCH 25/49] A closing paranthesis has been added to resolve a typo (#29942) --- doc/source/getting_started/basics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst index 6301fee7775cf..d489d35dc1226 100644 --- a/doc/source/getting_started/basics.rst +++ b/doc/source/getting_started/basics.rst @@ -2006,7 +2006,7 @@ The number of columns of each type in a ``DataFrame`` can be found by calling Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, -or a passed ``Series``, then it will be preserved in DataFrame operations. Furthermore, +or a passed ``Series``), then it will be preserved in DataFrame operations. Furthermore, different numeric dtypes will **NOT** be combined. The following example will give you a taste. .. ipython:: python From 9d528bf2bd9cdf766cd7f171079205159fa83f96 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sun, 1 Dec 2019 19:45:22 +0200 Subject: [PATCH 26/49] CI: added unwanted pattern test case for "foo.__class__" instead of type(foo) (#29937) --- ci/code_checks.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 4e25fd0ddb5ea..ceb13c52ded9c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -194,6 +194,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG + invgrep -R --include=*.{py,pyx} '\.__class__' pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG INVGREP_APPEND=" <- trailing whitespaces found" invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" * From 4e33ba094ae15c3722283fcc76ca6b90ba5be51b Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sun, 1 Dec 2019 17:50:19 +0000 Subject: [PATCH 27/49] CLN: OrderedDict -> Dict (#29923) --- pandas/core/frame.py | 6 +-- pandas/io/excel/_base.py | 5 +- pandas/io/stata.py | 17 +++--- .../tests/groupby/aggregate/test_aggregate.py | 53 +++++++------------ pandas/tests/groupby/aggregate/test_other.py | 4 +- pandas/tests/groupby/test_categorical.py | 3 +- pandas/tests/groupby/test_groupby.py | 5 +- .../tests/indexes/multi/test_constructor.py | 16 +++--- pandas/tests/indexes/multi/test_conversion.py | 16 +++--- pandas/tests/indexes/test_setops.py | 15 +++--- pandas/tests/io/test_stata.py | 3 +- 11 files changed, 54 insertions(+), 89 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5dfa7002abfca..ca943111b7e9f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9,7 +9,7 @@ labeling information """ import collections -from collections import OrderedDict, abc +from collections import abc from io import StringIO import itertools import sys @@ -8170,10 +8170,10 @@ def isin(self, values): def _from_nested_dict(data): # TODO: this should be seriously cythonized - new_data = OrderedDict() + new_data = {} for index, s in data.items(): for col, v in s.items(): - new_data[col] = new_data.get(col, OrderedDict()) + new_data[col] = new_data.get(col, {}) new_data[col][index] = v return new_data diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c442f0d9bf66c..44254f54cbc7a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1,5 +1,4 @@ import abc -from collections import OrderedDict from datetime import date, datetime, timedelta from io import BytesIO import os @@ -429,9 +428,9 @@ def parse( sheets = [sheet_name] # handle same-type duplicates. - sheets = list(OrderedDict.fromkeys(sheets).keys()) + sheets = list(dict.fromkeys(sheets).keys()) - output = OrderedDict() + output = {} for asheetname in sheets: if verbose: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 59bb4e3bf236a..b592b560bb5a0 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -10,7 +10,6 @@ http://www.statsmodels.org/devel/ """ -from collections import OrderedDict import datetime from io import BytesIO import os @@ -1676,7 +1675,7 @@ def read( else: data_formatted.append((col, data[col])) if requires_type_conversion: - data = DataFrame.from_dict(OrderedDict(data_formatted)) + data = DataFrame.from_dict(dict(data_formatted)) del data_formatted data = self._do_convert_missing(data, convert_missing) @@ -1715,7 +1714,7 @@ def any_startswith(x: str) -> bool: convert = True retyped_data.append((col, data[col].astype(dtype))) if convert: - data = DataFrame.from_dict(OrderedDict(retyped_data)) + data = DataFrame.from_dict(dict(retyped_data)) if index_col is not None: data = data.set_index(data.pop(index_col)) @@ -1845,7 +1844,7 @@ def _do_convert_categoricals( cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) - data = DataFrame.from_dict(OrderedDict(cat_converted_data)) + data = DataFrame.from_dict(dict(cat_converted_data)) return data @property @@ -2194,7 +2193,7 @@ def _prepare_categoricals(self, data): data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) - return DataFrame.from_dict(OrderedDict(data_formatted)) + return DataFrame.from_dict(dict(data_formatted)) def _replace_nans(self, data): # return data @@ -2673,7 +2672,7 @@ def __init__(self, df, columns, version=117, byteorder=None): self.df = df self.columns = columns - self._gso_table = OrderedDict((("", (0, 0)),)) + self._gso_table = {"": (0, 0)} if byteorder is None: byteorder = sys.byteorder self._byteorder = _set_endianness(byteorder) @@ -2703,7 +2702,7 @@ def generate_table(self): Returns ------- - gso_table : OrderedDict + gso_table : dict Ordered dictionary using the string found as keys and their lookup position (v,o) as values gso_df : DataFrame @@ -2761,7 +2760,7 @@ def generate_blob(self, gso_table): Parameters ---------- - gso_table : OrderedDict + gso_table : dict Ordered dictionary (str, vo) Returns @@ -2991,7 +2990,7 @@ def _write_map(self): the map with 0s. The second call writes the final map locations when all blocks have been written.""" if self._map is None: - self._map = OrderedDict( + self._map = dict( ( ("stata_data", 0), ("map", self._file.tell()), diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index e4de2147586f5..9543c9d5b59de 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1,7 +1,6 @@ """ test .agg behavior / note that .apply is tested generally in test_groupby.py """ -from collections import OrderedDict import functools import numpy as np @@ -175,18 +174,14 @@ def test_aggregate_str_func(tsframe, groupbyfunc): tm.assert_frame_equal(result, expected) # group frame by function dict - result = grouped.agg( - OrderedDict([["A", "var"], ["B", "std"], ["C", "mean"], ["D", "sem"]]) - ) + result = grouped.agg({"A": "var", "B": "std", "C": "mean", "D": "sem"}) expected = DataFrame( - OrderedDict( - [ - ["A", grouped["A"].var()], - ["B", grouped["B"].std()], - ["C", grouped["C"].mean()], - ["D", grouped["D"].sem()], - ] - ) + { + "A": grouped["A"].var(), + "B": grouped["B"].std(), + "C": grouped["C"].mean(), + "D": grouped["D"].sem(), + } ) tm.assert_frame_equal(result, expected) @@ -261,22 +256,20 @@ def test_multiple_functions_tuples_and_non_tuples(df): def test_more_flexible_frame_multi_function(df): grouped = df.groupby("A") - exmean = grouped.agg(OrderedDict([["C", np.mean], ["D", np.mean]])) - exstd = grouped.agg(OrderedDict([["C", np.std], ["D", np.std]])) + exmean = grouped.agg({"C": np.mean, "D": np.mean}) + exstd = grouped.agg({"C": np.std, "D": np.std}) expected = concat([exmean, exstd], keys=["mean", "std"], axis=1) expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) - d = OrderedDict([["C", [np.mean, np.std]], ["D", [np.mean, np.std]]]) + d = {"C": [np.mean, np.std], "D": [np.mean, np.std]} result = grouped.aggregate(d) tm.assert_frame_equal(result, expected) # be careful - result = grouped.aggregate(OrderedDict([["C", np.mean], ["D", [np.mean, np.std]]])) - expected = grouped.aggregate( - OrderedDict([["C", np.mean], ["D", [np.mean, np.std]]]) - ) + result = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) + expected = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) tm.assert_frame_equal(result, expected) def foo(x): @@ -288,13 +281,11 @@ def bar(x): # this uses column selection & renaming msg = r"nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): - d = OrderedDict( - [["C", np.mean], ["D", OrderedDict([["foo", np.mean], ["bar", np.std]])]] - ) + d = dict([["C", np.mean], ["D", dict([["foo", np.mean], ["bar", np.std]])]]) grouped.aggregate(d) # But without renaming, these functions are OK - d = OrderedDict([["C", [np.mean]], ["D", [foo, bar]]]) + d = {"C": [np.mean], "D": [foo, bar]} grouped.aggregate(d) @@ -303,26 +294,20 @@ def test_multi_function_flexible_mix(df): grouped = df.groupby("A") # Expected - d = OrderedDict( - [["C", OrderedDict([["foo", "mean"], ["bar", "std"]])], ["D", {"sum": "sum"}]] - ) + d = {"C": {"foo": "mean", "bar": "std"}, "D": {"sum": "sum"}} # this uses column selection & renaming msg = r"nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): grouped.aggregate(d) # Test 1 - d = OrderedDict( - [["C", OrderedDict([["foo", "mean"], ["bar", "std"]])], ["D", "sum"]] - ) + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} # this uses column selection & renaming with pytest.raises(SpecificationError, match=msg): grouped.aggregate(d) # Test 2 - d = OrderedDict( - [["C", OrderedDict([["foo", "mean"], ["bar", "std"]])], ["D", ["sum"]]] - ) + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} # this uses column selection & renaming with pytest.raises(SpecificationError, match=msg): grouped.aggregate(d) @@ -642,9 +627,7 @@ def test_maybe_mangle_lambdas_args(self): assert func["A"][0](0, 2, b=3) == (0, 2, 3) def test_maybe_mangle_lambdas_named(self): - func = OrderedDict( - [("C", np.mean), ("D", OrderedDict([("foo", np.mean), ("bar", np.mean)]))] - ) + func = {"C": np.mean, "D": {"foo": np.mean, "bar": np.mean}} result = _maybe_mangle_lambdas(func) assert result == func diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index f14384928b979..765bc3bab5d4a 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -2,7 +2,6 @@ test all other .agg behavior """ -from collections import OrderedDict import datetime as dt from functools import partial @@ -96,8 +95,7 @@ def test_agg_period_index(): index = period_range(start="1999-01", periods=5, freq="M") s1 = Series(np.random.rand(len(index)), index=index) s2 = Series(np.random.rand(len(index)), index=index) - series = [("s1", s1), ("s2", s2)] - df = DataFrame.from_dict(OrderedDict(series)) + df = DataFrame.from_dict({"s1": s1, "s2": s2}) grouped = df.groupby(df.index.month) list(grouped) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 5f78e4860f1e9..89ffcd9ee313e 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1,4 +1,3 @@ -from collections import OrderedDict from datetime import datetime import numpy as np @@ -1204,7 +1203,7 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): # GH 24880 expected = Series(data=data, index=index, name="C") result = df_cat.groupby(["A", "B"], observed=observed)["C"].apply( - lambda x: OrderedDict([("min", x.min()), ("max", x.max())]) + lambda x: {"min": x.min(), "max": x.max()} ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index a6b9b0e35f865..3a16642641fca 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1,4 +1,3 @@ -from collections import OrderedDict from datetime import datetime from decimal import Decimal from io import StringIO @@ -598,7 +597,7 @@ def test_groupby_as_index_agg(df): expected = grouped.mean() tm.assert_frame_equal(result, expected) - result2 = grouped.agg(OrderedDict([["C", np.mean], ["D", np.sum]])) + result2 = grouped.agg({"C": np.mean, "D": np.sum}) expected2 = grouped.mean() expected2["D"] = grouped.sum()["D"] tm.assert_frame_equal(result2, expected2) @@ -617,7 +616,7 @@ def test_groupby_as_index_agg(df): expected = grouped.mean() tm.assert_frame_equal(result, expected) - result2 = grouped.agg(OrderedDict([["C", np.mean], ["D", np.sum]])) + result2 = grouped.agg({"C": np.mean, "D": np.sum}) expected2 = grouped.mean() expected2["D"] = grouped.sum()["D"] tm.assert_frame_equal(result2, expected2) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index c0ec889d170d6..90e993a807bd2 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -1,5 +1,3 @@ -from collections import OrderedDict - import numpy as np import pytest @@ -654,14 +652,12 @@ def test_from_frame_error(non_frame): def test_from_frame_dtype_fidelity(): # GH 22420 df = pd.DataFrame( - OrderedDict( - [ - ("dates", pd.date_range("19910905", periods=6, tz="US/Eastern")), - ("a", [1, 1, 1, 2, 2, 2]), - ("b", pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True)), - ("c", ["x", "x", "y", "z", "x", "y"]), - ] - ) + { + "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } ) original_dtypes = df.dtypes.to_dict() diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index a0b17ae8924b7..fab4f72dc153b 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -1,5 +1,3 @@ -from collections import OrderedDict - import numpy as np import pytest @@ -107,14 +105,12 @@ def test_to_frame_dtype_fidelity(): original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} expected_df = pd.DataFrame( - OrderedDict( - [ - ("dates", pd.date_range("19910905", periods=6, tz="US/Eastern")), - ("a", [1, 1, 1, 2, 2, 2]), - ("b", pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True)), - ("c", ["x", "x", "y", "z", "x", "y"]), - ] - ) + { + "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } ) df = mi.to_frame(index=False) df_dtypes = df.dtypes.to_dict() diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index d5b23653e8a72..a7e2363ec422e 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -2,7 +2,6 @@ The tests in this package are to ensure the proper resultant dtypes of set operations. """ -from collections import OrderedDict import itertools as it import numpy as np @@ -16,14 +15,12 @@ from pandas.tests.indexes.conftest import indices_dict import pandas.util.testing as tm -COMPATIBLE_INCONSISTENT_PAIRS = OrderedDict( - [ - ((Int64Index, RangeIndex), (tm.makeIntIndex, tm.makeRangeIndex)), - ((Float64Index, Int64Index), (tm.makeFloatIndex, tm.makeIntIndex)), - ((Float64Index, RangeIndex), (tm.makeFloatIndex, tm.makeIntIndex)), - ((Float64Index, UInt64Index), (tm.makeFloatIndex, tm.makeUIntIndex)), - ] -) +COMPATIBLE_INCONSISTENT_PAIRS = { + (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex), + (Float64Index, Int64Index): (tm.makeFloatIndex, tm.makeIntIndex), + (Float64Index, RangeIndex): (tm.makeFloatIndex, tm.makeIntIndex), + (Float64Index, UInt64Index): (tm.makeFloatIndex, tm.makeUIntIndex), +} @pytest.fixture(params=it.combinations(indices_dict, 2), ids="-".join) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 2cc80a6e5565d..4203d0b0241ff 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1,4 +1,3 @@ -from collections import OrderedDict import datetime as dt from datetime import datetime import gzip @@ -1029,7 +1028,7 @@ def test_categorical_order(self, file): cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) - expected = DataFrame.from_dict(OrderedDict(cols)) + expected = DataFrame.from_dict(dict(cols)) # Read with and with out categoricals, ensure order is identical file = getattr(self, file) From 9333e3d8e360e0049f6aedca78721dda0449a871 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 10:11:52 -0800 Subject: [PATCH 28/49] DEPR: Categorical.ravel, get_dtype_counts, dtype_str, to_dense (#29900) --- asv_bench/benchmarks/frame_methods.py | 2 +- doc/redirects.csv | 5 -- doc/source/reference/frame.rst | 2 - doc/source/reference/indexing.rst | 1 - doc/source/reference/series.rst | 1 - doc/source/whatsnew/v1.0.0.rst | 4 ++ pandas/core/arrays/categorical.py | 18 ------ pandas/core/dtypes/common.py | 4 -- pandas/core/generic.py | 67 +--------------------- pandas/core/indexes/base.py | 17 +----- pandas/tests/extension/test_categorical.py | 5 +- pandas/tests/generic/test_generic.py | 20 ------- pandas/tests/indexes/multi/test_format.py | 7 --- pandas/tests/indexes/period/test_period.py | 11 ---- pandas/tests/indexes/test_common.py | 6 -- 15 files changed, 8 insertions(+), 162 deletions(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index eb9a0e83271f1..9647693d4ed6b 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -565,7 +565,7 @@ def setup(self): def time_frame_get_dtype_counts(self): with warnings.catch_warnings(record=True): - self.df.get_dtype_counts() + self.df._data.get_dtype_counts() def time_info(self): self.df.info() diff --git a/doc/redirects.csv b/doc/redirects.csv index 46021d052679f..4d171bc3d400d 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -358,7 +358,6 @@ generated/pandas.DataFrame.from_dict,../reference/api/pandas.DataFrame.from_dict generated/pandas.DataFrame.from_items,../reference/api/pandas.DataFrame.from_items generated/pandas.DataFrame.from_records,../reference/api/pandas.DataFrame.from_records generated/pandas.DataFrame.ge,../reference/api/pandas.DataFrame.ge -generated/pandas.DataFrame.get_dtype_counts,../reference/api/pandas.DataFrame.get_dtype_counts generated/pandas.DataFrame.get,../reference/api/pandas.DataFrame.get generated/pandas.DataFrame.get_value,../reference/api/pandas.DataFrame.get_value generated/pandas.DataFrame.get_values,../reference/api/pandas.DataFrame.get_values @@ -486,7 +485,6 @@ generated/pandas.DataFrame.T,../reference/api/pandas.DataFrame.T generated/pandas.DataFrame.timetuple,../reference/api/pandas.DataFrame.timetuple generated/pandas.DataFrame.to_clipboard,../reference/api/pandas.DataFrame.to_clipboard generated/pandas.DataFrame.to_csv,../reference/api/pandas.DataFrame.to_csv -generated/pandas.DataFrame.to_dense,../reference/api/pandas.DataFrame.to_dense generated/pandas.DataFrame.to_dict,../reference/api/pandas.DataFrame.to_dict generated/pandas.DataFrame.to_excel,../reference/api/pandas.DataFrame.to_excel generated/pandas.DataFrame.to_feather,../reference/api/pandas.DataFrame.to_feather @@ -632,7 +630,6 @@ generated/pandas.Index.drop,../reference/api/pandas.Index.drop generated/pandas.Index.droplevel,../reference/api/pandas.Index.droplevel generated/pandas.Index.dropna,../reference/api/pandas.Index.dropna generated/pandas.Index.dtype,../reference/api/pandas.Index.dtype -generated/pandas.Index.dtype_str,../reference/api/pandas.Index.dtype_str generated/pandas.Index.duplicated,../reference/api/pandas.Index.duplicated generated/pandas.Index.empty,../reference/api/pandas.Index.empty generated/pandas.Index.equals,../reference/api/pandas.Index.equals @@ -1046,7 +1043,6 @@ generated/pandas.Series.floordiv,../reference/api/pandas.Series.floordiv generated/pandas.Series.from_array,../reference/api/pandas.Series.from_array generated/pandas.Series.from_csv,../reference/api/pandas.Series.from_csv generated/pandas.Series.ge,../reference/api/pandas.Series.ge -generated/pandas.Series.get_dtype_counts,../reference/api/pandas.Series.get_dtype_counts generated/pandas.Series.get,../reference/api/pandas.Series.get generated/pandas.Series.get_value,../reference/api/pandas.Series.get_value generated/pandas.Series.get_values,../reference/api/pandas.Series.get_values @@ -1235,7 +1231,6 @@ generated/pandas.Series.T,../reference/api/pandas.Series.T generated/pandas.Series.timetuple,../reference/api/pandas.Series.timetuple generated/pandas.Series.to_clipboard,../reference/api/pandas.Series.to_clipboard generated/pandas.Series.to_csv,../reference/api/pandas.Series.to_csv -generated/pandas.Series.to_dense,../reference/api/pandas.Series.to_dense generated/pandas.Series.to_dict,../reference/api/pandas.Series.to_dict generated/pandas.Series.to_excel,../reference/api/pandas.Series.to_excel generated/pandas.Series.to_frame,../reference/api/pandas.Series.to_frame diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 5c860f2d4cb03..2604af4e33a89 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -28,7 +28,6 @@ Attributes and underlying data :toctree: api/ DataFrame.dtypes - DataFrame.get_dtype_counts DataFrame.select_dtypes DataFrame.values DataFrame.get_values @@ -363,7 +362,6 @@ Serialization / IO / conversion DataFrame.to_msgpack DataFrame.to_gbq DataFrame.to_records - DataFrame.to_dense DataFrame.to_string DataFrame.to_clipboard DataFrame.style diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 8edea28c17318..c155b5e3fcb37 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -32,7 +32,6 @@ Properties Index.has_duplicates Index.hasnans Index.dtype - Index.dtype_str Index.inferred_type Index.is_all_dates Index.shape diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 91843c7975a2c..2485b94ab4d09 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -579,7 +579,6 @@ Serialization / IO / conversion Series.to_sql Series.to_msgpack Series.to_json - Series.to_dense Series.to_string Series.to_clipboard Series.to_latex diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3f8d9d3916797..7d3f61ccf4e9f 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -459,6 +459,10 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`) - Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) - Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) +- Removed the previously deprecated :meth:`Series.to_dense`, :meth:`DataFrame.to_dense` (:issue:`26684`) +- Removed the previously deprecated :meth:`Index.dtype_str`, use ``str(index.dtype)`` instead (:issue:`27106`) +- :meth:`Categorical.ravel` returns a :class:`Categorical` instead of a ``ndarray`` (:issue:`27199`) +- Removed previously deprecated :meth:`Series.get_dtype_counts` and :meth:`DataFrame.get_dtype_counts` (:issue:`27145`) - Changed the default ``fill_value`` in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index ec1f5d2d6214c..0dc972011833a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1689,24 +1689,6 @@ def _values_for_rank(self): ) return values - def ravel(self, order="C"): - """ - Return a flattened (numpy) array. - - For internal compatibility with numpy arrays. - - Returns - ------- - numpy.array - """ - warn( - "Categorical.ravel will return a Categorical object instead " - "of an ndarray in a future version.", - FutureWarning, - stacklevel=2, - ) - return np.array(self) - def view(self, dtype=None): if dtype is not None: raise NotImplementedError(dtype) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index d981a1d6e4aa4..43810df18b0aa 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -269,10 +269,6 @@ def is_sparse(arr) -> bool: bool Whether or not the array-like is a pandas sparse array. - See Also - -------- - Series.to_dense : Return dense representation of a Series. - Examples -------- Returns `True` if the parameter is a 1-D pandas sparse array. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b13aee238efb3..42b8214e07d49 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -171,7 +171,7 @@ class NDFrame(PandasObject, SelectionMixin): ] _internal_names_set: Set[str] = set(_internal_names) _accessors: Set[str] = set() - _deprecations: FrozenSet[str] = frozenset(["get_dtype_counts", "get_values", "ix"]) + _deprecations: FrozenSet[str] = frozenset(["get_values", "ix"]) _metadata: List[str] = [] _is_copy = None _data: BlockManager @@ -1988,26 +1988,6 @@ def __array_wrap__(self, result, context=None): # values = self.values # return dict(typestr=values.dtype.str,shape=values.shape,data=values) - def to_dense(self): - """ - Return dense representation of Series/DataFrame (as opposed to sparse). - - .. deprecated:: 0.25.0 - - Returns - ------- - %(klass)s - Dense %(klass)s. - """ - warnings.warn( - "DataFrame/Series.to_dense is deprecated " - "and will be removed in a future version", - FutureWarning, - stacklevel=2, - ) - # compat - return self - # ---------------------------------------------------------------------- # Picklability @@ -5520,51 +5500,6 @@ def get_values(self): def _internal_get_values(self): return self.values - def get_dtype_counts(self): - """ - Return counts of unique dtypes in this object. - - .. deprecated:: 0.25.0 - - Use `.dtypes.value_counts()` instead. - - Returns - ------- - dtype : Series - Series with the count of columns with each dtype. - - See Also - -------- - dtypes : Return the dtypes in this object. - - Examples - -------- - >>> a = [['a', 1, 1.0], ['b', 2, 2.0], ['c', 3, 3.0]] - >>> df = pd.DataFrame(a, columns=['str', 'int', 'float']) - >>> df - str int float - 0 a 1 1.0 - 1 b 2 2.0 - 2 c 3 3.0 - - >>> df.get_dtype_counts() - float64 1 - int64 1 - object 1 - dtype: int64 - """ - warnings.warn( - "`get_dtype_counts` has been deprecated and will be " - "removed in a future version. For DataFrames use " - "`.dtypes.value_counts()", - FutureWarning, - stacklevel=2, - ) - - from pandas import Series - - return Series(self._data.get_dtype_counts()) - @property def dtypes(self): """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4a3fa26c3460e..c2352c94f1316 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -214,7 +214,7 @@ class Index(IndexOpsMixin, PandasObject): _deprecations: FrozenSet[str] = ( PandasObject._deprecations | IndexOpsMixin._deprecations - | frozenset(["asobject", "contains", "dtype_str", "get_values", "set_value"]) + | frozenset(["contains", "get_values", "set_value"]) ) # To hand over control to subclasses @@ -670,21 +670,6 @@ def dtype(self): """ return self._data.dtype - @property - def dtype_str(self): - """ - Return the dtype str of the underlying data. - - .. deprecated:: 0.25.0 - """ - warnings.warn( - "`dtype_str` has been deprecated. Call `str` on the " - "dtype attribute instead.", - FutureWarning, - stacklevel=2, - ) - return str(self.dtype) - def ravel(self, order="C"): """ Return an ndarray of the flattened values of the underlying data. diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 528053aa8c7f1..dff1e58641ade 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -93,10 +93,7 @@ class TestConstructors(base.BaseConstructorsTests): class TestReshaping(base.BaseReshapingTests): - def test_ravel(self, data): - # GH#27199 Categorical.ravel returns self until after deprecation cycle - with tm.assert_produces_warning(FutureWarning): - data.ravel() + pass class TestGetitem(base.BaseGetitemTests): diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index c180511e31619..0912a8901dc6a 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -950,23 +950,3 @@ def test_axis_classmethods(self, box): assert obj._get_axis_number(v) == box._get_axis_number(v) assert obj._get_axis_name(v) == box._get_axis_name(v) assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v) - - def test_deprecated_to_dense(self): - # GH 26557: DEPR - # Deprecated 0.25.0 - - df = pd.DataFrame({"A": [1, 2, 3]}) - with tm.assert_produces_warning(FutureWarning): - result = df.to_dense() - tm.assert_frame_equal(result, df) - - ser = pd.Series([1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - result = ser.to_dense() - tm.assert_series_equal(result, ser) - - def test_deprecated_get_dtype_counts(self): - # GH 18262 - df = DataFrame([1]) - with tm.assert_produces_warning(FutureWarning): - df.get_dtype_counts() diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index a7f58b9ea78bd..3a8063aed8d20 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -7,13 +7,6 @@ import pandas.util.testing as tm -def test_dtype_str(indices): - with tm.assert_produces_warning(FutureWarning): - dtype = indices.dtype_str - assert isinstance(dtype, str) - assert dtype == str(indices.dtype) - - def test_format(idx): idx.format() idx[:0].format() diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index d75bd7bb21827..a07a87080804f 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -156,17 +156,6 @@ def test_shallow_copy_changing_freq_raises(self): with pytest.raises(IncompatibleFrequency, match=msg): pi._shallow_copy(pi, freq="H") - def test_dtype_str(self): - pi = pd.PeriodIndex([], freq="M") - with tm.assert_produces_warning(FutureWarning): - assert pi.dtype_str == "period[M]" - assert pi.dtype_str == str(pi.dtype) - - with tm.assert_produces_warning(FutureWarning): - pi = pd.PeriodIndex([], freq="3M") - assert pi.dtype_str == "period[3M]" - assert pi.dtype_str == str(pi.dtype) - def test_view_asi8(self): idx = pd.PeriodIndex([], freq="M") diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 558ba04b657a1..9e60b91db5e18 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -158,12 +158,6 @@ def test_set_name_methods(self, indices): assert indices.name == name assert indices.names == [name] - def test_dtype_str(self, indices): - with tm.assert_produces_warning(FutureWarning): - dtype = indices.dtype_str - assert isinstance(dtype, str) - assert dtype == str(indices.dtype) - def test_hash_error(self, indices): index = indices with pytest.raises( From c4fa6a52f7737aecda08f6b0f2d6c27476298ae1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 1 Dec 2019 13:32:56 -0800 Subject: [PATCH 29/49] BUG: merge_asof with tz_aware left index and right column (#29940) --- doc/source/whatsnew/v1.0.0.rst | 3 +- pandas/core/reshape/merge.py | 2 +- pandas/tests/reshape/merge/test_merge_asof.py | 31 +++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7d3f61ccf4e9f..63f6d3b694b2e 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -668,7 +668,8 @@ Reshaping - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) - +- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) +- Sparse ^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index fdd31b3b7c022..d671fff568891 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1027,7 +1027,7 @@ def _get_merge_keys(self): ) ] else: - left_keys = [self.left.index.values] + left_keys = [self.left.index._values] if left_drop: self.left = self.left._drop_labels_or_levels(left_drop) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index e12aad870f1c1..b2e764c5463fa 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1303,3 +1303,34 @@ def test_int_type_tolerance(self, any_int_dtype): result = pd.merge_asof(left, right, on="a", tolerance=10) tm.assert_frame_equal(result, expected) + + def test_merge_index_column_tz(self): + # GH 29864 + index = pd.date_range("2019-10-01", freq="30min", periods=5, tz="UTC") + left = pd.DataFrame([0.9, 0.8, 0.7, 0.6], columns=["xyz"], index=index[1:]) + right = pd.DataFrame({"from_date": index, "abc": [2.46] * 4 + [2.19]}) + result = pd.merge_asof( + left=left, right=right, left_index=True, right_on=["from_date"] + ) + expected = pd.DataFrame( + { + "xyz": [0.9, 0.8, 0.7, 0.6], + "from_date": index[1:], + "abc": [2.46] * 3 + [2.19], + }, + index=pd.Index([1, 2, 3, 4]), + ) + tm.assert_frame_equal(result, expected) + + result = pd.merge_asof( + left=right, right=left, right_index=True, left_on=["from_date"] + ) + expected = pd.DataFrame( + { + "from_date": index, + "abc": [2.46] * 4 + [2.19], + "xyz": [np.nan, 0.9, 0.8, 0.7, 0.6], + }, + index=pd.Index([0, 1, 2, 3, 4]), + ) + tm.assert_frame_equal(result, expected) From 9f279b5b5d2046ee6131472ab0f08e754a2058a8 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sun, 1 Dec 2019 23:35:40 +0200 Subject: [PATCH 30/49] STY: F-strings and repr (#29938) --- pandas/tests/tseries/offsets/test_fiscal.py | 7 +- pandas/tests/tseries/offsets/test_offsets.py | 11 +- pandas/tseries/offsets.py | 107 ++++------- pandas/util/_decorators.py | 35 ++-- pandas/util/testing.py | 180 +++++++------------ 5 files changed, 128 insertions(+), 212 deletions(-) diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py index 8b1aaafb94e0b..c97e0b8493f9c 100644 --- a/pandas/tests/tseries/offsets/test_fiscal.py +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -79,10 +79,9 @@ def test_get_offset(): for name, expected in pairs: offset = get_offset(name) - assert ( - offset == expected - ), "Expected {name!r} to yield {expected!r} (actual: {offset!r})".format( - name=name, expected=expected, offset=offset + assert offset == expected, ( + f"Expected {repr(name)} to yield {repr(expected)} " + f"(actual: {repr(offset)})" ) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index ae78d5a55bb5e..458d69c1d3216 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -3969,10 +3969,9 @@ def test_get_offset(): for name, expected in pairs: offset = get_offset(name) - assert ( - offset == expected - ), "Expected {name!r} to yield {expected!r} (actual: {offset!r})".format( - name=name, expected=expected, offset=offset + assert offset == expected, ( + f"Expected {repr(name)} to yield {repr(expected)} " + f"(actual: {repr(offset)})" ) @@ -4170,9 +4169,9 @@ def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): def _make_timestamp(self, string, hrs_offset, tz): if hrs_offset >= 0: - offset_string = "{hrs:02d}00".format(hrs=hrs_offset) + offset_string = f"{hrs_offset:02d}00" else: - offset_string = "-{hrs:02d}00".format(hrs=-1 * hrs_offset) + offset_string = f"-{(hrs_offset * -1):02}00" return Timestamp(string + offset_string).tz_convert(tz) def test_springforward_plural(self): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 9c0bceb1d5110..96a9ad1e4d5f2 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -359,8 +359,8 @@ def apply_index(self, i): kwd = set(kwds) - relativedelta_fast raise NotImplementedError( "DateOffset with relativedelta " - "keyword(s) {kwd} not able to be " - "applied vectorized".format(kwd=kwd) + f"keyword(s) {kwd} not able to be " + "applied vectorized" ) def isAnchored(self): @@ -379,7 +379,7 @@ def _repr_attrs(self): continue elif attr not in exclude: value = getattr(self, attr) - attrs.append("{attr}={value}".format(attr=attr, value=value)) + attrs.append(f"{attr}={value}") out = "" if attrs: @@ -449,7 +449,7 @@ def freqstr(self): return repr(self) if self.n != 1: - fstr = "{n}{code}".format(n=self.n, code=code) + fstr = f"{self.n}{code}" else: fstr = code @@ -467,7 +467,7 @@ def _offset_str(self): @property def nanos(self): - raise ValueError("{name} is a non-fixed frequency".format(name=self)) + raise ValueError(f"{self} is a non-fixed frequency") class SingleConstructorOffset(DateOffset): @@ -475,7 +475,7 @@ class SingleConstructorOffset(DateOffset): def _from_name(cls, suffix=None): # default _from_name calls cls with no args if suffix: - raise ValueError("Bad freq suffix {suffix}".format(suffix=suffix)) + raise ValueError(f"Bad freq suffix {suffix}") return cls() @@ -513,7 +513,7 @@ def offset(self): def _repr_attrs(self): if self.offset: - attrs = ["offset={offset!r}".format(offset=self.offset)] + attrs = [f"offset={repr(self.offset)}"] else: attrs = None out = "" @@ -966,10 +966,10 @@ def _onOffset(self, dt): def _repr_attrs(self): out = super()._repr_attrs() hours = ",".join( - "{}-{}".format(st.strftime("%H:%M"), en.strftime("%H:%M")) + f'{st.strftime("%H:%M")}-{en.strftime("%H:%M")}' for st, en in zip(self.start, self.end) ) - attrs = ["{prefix}={hours}".format(prefix=self._prefix, hours=hours)] + attrs = [f"{self._prefix}={hours}"] out += ": " + ", ".join(attrs) return out @@ -1113,7 +1113,7 @@ def name(self): return self.rule_code else: month = ccalendar.MONTH_ALIASES[self.n] - return "{code}-{month}".format(code=self.rule_code, month=month) + return f"{self.code_rule}-{month}" def onOffset(self, dt): if self.normalize and not _is_normalized(dt): @@ -1296,9 +1296,10 @@ def __init__(self, n=1, normalize=False, day_of_month=None): else: object.__setattr__(self, "day_of_month", int(day_of_month)) if not self._min_day_of_month <= self.day_of_month <= 27: - msg = "day_of_month must be {min}<=day_of_month<=27, got {day}" raise ValueError( - msg.format(min=self._min_day_of_month, day=self.day_of_month) + "day_of_month must be " + f"{self._min_day_of_month}<=day_of_month<=27, " + f"got {self.day_of_month}" ) @classmethod @@ -1307,7 +1308,7 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - suffix = "-{day_of_month}".format(day_of_month=self.day_of_month) + suffix = f"-{self.day_of_month}" return self._prefix + suffix @apply_wraps @@ -1527,9 +1528,7 @@ def __init__(self, n=1, normalize=False, weekday=None): if self.weekday is not None: if self.weekday < 0 or self.weekday > 6: - raise ValueError( - "Day must be 0<=day<=6, got {day}".format(day=self.weekday) - ) + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") def isAnchored(self): return self.n == 1 and self.weekday is not None @@ -1541,9 +1540,7 @@ def apply(self, other): if not isinstance(other, datetime): raise TypeError( - "Cannot add {typ} to {cls}".format( - typ=type(other).__name__, cls=type(self).__name__ - ) + f"Cannot add {type(other).__name__} to {type(self).__name__}" ) k = self.n @@ -1621,7 +1618,7 @@ def rule_code(self): suffix = "" if self.weekday is not None: weekday = ccalendar.int_to_weekday[self.weekday] - suffix = "-{weekday}".format(weekday=weekday) + suffix = f"-{weekday}" return self._prefix + suffix @classmethod @@ -1690,13 +1687,9 @@ def __init__(self, n=1, normalize=False, week=0, weekday=0): object.__setattr__(self, "week", week) if self.weekday < 0 or self.weekday > 6: - raise ValueError( - "Day must be 0<=day<=6, got {day}".format(day=self.weekday) - ) + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") if self.week < 0 or self.week > 3: - raise ValueError( - "Week must be 0<=week<=3, got {week}".format(week=self.week) - ) + raise ValueError(f"Week must be 0<=week<=3, got {self.week}") def _get_offset_day(self, other): """ @@ -1719,16 +1712,12 @@ def _get_offset_day(self, other): @property def rule_code(self): weekday = ccalendar.int_to_weekday.get(self.weekday, "") - return "{prefix}-{week}{weekday}".format( - prefix=self._prefix, week=self.week + 1, weekday=weekday - ) + return f"{self._prefix}-{self.week + 1}{weekday}" @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError( - "Prefix {prefix!r} requires a suffix.".format(prefix=cls._prefix) - ) + raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") # TODO: handle n here... # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) week = int(suffix[0]) - 1 @@ -1768,9 +1757,7 @@ def __init__(self, n=1, normalize=False, weekday=0): raise ValueError("N cannot be 0") if self.weekday < 0 or self.weekday > 6: - raise ValueError( - "Day must be 0<=day<=6, got {day}".format(day=self.weekday) - ) + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") def _get_offset_day(self, other): """ @@ -1794,14 +1781,12 @@ def _get_offset_day(self, other): @property def rule_code(self): weekday = ccalendar.int_to_weekday.get(self.weekday, "") - return "{prefix}-{weekday}".format(prefix=self._prefix, weekday=weekday) + return f"{self._prefix}-{weekday}" @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError( - "Prefix {prefix!r} requires a suffix.".format(prefix=cls._prefix) - ) + raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") # TODO: handle n here... weekday = ccalendar.weekday_to_int[suffix] return cls(weekday=weekday) @@ -1847,7 +1832,7 @@ def _from_name(cls, suffix=None): @property def rule_code(self): month = ccalendar.MONTH_ALIASES[self.startingMonth] - return "{prefix}-{month}".format(prefix=self._prefix, month=month) + return f"{self._prefix}-{month}" @apply_wraps def apply(self, other): @@ -1990,7 +1975,7 @@ def _from_name(cls, suffix=None): @property def rule_code(self): month = ccalendar.MONTH_ALIASES[self.month] - return "{prefix}-{month}".format(prefix=self._prefix, month=month) + return f"{self._prefix}-{month}" class BYearEnd(YearOffset): @@ -2104,9 +2089,7 @@ def __init__( raise ValueError("N cannot be 0") if self.variation not in ["nearest", "last"]: - raise ValueError( - "{variation} is not a valid variation".format(variation=self.variation) - ) + raise ValueError(f"{self.variation} is not a valid variation") def isAnchored(self): return ( @@ -2211,7 +2194,7 @@ def get_year_end(self, dt): def rule_code(self): prefix = self._prefix suffix = self.get_rule_code_suffix() - return "{prefix}-{suffix}".format(prefix=prefix, suffix=suffix) + return f"{prefix}-{suffix}" def _get_suffix_prefix(self): if self.variation == "nearest": @@ -2223,9 +2206,7 @@ def get_rule_code_suffix(self): prefix = self._get_suffix_prefix() month = ccalendar.MONTH_ALIASES[self.startingMonth] weekday = ccalendar.int_to_weekday[self.weekday] - return "{prefix}-{month}-{weekday}".format( - prefix=prefix, month=month, weekday=weekday - ) + return f"{prefix}-{month}-{weekday}" @classmethod def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): @@ -2234,9 +2215,7 @@ def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): elif varion_code == "L": variation = "last" else: - raise ValueError( - "Unable to parse varion_code: {code}".format(code=varion_code) - ) + raise ValueError(f"Unable to parse varion_code: {varion_code}") startingMonth = ccalendar.MONTH_TO_CAL_NUM[startingMonth_code] weekday = ccalendar.weekday_to_int[weekday_code] @@ -2461,9 +2440,7 @@ def onOffset(self, dt): def rule_code(self): suffix = self._offset.get_rule_code_suffix() qtr = self.qtr_with_extra_week - return "{prefix}-{suffix}-{qtr}".format( - prefix=self._prefix, suffix=suffix, qtr=qtr - ) + return f"{self._prefix}-{suffix}-{qtr}" @classmethod def _from_name(cls, *args): @@ -2532,12 +2509,11 @@ def f(self, other): except AttributeError: # comparing with a non-Tick object raise TypeError( - "Invalid comparison between {cls} and {typ}".format( - cls=type(self).__name__, typ=type(other).__name__ - ) + f"Invalid comparison between {type(self).__name__} " + f"and {type(other).__name__}" ) - f.__name__ = "__{opname}__".format(opname=op.__name__) + f.__name__ = f"__{op.__name__}__" return f @@ -2572,8 +2548,7 @@ def __add__(self, other): return NotImplemented except OverflowError: raise OverflowError( - "the add operation between {self} and {other} " - "will overflow".format(self=self, other=other) + f"the add operation between {self} and {other} will overflow" ) def __eq__(self, other) -> bool: @@ -2645,9 +2620,7 @@ def apply(self, other): elif isinstance(other, type(self)): return type(self)(self.n + other.n) - raise ApplyTypeError( - "Unhandled type: {type_str}".format(type_str=type(other).__name__) - ) + raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") def isAnchored(self): return False @@ -2783,9 +2756,7 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()): # faster than cur + offset next_date = offset.apply(cur) if next_date <= cur: - raise ValueError( - "Offset {offset} did not increment date".format(offset=offset) - ) + raise ValueError(f"Offset {offset} did not increment date") cur = next_date else: while cur >= end: @@ -2799,9 +2770,7 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()): # faster than cur + offset next_date = offset.apply(cur) if next_date >= cur: - raise ValueError( - "Offset {offset} did not decrement date".format(offset=offset) - ) + raise ValueError(f"Offset {offset} did not decrement date") cur = next_date diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index b8f17cd848292..2684b90e33b7e 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -58,7 +58,7 @@ def deprecate( alt_name = alt_name or alternative.__name__ klass = klass or FutureWarning - warning_msg = msg or "{} is deprecated, use {} instead".format(name, alt_name) + warning_msg = msg or f"{name} is deprecated, use {alt_name} instead" @wraps(alternative) def wrapper(*args, **kwargs) -> Callable[..., Any]: @@ -66,12 +66,12 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: return alternative(*args, **kwargs) # adding deprecated directive to the docstring - msg = msg or "Use `{alt_name}` instead.".format(alt_name=alt_name) + msg = msg or f"Use `{alt_name}` instead." doc_error_msg = ( "deprecate needs a correctly formatted docstring in " "the target function (should have a one liner short " "summary, and opening quotes should be in their own " - "line). Found:\n{}".format(alternative.__doc__) + f"line). Found:\n{alternative.__doc__}" ) # when python is running in optimized mode (i.e. `-OO`), docstrings are @@ -182,10 +182,10 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: if old_arg_value is not None: if new_arg_name is None: msg = ( - "the '{old_name}' keyword is deprecated and will be " - "removed in a future version. " - "Please take steps to stop the use of '{old_name}'" - ).format(old_name=old_arg_name) + f"the {repr(old_arg_name)} keyword is deprecated and " + "will be removed in a future version. Please take " + f"steps to stop the use of {repr(old_arg_name)}" + ) warnings.warn(msg, FutureWarning, stacklevel=stacklevel) kwargs[old_arg_name] = old_arg_value return func(*args, **kwargs) @@ -196,26 +196,23 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: else: new_arg_value = mapping.get(old_arg_value, old_arg_value) msg = ( - "the {old_name}={old_val!r} keyword is deprecated, " - "use {new_name}={new_val!r} instead" - ).format( - old_name=old_arg_name, - old_val=old_arg_value, - new_name=new_arg_name, - new_val=new_arg_value, + f"the {old_arg_name}={repr(old_arg_value)} keyword is " + "deprecated, use " + f"{new_arg_name}={repr(new_arg_value)} instead" ) else: new_arg_value = old_arg_value msg = ( - "the '{old_name}' keyword is deprecated, " - "use '{new_name}' instead" - ).format(old_name=old_arg_name, new_name=new_arg_name) + f"the {repr(old_arg_name)}' keyword is deprecated, " + f"use {repr(new_arg_name)} instead" + ) warnings.warn(msg, FutureWarning, stacklevel=stacklevel) if kwargs.get(new_arg_name) is not None: msg = ( - "Can only specify '{old_name}' or '{new_name}', not both" - ).format(old_name=old_arg_name, new_name=new_arg_name) + f"Can only specify {repr(old_arg_name)} " + f"or {repr(new_arg_name)}, not both" + ) raise TypeError(msg) else: kwargs[new_arg_name] = new_arg_value diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 9adbf4cee5d74..6350b1075f4a0 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -119,7 +119,7 @@ def round_trip_pickle(obj, path=None): """ if path is None: - path = "__{random_bytes}__.pickle".format(random_bytes=rands(10)) + path = f"__{rands(10)}__.pickle" with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path) @@ -216,10 +216,9 @@ def decompress_file(path, compression): if len(zip_names) == 1: f = zip_file.open(zip_names.pop()) else: - raise ValueError("ZIP file {} error. Only one file per ZIP.".format(path)) + raise ValueError(f"ZIP file {path} error. Only one file per ZIP.") else: - msg = "Unrecognized compression type: {}".format(compression) - raise ValueError(msg) + raise ValueError(f"Unrecognized compression type: {compression}") try: yield f @@ -264,8 +263,7 @@ def write_to_compressed(compression, path, data, dest="test"): elif compression == "xz": compress_method = _get_lzma_file(lzma) else: - msg = "Unrecognized compression type: {}".format(compression) - raise ValueError(msg) + raise ValueError(f"Unrecognized compression type: {compression}") if compression == "zip": mode = "w" @@ -379,17 +377,15 @@ def _check_isinstance(left, right, cls): ------ AssertionError : Either `left` or `right` is not an instance of `cls`. """ - - err_msg = "{name} Expected type {exp_type}, found {act_type} instead" cls_name = cls.__name__ if not isinstance(left, cls): raise AssertionError( - err_msg.format(name=cls_name, exp_type=cls, act_type=type(left)) + f"{cls_name} Expected type {cls}, found {type(left)} instead" ) if not isinstance(right, cls): raise AssertionError( - err_msg.format(name=cls_name, exp_type=cls, act_type=type(right)) + f"{cls_name} Expected type {cls}, found {type(right)} instead" ) @@ -510,16 +506,12 @@ def ensure_clean(filename=None, return_filelike=False): try: os.close(fd) except OSError: - print( - "Couldn't close file descriptor: {fdesc} (file: {fname})".format( - fdesc=fd, fname=filename - ) - ) + print(f"Couldn't close file descriptor: {fd} (file: {filename})") try: if os.path.exists(filename): os.remove(filename) except OSError as e: - print("Exception on removing file: {error}".format(error=e)) + print(f"Exception on removing file: {e}") @contextmanager @@ -634,16 +626,16 @@ def _get_ilevel_values(index, level): # level comparison if left.nlevels != right.nlevels: - msg1 = "{obj} levels are different".format(obj=obj) - msg2 = "{nlevels}, {left}".format(nlevels=left.nlevels, left=left) - msg3 = "{nlevels}, {right}".format(nlevels=right.nlevels, right=right) + msg1 = f"{obj} levels are different" + msg2 = f"{left.nlevels}, {left}" + msg3 = f"{right.nlevels}, {right}" raise_assert_detail(obj, msg1, msg2, msg3) # length comparison if len(left) != len(right): - msg1 = "{obj} length are different".format(obj=obj) - msg2 = "{length}, {left}".format(length=len(left), left=left) - msg3 = "{length}, {right}".format(length=len(right), right=right) + msg1 = f"{obj} length are different" + msg2 = f"{len(left)}, {left}" + msg3 = f"{len(right)}, {right}" raise_assert_detail(obj, msg1, msg2, msg3) # MultiIndex special comparison for little-friendly error messages @@ -656,7 +648,7 @@ def _get_ilevel_values(index, level): llevel = _get_ilevel_values(left, level) rlevel = _get_ilevel_values(right, level) - lobj = "MultiIndex level [{level}]".format(level=level) + lobj = f"MultiIndex level [{level}]" assert_index_equal( llevel, rlevel, @@ -673,9 +665,7 @@ def _get_ilevel_values(index, level): if check_exact and check_categorical: if not left.equals(right): diff = np.sum((left.values != right.values).astype(int)) * 100.0 / len(left) - msg = "{obj} values are different ({pct} %)".format( - obj=obj, pct=np.round(diff, 5) - ) + msg = f"{obj} values are different ({np.round(diff, 5)} %)" raise_assert_detail(obj, msg, left, right) else: _testing.assert_almost_equal( @@ -698,9 +688,7 @@ def _get_ilevel_values(index, level): if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): - assert_categorical_equal( - left.values, right.values, obj="{obj} category".format(obj=obj) - ) + assert_categorical_equal(left.values, right.values, obj=f"{obj} category") def assert_class_equal(left, right, exact=True, obj="Input"): @@ -722,11 +710,11 @@ def repr_class(x): # allow equivalence of Int64Index/RangeIndex types = {type(left).__name__, type(right).__name__} if len(types - {"Int64Index", "RangeIndex"}): - msg = "{obj} classes are not equivalent".format(obj=obj) + msg = f"{obj} classes are not equivalent" raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) elif exact: if type(left) != type(right): - msg = "{obj} classes are different".format(obj=obj) + msg = f"{obj} classes are different" raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) @@ -770,7 +758,7 @@ def assert_attr_equal(attr, left, right, obj="Attributes"): if result: return True else: - msg = 'Attribute "{attr}" are different'.format(attr=attr) + msg = f'Attribute "{attr}" are different' raise_assert_detail(obj, msg, left_attr, right_attr) @@ -828,25 +816,20 @@ def assert_categorical_equal( _check_isinstance(left, right, Categorical) if check_category_order: - assert_index_equal( - left.categories, right.categories, obj="{obj}.categories".format(obj=obj) - ) + assert_index_equal(left.categories, right.categories, obj=f"{obj}.categories") assert_numpy_array_equal( - left.codes, - right.codes, - check_dtype=check_dtype, - obj="{obj}.codes".format(obj=obj), + left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes", ) else: assert_index_equal( left.categories.sort_values(), right.categories.sort_values(), - obj="{obj}.categories".format(obj=obj), + obj=f"{obj}.categories", ) assert_index_equal( left.categories.take(left.codes), right.categories.take(right.codes), - obj="{obj}.values".format(obj=obj), + obj=f"{obj}.values", ) assert_attr_equal("ordered", left, right, obj=obj) @@ -869,21 +852,15 @@ def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray") """ _check_isinstance(left, right, IntervalArray) - assert_index_equal( - left.left, right.left, exact=exact, obj="{obj}.left".format(obj=obj) - ) - assert_index_equal( - left.right, right.right, exact=exact, obj="{obj}.left".format(obj=obj) - ) + assert_index_equal(left.left, right.left, exact=exact, obj=f"{obj}.left") + assert_index_equal(left.right, right.right, exact=exact, obj=f"{obj}.left") assert_attr_equal("closed", left, right, obj=obj) def assert_period_array_equal(left, right, obj="PeriodArray"): _check_isinstance(left, right, PeriodArray) - assert_numpy_array_equal( - left._data, right._data, obj="{obj}.values".format(obj=obj) - ) + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}.values") assert_attr_equal("freq", left, right, obj=obj) @@ -891,7 +868,7 @@ def assert_datetime_array_equal(left, right, obj="DatetimeArray"): __tracebackhide__ = True _check_isinstance(left, right, DatetimeArray) - assert_numpy_array_equal(left._data, right._data, obj="{obj}._data".format(obj=obj)) + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") assert_attr_equal("freq", left, right, obj=obj) assert_attr_equal("tz", left, right, obj=obj) @@ -899,7 +876,7 @@ def assert_datetime_array_equal(left, right, obj="DatetimeArray"): def assert_timedelta_array_equal(left, right, obj="TimedeltaArray"): __tracebackhide__ = True _check_isinstance(left, right, TimedeltaArray) - assert_numpy_array_equal(left._data, right._data, obj="{obj}._data".format(obj=obj)) + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") assert_attr_equal("freq", left, right, obj=obj) @@ -916,16 +893,14 @@ def raise_assert_detail(obj, message, left, right, diff=None): elif is_categorical_dtype(right): right = repr(right) - msg = """{obj} are different + msg = f"""{obj} are different {message} [left]: {left} -[right]: {right}""".format( - obj=obj, message=message, left=left, right=right - ) +[right]: {right}""" if diff is not None: - msg += "\n[diff]: {diff}".format(diff=diff) + msg += f"\n[diff]: {diff}" raise AssertionError(msg) @@ -973,21 +948,16 @@ def _get_base(obj): if check_same == "same": if left_base is not right_base: - msg = "{left!r} is not {right!r}".format(left=left_base, right=right_base) - raise AssertionError(msg) + raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}") elif check_same == "copy": if left_base is right_base: - msg = "{left!r} is {right!r}".format(left=left_base, right=right_base) - raise AssertionError(msg) + raise AssertionError(f"{repr(left_base)} is {repr(right_base)}") def _raise(left, right, err_msg): if err_msg is None: if left.shape != right.shape: raise_assert_detail( - obj, - "{obj} shapes are different".format(obj=obj), - left.shape, - right.shape, + obj, f"{obj} shapes are different", left.shape, right.shape, ) diff = 0 @@ -997,9 +967,7 @@ def _raise(left, right, err_msg): diff += 1 diff = diff * 100.0 / left.size - msg = "{obj} values are different ({pct} %)".format( - obj=obj, pct=np.round(diff, 5) - ) + msg = f"{obj} values are different ({np.round(diff, 5)} %)" raise_assert_detail(obj, msg, left, right) raise AssertionError(err_msg) @@ -1128,8 +1096,8 @@ def assert_series_equal( # length comparison if len(left) != len(right): - msg1 = "{len}, {left}".format(len=len(left), left=left.index) - msg2 = "{len}, {right}".format(len=len(right), right=right.index) + msg1 = f"{len(left)}, {left.index}" + msg2 = f"{len(right)}, {right.index}" raise_assert_detail(obj, "Series length are different", msg1, msg2) # index comparison @@ -1141,7 +1109,7 @@ def assert_series_equal( check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj="{obj}.index".format(obj=obj), + obj=f"{obj}.index", ) if check_dtype: @@ -1155,16 +1123,14 @@ def assert_series_equal( ): pass else: - assert_attr_equal( - "dtype", left, right, obj="Attributes of {obj}".format(obj=obj) - ) + assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") if check_exact: assert_numpy_array_equal( left._internal_get_values(), right._internal_get_values(), check_dtype=check_dtype, - obj="{obj}".format(obj=obj), + obj=str(obj), ) elif check_datetimelike_compat: # we want to check only if we have compat dtypes @@ -1176,8 +1142,9 @@ def assert_series_equal( # vs Timestamp) but will compare equal if not Index(left.values).equals(Index(right.values)): msg = ( - "[datetimelike_compat=True] {left} is not equal to {right}." - ).format(left=left.values, right=right.values) + f"[datetimelike_compat=True] {left.values} " + f"is not equal to {right.values}." + ) raise AssertionError(msg) else: assert_numpy_array_equal( @@ -1205,7 +1172,7 @@ def assert_series_equal( right._internal_get_values(), check_less_precise=check_less_precise, check_dtype=check_dtype, - obj="{obj}".format(obj=obj), + obj=str(obj), ) # metadata comparison @@ -1214,9 +1181,7 @@ def assert_series_equal( if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): - assert_categorical_equal( - left.values, right.values, obj="{obj} category".format(obj=obj) - ) + assert_categorical_equal(left.values, right.values, obj=f"{obj} category") # This could be refactored to use the NDFrame.equals method @@ -1336,10 +1301,7 @@ def assert_frame_equal( # shape comparison if left.shape != right.shape: raise_assert_detail( - obj, - "{obj} shape mismatch".format(obj=obj), - "{shape!r}".format(shape=left.shape), - "{shape!r}".format(shape=right.shape), + obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}", ) if check_like: @@ -1354,7 +1316,7 @@ def assert_frame_equal( check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj="{obj}.index".format(obj=obj), + obj=f"{obj}.index", ) # column comparison @@ -1366,7 +1328,7 @@ def assert_frame_equal( check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj="{obj}.columns".format(obj=obj), + obj=f"{obj}.columns", ) # compare by blocks @@ -1396,7 +1358,7 @@ def assert_frame_equal( check_names=check_names, check_datetimelike_compat=check_datetimelike_compat, check_categorical=check_categorical, - obj="{obj}.iloc[:, {idx}]".format(obj=obj, idx=i), + obj=f"{obj}.iloc[:, {i}]", ) @@ -1562,7 +1524,7 @@ def assert_sp_array_equal( def assert_contains_all(iterable, dic): for k in iterable: - assert k in dic, "Did not contain item: '{key!r}'".format(key=k) + assert k in dic, f"Did not contain item: {repr(k)}" def assert_copy(iter1, iter2, **eql_kwargs): @@ -1577,9 +1539,9 @@ def assert_copy(iter1, iter2, **eql_kwargs): for elem1, elem2 in zip(iter1, iter2): assert_almost_equal(elem1, elem2, **eql_kwargs) msg = ( - "Expected object {obj1!r} and object {obj2!r} to be " + f"Expected object {repr(type(elem1))} and object {repr(type(elem2))} to be " "different objects, but they were the same object." - ).format(obj1=type(elem1), obj2=type(elem2)) + ) assert elem1 is not elem2, msg @@ -1926,8 +1888,8 @@ def makeCustomIndex( return idx elif idx_type is not None: raise ValueError( - '"{idx_type}" is not a legal value for `idx_type`, ' - 'use "i"/"f"/"s"/"u"/"dt/"p"/"td".'.format(idx_type=idx_type) + f"{repr(idx_type)} is not a legal value for `idx_type`, " + "use 'i'/'f'/'s'/'u'/'dt'/'p'/'td'." ) if len(ndupe_l) < nlevels: @@ -1949,7 +1911,7 @@ def keyfunc(x): div_factor = nentries // ndupe_l[i] + 1 cnt = Counter() for j in range(div_factor): - label = "{prefix}_l{i}_g{j}".format(prefix=prefix, i=i, j=j) + label = f"{prefix}_l{i}_g{j}" cnt[label] = ndupe_l[i] # cute Counter trick result = sorted(cnt.elements(), key=keyfunc)[:nentries] @@ -2066,7 +2028,7 @@ def makeCustomDataframe( # by default, generate data based on location if data_gen_f is None: - data_gen_f = lambda r, c: "R{rows}C{cols}".format(rows=r, cols=c) + data_gen_f = lambda r, c: f"R{r}C{c}" data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] @@ -2370,17 +2332,13 @@ def wrapper(*args, **kwargs): errno = getattr(err.reason, "errno", None) if errno in skip_errnos: - skip( - "Skipping test due to known errno" - " and error {error}".format(error=err) - ) + skip(f"Skipping test due to known errno and error {err}") e_str = str(err) if any(m.lower() in e_str.lower() for m in _skip_on_messages): skip( - "Skipping test because exception " - "message is known and error {error}".format(error=err) + f"Skipping test because exception message is known and error {err}" ) if not isinstance(err, error_classes): @@ -2389,10 +2347,7 @@ def wrapper(*args, **kwargs): if raise_on_error or can_connect(url, error_classes): raise else: - skip( - "Skipping test due to lack of connectivity" - " and error {error}".format(error=err) - ) + skip(f"Skipping test due to lack of connectivity and error {err}") return wrapper @@ -2504,12 +2459,8 @@ class for all warnings. To check that no warning is returned, caller = getframeinfo(stack()[2][0]) msg = ( "Warning not set with correct stacklevel. " - "File where warning is raised: {actual} != " - "{caller}. Warning message: {message}" - ).format( - actual=actual_warning.filename, - caller=caller.filename, - message=actual_warning.message, + f"File where warning is raised: {actual_warning.filename} != " + f"{caller.filename}. Warning message: {actual_warning.message}" ) assert actual_warning.filename == caller.filename, msg else: @@ -2522,13 +2473,14 @@ class for all warnings. To check that no warning is returned, ) ) if expected_warning: - msg = "Did not see expected warning of class {name!r}.".format( - name=expected_warning.__name__ + msg = ( + f"Did not see expected warning of class " + f"{repr(expected_warning.__name__)}" ) assert saw_warning, msg if raise_on_extra_warnings and extra_warnings: raise AssertionError( - "Caused unexpected warning(s): {!r}.".format(extra_warnings) + f"Caused unexpected warning(s): {repr(extra_warnings)}" ) From 5787b2055a51cb6b2747ec088b8ddeb04f495dcc Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 1 Dec 2019 13:38:46 -0800 Subject: [PATCH 31/49] DEPR: tz_convert in the Timestamp constructor raises (#29929) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/_libs/tslibs/timestamps.pyx | 5 ++--- pandas/tests/scalar/timestamp/test_timestamp.py | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 63f6d3b694b2e..aaa1bf744baf9 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -466,6 +466,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Changed the default ``fill_value`` in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) +- Passing a tz-aware ``datetime.datetime`` or :class:`Timestamp` into the :class:`Timestamp` constructor with the ``tz`` argument now raises a ``ValueError`` (:issue:`23621`) - Removed the previously deprecated :attr:`Series.base`, :attr:`Index.base`, :attr:`Categorical.base`, :attr:`Series.flags`, :attr:`Index.flags`, :attr:`PeriodArray.flags`, :attr:`Series.strides`, :attr:`Index.strides`, :attr:`Series.itemsize`, :attr:`Index.itemsize`, :attr:`Series.data`, :attr:`Index.data` (:issue:`20721`) - Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) - Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index fbe71a0a6d198..e4e7f65db8dea 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -401,9 +401,8 @@ class Timestamp(_Timestamp): freq = None if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: - warnings.warn("Passing a datetime or Timestamp with tzinfo and the" - " tz parameter will raise in the future. Use" - " tz_convert instead.", FutureWarning) + raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with the" + " tz parameter. Use tz_convert instead.") ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index d6251ffc7940d..512a83ed304d1 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -675,11 +675,13 @@ def test_constructor_invalid_frequency(self): Timestamp("2012-01-01", freq=[]) @pytest.mark.parametrize("box", [datetime, Timestamp]) - def test_depreciate_tz_and_tzinfo_in_datetime_input(self, box): + def test_raise_tz_and_tzinfo_in_datetime_input(self, box): # GH 23579 kwargs = {"year": 2018, "month": 1, "day": 1, "tzinfo": utc} - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(ValueError, match="Cannot pass a datetime or Timestamp"): Timestamp(box(**kwargs), tz="US/Pacific") + with pytest.raises(ValueError, match="Cannot pass a datetime or Timestamp"): + Timestamp(box(**kwargs), tzinfo=pytz.timezone("US/Pacific")) def test_dont_convert_dateutil_utc_to_pytz_utc(self): result = Timestamp(datetime(2018, 1, 1), tz=tzutc()) From a251ba3b0ed7abbfb46e3b30386f13fc3e867b09 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 14:22:05 -0800 Subject: [PATCH 32/49] CLN: make kwargs explicit in pytables constructors (#29936) --- pandas/io/pytables.py | 86 ++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 29835a9bd0c00..6629c0dd30e19 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1706,7 +1706,6 @@ class IndexCol: name: str cname: str - kind_attr: str def __init__( self, @@ -1717,12 +1716,10 @@ def __init__( cname: Optional[str] = None, itemsize=None, axis=None, - kind_attr: Optional[str] = None, pos=None, freq=None, tz=None, index_name=None, - **kwargs, ): if not isinstance(name, str): @@ -1734,7 +1731,6 @@ def __init__( self.itemsize = itemsize self.name = name self.cname = cname or name - self.kind_attr = kind_attr or f"{name}_kind" self.axis = axis self.pos = pos self.freq = freq @@ -1751,7 +1747,10 @@ def __init__( # constructor annotations. assert isinstance(self.name, str) assert isinstance(self.cname, str) - assert isinstance(self.kind_attr, str) + + @property + def kind_attr(self) -> str: + return f"{self.name}_kind" def set_pos(self, pos: int): """ set the position of this column in the Table """ @@ -2044,11 +2043,12 @@ class DataCol(IndexCol): _info_fields = ["tz", "ordered"] @classmethod - def create_for_block(cls, i=None, name=None, cname=None, version=None, **kwargs): + def create_for_block( + cls, i: int, name=None, version=None, pos: Optional[int] = None + ): """ return a new datacol with the block i """ - if cname is None: - cname = name or f"values_block_{i}" + cname = name or f"values_block_{i}" if name is None: name = cname @@ -2063,27 +2063,24 @@ def create_for_block(cls, i=None, name=None, cname=None, version=None, **kwargs) except IndexError: pass - return cls(name=name, cname=cname, **kwargs) + return cls(name=name, cname=cname, pos=pos) def __init__( - self, - values=None, - kind=None, - typ=None, - cname=None, - data=None, - meta=None, - metadata=None, - block=None, - **kwargs, + self, name: str, values=None, kind=None, typ=None, cname=None, pos=None, ): - super().__init__(values=values, kind=kind, typ=typ, cname=cname, **kwargs) + super().__init__( + name=name, values=values, kind=kind, typ=typ, pos=pos, cname=cname + ) self.dtype = None - self.dtype_attr = f"{self.name}_dtype" - self.meta = meta - self.meta_attr = f"{self.name}_meta" - self.set_data(data) - self.set_metadata(metadata) + self.data = None + + @property + def dtype_attr(self) -> str: + return f"{self.name}_dtype" + + @property + def meta_attr(self) -> str: + return f"{self.name}_meta" def __repr__(self) -> str: temp = tuple( @@ -3156,8 +3153,15 @@ class Table(Fixed): is_table = True is_shape_reversed = False - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + index_axes: List[IndexCol] + non_index_axes: List[Tuple[int, Any]] + values_axes: List[DataCol] + data_columns: List + metadata: List + info: Dict + + def __init__(self, parent: HDFStore, group: "Node", **kwargs): + super().__init__(parent, group, **kwargs) self.index_axes = [] self.non_index_axes = [] self.values_axes = [] @@ -3303,18 +3307,18 @@ def queryables(self) -> Dict[str, Any]: """ return a dict of the kinds allowable columns for this object """ # compute the values_axes queryables - return dict( - [(a.cname, a) for a in self.index_axes] - + [ - (self.storage_obj_type._AXIS_NAMES[axis], None) - for axis, values in self.non_index_axes - ] - + [ - (v.cname, v) - for v in self.values_axes - if v.name in set(self.data_columns) - ] - ) + d1 = [(a.cname, a) for a in self.index_axes] + d2 = [ + (self.storage_obj_type._AXIS_NAMES[axis], None) + for axis, values in self.non_index_axes + ] + d3 = [ + (v.cname, v) for v in self.values_axes if v.name in set(self.data_columns) + ] + + return dict(d1 + d2 + d3) # type: ignore + # error: List comprehension has incompatible type + # List[Tuple[Any, None]]; expected List[Tuple[str, IndexCol]] def index_cols(self): """ return a list of my index cols """ @@ -4432,9 +4436,7 @@ def indexables(self): for i, n in enumerate(d._v_names): assert isinstance(n, str) - dc = GenericDataIndexableCol( - name=n, pos=i, values=[n], version=self.version - ) + dc = GenericDataIndexableCol(name=n, pos=i, values=[n]) self._indexables.append(dc) return self._indexables From 0624ec95077123353a6c39ed21ae49bbe68a0870 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 14:23:27 -0800 Subject: [PATCH 33/49] DEPR: dropna multiple axes, fillna int for td64, from_codes with floats, Series.nonzero (#29875) --- doc/source/whatsnew/v1.0.0.rst | 4 ++ pandas/core/arrays/categorical.py | 18 +----- pandas/core/frame.py | 60 ++++++++----------- pandas/core/internals/blocks.py | 12 ++-- pandas/core/series.py | 49 --------------- .../arrays/categorical/test_constructors.py | 10 ++-- pandas/tests/frame/test_missing.py | 17 ++---- pandas/tests/series/test_missing.py | 22 +++---- 8 files changed, 52 insertions(+), 140 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index aaa1bf744baf9..4e8a471239610 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -459,6 +459,10 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`) - Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) - Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) +- Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`) +- Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`) +- Removed previously deprecated :meth:`Series.nonzero`, use `to_numpy().nonzero()` instead (:issue:`24048`) +- Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`) - Removed the previously deprecated :meth:`Series.to_dense`, :meth:`DataFrame.to_dense` (:issue:`26684`) - Removed the previously deprecated :meth:`Index.dtype_str`, use ``str(index.dtype)`` instead (:issue:`27106`) - :meth:`Categorical.ravel` returns a :class:`Categorical` instead of a ``ndarray`` (:issue:`27199`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0dc972011833a..46aab31770fde 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -27,7 +27,6 @@ is_dict_like, is_dtype_equal, is_extension_array_dtype, - is_float_dtype, is_integer_dtype, is_iterator, is_list_like, @@ -646,22 +645,7 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): codes = np.asarray(codes) # #21767 if len(codes) and not is_integer_dtype(codes): - msg = "codes need to be array-like integers" - if is_float_dtype(codes): - icodes = codes.astype("i8") - if (icodes == codes).all(): - msg = None - codes = icodes - warn( - ( - "float codes will be disallowed in the future and " - "raise a ValueError" - ), - FutureWarning, - stacklevel=2, - ) - if msg: - raise ValueError(msg) + raise ValueError("codes need to be array-like integers") if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1): raise ValueError("codes need to be between -1 and len(categories)-1") diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ca943111b7e9f..0b690363a2178 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4475,7 +4475,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): * 0, or 'index' : Drop rows which contain missing values. * 1, or 'columns' : Drop columns which contain missing value. - .. deprecated:: 0.23.0 + .. versionchanged:: 1.0.0 Pass tuple or list to drop on multiple axes. Only a single axis is allowed. @@ -4565,43 +4565,35 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): inplace = validate_bool_kwarg(inplace, "inplace") if isinstance(axis, (tuple, list)): # GH20987 - msg = ( - "supplying multiple axes to axis is deprecated and " - "will be removed in a future version." - ) - warnings.warn(msg, FutureWarning, stacklevel=2) + raise TypeError("supplying multiple axes to axis is no longer supported.") - result = self - for ax in axis: - result = result.dropna(how=how, thresh=thresh, subset=subset, axis=ax) + axis = self._get_axis_number(axis) + agg_axis = 1 - axis + + agg_obj = self + if subset is not None: + ax = self._get_axis(agg_axis) + indices = ax.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(list(np.compress(check, subset))) + agg_obj = self.take(indices, axis=agg_axis) + + count = agg_obj.count(axis=agg_axis) + + if thresh is not None: + mask = count >= thresh + elif how == "any": + mask = count == len(agg_obj._get_axis(agg_axis)) + elif how == "all": + mask = count > 0 else: - axis = self._get_axis_number(axis) - agg_axis = 1 - axis - - agg_obj = self - if subset is not None: - ax = self._get_axis(agg_axis) - indices = ax.get_indexer_for(subset) - check = indices == -1 - if check.any(): - raise KeyError(list(np.compress(check, subset))) - agg_obj = self.take(indices, axis=agg_axis) - - count = agg_obj.count(axis=agg_axis) - - if thresh is not None: - mask = count >= thresh - elif how == "any": - mask = count == len(agg_obj._get_axis(agg_axis)) - elif how == "all": - mask = count > 0 + if how is not None: + raise ValueError("invalid how option: {h}".format(h=how)) else: - if how is not None: - raise ValueError("invalid how option: {h}".format(h=how)) - else: - raise TypeError("must specify how or thresh") + raise TypeError("must specify how or thresh") - result = self.loc(axis=axis)[mask] + result = self.loc(axis=axis)[mask] if inplace: self._update_inplace(result) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b0382755f2edb..8dd39473ee1f4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2444,15 +2444,11 @@ def fillna(self, value, **kwargs): # interpreted as nanoseconds if is_integer(value): # Deprecation GH#24694, GH#19233 - warnings.warn( - "Passing integers to fillna is deprecated, will " - "raise a TypeError in a future version. To retain " - "the old behavior, pass pd.Timedelta(seconds=n) " - "instead.", - FutureWarning, - stacklevel=6, + raise TypeError( + "Passing integers to fillna for timedelta64[ns] dtype is no " + "longer supporetd. To obtain the old behavior, pass " + "`pd.Timedelta(seconds=n)` instead." ) - value = Timedelta(value, unit="s") return super().fillna(value, **kwargs) def should_store(self, value): diff --git a/pandas/core/series.py b/pandas/core/series.py index 56039605651ac..a8232f137f3ef 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -528,55 +528,6 @@ def compress(self, condition, *args, **kwargs): nv.validate_compress(args, kwargs) return self[condition] - def nonzero(self): - """ - Return the *integer* indices of the elements that are non-zero. - - .. deprecated:: 0.24.0 - Please use .to_numpy().nonzero() as a replacement. - - This method is equivalent to calling `numpy.nonzero` on the - series data. For compatibility with NumPy, the return value is - the same (a tuple with an array of indices for each dimension), - but it will always be a one-item tuple because series only have - one dimension. - - Returns - ------- - numpy.ndarray - Indices of elements that are non-zero. - - See Also - -------- - numpy.nonzero - - Examples - -------- - >>> s = pd.Series([0, 3, 0, 4]) - >>> s.nonzero() - (array([1, 3]),) - >>> s.iloc[s.nonzero()[0]] - 1 3 - 3 4 - dtype: int64 - - # same return although index of s is different - >>> s = pd.Series([0, 3, 0, 4], index=['a', 'b', 'c', 'd']) - >>> s.nonzero() - (array([1, 3]),) - >>> s.iloc[s.nonzero()[0]] - b 3 - d 4 - dtype: int64 - """ - msg = ( - "Series.nonzero() is deprecated " - "and will be removed in a future version." - "Use Series.to_numpy().nonzero() instead" - ) - warnings.warn(msg, FutureWarning, stacklevel=2) - return self._values.nonzero() - def put(self, *args, **kwargs): """ Apply the `put` method to its `values` attribute if it has one. diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 59017a1442cb4..14bb9b88eee88 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -529,13 +529,11 @@ def test_from_codes_with_float(self): # empty codes should not raise for floats Categorical.from_codes([], dtype.categories) - with tm.assert_produces_warning(FutureWarning): - cat = Categorical.from_codes(codes, dtype.categories) - tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="i1")) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype.categories) - with tm.assert_produces_warning(FutureWarning): - cat = Categorical.from_codes(codes, dtype=dtype) - tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="i1")) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype=dtype) codes = [1.1, 2.0, 0] # non-integer with pytest.raises(ValueError, match="codes need to be array-like integers"): diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 24510ff9338ca..0b77c0067e5f2 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -165,23 +165,16 @@ def test_dropna_multiple_axes(self): [7, np.nan, 8, 9], ] ) - cp = df.copy() # GH20987 - with tm.assert_produces_warning(FutureWarning): - result = df.dropna(how="all", axis=[0, 1]) - with tm.assert_produces_warning(FutureWarning): - result2 = df.dropna(how="all", axis=(0, 1)) - expected = df.dropna(how="all").dropna(how="all", axis=1) - - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result2, expected) - tm.assert_frame_equal(df, cp) + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=[0, 1]) + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=(0, 1)) inp = df.copy() - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="supplying multiple axes"): inp.dropna(how="all", axis=(0, 1), inplace=True) - tm.assert_frame_equal(inp, expected) def test_dropna_tz_aware_datetime(self): # GH13407 diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 81bf1edbe86df..09f1db25a3e31 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -16,6 +16,7 @@ MultiIndex, NaT, Series, + Timedelta, Timestamp, date_range, isna, @@ -60,8 +61,7 @@ def test_timedelta_fillna(self): td = s.diff() # reg fillna - with tm.assert_produces_warning(FutureWarning): - result = td.fillna(0) + result = td.fillna(Timedelta(seconds=0)) expected = Series( [ timedelta(0), @@ -73,8 +73,10 @@ def test_timedelta_fillna(self): tm.assert_series_equal(result, expected) # interpreted as seconds, deprecated - with tm.assert_produces_warning(FutureWarning): - result = td.fillna(1) + with pytest.raises(TypeError, match="Passing integers to fillna"): + td.fillna(1) + + result = td.fillna(Timedelta(seconds=1)) expected = Series( [ timedelta(seconds=1), @@ -122,16 +124,14 @@ def test_timedelta_fillna(self): # ffill td[2] = np.nan result = td.ffill() - with tm.assert_produces_warning(FutureWarning): - expected = td.fillna(0) + expected = td.fillna(Timedelta(seconds=0)) expected[0] = np.nan tm.assert_series_equal(result, expected) # bfill td[2] = np.nan result = td.bfill() - with tm.assert_produces_warning(FutureWarning): - expected = td.fillna(0) + expected = td.fillna(Timedelta(seconds=0)) expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1) tm.assert_series_equal(result, expected) @@ -1597,12 +1597,6 @@ def test_series_interpolate_intraday(self): tm.assert_numpy_array_equal(result.values, exp.values) - def test_nonzero_warning(self): - # GH 24048 - ser = pd.Series([1, 0, 3, 4]) - with tm.assert_produces_warning(FutureWarning): - ser.nonzero() - @pytest.mark.parametrize( "ind", [ From f5c102f1683ee0c63079800e3b2c5937d04ee2c7 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 1 Dec 2019 22:24:49 +0000 Subject: [PATCH 34/49] Convert core/indexes/base.py to f-strings (#29903) --- pandas/core/indexes/base.py | 70 ++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c2352c94f1316..9f96d09b0d3dd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -120,7 +120,7 @@ def cmp_method(self, other): return result return ops.invalid_comparison(self, other, op) - name = "__{name}__".format(name=op.__name__) + name = f"__{op.__name__}__" return set_function_name(cmp_method, name, cls) @@ -136,7 +136,7 @@ def index_arithmetic_method(self, other): return (Index(result[0]), Index(result[1])) return Index(result) - name = "__{name}__".format(name=op.__name__) + name = f"__{op.__name__}__" # TODO: docstring? return set_function_name(index_arithmetic_method, name, cls) @@ -441,7 +441,7 @@ def __new__( except IncompatibleFrequency: pass if kwargs: - raise TypeError(f"Unexpected keyword arguments {set(kwargs)!r}") + raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") return cls._simple_new(subarr, name, **kwargs) elif hasattr(data, "__array__"): @@ -753,8 +753,7 @@ def astype(self, dtype, copy=True): self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype ) except (TypeError, ValueError): - msg = "Cannot cast {name} to dtype {dtype}" - raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) + raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}") _index_shared_docs[ "take" @@ -799,8 +798,10 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): ) else: if allow_fill and fill_value is not None: - msg = "Unable to fill values because {0} cannot contain NA" - raise ValueError(msg.format(type(self).__name__)) + cls_name = type(self).__name__ + raise ValueError( + f"Unable to fill values because {cls_name} cannot contain NA" + ) taken = self.values.take(indices) return self._shallow_copy(taken) @@ -1271,9 +1272,7 @@ def _set_names(self, values, level=None): # All items in 'name' need to be hashable: for name in values: if not is_hashable(name): - raise TypeError( - "{}.name must be a hashable type".format(type(self).__name__) - ) + raise TypeError(f"{type(self).__name__}.name must be a hashable type") self.name = values[0] names = property(fset=_set_names, fget=_get_names) @@ -1441,13 +1440,11 @@ def _validate_index_level(self, level): ) elif level > 0: raise IndexError( - "Too many levels: Index has only 1 level, not %d" % (level + 1) + f"Too many levels: Index has only 1 level, not {level + 1}" ) elif level != self.name: raise KeyError( - "Requested level ({}) does not match index name ({})".format( - level, self.name - ) + f"Requested level ({level}) does not match index name ({self.name})" ) def _get_level_number(self, level): @@ -1543,9 +1540,8 @@ def droplevel(self, level=0): return self if len(level) >= self.nlevels: raise ValueError( - "Cannot remove {} levels from an index with {} " - "levels: at least one level must be " - "left.".format(len(level), self.nlevels) + f"Cannot remove {len(level)} levels from an index with {self.nlevels} " + "levels: at least one level must be left." ) # The two checks above guarantee that here self is a MultiIndex @@ -1999,7 +1995,7 @@ def fillna(self, value=None, downcast=None): @Appender(_index_shared_docs["dropna"]) def dropna(self, how="any"): if how not in ("any", "all"): - raise ValueError("invalid how option: {0}".format(how)) + raise ValueError(f"invalid how option: {how}") if self.hasnans: return self._shallow_copy(self.values[~self._isnan]) @@ -2273,10 +2269,8 @@ def __xor__(self, other): def __nonzero__(self): raise ValueError( - "The truth value of a {0} is ambiguous. " - "Use a.empty, a.bool(), a.item(), a.any() or a.all().".format( - type(self).__name__ - ) + f"The truth value of a {type(self).__name__} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." ) __bool__ = __nonzero__ @@ -2339,7 +2333,7 @@ def _validate_sort_keyword(self, sort): if sort not in [None, False]: raise ValueError( "The 'sort' keyword only takes the values of " - "None or False; {0} was passed.".format(sort) + f"None or False; {sort} was passed." ) def union(self, other, sort=None): @@ -2466,10 +2460,9 @@ def _union(self, other, sort): if sort is None: try: result = algos.safe_sort(result) - except TypeError as e: + except TypeError as err: warnings.warn( - "{}, sort order is undefined for " - "incomparable objects".format(e), + f"{err}, sort order is undefined for incomparable objects", RuntimeWarning, stacklevel=3, ) @@ -2924,8 +2917,8 @@ def _get_fill_indexer_searchsorted(self, target, method, limit=None): """ if limit is not None: raise ValueError( - "limit argument for %r method only well-defined " - "if index and target are monotonic" % method + f"limit argument for {repr(method)} method only well-defined " + "if index and target are monotonic" ) side = "left" if method == "pad" else "right" @@ -3212,10 +3205,8 @@ def _invalid_indexer(self, form, key): Consistent invalid indexer message. """ raise TypeError( - "cannot do {form} indexing on {klass} with these " - "indexers [{key}] of {kind}".format( - form=form, klass=type(self), key=key, kind=type(key) - ) + f"cannot do {form} indexing on {type(self)} with these " + f"indexers [{key}] of {type(key)}" ) # -------------------------------------------------------------------- @@ -3977,8 +3968,8 @@ def _scalar_data_error(cls, data): # We return the TypeError so that we can raise it from the constructor # in order to keep mypy happy return TypeError( - "{0}(...) must be called with a collection of some " - "kind, {1} was passed".format(cls.__name__, repr(data)) + f"{cls.__name__}(...) must be called with a collection of some " + f"kind, {repr(data)} was passed" ) @classmethod @@ -4022,8 +4013,7 @@ def _assert_can_do_op(self, value): Check value is valid for scalar op. """ if not is_scalar(value): - msg = "'value' must be a scalar, passed: {0}" - raise TypeError(msg.format(type(value).__name__)) + raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") def _is_memory_usage_qualified(self) -> bool: """ @@ -4098,7 +4088,7 @@ def contains(self, key) -> bool: return key in self def __hash__(self): - raise TypeError("unhashable type: %r" % type(self).__name__) + raise TypeError(f"unhashable type: {repr(type(self).__name__)}") def __setitem__(self, key, value): raise TypeError("Index does not support mutable operations") @@ -5037,8 +5027,8 @@ def get_slice_bound(self, label, side, kind): slc = lib.maybe_indices_to_slice(slc.astype("i8"), len(self)) if isinstance(slc, np.ndarray): raise KeyError( - "Cannot get %s slice bound for non-unique " - "label: %r" % (side, original_label) + f"Cannot get {side} slice bound for non-unique " + f"label: {repr(original_label)}" ) if isinstance(slc, slice): @@ -5196,7 +5186,7 @@ def drop(self, labels, errors="raise"): mask = indexer == -1 if mask.any(): if errors != "ignore": - raise KeyError("{} not found in axis".format(labels[mask])) + raise KeyError(f"{labels[mask]} not found in axis") indexer = indexer[~mask] return self.delete(indexer) From 6684d7684387c779ebab7ee0ed7249133fc19607 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 15:10:30 -0800 Subject: [PATCH 35/49] CLN: make kwargs explicit for pytables read_ methods (#29935) --- pandas/io/pytables.py | 80 +++++++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 18 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6629c0dd30e19..05bde5a948248 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -8,7 +8,17 @@ import itertools import os import re -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Hashable, + List, + Optional, + Tuple, + Type, + Union, +) import warnings import numpy as np @@ -2781,13 +2791,16 @@ def read_array( else: return ret - def read_index(self, key: str, **kwargs) -> Index: + def read_index( + self, key: str, start: Optional[int] = None, stop: Optional[int] = None + ) -> Index: variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety")) if variety == "multi": - return self.read_multi_index(key, **kwargs) + return self.read_multi_index(key, start=start, stop=stop) elif variety == "regular": - _, index = self.read_index_node(getattr(self.group, key), **kwargs) + node = getattr(self.group, key) + _, index = self.read_index_node(node, start=start, stop=stop) return index else: # pragma: no cover raise TypeError(f"unrecognized index variety: {variety}") @@ -2840,7 +2853,9 @@ def write_multi_index(self, key: str, index: MultiIndex): label_key = f"{key}_label{i}" self.write_array(label_key, level_codes) - def read_multi_index(self, key: str, **kwargs) -> MultiIndex: + def read_multi_index( + self, key: str, start: Optional[int] = None, stop: Optional[int] = None + ) -> MultiIndex: nlevels = getattr(self.attrs, f"{key}_nlevels") levels = [] @@ -2848,12 +2863,13 @@ def read_multi_index(self, key: str, **kwargs) -> MultiIndex: names = [] for i in range(nlevels): level_key = f"{key}_level{i}" - name, lev = self.read_index_node(getattr(self.group, level_key), **kwargs) + node = getattr(self.group, level_key) + name, lev = self.read_index_node(node, start=start, stop=stop) levels.append(lev) names.append(name) label_key = f"{key}_label{i}" - level_codes = self.read_array(label_key, **kwargs) + level_codes = self.read_array(label_key, start=start, stop=stop) codes.append(level_codes) return MultiIndex( @@ -3014,6 +3030,8 @@ class SeriesFixed(GenericFixed): pandas_kind = "series" attributes = ["name"] + name: Optional[Hashable] + @property def shape(self): try: @@ -3021,10 +3039,16 @@ def shape(self): except (TypeError, AttributeError): return None - def read(self, **kwargs): - kwargs = self.validate_read(kwargs) - index = self.read_index("index", **kwargs) - values = self.read_array("values", **kwargs) + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + self.validate_read({"where": where, "columns": columns}) + index = self.read_index("index", start=start, stop=stop) + values = self.read_array("values", start=start, stop=stop) return Series(values, index=index, name=self.name) def write(self, obj, **kwargs): @@ -3038,6 +3062,8 @@ class BlockManagerFixed(GenericFixed): attributes = ["ndim", "nblocks"] is_shape_reversed = False + nblocks: int + @property def shape(self): try: @@ -3069,10 +3095,15 @@ def shape(self): except AttributeError: return None - def read(self, start=None, stop=None, **kwargs): + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): # start, stop applied to rows, so 0th axis only - - kwargs = self.validate_read(kwargs) + self.validate_read({"columns": columns, "where": where}) select_axis = self.obj_type()._get_block_manager_axis(0) axes = [] @@ -4360,14 +4391,21 @@ def write(self, obj, data_columns=None, **kwargs): obj.columns = [name] return super().write(obj=obj, data_columns=obj.columns.tolist(), **kwargs) - def read(self, columns=None, **kwargs): + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): is_multi_index = self.is_multi_index if columns is not None and is_multi_index: + assert isinstance(self.levels, list) # needed for mypy for n in self.levels: if n not in columns: columns.insert(0, n) - s = super().read(columns=columns, **kwargs) + s = super().read(where=where, columns=columns, start=start, stop=stop) if is_multi_index: s.set_index(self.levels, inplace=True) @@ -4468,9 +4506,15 @@ def write(self, obj, data_columns=None, **kwargs): data_columns.insert(0, n) return super().write(obj=obj, data_columns=data_columns, **kwargs) - def read(self, **kwargs): + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): - df = super().read(**kwargs) + df = super().read(where=where, columns=columns, start=start, stop=stop) df = df.set_index(self.levels) # remove names for 'level_%d' From b1e68cb7eb7e31e618b524a568e9ffb59d44a008 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 15:20:36 -0800 Subject: [PATCH 36/49] CLN: explicit signature for to_hdf (#29939) --- pandas/core/generic.py | 67 +++++++++++++++++++++++++++++------------- pandas/io/pytables.py | 26 ++++++++++------ 2 files changed, 64 insertions(+), 29 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 42b8214e07d49..48500a9a428d0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2403,7 +2403,19 @@ def to_json( indent=indent, ) - def to_hdf(self, path_or_buf, key, **kwargs): + def to_hdf( + self, + path_or_buf, + key: str, + mode: str = "a", + complevel: Optional[int] = None, + complib: Optional[str] = None, + append: bool_t = False, + format: Optional[str] = None, + errors: str = "strict", + encoding: str = "UTF-8", + **kwargs, + ): """ Write the contained data to an HDF5 file using HDFStore. @@ -2431,7 +2443,20 @@ def to_hdf(self, path_or_buf, key, **kwargs): - 'a': append, an existing file is opened for reading and writing, and if the file does not exist it is created. - 'r+': similar to 'a', but the file must already exist. - format : {'fixed', 'table'}, default 'fixed' + complevel : {0-9}, optional + Specifies a compression level for data. + A value of 0 disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. + append : bool, default False + For Table formats, append the input data to the existing. + format : {'fixed', 'table', None}, default 'fixed' Possible values: - 'fixed': Fixed format. Fast writing/reading. Not-appendable, @@ -2439,32 +2464,22 @@ def to_hdf(self, path_or_buf, key, **kwargs): - 'table': Table format. Write as a PyTables Table structure which may perform worse but allow more flexible operations like searching / selecting subsets of the data. - append : bool, default False - For Table formats, append the input data to the existing. + - If None, pd.get_option('io.hdf.default_format') is checked, + followed by fallback to "fixed" + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + encoding : str, default "UTF-8" data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk queries, or True to use all columns. By default only the axes of the object are indexed. See :ref:`io.hdf5-query-data-columns`. Applicable only to format='table'. - complevel : {0-9}, optional - Specifies a compression level for data. - A value of 0 disables compression. - complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' - Specifies the compression library to be used. - As of v0.20.2 these additional compressors for Blosc are supported - (default if no compressor specified: 'blosc:blosclz'): - {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', - 'blosc:zlib', 'blosc:zstd'}. - Specifying a compression library which is not available issues - a ValueError. fletcher32 : bool, default False If applying compression use the fletcher32 checksum. dropna : bool, default False If true, ALL nan rows will not be written to store. - errors : str, default 'strict' - Specifies how encoding and decoding errors are to be handled. - See the errors argument for :func:`open` for a full list - of options. See Also -------- @@ -2506,7 +2521,19 @@ def to_hdf(self, path_or_buf, key, **kwargs): """ from pandas.io import pytables - pytables.to_hdf(path_or_buf, key, self, **kwargs) + pytables.to_hdf( + path_or_buf, + key, + self, + mode=mode, + complevel=complevel, + complib=complib, + append=append, + format=format, + errors=errors, + encoding=encoding, + **kwargs, + ) def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs): """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 05bde5a948248..6ef821fc52d46 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -53,6 +53,7 @@ concat, isna, ) +from pandas._typing import FrameOrSeries from pandas.core.arrays.categorical import Categorical import pandas.core.common as com from pandas.core.computation.pytables import PyTablesExpr, maybe_expression @@ -251,20 +252,27 @@ def _tables(): def to_hdf( path_or_buf, - key, - value, - mode=None, + key: str, + value: FrameOrSeries, + mode: str = "a", complevel: Optional[int] = None, - complib=None, - append=None, + complib: Optional[str] = None, + append: bool = False, + format: Optional[str] = None, + errors: str = "strict", + encoding: str = "UTF-8", **kwargs, ): """ store this object, close it if we opened it """ if append: - f = lambda store: store.append(key, value, **kwargs) + f = lambda store: store.append( + key, value, format=format, errors=errors, encoding=encoding, **kwargs + ) else: - f = lambda store: store.put(key, value, **kwargs) + f = lambda store: store.put( + key, value, format=format, errors=errors, encoding=encoding, **kwargs + ) path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): @@ -1042,7 +1050,7 @@ def append( format=None, append=True, columns=None, - dropna=None, + dropna: Optional[bool] = None, **kwargs, ): """ @@ -1070,7 +1078,7 @@ def append( chunksize : size to chunk the writing expectedrows : expected TOTAL row size of this table encoding : default None, provide an encoding for strings - dropna : bool, default False + dropna : bool, default False Do not write an ALL nan row to the store settable by the option 'io.hdf.dropna_table'. From 9ed267ef60d9b829c974cd29269e064c2ddce029 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Sun, 1 Dec 2019 23:22:14 +0000 Subject: [PATCH 37/49] TST: add test for rolling max/min/mean with DatetimeIndex over different frequencies (#29932) --- pandas/tests/window/test_timeseries_window.py | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 02969a6c6e822..46582b4b50c84 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -535,15 +535,34 @@ def test_ragged_max(self): expected["B"] = [0.0, 1, 2, 3, 4] tm.assert_frame_equal(result, expected) - def test_minutes_freq_max(self): + @pytest.mark.parametrize( + "freq, op, result_data", + [ + ("ms", "min", [0.0] * 10), + ("ms", "mean", [0.0] * 9 + [2.0 / 9]), + ("ms", "max", [0.0] * 9 + [2.0]), + ("s", "min", [0.0] * 10), + ("s", "mean", [0.0] * 9 + [2.0 / 9]), + ("s", "max", [0.0] * 9 + [2.0]), + ("min", "min", [0.0] * 10), + ("min", "mean", [0.0] * 9 + [2.0 / 9]), + ("min", "max", [0.0] * 9 + [2.0]), + ("h", "min", [0.0] * 10), + ("h", "mean", [0.0] * 9 + [2.0 / 9]), + ("h", "max", [0.0] * 9 + [2.0]), + ("D", "min", [0.0] * 10), + ("D", "mean", [0.0] * 9 + [2.0 / 9]), + ("D", "max", [0.0] * 9 + [2.0]), + ], + ) + def test_freqs_ops(self, freq, op, result_data): # GH 21096 - n = 10 - index = date_range(start="2018-1-1 01:00:00", freq="1min", periods=n) + index = date_range(start="2018-1-1 01:00:00", freq=f"1{freq}", periods=10) s = Series(data=0, index=index) s.iloc[1] = np.nan s.iloc[-1] = 2 - result = s.rolling(window=f"{n}min").max() - expected = Series(data=[0] * (n - 1) + [2.0], index=index) + result = getattr(s.rolling(window=f"10{freq}"), op)() + expected = Series(data=result_data, index=index) tm.assert_series_equal(result, expected) From 4bec9c4442b2f17226337661aa008f85638db176 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 15:29:54 -0800 Subject: [PATCH 38/49] CLN: BlockManager.apply (#29825) --- pandas/core/generic.py | 6 +-- pandas/core/internals/blocks.py | 14 +++---- pandas/core/internals/managers.py | 62 ++++++++++++------------------- 3 files changed, 32 insertions(+), 50 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 48500a9a428d0..59fd35666efe6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5571,7 +5571,7 @@ def _to_dict_of_blocks(self, copy=True): for k, v, in self._data.to_dict(copy=copy).items() } - def astype(self, dtype, copy=True, errors="raise"): + def astype(self, dtype, copy: bool_t = True, errors: str = "raise"): """ Cast a pandas object to a specified dtype ``dtype``. @@ -5697,10 +5697,10 @@ def astype(self, dtype, copy=True, errors="raise"): elif is_extension_array_dtype(dtype) and self.ndim > 1: # GH 18099/22869: columnwise conversion to extension dtype # GH 24704: use iloc to handle duplicate column names - results = ( + results = [ self.iloc[:, i].astype(dtype, copy=copy) for i in range(len(self.columns)) - ) + ] else: # else, only a single dtype is given diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8dd39473ee1f4..8a543832b50fe 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -523,16 +523,14 @@ def f(mask, val, idx): return self.split_and_operate(None, f, False) - def astype(self, dtype, copy=False, errors="raise", **kwargs): - return self._astype(dtype, copy=copy, errors=errors, **kwargs) - - def _astype(self, dtype, copy=False, errors="raise", **kwargs): - """Coerce to the new type + def astype(self, dtype, copy: bool = False, errors: str = "raise"): + """ + Coerce to the new dtype. Parameters ---------- dtype : str, dtype convertible - copy : boolean, default False + copy : bool, default False copy if indicated errors : str, {'raise', 'ignore'}, default 'ignore' - ``raise`` : allow exceptions to be raised @@ -2142,7 +2140,7 @@ def _maybe_coerce_values(self, values): assert isinstance(values, np.ndarray), type(values) return values - def _astype(self, dtype, **kwargs): + def astype(self, dtype, copy: bool = False, errors: str = "raise"): """ these automatically copy, so copy=True has no effect raise on an except if raise == True @@ -2158,7 +2156,7 @@ def _astype(self, dtype, **kwargs): return self.make_block(values) # delegate - return super()._astype(dtype=dtype, **kwargs) + return super().astype(dtype=dtype, copy=copy, errors=errors) def _can_hold_element(self, element: Any) -> bool: tipo = maybe_infer_dtype_type(element) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 0fe95a4b7f370..9adfe41b68e4f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -340,33 +340,20 @@ def _verify_integrity(self): "tot_items: {1}".format(len(self.items), tot_items) ) - def apply( - self, - f, - axes=None, - filter=None, - do_integrity_check=False, - consolidate=True, - **kwargs, - ): + def apply(self, f: str, filter=None, **kwargs): """ - iterate over the blocks, collect and create a new block manager + Iterate over the blocks, collect and create a new BlockManager. Parameters ---------- - f : the callable or function name to operate on at the block level - axes : optional (if not supplied, use self.axes) + f : str + Name of the Block method to apply. filter : list, if supplied, only call the block if the filter is in the block - do_integrity_check : boolean, default False. Do the block manager - integrity check - consolidate: boolean, default True. Join together blocks having same - dtype Returns ------- - Block Manager (new object) - + BlockManager """ result_blocks = [] @@ -380,8 +367,7 @@ def apply( else: kwargs["filter"] = filter_locs - if consolidate: - self._consolidate_inplace() + self._consolidate_inplace() if f == "where": align_copy = True @@ -429,11 +415,8 @@ def apply( result_blocks = _extend_blocks(applied, result_blocks) if len(result_blocks) == 0: - return self.make_empty(axes or self.axes) - bm = type(self)( - result_blocks, axes or self.axes, do_integrity_check=do_integrity_check - ) - bm._consolidate_inplace() + return self.make_empty(self.axes) + bm = type(self)(result_blocks, self.axes, do_integrity_check=False) return bm def quantile( @@ -540,8 +523,8 @@ def get_axe(block, qs, axes): [make_block(values, ndim=1, placement=np.arange(len(values)))], axes[0] ) - def isna(self, func, **kwargs): - return self.apply("apply", func=func, **kwargs) + def isna(self, func): + return self.apply("apply", func=func) def where(self, **kwargs): return self.apply("where", **kwargs) @@ -567,8 +550,8 @@ def fillna(self, **kwargs): def downcast(self, **kwargs): return self.apply("downcast", **kwargs) - def astype(self, dtype, **kwargs): - return self.apply("astype", dtype=dtype, **kwargs) + def astype(self, dtype, copy: bool = False, errors: str = "raise"): + return self.apply("astype", dtype=dtype, copy=copy, errors=errors) def convert(self, **kwargs): return self.apply("convert", **kwargs) @@ -768,14 +751,19 @@ def copy(self, deep=True): """ # this preserves the notion of view copying of axes if deep: - if deep == "all": - copy = lambda ax: ax.copy(deep=True) - else: - copy = lambda ax: ax.view() - new_axes = [copy(ax) for ax in self.axes] + + def copy_func(ax): + if deep == "all": + return ax.copy(deep=True) + else: + return ax.view() + + new_axes = [copy_func(ax) for ax in self.axes] else: new_axes = list(self.axes) - return self.apply("copy", axes=new_axes, deep=deep, do_integrity_check=False) + res = self.apply("copy", deep=deep) + res.axes = new_axes + return res def as_array(self, transpose=False, items=None): """Convert the blockmanager data into an numpy array. @@ -1527,10 +1515,6 @@ def get_slice(self, slobj, axis=0): def index(self): return self.axes[0] - def convert(self, **kwargs): - """ convert the whole block as one """ - return self.apply("convert", **kwargs) - @property def dtype(self): return self._block.dtype From 7ea4e6197f58affbc59acf0b2f79cd3fac0a4c03 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 2 Dec 2019 00:41:26 +0100 Subject: [PATCH 39/49] ENH: add NA scalar for missing value indicator, use in StringArray. (#29597) --- doc/source/user_guide/missing_data.rst | 149 ++++++++++++++++++++- doc/source/whatsnew/v1.0.0.rst | 44 ++++++ pandas/__init__.py | 1 + pandas/_libs/lib.pyx | 5 +- pandas/_libs/missing.pxd | 5 + pandas/_libs/missing.pyx | 138 ++++++++++++++++++- pandas/_libs/testing.pyx | 8 +- pandas/core/api.py | 2 + pandas/core/arrays/numpy_.py | 3 + pandas/core/arrays/string_.py | 39 +++--- pandas/core/dtypes/missing.py | 11 ++ pandas/io/formats/format.py | 3 + pandas/tests/api/test_api.py | 2 +- pandas/tests/arrays/string_/test_string.py | 21 ++- pandas/tests/extension/test_string.py | 8 +- pandas/tests/scalar/test_na_scalar.py | 131 ++++++++++++++++++ 16 files changed, 530 insertions(+), 40 deletions(-) create mode 100644 pandas/tests/scalar/test_na_scalar.py diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 6c36a6470f841..11957cfa265f5 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -12,10 +12,10 @@ pandas. .. note:: The choice of using ``NaN`` internally to denote missing data was largely - for simplicity and performance reasons. It differs from the MaskedArray - approach of, for example, :mod:`scikits.timeseries`. We are hopeful that - NumPy will soon be able to provide a native NA type solution (similar to R) - performant enough to be used in pandas. + for simplicity and performance reasons. + Starting from pandas 1.0, some optional data types start experimenting + with a native ``NA`` scalar using a mask-based approach. See + :ref:`here ` for more. See the :ref:`cookbook` for some advanced strategies. @@ -110,7 +110,7 @@ pandas objects provide compatibility between ``NaT`` and ``NaN``. .. _missing.inserting: Inserting missing data ----------------------- +~~~~~~~~~~~~~~~~~~~~~~ You can insert missing values by simply assigning to containers. The actual missing value used will be chosen based on the dtype. @@ -135,9 +135,10 @@ For object containers, pandas will use the value given: s.loc[1] = np.nan s +.. _missing_data.calculations: Calculations with missing data ------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Missing values propagate naturally through arithmetic operations between pandas objects. @@ -771,3 +772,139 @@ the ``dtype="Int64"``. s See :ref:`integer_na` for more. + + +.. _missing_data.NA: + +Experimental ``NA`` scalar to denote missing values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + Experimental: the behaviour of ``pd.NA`` can still change without warning. + +.. versionadded:: 1.0.0 + +Starting from pandas 1.0, an experimental ``pd.NA`` value (singleton) is +available to represent scalar missing values. At this moment, it is used in +the nullable :doc:`integer `, boolean and +:ref:`dedicated string ` data types as the missing value indicator. + +The goal of ``pd.NA`` is provide a "missing" indicator that can be used +consistently accross data types (instead of ``np.nan``, ``None`` or ``pd.NaT`` +depending on the data type). + +For example, when having missing values in a Series with the nullable integer +dtype, it will use ``pd.NA``: + +.. ipython:: python + + s = pd.Series([1, 2, None], dtype="Int64") + s + s[2] + s[2] is pd.NA + +Currently, pandas does not yet use those data types by default (when creating +a DataFrame or Series, or when reading in data), so you need to specify +the dtype explicitly. + +Propagation in arithmetic and comparison operations +--------------------------------------------------- + +In general, missing values *propagate* in operations involving ``pd.NA``. When +one of the operands is unknown, the outcome of the operation is also unknown. + +For example, ``pd.NA`` propagates in arithmetic operations, similarly to +``np.nan``: + +.. ipython:: python + + pd.NA + 1 + "a" * pd.NA + +In equality and comparison operations, ``pd.NA`` also propagates. This deviates +from the behaviour of ``np.nan``, where comparisons with ``np.nan`` always +return ``False``. + +.. ipython:: python + + pd.NA == 1 + pd.NA == pd.NA + pd.NA < 2.5 + +To check if a value is equal to ``pd.NA``, the :func:`isna` function can be +used: + +.. ipython:: python + + pd.isna(pd.NA) + +An exception on this basic propagation rule are *reductions* (such as the +mean or the minimum), where pandas defaults to skipping missing values. See +:ref:`above ` for more. + +Logical operations +------------------ + +For logical operations, ``pd.NA`` follows the rules of the +`three-valued logic `__ (or +*Kleene logic*, similarly to R, SQL and Julia). This logic means to only +propagate missing values when it is logically required. + +For example, for the logical "or" operation (``|``), if one of the operands +is ``True``, we already know the result will be ``True``, regardless of the +other value (so regardless the missing value would be ``True`` or ``False``). +In this case, ``pd.NA`` does not propagate: + +.. ipython:: python + + True | False + True | pd.NA + pd.NA | True + +On the other hand, if one of the operands is ``False``, the result depends +on the value of the other operand. Therefore, in this case ``pd.NA`` +propagates: + +.. ipython:: python + + False | True + False | False + False | pd.NA + +The behaviour of the logical "and" operation (``&``) can be derived using +similar logic (where now ``pd.NA`` will not propagate if one of the operands +is already ``False``): + +.. ipython:: python + + False & True + False & False + False & pd.NA + +.. ipython:: python + + True & True + True & False + True & pd.NA + + +``NA`` in a boolean context +--------------------------- + +Since the actual value of an NA is unknown, it is ambiguous to convert NA +to a boolean value. The following raises an error: + +.. ipython:: python + :okexcept: + + bool(pd.NA) + +This also means that ``pd.NA`` cannot be used in a context where it is +evaluated to a boolean, such as ``if condition: ...`` where ``condition`` can +potentially be ``pd.NA``. In such cases, :func:`isna` can be used to check +for ``pd.NA`` or ``condition`` being ``pd.NA`` can be avoided, for example by +filling missing values beforehand. + +A similar situation occurs when using Series or DataFrame objects in ``if`` +statements, see :ref:`gotchas.truth`. diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4e8a471239610..87635314d2cd0 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -102,6 +102,50 @@ String accessor methods returning integers will return a value with :class:`Int6 We recommend explicitly using the ``string`` data type when working with strings. See :ref:`text.types` for more. +.. _whatsnew_100.NA: + +Experimental ``NA`` scalar to denote missing values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new ``pd.NA`` value (singleton) is introduced to represent scalar missing +values. Up to now, ``np.nan`` is used for this for float data, ``np.nan`` or +``None`` for object-dtype data and ``pd.NaT`` for datetime-like data. The +goal of ``pd.NA`` is provide a "missing" indicator that can be used +consistently accross data types. For now, the nullable integer and boolean +data types and the new string data type make use of ``pd.NA`` (:issue:`28095`). + +.. warning:: + + Experimental: the behaviour of ``pd.NA`` can still change without warning. + +For example, creating a Series using the nullable integer dtype: + +.. ipython:: python + + s = pd.Series([1, 2, None], dtype="Int64") + s + s[2] + +Compared to ``np.nan``, ``pd.NA`` behaves differently in certain operations. +In addition to arithmetic operations, ``pd.NA`` also propagates as "missing" +or "unknown" in comparison operations: + +.. ipython:: python + + np.nan > 1 + pd.NA > 1 + +For logical operations, ``pd.NA`` follows the rules of the +`three-valued logic `__ (or +*Kleene logic*). For example: + +.. ipython:: python + + pd.NA | True + +For more, see :ref:`NA section ` in the user guide on missing +data. + .. _whatsnew_100.boolean: Boolean data type with missing values support diff --git a/pandas/__init__.py b/pandas/__init__.py index d6f3458b4d604..a60aa08b89f84 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -70,6 +70,7 @@ StringDtype, BooleanDtype, # missing + NA, isna, isnull, notna, diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 780f93291cee8..41c15ab4de5e1 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -57,7 +57,7 @@ from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare from pandas._libs.missing cimport ( - checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period + checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period, C_NA ) @@ -160,6 +160,7 @@ def is_scalar(val: object) -> bool: or PyTime_Check(val) # We differ from numpy, which claims that None is not scalar; # see np.isscalar + or val is C_NA or val is None or isinstance(val, (Fraction, Number)) or util.is_period_object(val) @@ -1494,7 +1495,7 @@ cdef class Validator: f'must define is_value_typed') cdef bint is_valid_null(self, object value) except -1: - return value is None or util.is_nan(value) + return value is None or value is C_NA or util.is_nan(value) cdef bint is_array_typed(self) except -1: return False diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd index d0dd306680ae8..d4303ac28b9a5 100644 --- a/pandas/_libs/missing.pxd +++ b/pandas/_libs/missing.pxd @@ -9,3 +9,8 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr) cdef bint is_null_datetime64(v) cdef bint is_null_timedelta64(v) cdef bint is_null_period(v) + +cdef class C_NAType: + pass + +cdef C_NAType C_NA diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 9568ddb7fe53f..9bf955ad369e7 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -1,6 +1,8 @@ import cython from cython import Py_ssize_t +import numbers + import numpy as np cimport numpy as cnp from numpy cimport ndarray, int64_t, uint8_t, float64_t @@ -44,7 +46,7 @@ cpdef bint checknull(object val): The difference between `checknull` and `checknull_old` is that `checknull` does *not* consider INF or NEGINF to be NA. """ - return is_null_datetimelike(val, inat_is_null=False) + return val is C_NA or is_null_datetimelike(val, inat_is_null=False) cpdef bint checknull_old(object val): @@ -278,3 +280,137 @@ cdef inline bint is_null_period(v): # determine if we have a null for a Period (or integer versions), # excluding np.datetime64('nat') and np.timedelta64('nat') return checknull_with_nat(v) + + +# ----------------------------------------------------------------------------- +# Implementation of NA singleton + + +def _create_binary_propagating_op(name, divmod=False): + + def method(self, other): + if other is C_NA or isinstance(other, str) or isinstance(other, numbers.Number): + if divmod: + return NA, NA + else: + return NA + + return NotImplemented + + method.__name__ = name + return method + + +def _create_unary_propagating_op(name): + def method(self): + return NA + + method.__name__ = name + return method + + +cdef class C_NAType: + pass + + +class NAType(C_NAType): + """ + NA ("not available") missing value indicator. + + .. warning:: + + Experimental: the behaviour of NA can still change without warning. + + .. versionadded:: 1.0.0 + + The NA singleton is a missing value indicator defined by pandas. It is + used in certain new extension dtypes (currently the "string" dtype). + """ + + _instance = None + + def __new__(cls, *args, **kwargs): + if NAType._instance is None: + NAType._instance = C_NAType.__new__(cls, *args, **kwargs) + return NAType._instance + + def __repr__(self) -> str: + return "NA" + + def __str__(self) -> str: + return "NA" + + def __bool__(self): + raise TypeError("boolean value of NA is ambiguous") + + def __hash__(self): + return id(self) + + # Binary arithmetic and comparison ops -> propagate + + __add__ = _create_binary_propagating_op("__add__") + __radd__ = _create_binary_propagating_op("__radd__") + __sub__ = _create_binary_propagating_op("__sub__") + __rsub__ = _create_binary_propagating_op("__rsub__") + __mul__ = _create_binary_propagating_op("__mul__") + __rmul__ = _create_binary_propagating_op("__rmul__") + __matmul__ = _create_binary_propagating_op("__matmul__") + __rmatmul__ = _create_binary_propagating_op("__rmatmul__") + __truediv__ = _create_binary_propagating_op("__truediv__") + __rtruediv__ = _create_binary_propagating_op("__rtruediv__") + __floordiv__ = _create_binary_propagating_op("__floordiv__") + __rfloordiv__ = _create_binary_propagating_op("__rfloordiv__") + __mod__ = _create_binary_propagating_op("__mod__") + __rmod__ = _create_binary_propagating_op("__rmod__") + __divmod__ = _create_binary_propagating_op("__divmod__", divmod=True) + __rdivmod__ = _create_binary_propagating_op("__rdivmod__", divmod=True) + __pow__ = _create_binary_propagating_op("__pow__") + __rpow__ = _create_binary_propagating_op("__rpow__") + # __lshift__ and __rshift__ are not implemented + + __eq__ = _create_binary_propagating_op("__eq__") + __ne__ = _create_binary_propagating_op("__ne__") + __le__ = _create_binary_propagating_op("__le__") + __lt__ = _create_binary_propagating_op("__lt__") + __gt__ = _create_binary_propagating_op("__gt__") + __ge__ = _create_binary_propagating_op("__ge__") + + # Unary ops + + __neg__ = _create_unary_propagating_op("__neg__") + __pos__ = _create_unary_propagating_op("__pos__") + __abs__ = _create_unary_propagating_op("__abs__") + __invert__ = _create_unary_propagating_op("__invert__") + + # Logical ops using Kleene logic + + def __and__(self, other): + if other is False: + return False + elif other is True or other is C_NA: + return NA + else: + return NotImplemented + + __rand__ = __and__ + + def __or__(self, other): + if other is True: + return True + elif other is False or other is C_NA: + return NA + else: + return NotImplemented + + __ror__ = __or__ + + def __xor__(self, other): + if other is False or other is True or other is C_NA: + return NA + return NotImplemented + + __rxor__ = __xor__ + + +C_NA = NAType() # C-visible +NA = C_NA # Python-visible diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 141735a97938a..8b847350cb1ff 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -180,13 +180,15 @@ cpdef assert_almost_equal(a, b, # classes can't be the same, to raise error assert_class_equal(a, b, obj=obj) - if a == b: - # object comparison - return True if isna(a) and isna(b): # TODO: Should require same-dtype NA? # nan / None comparison return True + + if a == b: + # object comparison + return True + if is_comparable_as_number(a) and is_comparable_as_number(b): if array_equivalent(a, b, strict_nan=True): # inf comparison diff --git a/pandas/core/api.py b/pandas/core/api.py index 65f0178b19187..bf701c0318874 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -55,3 +55,5 @@ # DataFrame needs to be imported after NamedAgg to avoid a circular import from pandas.core.frame import DataFrame # isort:skip + +from pandas._libs.missing import NA diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 6f2bb095a014d..8ba5cd7565850 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -278,6 +278,9 @@ def fillna(self, value=None, method=None, limit=None): return new_values def take(self, indices, allow_fill=False, fill_value=None): + if fill_value is None: + # Primarily for subclasses + fill_value = self.dtype.na_value result = take( self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value ) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 8599b5e39f34a..f6af05ab4d9e7 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1,9 +1,9 @@ import operator -from typing import TYPE_CHECKING, Type +from typing import Type import numpy as np -from pandas._libs import lib +from pandas._libs import lib, missing as libmissing from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import pandas_dtype @@ -17,9 +17,6 @@ from pandas.core.construction import extract_array from pandas.core.missing import isna -if TYPE_CHECKING: - from pandas._typing import Scalar - @register_extension_dtype class StringDtype(ExtensionDtype): @@ -50,16 +47,8 @@ class StringDtype(ExtensionDtype): StringDtype """ - @property - def na_value(self) -> "Scalar": - """ - StringDtype uses :attr:`numpy.nan` as the missing NA value. - - .. warning:: - - `na_value` may change in a future release. - """ - return np.nan + #: StringDtype.na_value uses pandas.NA + na_value = libmissing.NA @property def type(self) -> Type: @@ -149,7 +138,7 @@ class StringArray(PandasArray): -------- >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string") - ['This is', 'some text', nan, 'data.'] + ['This is', 'some text', NA, 'data.'] Length: 4, dtype: string Unlike ``object`` dtype arrays, ``StringArray`` doesn't allow non-string @@ -190,10 +179,10 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype: assert dtype == "string" result = super()._from_sequence(scalars, dtype=object, copy=copy) - # convert None to np.nan + # Standardize all missing-like values to NA # TODO: it would be nice to do this in _validate / lib.is_string_array # We are already doing a scan over the values there. - result[result.isna()] = np.nan + result[result.isna()] = StringDtype.na_value return result @classmethod @@ -210,6 +199,12 @@ def __arrow_array__(self, type=None): type = pa.string() return pa.array(self._ndarray, type=type, from_pandas=True) + def _values_for_factorize(self): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = -1 + return arr, -1 + def __setitem__(self, key, value): value = extract_array(value, extract_numpy=True) if isinstance(value, type(self)): @@ -223,9 +218,9 @@ def __setitem__(self, key, value): # validate new items if scalar_value: - if scalar_value is None: - value = np.nan - elif not (isinstance(value, str) or np.isnan(value)): + if isna(value): + value = StringDtype.na_value + elif not isinstance(value, str): raise ValueError( "Cannot set non-string value '{}' into a StringArray.".format(value) ) @@ -283,7 +278,7 @@ def method(self, other): other = other[valid] result = np.empty_like(self._ndarray, dtype="object") - result[mask] = np.nan + result[mask] = StringDtype.na_value result[valid] = op(self._ndarray[valid], other) if op.__name__ in {"add", "radd", "mul", "rmul"}: diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 205ca193636c6..fc22d5be1ca69 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -80,6 +80,9 @@ def isna(obj): >>> pd.isna('dog') False + >>> pd.isna(pd.NA) + True + >>> pd.isna(np.nan) True @@ -327,6 +330,9 @@ def notna(obj): >>> pd.notna('dog') True + >>> pd.notna(pd.NA) + False + >>> pd.notna(np.nan) False @@ -444,6 +450,9 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool: if left_value is NaT and right_value is not NaT: return False + elif left_value is libmissing.NA and right_value is not libmissing.NA: + return False + elif isinstance(left_value, float) and np.isnan(left_value): if not isinstance(right_value, float) or not np.isnan(right_value): return False @@ -455,6 +464,8 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool: if "Cannot compare tz-naive" in str(err): # tzawareness compat failure, see GH#28507 return False + elif "boolean value of NA is ambiguous" in str(err): + return False raise return True diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index f8f5d337185c4..3adf8d7bbdd11 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -35,6 +35,7 @@ from pandas._config.config import get_option, set_option from pandas._libs import lib +from pandas._libs.missing import NA from pandas._libs.tslib import format_array_from_datetime from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.nattype import NaTType @@ -1223,6 +1224,8 @@ def _format(x): # determine na_rep if x is None or NaT-like if x is None: return "None" + elif x is NA: + return "NA" elif x is NaT or np.isnat(x): return "NaT" except (TypeError, ValueError): diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 85e38d58a6c57..3c0abd7fca830 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -46,7 +46,7 @@ class TestPDApi(Base): deprecated_modules: List[str] = [] # misc - misc = ["IndexSlice", "NaT"] + misc = ["IndexSlice", "NaT", "NA"] # top-level classes classes = [ diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 1ce62d8f8b3d9..0dfd75a2042b0 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -9,10 +9,20 @@ import pandas.util.testing as tm +def test_repr_with_NA(): + a = pd.array(["a", pd.NA, "b"], dtype="string") + for obj in [a, pd.Series(a), pd.DataFrame({"a": a})]: + assert "NA" in repr(obj) and "NaN" not in repr(obj) + assert "NA" in str(obj) and "NaN" not in str(obj) + if hasattr(obj, "_repr_html_"): + html_repr = obj._repr_html_() + assert "NA" in html_repr and "NaN" not in html_repr + + def test_none_to_nan(): a = pd.arrays.StringArray._from_sequence(["a", None, "b"]) assert a[1] is not None - assert np.isnan(a[1]) + assert a[1] is pd.NA def test_setitem_validates(): @@ -24,6 +34,15 @@ def test_setitem_validates(): a[:] = np.array([1, 2]) +def test_setitem_with_scalar_string(): + # is_float_dtype considers some strings, like 'd', to be floats + # which can cause issues. + arr = pd.array(["a", "c"], dtype="string") + arr[0] = "d" + expected = pd.array(["d", "c"], dtype="string") + tm.assert_extension_array_equal(arr, expected) + + @pytest.mark.parametrize( "input, method", [ diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 5b872d5b72227..471a1b79d23bc 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -25,7 +25,7 @@ def data(): @pytest.fixture def data_missing(): """Length 2 array with [NA, Valid]""" - return StringArray._from_sequence([np.nan, "A"]) + return StringArray._from_sequence([pd.NA, "A"]) @pytest.fixture @@ -35,17 +35,17 @@ def data_for_sorting(): @pytest.fixture def data_missing_for_sorting(): - return StringArray._from_sequence(["B", np.nan, "A"]) + return StringArray._from_sequence(["B", pd.NA, "A"]) @pytest.fixture def na_value(): - return np.nan + return pd.NA @pytest.fixture def data_for_grouping(): - return StringArray._from_sequence(["B", "B", np.nan, np.nan, "A", "A", "B", "C"]) + return StringArray._from_sequence(["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"]) class TestDtype(base.BaseDtypeTests): diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py new file mode 100644 index 0000000000000..e68e49814245f --- /dev/null +++ b/pandas/tests/scalar/test_na_scalar.py @@ -0,0 +1,131 @@ +import numpy as np +import pytest + +from pandas._libs.missing import NA + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +import pandas.util.testing as tm + + +def test_singleton(): + assert NA is NA + new_NA = type(NA)() + assert new_NA is NA + + +def test_repr(): + assert repr(NA) == "NA" + assert str(NA) == "NA" + + +def test_truthiness(): + with pytest.raises(TypeError): + bool(NA) + + with pytest.raises(TypeError): + not NA + + +def test_hashable(): + assert hash(NA) == hash(NA) + d = {NA: "test"} + assert d[NA] == "test" + + +def test_arithmetic_ops(all_arithmetic_functions): + op = all_arithmetic_functions + + for other in [NA, 1, 1.0, "a", np.int64(1), np.nan]: + if op.__name__ == "rmod" and isinstance(other, str): + continue + if op.__name__ in ("divmod", "rdivmod"): + assert op(NA, other) is (NA, NA) + else: + assert op(NA, other) is NA + + +def test_comparison_ops(): + + for other in [NA, 1, 1.0, "a", np.int64(1), np.nan]: + assert (NA == other) is NA + assert (NA != other) is NA + assert (NA > other) is NA + assert (NA >= other) is NA + assert (NA < other) is NA + assert (NA <= other) is NA + + if isinstance(other, np.int64): + # for numpy scalars we get a deprecation warning and False as result + # for equality or error for larger/lesser than + continue + + assert (other == NA) is NA + assert (other != NA) is NA + assert (other > NA) is NA + assert (other >= NA) is NA + assert (other < NA) is NA + assert (other <= NA) is NA + + +def test_unary_ops(): + assert +NA is NA + assert -NA is NA + assert abs(NA) is NA + assert ~NA is NA + + +def test_logical_and(): + + assert NA & True is NA + assert True & NA is NA + assert NA & False is False + assert False & NA is False + assert NA & NA is NA + + with pytest.raises(TypeError): + NA & 5 + + +def test_logical_or(): + + assert NA | True is True + assert True | NA is True + assert NA | False is NA + assert False | NA is NA + assert NA | NA is NA + + with pytest.raises(TypeError): + NA | 5 + + +def test_logical_xor(): + + assert NA ^ True is NA + assert True ^ NA is NA + assert NA ^ False is NA + assert False ^ NA is NA + assert NA ^ NA is NA + + with pytest.raises(TypeError): + NA ^ 5 + + +def test_logical_not(): + assert ~NA is NA + + +def test_is_scalar(): + assert is_scalar(NA) is True + + +def test_isna(): + assert pd.isna(NA) is True + assert pd.notna(NA) is False + + +def test_series_isna(): + s = pd.Series([1, NA], dtype=object) + expected = pd.Series([False, True]) + tm.assert_series_equal(s.isna(), expected) From 7711a5e049fcfcafac5bfa200c934b0a77119769 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Mon, 2 Dec 2019 02:55:05 +0200 Subject: [PATCH 40/49] Added space at the end of the sentence (#29949) --- pandas/core/dtypes/dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 9414786424245..89e80b3b42017 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -574,8 +574,8 @@ def update_dtype( "Constructing a CategoricalDtype without specifying " "`ordered` will default to `ordered=False` in a future " "version, which will cause the resulting categorical's " - "`ordered` attribute to change to False; `ordered=True`" - " must be explicitly passed in order to be retained" + "`ordered` attribute to change to False; `ordered=True` " + "must be explicitly passed in order to be retained" ) warnings.warn(msg, FutureWarning, stacklevel=3) From 42b239c504c89517b1ab4c9833cb58556150fbbb Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Mon, 2 Dec 2019 03:24:12 +0200 Subject: [PATCH 41/49] repr() (#29950) --- pandas/core/computation/pytables.py | 7 ++----- pandas/core/dtypes/common.py | 6 ++++-- pandas/core/dtypes/dtypes.py | 17 ++++++++--------- pandas/core/frame.py | 7 +++---- pandas/core/generic.py | 17 +++++++---------- pandas/core/groupby/grouper.py | 2 +- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/interval.py | 8 ++------ pandas/core/internals/concat.py | 4 +--- pandas/core/internals/managers.py | 4 +--- 10 files changed, 30 insertions(+), 44 deletions(-) diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 65e38ff290ce4..8eef37a359a8e 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -52,7 +52,7 @@ def _resolve_name(self): if self.side == "left": # Note: The behavior of __new__ ensures that self.name is a str here if self.name not in self.env.queryables: - raise NameError("name {name!r} is not defined".format(name=self.name)) + raise NameError(f"name {repr(self.name)} is not defined") return self.name # resolve the rhs (and allow it to be None) @@ -431,10 +431,7 @@ def visit_Subscript(self, node, **kwargs): try: return self.const_type(value[slobj], self.env) except TypeError: - raise ValueError( - "cannot subscript {value!r} with " - "{slobj!r}".format(value=value, slobj=slobj) - ) + raise ValueError(f"cannot subscript {repr(value)} with {repr(slobj)}") def visit_Attribute(self, node, **kwargs): attr = node.attr diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 43810df18b0aa..602d7d0da95e6 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1861,8 +1861,10 @@ def _validate_date_like_dtype(dtype) -> None: except ValueError as e: raise TypeError("{error}".format(error=e)) if typ != "generic" and typ != "ns": - msg = "{name!r} is too specific of a frequency, try passing {type!r}" - raise ValueError(msg.format(name=dtype.name, type=dtype.type.__name__)) + raise ValueError( + f"{repr(dtype.name)} is too specific of a frequency, " + f"try passing {repr(dtype.type.__name__)}" + ) def pandas_dtype(dtype): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 89e80b3b42017..6dc2b098f1c97 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -319,8 +319,7 @@ def _from_values_or_dtype( if dtype == "category": dtype = CategoricalDtype(categories, ordered) else: - msg = "Unknown dtype {dtype!r}" - raise ValueError(msg.format(dtype=dtype)) + raise ValueError(f"Unknown dtype {repr(dtype)}") elif categories is not None or ordered is not None: raise ValueError( "Cannot specify `categories` or `ordered` together with `dtype`." @@ -512,8 +511,9 @@ def validate_categories(categories, fastpath: bool = False): from pandas.core.indexes.base import Index if not fastpath and not is_list_like(categories): - msg = "Parameter 'categories' must be list-like, was {!r}" - raise TypeError(msg.format(categories)) + raise TypeError( + f"Parameter 'categories' must be list-like, was {repr(categories)}" + ) elif not isinstance(categories, ABCIndexClass): categories = Index(categories, tupleize_cols=False) @@ -549,11 +549,10 @@ def update_dtype( # dtype='category' should not change anything return self elif not self.is_dtype(dtype): - msg = ( - "a CategoricalDtype must be passed to perform an update, " - "got {dtype!r}" - ).format(dtype=dtype) - raise ValueError(msg) + raise ValueError( + f"a CategoricalDtype must be passed to perform an update, " + f"got {repr(dtype)}" + ) else: # from here on, dtype is a CategoricalDtype dtype = cast(CategoricalDtype, dtype) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0b690363a2178..fde3d1657b4f2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8123,10 +8123,9 @@ def isin(self, values): else: if not is_list_like(values): raise TypeError( - "only list-like or dict-like objects are " - "allowed to be passed to DataFrame.isin(), " - "you passed a " - "{0!r}".format(type(values).__name__) + f"only list-like or dict-like objects are allowed " + f"to be passed to DataFrame.isin(), " + f"you passed a {repr(type(values).__name__)}" ) return DataFrame( algorithms.isin(self.values.ravel(), values).reshape(self.shape), diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 59fd35666efe6..e19bf9c1c39ea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1864,8 +1864,8 @@ def _drop_labels_or_levels(self, keys, axis=0): def __hash__(self): raise TypeError( - "{0!r} objects are mutable, thus they cannot be" - " hashed".format(type(self).__name__) + f"{repr(type(self).__name__)} objects are mutable, " + f"thus they cannot be hashed" ) def __iter__(self): @@ -6567,11 +6567,9 @@ def replace( or is_dict_like(regex) ): raise TypeError( - "'regex' must be a string or a compiled " - "regular expression or a list or dict of " - "strings or regular expressions, you " - "passed a" - " {0!r}".format(type(regex).__name__) + f"'regex' must be a string or a compiled regular expression " + f"or a list or dict of strings or regular expressions, " + f"you passed a {repr(type(regex).__name__)}" ) return self.replace( regex, value, inplace=inplace, limit=limit, regex=True @@ -6597,10 +6595,9 @@ def replace( to_replace=to_replace, value=value, inplace=inplace, regex=regex ) else: - msg = ('Invalid "to_replace" type: ' "{0!r}").format( - type(to_replace).__name__ + raise TypeError( + f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' ) - raise TypeError(msg) # pragma: no cover if inplace: self._update_inplace(new_data) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 9b2f43d8dd484..b0df04f18ff1d 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -206,7 +206,7 @@ def groups(self): def __repr__(self) -> str: attrs_list = ( - f"{attr_name}={getattr(self, attr_name)!r}" + f"{attr_name}={repr(getattr(self, attr_name))}" for attr_name in self._attributes if getattr(self, attr_name) is not None ) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9dcf62d472481..eb3728c1e3bd2 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -475,7 +475,7 @@ def _format_attrs(self): if attrib == "freq": freq = self.freqstr if freq is not None: - freq = f"{freq!r}" + freq = repr(freq) attrs.append(("freq", freq)) return attrs diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 56957b2f879ec..2555caac29a7f 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -83,9 +83,7 @@ def _get_next_label(label): elif is_float_dtype(dtype): return np.nextafter(label, np.infty) else: - raise TypeError( - "cannot determine next label for type {typ!r}".format(typ=type(label)) - ) + raise TypeError(f"cannot determine next label for type {repr(type(label))}") def _get_prev_label(label): @@ -99,9 +97,7 @@ def _get_prev_label(label): elif is_float_dtype(dtype): return np.nextafter(label, -np.infty) else: - raise TypeError( - "cannot determine next label for type {typ!r}".format(typ=type(label)) - ) + raise TypeError(f"cannot determine next label for type {repr(type(label))}") def _get_interval_closed_bounds(interval): diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 6c4ab2882d67f..7cb693474c82f 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -121,9 +121,7 @@ def __init__(self, block, shape, indexers=None): self.shape = shape def __repr__(self) -> str: - return "{name}({block!r}, {indexers})".format( - name=type(self).__name__, block=self.block, indexers=self.indexers - ) + return f"{type(self).__name__}({repr(self.block)}, {self.indexers})" @cache_readonly def needs_filling(self): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9adfe41b68e4f..f312b88d9a0bc 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1905,9 +1905,7 @@ def _compare_or_regex_search(a, b, regex=False): type_names[1] = "ndarray(dtype={dtype})".format(dtype=b.dtype) raise TypeError( - "Cannot compare types {a!r} and {b!r}".format( - a=type_names[0], b=type_names[1] - ) + f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" ) return result From 5fdbaa129713c7ed14265c4616740ba07cacbe0e Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Mon, 2 Dec 2019 03:42:05 +0200 Subject: [PATCH 42/49] repr() (#29948) --- pandas/_libs/interval.pyx | 8 ++++---- pandas/conftest.py | 4 ++-- pandas/core/accessor.py | 6 +++--- pandas/core/algorithms.py | 6 ++---- pandas/core/arrays/categorical.py | 6 +++--- pandas/core/arrays/numpy_.py | 2 +- pandas/core/computation/align.py | 8 ++++---- pandas/core/computation/eval.py | 11 +++++------ pandas/core/computation/expr.py | 4 +--- pandas/core/computation/expressions.py | 10 ++++------ pandas/core/computation/ops.py | 19 ++++++++----------- 11 files changed, 37 insertions(+), 47 deletions(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 6bd8693dffebd..a99ddc16ac3af 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -179,8 +179,8 @@ cdef class IntervalMixin: When `other` is not closed exactly the same as self. """ if self.closed != other.closed: - msg = f"'{name}.closed' is '{other.closed}', expected '{self.closed}'." - raise ValueError(msg) + raise ValueError(f"'{name}.closed' is {repr(other.closed)}, " + f"expected {repr(self.closed)}.") cdef _interval_like(other): @@ -316,7 +316,7 @@ cdef class Interval(IntervalMixin): not tz_compare(left.tzinfo, right.tzinfo)): # GH 18538 msg = (f"left and right must have the same time zone, got " - f"'{left.tzinfo}' and '{right.tzinfo}'") + f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}") raise ValueError(msg) self.left = left self.right = right @@ -379,7 +379,7 @@ cdef class Interval(IntervalMixin): left, right = self._repr_base() name = type(self).__name__ - repr_str = f'{name}({left!r}, {right!r}, closed={self.closed!r})' + repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})' return repr_str def __str__(self) -> str: diff --git a/pandas/conftest.py b/pandas/conftest.py index f7d30d537b358..3553a411a27f8 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -88,7 +88,7 @@ def spmatrix(request): return getattr(sparse, request.param + "_matrix") -@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: "axis {!r}".format(x)) +@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis {repr(x)}") def axis(request): """ Fixture for returning the axis numbers of a DataFrame. @@ -99,7 +99,7 @@ def axis(request): axis_frame = axis -@pytest.fixture(params=[0, "index"], ids=lambda x: "axis {!r}".format(x)) +@pytest.fixture(params=[0, "index"], ids=lambda x: f"axis {repr(x)}") def axis_series(request): """ Fixture for returning the axis numbers of a Series. diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 182b07d57ea49..96b7cf8f97c3f 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -183,9 +183,9 @@ def _register_accessor(name, cls): def decorator(accessor): if hasattr(cls, name): warnings.warn( - "registration of accessor {!r} under name {!r} for type " - "{!r} is overriding a preexisting attribute with the same " - "name.".format(accessor, name, cls), + f"registration of accessor {repr(accessor)} under name " + f"{repr(name)} for type {repr(cls)} is overriding a preexisting" + f"attribute with the same name.", UserWarning, stacklevel=2, ) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 18adb12a9ad72..75d2bfc248e23 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1194,10 +1194,8 @@ def compute(self, method): dtype = frame[column].dtype if not self.is_valid_dtype_n_method(dtype): raise TypeError( - ( - "Column {column!r} has dtype {dtype}, cannot use method " - "{method!r} with this dtype" - ).format(column=column, dtype=dtype, method=method) + f"Column {repr(column)} has dtype {dtype}, " + f"cannot use method {repr(method)} with this dtype" ) def get_indexer(current_indexer, other_indexer): diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 46aab31770fde..83f6051b8423f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1632,7 +1632,7 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"): """ inplace = validate_bool_kwarg(inplace, "inplace") if na_position not in ["last", "first"]: - raise ValueError(f"invalid na_position: {na_position!r}") + raise ValueError(f"invalid na_position: {repr(na_position)}") sorted_idx = nargsort(self, ascending=ascending, na_position=na_position) @@ -1769,8 +1769,8 @@ def fillna(self, value=None, method=None, limit=None): else: raise TypeError( - '"value" parameter must be a scalar, dict ' - f'or Series, but you passed a {type(value).__name__!r}"' + f"'value' parameter must be a scalar, dict " + f"or Series, but you passed a {type(value).__name__}" ) return self._constructor(codes, dtype=self.dtype, fastpath=True) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 8ba5cd7565850..604bac4bf9b61 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -45,7 +45,7 @@ def __init__(self, dtype): self._type = dtype.type def __repr__(self) -> str: - return "PandasDtype({!r})".format(self.name) + return f"PandasDtype({repr(self.name)})" @property def numpy_dtype(self): diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 197ddd999fd37..9390eb47d07ee 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -100,10 +100,10 @@ def _align_core(terms): ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) if ordm >= 1 and reindexer_size >= 10000: w = ( - "Alignment difference on axis {axis} is larger " - "than an order of magnitude on term {term!r}, by " - "more than {ordm:.4g}; performance may suffer" - ).format(axis=axis, term=terms[i].name, ordm=ordm) + f"Alignment difference on axis {axis} is larger " + f"than an order of magnitude on term {repr(terms[i].name)}, " + f"by more than {ordm:.4g}; performance may suffer" + ) warnings.warn(w, category=PerformanceWarning, stacklevel=6) f = partial(ti.reindex, reindexer, axis=axis, copy=False) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 598680ca6c2de..6dc110e3f8d07 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -47,8 +47,7 @@ def _check_engine(engine): if engine not in _engines: valid = list(_engines.keys()) raise KeyError( - "Invalid engine {engine!r} passed, valid engines are" - " {valid}".format(engine=engine, valid=valid) + f"Invalid engine {repr(engine)} passed, valid engines are {valid}" ) # TODO: validate this in a more general way (thinking of future engines @@ -82,8 +81,8 @@ def _check_parser(parser: str): if parser not in _parsers: raise KeyError( - "Invalid parser {parser!r} passed, valid parsers are" - " {valid}".format(parser=parser, valid=_parsers.keys()) + f"Invalid parser {repr(parser)} passed, " + f"valid parsers are {_parsers.keys()}" ) @@ -93,8 +92,8 @@ def _check_resolvers(resolvers): if not hasattr(resolver, "__getitem__"): name = type(resolver).__name__ raise TypeError( - "Resolver of type {name!r} does not implement " - "the __getitem__ method".format(name=name) + f"Resolver of type {repr(name)} does not " + f"implement the __getitem__ method" ) diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index e608f82b03ade..9330586bbce68 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -294,9 +294,7 @@ def _node_not_implemented(node_name, cls): """ def f(self, *args, **kwargs): - raise NotImplementedError( - "{name!r} nodes are not implemented".format(name=node_name) - ) + raise NotImplementedError(f"{repr(node_name)} nodes are not implemented") return f diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 77999d2c166fd..1a493bc58a227 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -175,17 +175,15 @@ def _bool_arith_check( if _has_bool_dtype(a) and _has_bool_dtype(b): if op_str in unsupported: warnings.warn( - "evaluating in Python space because the {op!r} " - "operator is not supported by numexpr for " - "the bool dtype, use {alt_op!r} instead".format( - op=op_str, alt_op=unsupported[op_str] - ) + f"evaluating in Python space because the {repr(op_str)} " + f"operator is not supported by numexpr for " + f"the bool dtype, use {repr(unsupported[op_str])} instead" ) return False if op_str in not_allowed: raise NotImplementedError( - "operator {op!r} not implemented for bool dtypes".format(op=op_str) + f"operator {repr(op_str)} not implemented for bool dtypes" ) return True diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 4852e498537f2..fe02963e4782d 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -56,11 +56,12 @@ class UndefinedVariableError(NameError): """ def __init__(self, name, is_local: bool): + base_msg = f"{repr(name)} is not defined" if is_local: - msg = "local variable {0!r} is not defined" + msg = f"local variable {base_msg}" else: - msg = "name {0!r} is not defined" - super().__init__(msg.format(name)) + msg = f"name {base_msg}" + super().__init__(msg) class Term: @@ -143,10 +144,7 @@ def type(self): @property def raw(self) -> str: - return pprint_thing( - "{0}(name={1!r}, type={2})" - "".format(type(self).__name__, self.name, self.type) - ) + return f"{type(self).__name__}(name={repr(self.name)}, type={self.type})" @property def is_datetime(self) -> bool: @@ -374,8 +372,7 @@ def __init__(self, op: str, lhs, rhs, **kwargs): # has to be made a list for python3 keys = list(_binary_ops_dict.keys()) raise ValueError( - "Invalid binary operator {0!r}, valid" - " operators are {1}".format(op, keys) + f"Invalid binary operator {repr(op)}, valid operators are {keys}" ) def __call__(self, env): @@ -548,8 +545,8 @@ def __init__(self, op: str, operand): self.func = _unary_ops_dict[op] except KeyError: raise ValueError( - "Invalid unary operator {0!r}, valid operators " - "are {1}".format(op, _unary_ops_syms) + f"Invalid unary operator {repr(op)}, " + f"valid operators are {_unary_ops_syms}" ) def __call__(self, env): From 8324cbeea23cded3f4445fbe413b9347118e83e2 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Mon, 2 Dec 2019 03:44:55 +0200 Subject: [PATCH 43/49] Typing (#29947) --- pandas/_libs/tslibs/offsets.pyx | 3 ++- pandas/core/arrays/sparse/dtype.py | 2 +- pandas/core/dtypes/base.py | 4 ++-- pandas/core/dtypes/dtypes.py | 6 +++--- pandas/core/indexes/frozen.py | 4 +++- pandas/io/pytables.py | 4 ++-- pandas/io/stata.py | 3 ++- pandas/tseries/offsets.py | 4 ++-- 8 files changed, 17 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index c8985c365741d..41420dbceef9d 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1,6 +1,7 @@ import cython import time +from typing import Any from cpython.datetime cimport (PyDateTime_IMPORT, PyDateTime_Check, PyDelta_Check, @@ -328,7 +329,7 @@ class _BaseOffset: def __setattr__(self, name, value): raise AttributeError("DateOffset objects are immutable.") - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: if isinstance(other, str): try: # GH#23524 if to_offset fails, we are dealing with an diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 0124304727ab3..4fb64ec9255e1 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -90,7 +90,7 @@ def __hash__(self): # __eq__, so we explicitly do it here. return super().__hash__() - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: # We have to override __eq__ to handle NA values in _metadata. # The base class does simple == checks, which fail for NA. if isinstance(other, str): diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 8acdf32c8768e..063014cbe970d 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -1,5 +1,5 @@ """Extend pandas with custom array types""" -from typing import List, Optional, Tuple, Type +from typing import Any, List, Optional, Tuple, Type import numpy as np @@ -86,7 +86,7 @@ def __from_arrow__( def __str__(self) -> str: return self.name - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: """ Check whether 'other' is equal to self. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 6dc2b098f1c97..9e16ba670344e 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -764,7 +764,7 @@ def __hash__(self) -> int: # TODO: update this. return hash(str(self)) - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: if isinstance(other, str): return other == self.name @@ -903,7 +903,7 @@ def __hash__(self) -> int: # make myself hashable return hash(str(self)) - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: if isinstance(other, str): return other == self.name or other == self.name.title() @@ -1076,7 +1076,7 @@ def __hash__(self) -> int: # make myself hashable return hash(str(self)) - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: if isinstance(other, str): return other.lower() in (self.name.lower(), str(self).lower()) elif not isinstance(other, IntervalDtype): diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 2ea83ba889fd2..27f88933f9998 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -7,6 +7,8 @@ """ +from typing import Any + from pandas.core.base import PandasObject from pandas.io.formats.printing import pprint_thing @@ -71,7 +73,7 @@ def __radd__(self, other): other = list(other) return type(self)(other + list(self)) - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: if isinstance(other, (tuple, FrozenList)): other = list(other) return super().__eq__(other) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 6ef821fc52d46..c1ea1bd78baef 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1787,7 +1787,7 @@ def __repr__(self) -> str: ) ) - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: """ compare 2 col items """ return all( getattr(self, a, None) == getattr(other, a, None) @@ -2113,7 +2113,7 @@ def __repr__(self) -> str: ) ) - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: """ compare 2 col items """ return all( getattr(self, a, None) == getattr(other, a, None) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b592b560bb5a0..eaecc0627e693 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -15,6 +15,7 @@ import os import struct import sys +from typing import Any import warnings from dateutil.relativedelta import relativedelta @@ -857,7 +858,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"{type(self)}({self})" - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: return ( isinstance(other, type(self)) and self.string == other.string diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 96a9ad1e4d5f2..75698f7354bf9 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,7 +1,7 @@ from datetime import date, datetime, timedelta import functools import operator -from typing import Optional +from typing import Any, Optional from dateutil.easter import easter import numpy as np @@ -2551,7 +2551,7 @@ def __add__(self, other): f"the add operation between {self} and {other} will overflow" ) - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: if isinstance(other, str): from pandas.tseries.frequencies import to_offset From 9b02d54138bc5775c9da9679eb15ce07805e472e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 17:45:36 -0800 Subject: [PATCH 44/49] CLN: fix pytables passing too many kwargs (#29951) --- pandas/io/pytables.py | 56 +++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c1ea1bd78baef..39e9d467b652f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1431,7 +1431,12 @@ def _validate_format(self, format: str, kwargs: Dict[str, Any]) -> Dict[str, Any return kwargs def _create_storer( - self, group, format=None, value=None, **kwargs + self, + group, + format=None, + value=None, + encoding: str = "UTF-8", + errors: str = "strict", ) -> Union["GenericFixed", "Table"]: """ return a suitable class to operate """ @@ -1439,8 +1444,7 @@ def error(t): # return instead of raising so mypy can tell where we are raising return TypeError( f"cannot properly create the storer for: [{t}] [group->" - f"{group},value->{type(value)},format->{format}," - f"kwargs->{kwargs}]" + f"{group},value->{type(value)},format->{format}" ) pt = _ensure_decoded(getattr(group._v_attrs, "pandas_type", None)) @@ -1476,7 +1480,9 @@ def error(t): # a storer node if "table" not in pt: try: - return globals()[_STORER_MAP[pt]](self, group, **kwargs) + return globals()[_STORER_MAP[pt]]( + self, group, encoding=encoding, errors=errors + ) except KeyError: raise error("_STORER_MAP") @@ -1517,7 +1523,9 @@ def error(t): pass try: - return globals()[_TABLE_MAP[tt]](self, group, **kwargs) + return globals()[_TABLE_MAP[tt]]( + self, group, encoding=encoding, errors=errors + ) except KeyError: raise error("_TABLE_MAP") @@ -1526,11 +1534,20 @@ def _write_to_group( key: str, value, format, + axes=None, index=True, append=False, complib=None, + complevel: Optional[int] = None, + fletcher32=None, + min_itemsize=None, + chunksize=None, + expectedrows=None, + dropna=False, + nan_rep=None, + data_columns=None, encoding=None, - **kwargs, + errors: str = "strict", ): group = self.get_node(key) @@ -1565,7 +1582,7 @@ def _write_to_group( group = self._handle.create_group(path, p) path = new_path - s = self._create_storer(group, format, value, encoding=encoding, **kwargs) + s = self._create_storer(group, format, value, encoding=encoding, errors=errors) if append: # raise if we are trying to append to a Fixed format, # or a table that exists (and we are putting) @@ -1580,7 +1597,20 @@ def _write_to_group( raise ValueError("Compression not supported on Fixed format stores") # write the object - s.write(obj=value, append=append, complib=complib, **kwargs) + s.write( + obj=value, + axes=axes, + append=append, + complib=complib, + complevel=complevel, + fletcher32=fletcher32, + min_itemsize=min_itemsize, + chunksize=chunksize, + expectedrows=expectedrows, + dropna=dropna, + nan_rep=nan_rep, + data_columns=data_columns, + ) if isinstance(s, Table) and index: s.create_index(columns=index) @@ -2524,10 +2554,11 @@ class Fixed: ndim: int parent: HDFStore group: "Node" + errors: str is_table = False def __init__( - self, parent: HDFStore, group: "Node", encoding=None, errors="strict", **kwargs + self, parent: HDFStore, group: "Node", encoding=None, errors: str = "strict" ): assert isinstance(parent, HDFStore), type(parent) assert _table_mod is not None # needed for mypy @@ -3199,8 +3230,10 @@ class Table(Fixed): metadata: List info: Dict - def __init__(self, parent: HDFStore, group: "Node", **kwargs): - super().__init__(parent, group, **kwargs) + def __init__( + self, parent: HDFStore, group: "Node", encoding=None, errors: str = "strict" + ): + super().__init__(parent, group, encoding=encoding, errors=errors) self.index_axes = [] self.non_index_axes = [] self.values_axes = [] @@ -4076,7 +4109,6 @@ def write( dropna=False, nan_rep=None, data_columns=None, - errors="strict", # not used here, but passed to super ): if not append and self.is_exists: From 7e791e45304fed6ac1bec2942b3b211816a7f3bd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 20:26:17 -0800 Subject: [PATCH 45/49] DEPR: DTI/TDI/PI constructor arguments (#29930) --- doc/source/whatsnew/v0.15.0.rst | 5 +- doc/source/whatsnew/v1.0.0.rst | 2 + pandas/core/indexes/datetimes.py | 37 +--------- pandas/core/indexes/period.py | 27 +------ pandas/core/indexes/timedeltas.py | 28 -------- .../indexes/datetimes/test_construction.py | 14 ---- .../tests/indexes/period/test_construction.py | 71 ++----------------- pandas/tests/indexes/period/test_period.py | 5 -- .../indexes/timedeltas/test_construction.py | 14 ---- 9 files changed, 11 insertions(+), 192 deletions(-) diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst index c27ada6ef3b58..b328e549e8899 100644 --- a/doc/source/whatsnew/v0.15.0.rst +++ b/doc/source/whatsnew/v0.15.0.rst @@ -312,14 +312,13 @@ Timezone handling improvements previously this resulted in ``Exception`` or ``TypeError`` (:issue:`7812`) .. ipython:: python - :okwarning: ts = pd.Timestamp('2014-08-01 09:00', tz='US/Eastern') ts ts.tz_localize(None) - didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H', - periods=10, tz='US/Eastern') + didx = pd.date_range(start='2014-08-01 09:00', freq='H', + periods=10, tz='US/Eastern') didx didx.tz_localize(None) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 87635314d2cd0..9847324147618 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -465,6 +465,8 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`) - :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`) - :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`) +- :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` constructors no longer allow ``start``, ``end``, and ``periods`` keywords, use :func:`date_range`, :func:`timedelta_range`, and :func:`period_range` instead (:issue:`23919`) +- :class:`DatetimeIndex` and :class:`TimedeltaIndex` constructors no longer have a ``verify_integrity`` keyword argument (:issue:`23919`) - :func:`core.internals.blocks.make_block` no longer accepts the "fastpath" keyword(:issue:`19265`) - :meth:`Block.make_block_same_class` no longer accepts the "dtype" keyword(:issue:`19434`) - Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 0d368845ea4f2..cef8155f0bfa3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -54,11 +54,10 @@ def _new_DatetimeIndex(cls, d): result = cls._simple_new(data, **d) else: with warnings.catch_warnings(): - # we ignore warnings from passing verify_integrity=False # TODO: If we knew what was going in to **d, we might be able to # go through _simple_new instead warnings.simplefilter("ignore") - result = cls.__new__(cls, verify_integrity=False, **d) + result = cls.__new__(cls, **d) return result @@ -263,9 +262,6 @@ def __new__( cls, data=None, freq=None, - start=None, - end=None, - periods=None, tz=None, normalize=False, closed=None, @@ -275,39 +271,8 @@ def __new__( dtype=None, copy=False, name=None, - verify_integrity=None, ): - if verify_integrity is not None: - warnings.warn( - "The 'verify_integrity' argument is deprecated, " - "will be removed in a future version.", - FutureWarning, - stacklevel=2, - ) - else: - verify_integrity = True - - if data is None: - dtarr = DatetimeArray._generate_range( - start, - end, - periods, - freq=freq, - tz=tz, - normalize=normalize, - closed=closed, - ambiguous=ambiguous, - ) - warnings.warn( - "Creating a DatetimeIndex by passing range " - "endpoints is deprecated. Use " - "`pandas.date_range` instead.", - FutureWarning, - stacklevel=2, - ) - return cls._simple_new(dtarr._data, freq=dtarr.freq, tz=dtarr.tz, name=name) - if is_scalar(data): raise TypeError( "{cls}() must be called with a " diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b3476dcb12abd..d63de10d92921 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -187,9 +187,6 @@ def __new__( data=None, ordinal=None, freq=None, - start=None, - end=None, - periods=None, tz=None, dtype=None, copy=False, @@ -219,29 +216,9 @@ def __new__( if data is None and ordinal is None: # range-based. - data, freq2 = PeriodArray._generate_range(start, end, periods, freq, fields) - # PeriodArray._generate range does validate that fields is + data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields) + # PeriodArray._generate range does validation that fields is # empty when really using the range-based constructor. - if not fields: - msg = ( - "Creating a PeriodIndex by passing range " - "endpoints is deprecated. Use " - "`pandas.period_range` instead." - ) - # period_range differs from PeriodIndex for cases like - # start="2000", periods=4 - # PeriodIndex interprets that as A-DEC freq. - # period_range interprets it as 'D' freq. - cond = freq is None and ( - (start and not isinstance(start, Period)) - or (end and not isinstance(end, Period)) - ) - if cond: - msg += ( - " Note that the default `freq` may differ. Pass " - "'freq=\"{}\"' to ensure the same output." - ).format(freq2.freqstr) - warnings.warn(msg, FutureWarning, stacklevel=2) freq = freq2 data = PeriodArray(data, freq=freq) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 7a7720f730312..57cb170dc0ae8 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -186,40 +186,12 @@ def __new__( data=None, unit=None, freq=None, - start=None, - end=None, - periods=None, closed=None, dtype=_TD_DTYPE, copy=False, name=None, - verify_integrity=None, ): - if verify_integrity is not None: - warnings.warn( - "The 'verify_integrity' argument is deprecated, " - "will be removed in a future version.", - FutureWarning, - stacklevel=2, - ) - else: - verify_integrity = True - - if data is None: - freq, freq_infer = dtl.maybe_infer_freq(freq) - warnings.warn( - "Creating a TimedeltaIndex by passing range " - "endpoints is deprecated. Use " - "`pandas.timedelta_range` instead.", - FutureWarning, - stacklevel=2, - ) - result = TimedeltaArray._generate_range( - start, end, periods, freq, closed=closed - ) - return cls._simple_new(result._data, freq=freq, name=name) - if is_scalar(data): raise TypeError( "{cls}() must be called with a " diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 4ef1cbd5af958..88290bf31ba9d 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -485,16 +485,6 @@ def test_construction_with_ndarray(self): expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B") tm.assert_index_equal(result, expected) - def test_verify_integrity_deprecated(self): - # GH#23919 - with tm.assert_produces_warning(FutureWarning): - DatetimeIndex(["1/1/2000"], verify_integrity=False) - - def test_range_kwargs_deprecated(self): - # GH#23919 - with tm.assert_produces_warning(FutureWarning): - DatetimeIndex(start="1/1/2000", end="1/10/2000", freq="D") - def test_integer_values_and_tz_deprecated(self): # GH-24559 values = np.array([946684800000000000]) @@ -517,10 +507,6 @@ def test_constructor_coverage(self): with pytest.raises(TypeError, match=msg): date_range(start="1/1/2000", periods="foo", freq="D") - with pytest.raises(ValueError): - with tm.assert_produces_warning(FutureWarning): - DatetimeIndex(start="1/1/2000", end="1/10/2000") - with pytest.raises(TypeError): DatetimeIndex("1/1/2000") diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 1973cb7f4740d..2adce0b7f8b44 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -35,10 +35,7 @@ def test_construction_base_constructor(self): def test_constructor_use_start_freq(self): # GH #1118 p = Period("4/2/2012", freq="B") - with tm.assert_produces_warning(FutureWarning): - index = PeriodIndex(start=p, periods=10) expected = period_range(start="4/2/2012", periods=10, freq="B") - tm.assert_index_equal(index, expected) index = period_range(start=p, periods=10) tm.assert_index_equal(index, expected) @@ -68,12 +65,6 @@ def test_constructor_field_arrays(self): with pytest.raises(ValueError, match=msg): PeriodIndex(year=years, month=months, freq="2M") - msg = "Can either instantiate from fields or endpoints, but not both" - with pytest.raises(ValueError, match=msg): - PeriodIndex( - year=years, month=months, freq="M", start=Period("2007-01", freq="M") - ) - years = [2007, 2007, 2007] months = [1, 2, 3] idx = PeriodIndex(year=years, month=months, freq="M") @@ -115,26 +106,6 @@ def test_constructor_invalid_quarters(self): PeriodIndex(year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC") def test_constructor_corner(self): - msg = "Not enough parameters to construct Period range" - with pytest.raises(ValueError, match=msg): - PeriodIndex(periods=10, freq="A") - - start = Period("2007", freq="A-JUN") - end = Period("2010", freq="A-DEC") - - msg = "start and end must have same freq" - with pytest.raises(ValueError, match=msg): - PeriodIndex(start=start, end=end) - - msg = ( - "Of the three parameters: start, end, and periods, exactly two" - " must be specified" - ) - with pytest.raises(ValueError, match=msg): - PeriodIndex(start=start) - with pytest.raises(ValueError, match=msg): - PeriodIndex(end=end) - result = period_range("2007-01", periods=10.5, freq="M") exp = period_range("2007-01", periods=10, freq="M") tm.assert_index_equal(result, exp) @@ -368,27 +339,20 @@ def test_constructor_year_and_quarter(self): p = PeriodIndex(lops) tm.assert_index_equal(p, idx) - @pytest.mark.parametrize( - "func, warning", [(PeriodIndex, FutureWarning), (period_range, None)] - ) - def test_constructor_freq_mult(self, func, warning): + def test_constructor_freq_mult(self): # GH #7811 - with tm.assert_produces_warning(warning): - # must be the same, but for sure... - pidx = func(start="2014-01", freq="2M", periods=4) + pidx = period_range(start="2014-01", freq="2M", periods=4) expected = PeriodIndex(["2014-01", "2014-03", "2014-05", "2014-07"], freq="2M") tm.assert_index_equal(pidx, expected) - with tm.assert_produces_warning(warning): - pidx = func(start="2014-01-02", end="2014-01-15", freq="3D") + pidx = period_range(start="2014-01-02", end="2014-01-15", freq="3D") expected = PeriodIndex( ["2014-01-02", "2014-01-05", "2014-01-08", "2014-01-11", "2014-01-14"], freq="3D", ) tm.assert_index_equal(pidx, expected) - with tm.assert_produces_warning(warning): - pidx = func(end="2014-01-01 17:00", freq="4H", periods=3) + pidx = period_range(end="2014-01-01 17:00", freq="4H", periods=3) expected = PeriodIndex( ["2014-01-01 09:00", "2014-01-01 13:00", "2014-01-01 17:00"], freq="4H" ) @@ -425,18 +389,6 @@ def test_constructor_freq_combined(self): expected = PeriodIndex(["2016-01-01 00:00", "2016-01-02 01:00"], freq="25H") tm.assert_index_equal(pidx, expected) - def test_constructor_range_based_deprecated(self): - with tm.assert_produces_warning(FutureWarning): - pi = PeriodIndex(freq="A", start="1/1/2001", end="12/1/2009") - assert len(pi) == 9 - - def test_constructor_range_based_deprecated_different_freq(self): - with tm.assert_produces_warning(FutureWarning) as m: - PeriodIndex(start="2000", periods=2) - - (warning,) = m - assert 'freq="A-DEC"' in str(warning.message) - def test_constructor(self): pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") assert len(pi) == 9 @@ -507,21 +459,6 @@ def test_constructor(self): with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals) - def test_constructor_error(self): - start = Period("02-Apr-2005", "B") - end_intv = Period("2006-12-31", ("w", 1)) - - msg = "start and end must have same freq" - with pytest.raises(ValueError, match=msg): - PeriodIndex(start=start, end=end_intv) - - msg = ( - "Of the three parameters: start, end, and periods, " - "exactly two must be specified" - ) - with pytest.raises(ValueError, match=msg): - PeriodIndex(start=start) - @pytest.mark.parametrize( "freq", ["M", "Q", "A", "D", "B", "T", "S", "L", "U", "N", "H"] ) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index a07a87080804f..14bf6490a706b 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -529,15 +529,10 @@ def test_pindex_qaccess(self): assert s["05Q4"] == s[2] def test_pindex_multiples(self): - with tm.assert_produces_warning(FutureWarning): - pi = PeriodIndex(start="1/1/11", end="12/31/11", freq="2M") expected = PeriodIndex( ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"], freq="2M", ) - tm.assert_index_equal(pi, expected) - assert pi.freq == offsets.MonthEnd(2) - assert pi.freqstr == "2M" pi = period_range(start="1/1/11", end="12/31/11", freq="2M") tm.assert_index_equal(pi, expected) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 3cf86bea1d6de..c8feb9e2a853a 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -10,16 +10,6 @@ class TestTimedeltaIndex: - def test_verify_integrity_deprecated(self): - # GH#23919 - with tm.assert_produces_warning(FutureWarning): - TimedeltaIndex(["1 Day"], verify_integrity=False) - - def test_range_kwargs_deprecated(self): - # GH#23919 - with tm.assert_produces_warning(FutureWarning): - TimedeltaIndex(start="1 Day", end="3 Days", freq="D") - def test_int64_nocopy(self): # GH#23539 check that a copy isn't made when we pass int64 data # and copy=False @@ -166,10 +156,6 @@ def test_constructor_coverage(self): with pytest.raises(TypeError, match=msg): timedelta_range(start="1 days", periods="foo", freq="D") - with pytest.raises(ValueError): - with tm.assert_produces_warning(FutureWarning): - TimedeltaIndex(start="1 days", end="10 days") - with pytest.raises(TypeError): TimedeltaIndex("1 days") From 37526c1ddbe0952dffe16ac0fe158a00aa7b397f Mon Sep 17 00:00:00 2001 From: Mak Sze Chun Date: Mon, 2 Dec 2019 18:37:11 +0800 Subject: [PATCH 46/49] API/DEPR: Change default skipna behaviour + deprecate numeric_only in Categorical.min and max (#27929) --- doc/source/whatsnew/v1.0.0.rst | 22 +++++++++ pandas/core/arrays/categorical.py | 38 +++++++-------- pandas/core/series.py | 4 +- .../arrays/categorical/test_analytics.py | 46 ++++++++++++------- pandas/tests/reductions/test_reductions.py | 32 ++++++------- 5 files changed, 85 insertions(+), 57 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9847324147618..b45bec37e84eb 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -304,6 +304,26 @@ The following methods now also correctly output values for unobserved categories df.groupby(["cat_1", "cat_2"], observed=False)["value"].count() +By default :meth:`Categorical.min` now returns the minimum instead of np.nan +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When :class:`Categorical` contains ``np.nan``, +:meth:`Categorical.min` no longer return ``np.nan`` by default (skipna=True) (:issue:`25303`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.Categorical([1, 2, np.nan], ordered=True).min() + Out[1]: nan + + +*pandas 1.0.0* + +.. ipython:: python + + pd.Categorical([1, 2, np.nan], ordered=True).min() + .. _whatsnew_1000.api_breaking.deps: Increased minimum versions for dependencies @@ -410,6 +430,8 @@ Deprecations - :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) - :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`) - :meth:`Categorical.take_nd` is deprecated, use :meth:`Categorical.take` instead (:issue:`27745`) +- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`) +- .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 83f6051b8423f..f4a20b808292a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2123,7 +2123,8 @@ def _reduce(self, name, axis=0, **kwargs): raise TypeError(f"Categorical cannot perform the operation {name}") return func(**kwargs) - def min(self, numeric_only=None, **kwargs): + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def min(self, skipna=True): """ The minimum value of the object. @@ -2139,17 +2140,18 @@ def min(self, numeric_only=None, **kwargs): min : the minimum of this `Categorical` """ self.check_for_ordered("min") - if numeric_only: - good = self._codes != -1 - pointer = self._codes[good].min(**kwargs) - else: - pointer = self._codes.min(**kwargs) - if pointer == -1: - return np.nan + good = self._codes != -1 + if not good.all(): + if skipna: + pointer = self._codes[good].min() + else: + return np.nan else: - return self.categories[pointer] + pointer = self._codes.min() + return self.categories[pointer] - def max(self, numeric_only=None, **kwargs): + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def max(self, skipna=True): """ The maximum value of the object. @@ -2165,15 +2167,15 @@ def max(self, numeric_only=None, **kwargs): max : the maximum of this `Categorical` """ self.check_for_ordered("max") - if numeric_only: - good = self._codes != -1 - pointer = self._codes[good].max(**kwargs) - else: - pointer = self._codes.max(**kwargs) - if pointer == -1: - return np.nan + good = self._codes != -1 + if not good.all(): + if skipna: + pointer = self._codes[good].max() + else: + return np.nan else: - return self.categories[pointer] + pointer = self._codes.max() + return self.categories[pointer] def mode(self, dropna=True): """ diff --git a/pandas/core/series.py b/pandas/core/series.py index a8232f137f3ef..11e87a4eed27f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3820,9 +3820,7 @@ def _reduce( self._get_axis_number(axis) if isinstance(delegate, Categorical): - # TODO deprecate numeric_only argument for Categorical and use - # skipna as well, see GH25303 - return delegate._reduce(name, numeric_only=numeric_only, **kwds) + return delegate._reduce(name, skipna=skipna, **kwds) elif isinstance(delegate, ExtensionArray): # dispatch to ExtensionArray interface return delegate._reduce(name, skipna=skipna, **kwds) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 279f1492d7dad..637a47eba0597 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -35,31 +35,43 @@ def test_min_max(self): assert _min == "d" assert _max == "a" + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_with_nan(self, skipna): + # GH 25303 cat = Categorical( [np.nan, "b", "c", np.nan], categories=["d", "c", "b", "a"], ordered=True ) - _min = cat.min() - _max = cat.max() - assert np.isnan(_min) - assert _max == "b" + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) - _min = cat.min(numeric_only=True) - assert _min == "c" - _max = cat.max(numeric_only=True) - assert _max == "b" + if skipna is False: + assert np.isnan(_min) + assert np.isnan(_max) + else: + assert _min == "c" + assert _max == "b" cat = Categorical( [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True ) - _min = cat.min() - _max = cat.max() - assert np.isnan(_min) - assert _max == 1 - - _min = cat.min(numeric_only=True) - assert _min == 2 - _max = cat.max(numeric_only=True) - assert _max == 1 + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) + + if skipna is False: + assert np.isnan(_min) + assert np.isnan(_max) + else: + assert _min == 2 + assert _max == 1 + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_deprecate_numeric_only_min_max(self, method): + # GH 25303 + cat = Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + with tm.assert_produces_warning(expected_warning=FutureWarning): + getattr(cat, method)(numeric_only=True) @pytest.mark.parametrize( "values,categories,exp_mode", diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index b0ef0c58ca65a..80d148c919ab2 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1043,7 +1043,7 @@ def test_min_max(self): ) _min = cat.min() _max = cat.max() - assert np.isnan(_min) + assert _min == "c" assert _max == "b" cat = Series( @@ -1053,30 +1053,24 @@ def test_min_max(self): ) _min = cat.min() _max = cat.max() - assert np.isnan(_min) + assert _min == 2 assert _max == 1 - def test_min_max_numeric_only(self): - # TODO deprecate numeric_only argument for Categorical and use - # skipna as well, see GH25303 + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_skipna(self, skipna): + # GH 25303 cat = Series( Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True) ) + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) - _min = cat.min() - _max = cat.max() - assert np.isnan(_min) - assert _max == "a" - - _min = cat.min(numeric_only=True) - _max = cat.max(numeric_only=True) - assert _min == "b" - assert _max == "a" - - _min = cat.min(numeric_only=False) - _max = cat.max(numeric_only=False) - assert np.isnan(_min) - assert _max == "a" + if skipna is True: + assert _min == "b" + assert _max == "a" + else: + assert np.isnan(_min) + assert np.isnan(_max) class TestSeriesMode: From 28f4a8ae25deba681bff8a1f9ec049ae2b2eca9a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Dec 2019 04:34:18 -0800 Subject: [PATCH 47/49] CLN: small things in pytables (#29958) --- pandas/io/pytables.py | 49 +++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 39e9d467b652f..5a42df92ddf84 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -312,7 +312,7 @@ def read_hdf(path_or_buf, key=None, mode: str = "r", **kwargs): key : object, optional The group identifier in the store. Can be omitted if the HDF file contains a single pandas object. - mode : {'r', 'r+', 'a'}, optional + mode : {'r', 'r+', 'a'}, default 'r' Mode to use when opening the file. Ignored if path_or_buf is a :class:`pandas.HDFStore`. Default is 'r'. where : list, optional @@ -417,7 +417,7 @@ def read_hdf(path_or_buf, key=None, mode: str = "r", **kwargs): raise -def _is_metadata_of(group, parent_group) -> bool: +def _is_metadata_of(group: "Node", parent_group: "Node") -> bool: """Check if a given group is a metadata group for a given parent_group.""" if group._v_depth <= parent_group._v_depth: return False @@ -932,9 +932,7 @@ def func(_start, _stop, _where): # retrieve the objs, _where is always passed as a set of # coordinates here objs = [ - t.read( - where=_where, columns=columns, start=_start, stop=_stop, **kwargs - ) + t.read(where=_where, columns=columns, start=_start, stop=_stop) for t in tbls ] @@ -957,7 +955,7 @@ def func(_start, _stop, _where): return it.get_result(coordinates=True) - def put(self, key: str, value, format=None, append=False, **kwargs): + def put(self, key: str, value: FrameOrSeries, format=None, append=False, **kwargs): """ Store object in HDFStore. @@ -986,8 +984,8 @@ def put(self, key: str, value, format=None, append=False, **kwargs): """ if format is None: format = get_option("io.hdf.default_format") or "fixed" - kwargs = self._validate_format(format, kwargs) - self._write_to_group(key, value, append=append, **kwargs) + format = self._validate_format(format) + self._write_to_group(key, value, format=format, append=append, **kwargs) def remove(self, key: str, where=None, start=None, stop=None): """ @@ -1046,7 +1044,7 @@ def remove(self, key: str, where=None, start=None, stop=None): def append( self, key: str, - value, + value: FrameOrSeries, format=None, append=True, columns=None, @@ -1096,8 +1094,10 @@ def append( dropna = get_option("io.hdf.dropna_table") if format is None: format = get_option("io.hdf.default_format") or "table" - kwargs = self._validate_format(format, kwargs) - self._write_to_group(key, value, append=append, dropna=dropna, **kwargs) + format = self._validate_format(format) + self._write_to_group( + key, value, format=format, append=append, dropna=dropna, **kwargs + ) def append_to_multiple( self, @@ -1418,17 +1418,16 @@ def _check_if_open(self): if not self.is_open: raise ClosedFileError(f"{self._path} file is not open!") - def _validate_format(self, format: str, kwargs: Dict[str, Any]) -> Dict[str, Any]: - """ validate / deprecate formats; return the new kwargs """ - kwargs = kwargs.copy() + def _validate_format(self, format: str) -> str: + """ validate / deprecate formats """ # validate try: - kwargs["format"] = _FORMAT_MAP[format.lower()] + format = _FORMAT_MAP[format.lower()] except KeyError: raise TypeError(f"invalid HDFStore format specified [{format}]") - return kwargs + return format def _create_storer( self, @@ -1532,7 +1531,7 @@ def error(t): def _write_to_group( self, key: str, - value, + value: FrameOrSeries, format, axes=None, index=True, @@ -1615,10 +1614,10 @@ def _write_to_group( if isinstance(s, Table) and index: s.create_index(columns=index) - def _read_group(self, group: "Node", **kwargs): + def _read_group(self, group: "Node"): s = self._create_storer(group) s.infer_axes() - return s.read(**kwargs) + return s.read() class TableIterator: @@ -2752,28 +2751,22 @@ def f(values, freq=None, tz=None): return klass - def validate_read(self, kwargs: Dict[str, Any]) -> Dict[str, Any]: + def validate_read(self, columns, where): """ - remove table keywords from kwargs and return raise if any keywords are passed which are not-None """ - kwargs = copy.copy(kwargs) - - columns = kwargs.pop("columns", None) if columns is not None: raise TypeError( "cannot pass a column specification when reading " "a Fixed format store. this store must be " "selected in its entirety" ) - where = kwargs.pop("where", None) if where is not None: raise TypeError( "cannot pass a where specification when reading " "from a Fixed format store. this store must be " "selected in its entirety" ) - return kwargs @property def is_exists(self) -> bool: @@ -3085,7 +3078,7 @@ def read( start: Optional[int] = None, stop: Optional[int] = None, ): - self.validate_read({"where": where, "columns": columns}) + self.validate_read(columns, where) index = self.read_index("index", start=start, stop=stop) values = self.read_array("values", start=start, stop=stop) return Series(values, index=index, name=self.name) @@ -3142,7 +3135,7 @@ def read( stop: Optional[int] = None, ): # start, stop applied to rows, so 0th axis only - self.validate_read({"columns": columns, "where": where}) + self.validate_read(columns, where) select_axis = self.obj_type()._get_block_manager_axis(0) axes = [] From a97abc50cbd50bbacb0f4af6541cb3d5ece35eda Mon Sep 17 00:00:00 2001 From: Koushik <42416901+koushikgk@users.noreply.github.com> Date: Mon, 2 Dec 2019 05:35:15 -0700 Subject: [PATCH 48/49] DOC : Typo fix in userguide/Styling (#29956) --- doc/source/user_guide/style.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 5e026e3a7d78f..633827eb79f46 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -677,7 +677,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Notice that you're able share the styles even though they're data aware. The styles are re-evaluated on the new DataFrame they've been `use`d upon." + "Notice that you're able to share the styles even though they're data aware. The styles are re-evaluated on the new DataFrame they've been `use`d upon." ] }, { From 0c2b1db198ce628ca889ad25c1179fff4ab3337c Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Mon, 2 Dec 2019 14:45:02 +0200 Subject: [PATCH 49/49] repr() (#29959) --- pandas/io/html.py | 26 +++++------- pandas/io/parsers.py | 41 +++++++++---------- pandas/plotting/_core.py | 2 +- pandas/tests/computation/test_eval.py | 8 ++-- pandas/tests/frame/test_alter_axes.py | 4 +- pandas/tests/frame/test_query_eval.py | 2 +- .../indexes/timedeltas/test_timedelta.py | 2 +- pandas/tests/io/parser/test_unsupported.py | 6 +-- pandas/tests/io/test_html.py | 4 +- pandas/tests/test_strings.py | 6 +-- 10 files changed, 45 insertions(+), 56 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 5f38f866e1643..b8cb6679a9562 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -560,9 +560,7 @@ def _parse_tables(self, doc, match, attrs): unique_tables.add(table) if not result: - raise ValueError( - "No tables found matching pattern {patt!r}".format(patt=match.pattern) - ) + raise ValueError(f"No tables found matching pattern {repr(match.pattern)}") return result def _text_getter(self, obj): @@ -618,7 +616,7 @@ def _build_xpath_expr(attrs) -> str: if "class_" in attrs: attrs["class"] = attrs.pop("class_") - s = ["@{key}={val!r}".format(key=k, val=v) for k, v in attrs.items()] + s = [f"@{k}={repr(v)}" for k, v in attrs.items()] return "[{expr}]".format(expr=" and ".join(s)) @@ -661,8 +659,7 @@ def _parse_tables(self, doc, match, kwargs): # 1. check all descendants for the given pattern and only search tables # 2. go up the tree until we find a table - query = "//table//*[re:test(text(), {patt!r})]/ancestor::table" - xpath_expr = query.format(patt=pattern) + xpath_expr = f"//table//*[re:test(text(), {repr(pattern)})]/ancestor::table" # if any table attributes were given build an xpath expression to # search for them @@ -682,9 +679,7 @@ def _parse_tables(self, doc, match, kwargs): elem.getparent().remove(elem) if not tables: - raise ValueError( - "No tables found matching regex {patt!r}".format(patt=pattern) - ) + raise ValueError(f"No tables found matching regex {repr(pattern)}") return tables def _equals_tag(self, obj, tag): @@ -833,8 +828,7 @@ def _parser_dispatch(flavor): valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: raise ValueError( - "{invalid!r} is not a valid flavor, valid flavors " - "are {valid}".format(invalid=flavor, valid=valid_parsers) + f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}" ) if flavor in ("bs4", "html5lib"): @@ -863,13 +857,13 @@ def _validate_flavor(flavor): elif isinstance(flavor, abc.Iterable): if not all(isinstance(flav, str) for flav in flavor): raise TypeError( - "Object of type {typ!r} is not an iterable of " - "strings".format(typ=type(flavor).__name__) + f"Object of type {repr(type(flavor).__name__)} " + f"is not an iterable of strings" ) else: - fmt = "{flavor!r}" if isinstance(flavor, str) else "{flavor}" - fmt += " is not a valid flavor" - raise ValueError(fmt.format(flavor=flavor)) + msg = repr(flavor) if isinstance(flavor, str) else str(flavor) + msg += " is not a valid flavor" + raise ValueError(msg) flavor = tuple(flavor) valid_flavors = set(_valid_parsers) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index bbec148b8745d..7403e6d254d03 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -972,10 +972,10 @@ def _clean_options(self, options, engine): elif engine not in ("python", "python-fwf"): # wait until regex engine integrated fallback_reason = ( - "the 'c' engine does not support" - " regex separators (separators > 1 char and" - r" different from '\s+' are" - " interpreted as regex)" + "the 'c' engine does not support " + "regex separators (separators > 1 char and " + r"different from '\s+' are " + "interpreted as regex)" ) engine = "python" elif delim_whitespace: @@ -990,9 +990,9 @@ def _clean_options(self, options, engine): encodeable = False if not encodeable and engine not in ("python", "python-fwf"): fallback_reason = ( - "the separator encoded in {encoding}" - " is > 1 char long, and the 'c' engine" - " does not support such separators".format(encoding=encoding) + "the separator encoded in {encoding} " + "is > 1 char long, and the 'c' engine " + "does not support such separators".format(encoding=encoding) ) engine = "python" @@ -1021,21 +1021,19 @@ def _clean_options(self, options, engine): if "python" in engine: for arg in _python_unsupported: if fallback_reason and result[arg] != _c_parser_defaults[arg]: - msg = ( - "Falling back to the 'python' engine because" - " {reason}, but this causes {option!r} to be" - " ignored as it is not supported by the 'python'" - " engine." - ).format(reason=fallback_reason, option=arg) - raise ValueError(msg) + raise ValueError( + f"Falling back to the 'python' engine because " + f"{fallback_reason}, but this causes {repr(arg)} to be " + f"ignored as it is not supported by the 'python' engine." + ) del result[arg] if fallback_reason: warnings.warn( ( - "Falling back to the 'python' engine because" - " {0}; you can avoid this warning by specifying" - " engine='python'." + "Falling back to the 'python' engine because " + "{0}; you can avoid this warning by specifying " + "engine='python'." ).format(fallback_reason), ParserWarning, stacklevel=5, @@ -1056,8 +1054,8 @@ def _clean_options(self, options, engine): depr_default = _deprecated_defaults[arg] msg = ( - "The '{arg}' argument has been deprecated " - "and will be removed in a future version.".format(arg=arg) + f"The {repr(arg)} argument has been deprecated and will be " + f"removed in a future version." ) if result.get(arg, depr_default) != depr_default: @@ -1081,9 +1079,8 @@ def _clean_options(self, options, engine): if converters is not None: if not isinstance(converters, dict): raise TypeError( - "Type converters must be a dict or" - " subclass, input was " - "a {0!r}".format(type(converters).__name__) + f"Type converters must be a dict or subclass, " + f"input was a {repr(type(converters).__name__)}" ) else: converters = {} diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index beb276478070e..375e6fe2b02c7 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -743,7 +743,7 @@ def _get_call_args(backend_name, data, args, kwargs): if args and isinstance(data, ABCSeries): positional_args = str(args)[1:-1] keyword_args = ", ".join( - f"{name}={value!r}" for (name, default), value in zip(arg_def, args) + f"{name}={repr(value)}" for (name, default), value in zip(arg_def, args) ) msg = ( "`Series.plot()` should not be called with positional " diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 1146b486a3eb4..2208fbf933387 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1114,11 +1114,11 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): if not is_python_engine: assert len(w) == 1 msg = str(w[0].message) + loged = np.log10(s.size - df.shape[1]) expected = ( - "Alignment difference on axis {0} is larger" - " than an order of magnitude on term {1!r}, " - "by more than {2:.4g}; performance may suffer" - "".format(1, "df", np.log10(s.size - df.shape[1])) + f"Alignment difference on axis 1 is larger " + f"than an order of magnitude on term 'df', " + f"by more than {loged:.4g}; performance may suffer" ) assert msg == expected diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 6206b333d29e1..b52f24f9e06f1 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -342,7 +342,7 @@ def __init__(self, name, color): self.color = color def __str__(self) -> str: - return "".format(self=self) + return f"" # necessary for pretty KeyError __repr__ = __str__ @@ -419,7 +419,7 @@ def __init__(self, name, color): self.color = color def __str__(self) -> str: - return "".format(self=self) + return f"" thing1 = Thing("One", "red") thing2 = Thing("Two", "blue") diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index cd1bee356ed8e..abd8ef98ff871 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -27,7 +27,7 @@ def engine(request): def skip_if_no_pandas_parser(parser): if parser != "pandas": - pytest.skip("cannot evaluate with parser {0!r}".format(parser)) + pytest.skip(f"cannot evaluate with parser {repr(parser)}") class TestCompat: diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index ba0af7dd8136c..d59b6c18f6042 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -234,7 +234,7 @@ def test_pickle(self): def test_hash_error(self): index = timedelta_range("1 days", periods=10) with pytest.raises( - TypeError, match=("unhashable type: {0.__name__!r}".format(type(index))) + TypeError, match=(f"unhashable type: {repr(type(index).__name__)}") ): hash(index) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index b23ddf5bd9292..07ab41b47bf27 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -96,9 +96,9 @@ def test_python_engine(self, python_engine): for default in py_unsupported: msg = ( - "The {default!r} option is not supported with the {python_engine!r}" - " engine" - ).format(default=default, python_engine=python_engine) + f"The {repr(default)} option is not " + f"supported with the {repr(python_engine)} engine" + ) kwargs = {default: object()} with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index d8d617ceeebff..353946a311c1a 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -902,8 +902,8 @@ def test_computer_sales_page(self, datapath): def test_wikipedia_states_table(self, datapath): data = datapath("io", "data", "html", "wikipedia_states.html") - assert os.path.isfile(data), "{data!r} is not a file".format(data=data) - assert os.path.getsize(data), "{data!r} is an empty file".format(data=data) + assert os.path.isfile(data), f"{repr(data)} is not a file" + assert os.path.getsize(data), f"{repr(data)} is an empty file" result = self.read_html(data, "Arizona", header=1)[0] assert result["sq mi"].dtype == np.dtype("float64") diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 3c97b75ecfa0c..0e2f8ee6543e1 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -296,10 +296,8 @@ def test_api_per_method( else: # GH 23011, GH 23163 msg = ( - "Cannot use .str.{name} with values of inferred dtype " - "{inferred_dtype!r}.".format( - name=method_name, inferred_dtype=inferred_dtype - ) + f"Cannot use .str.{method_name} with values of " + f"inferred dtype {repr(inferred_dtype)}." ) with pytest.raises(TypeError, match=msg): method(*args, **kwargs)