From 1102a33d9776ed316cade079e22be6daa76c9e42 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 2 Oct 2018 22:31:36 +0200 Subject: [PATCH 01/11] DOC/CLN: clean-up shared_docs in generic.py (#20074) --- pandas/core/frame.py | 9 +++-- pandas/core/generic.py | 65 ++++++++++-------------------------- pandas/core/panel.py | 16 +++++++-- pandas/core/series.py | 5 +-- pandas/core/sparse/series.py | 7 ++-- 5 files changed, 44 insertions(+), 58 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4e8b4e3a6bec..15cebb88faea7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3629,7 +3629,8 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, fill_axis=fill_axis, broadcast_axis=broadcast_axis) - @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.reindex.__doc__) @rewrite_axis_style_signature('labels', [('method', None), ('copy', True), ('level', None), @@ -4479,7 +4480,8 @@ def f(vals): # ---------------------------------------------------------------------- # Sorting - @Appender(_shared_docs['sort_values'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.sort_values.__doc__) def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): inplace = validate_bool_kwarg(inplace, 'inplace') @@ -4521,7 +4523,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, else: return self._constructor(new_data).__finalize__(self) - @Appender(_shared_docs['sort_index'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.sort_index.__doc__) def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, by=None): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 393e7caae5fab..8fed92f7ed6b9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -643,7 +643,8 @@ def _set_axis(self, axis, labels): self._data.set_axis(axis, labels) self._clear_item_cache() - _shared_docs['transpose'] = """ + def transpose(self, *args, **kwargs): + """ Permute the dimensions of the %(klass)s Parameters @@ -663,9 +664,6 @@ def _set_axis(self, axis, labels): y : same as input """ - @Appender(_shared_docs['transpose'] % _shared_doc_kwargs) - def transpose(self, *args, **kwargs): - # construct the args axes, kwargs = self._construct_axes_from_arguments(args, kwargs, require_all=True) @@ -965,9 +963,8 @@ def swaplevel(self, i=-2, j=-1, axis=0): # ---------------------------------------------------------------------- # Rename - # TODO: define separate funcs for DataFrame, Series and Panel so you can - # get completion on keyword arguments. - _shared_docs['rename'] = """ + def rename(self, *args, **kwargs): + """ Alter axes input function or functions. Function / dict values must be unique (1-to-1). Labels not contained in a dict / Series will be left as-is. Extra labels listed don't throw an error. Alternatively, change @@ -975,13 +972,11 @@ def swaplevel(self, i=-2, j=-1, axis=0): Parameters ---------- - %(optional_mapper)s %(axes)s : scalar, list-like, dict-like or function, optional Scalar or list-like will alter the ``Series.name`` attribute, and raise on DataFrame or Panel. dict-like or functions are transformations to apply to that axis' values - %(optional_axis)s copy : boolean, default True Also copy underlying data inplace : boolean, default False @@ -1069,12 +1064,6 @@ def swaplevel(self, i=-2, j=-1, axis=0): See the :ref:`user guide ` for more. """ - - @Appender(_shared_docs['rename'] % dict(axes='axes keywords for this' - ' object', klass='NDFrame', - optional_mapper='', - optional_axis='')) - def rename(self, *args, **kwargs): axes, kwargs = self._construct_axes_from_arguments(args, kwargs) copy = kwargs.pop('copy', True) inplace = kwargs.pop('inplace', False) @@ -1127,8 +1116,6 @@ def f(x): else: return result.__finalize__(self) - rename.__doc__ = _shared_docs['rename'] - def rename_axis(self, mapper, axis=0, copy=True, inplace=False): """ Alter the name of the index or columns. @@ -3024,7 +3011,8 @@ def __delitem__(self, key): except KeyError: pass - _shared_docs['_take'] = """ + def _take(self, indices, axis=0, is_copy=True): + """ Return the elements in the given *positional* indices along an axis. This means that we are not indexing according to actual values in @@ -3055,9 +3043,6 @@ def __delitem__(self, key): numpy.ndarray.take numpy.take """ - - @Appender(_shared_docs['_take']) - def _take(self, indices, axis=0, is_copy=True): self._consolidate_inplace() new_data = self._data.take(indices, @@ -3072,7 +3057,8 @@ def _take(self, indices, axis=0, is_copy=True): return result - _shared_docs['take'] = """ + def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs): + """ Return the elements in the given *positional* indices along an axis. This means that we are not indexing according to actual values in @@ -3155,9 +3141,6 @@ class max_speed 1 monkey mammal NaN 3 lion mammal 80.5 """ - - @Appender(_shared_docs['take']) - def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs): if convert is not None: msg = ("The 'convert' parameter is deprecated " "and will be removed in a future version.") @@ -3580,7 +3563,9 @@ def add_suffix(self, suffix): mapper = {self._info_axis_name: f} return self.rename(**mapper) - _shared_docs['sort_values'] = """ + def sort_values(self, by=None, axis=0, ascending=True, inplace=False, + kind='quicksort', na_position='last'): + """ Sort by the values along either axis Parameters @@ -3665,17 +3650,12 @@ def add_suffix(self, suffix): 0 A 2 0 1 A 1 1 """ - - def sort_values(self, by=None, axis=0, ascending=True, inplace=False, - kind='quicksort', na_position='last'): - """ - NOT IMPLEMENTED: do not call this method, as sorting values is not - supported for Panel objects and will raise an error. - """ raise NotImplementedError("sort_values has not been implemented " "on Panel or Panel4D objects.") - _shared_docs['sort_index'] = """ + def sort_index(self, axis=0, level=None, ascending=True, inplace=False, + kind='quicksort', na_position='last', sort_remaining=True): + """ Sort object by labels (along an axis) Parameters @@ -3703,10 +3683,6 @@ def sort_values(self, by=None, axis=0, ascending=True, inplace=False, ------- sorted_obj : %(klass)s """ - - @Appender(_shared_docs['sort_index'] % dict(axes="axes", klass="NDFrame")) - def sort_index(self, axis=0, level=None, ascending=True, inplace=False, - kind='quicksort', na_position='last', sort_remaining=True): inplace = validate_bool_kwarg(inplace, 'inplace') axis = self._get_axis_number(axis) axis_name = self._get_axis_name(axis) @@ -3724,7 +3700,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, new_axis = labels.take(sort_index) return self.reindex(**{axis_name: new_axis}) - _shared_docs['reindex'] = """ + def reindex(self, *args, **kwargs): + """ Conform %(klass)s to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and @@ -3920,14 +3897,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, ------- reindexed : %(klass)s """ - - # TODO: Decide if we care about having different examples for different - # kinds - - @Appender(_shared_docs['reindex'] % dict(axes="axes", klass="NDFrame", - optional_labels="", - optional_axis="")) - def reindex(self, *args, **kwargs): + # TODO: Decide if we care about having different examples for different + # kinds # construct the args axes, kwargs = self._construct_axes_from_arguments(args, kwargs) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 81d1e83ee6870..1e2d4000413bb 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1215,7 +1215,8 @@ def _wrap_result(self, result, axis): return self._construct_return_type(result, axes) - @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.reindex.__doc__) def reindex(self, *args, **kwargs): major = kwargs.pop("major", None) minor = kwargs.pop('minor', None) @@ -1236,7 +1237,8 @@ def reindex(self, *args, **kwargs): kwargs.pop('labels', None) return super(Panel, self).reindex(**kwargs) - @Appender(_shared_docs['rename'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.rename.__doc__) def rename(self, items=None, major_axis=None, minor_axis=None, **kwargs): major_axis = (major_axis if major_axis is not None else kwargs.pop('major', None)) @@ -1253,7 +1255,8 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, copy=copy, limit=limit, fill_value=fill_value) - @Appender(_shared_docs['transpose'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.transpose.__doc__) def transpose(self, *args, **kwargs): # check if a list of axes was passed in instead as a # single *args element @@ -1536,6 +1539,13 @@ def _extract_axis(self, data, axis=0, intersect=False): return ensure_index(index) + def sort_values(self, *args, **kwargs): + """ + NOT IMPLEMENTED: do not call this method, as sorting values is not + supported for Panel objects and will raise an error. + """ + super(Panel, self).sort_values(*args, **kwargs) + Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, stat_axis=1, aliases={'major': 'major_axis', diff --git a/pandas/core/series.py b/pandas/core/series.py index 83f80c305c5eb..82198c2b3edd5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3496,7 +3496,8 @@ def rename(self, index=None, **kwargs): return self._set_name(index, inplace=kwargs.get('inplace')) return super(Series, self).rename(index=index, **kwargs) - @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(generic.NDFrame.reindex.__doc__) def reindex(self, index=None, **kwargs): return super(Series, self).reindex(index=index, **kwargs) @@ -3680,7 +3681,7 @@ def memory_usage(self, index=True, deep=False): v += self.index.memory_usage(deep=deep) return v - @Appender(generic._shared_docs['_take']) + @Appender(generic.NDFrame._take.__doc__) def _take(self, indices, axis=0, is_copy=False): indices = ensure_platform_int(indices) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 8ac5d81f23bb2..97cd3a0a1fb6a 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -19,7 +19,7 @@ import pandas.core.indexes.base as ibase import pandas.core.ops as ops import pandas._libs.index as libindex -from pandas.util._decorators import Appender +from pandas.util._decorators import Appender, Substitution from pandas.core.sparse.array import ( make_sparse, SparseArray, @@ -563,7 +563,8 @@ def copy(self, deep=True): return self._constructor(new_data, sparse_index=self.sp_index, fill_value=self.fill_value).__finalize__(self) - @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(generic.NDFrame.reindex.__doc__) def reindex(self, index=None, method=None, copy=True, limit=None, **kwargs): @@ -592,7 +593,7 @@ def sparse_reindex(self, new_index): sparse_index=new_index, fill_value=self.fill_value).__finalize__(self) - @Appender(generic._shared_docs['take']) + @Appender(generic.NDFrame.take.__doc__) def take(self, indices, axis=0, convert=None, *args, **kwargs): if convert is not None: msg = ("The 'convert' parameter is deprecated " From 8e749a33b5f814bded42044a4182449d5d6c8213 Mon Sep 17 00:00:00 2001 From: Pamela Wu Date: Tue, 2 Oct 2018 17:14:48 -0400 Subject: [PATCH 02/11] CLN GH22874 replace bare excepts in pandas/io/pytables.py (#22919) --- pandas/io/pytables.py | 49 ++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c57b1c3e211f6..fc9e415ed38f7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -258,7 +258,7 @@ def _tables(): try: _table_file_open_policy_is_strict = ( tables.file._FILE_OPEN_POLICY == 'strict') - except: + except AttributeError: pass return _table_mod @@ -395,11 +395,11 @@ def read_hdf(path_or_buf, key=None, mode='r', **kwargs): 'contains multiple datasets.') key = candidate_only_group._v_pathname return store.select(key, auto_close=auto_close, **kwargs) - except: + except (ValueError, TypeError): # if there is an error, close the store try: store.close() - except: + except AttributeError: pass raise @@ -517,7 +517,7 @@ def __getattr__(self, name): """ allow attribute access to get stores """ try: return self.get(name) - except: + except (KeyError, ClosedFileError): pass raise AttributeError("'%s' object has no attribute '%s'" % (type(self).__name__, name)) @@ -675,7 +675,7 @@ def flush(self, fsync=False): if fsync: try: os.fsync(self._handle.fileno()) - except: + except OSError: pass def get(self, key): @@ -1161,7 +1161,7 @@ def get_node(self, key): if not key.startswith('/'): key = '/' + key return self._handle.get_node(self.root, key) - except: + except _table_mod.exceptions.NoSuchNodeError: return None def get_storer(self, key): @@ -1270,7 +1270,7 @@ def _validate_format(self, format, kwargs): # validate try: kwargs['format'] = _FORMAT_MAP[format.lower()] - except: + except KeyError: raise TypeError("invalid HDFStore format specified [{0}]" .format(format)) @@ -1307,7 +1307,7 @@ def error(t): try: pt = _TYPE_MAP[type(value)] - except: + except KeyError: error('_TYPE_MAP') # we are actually a table @@ -1318,7 +1318,7 @@ def error(t): if u('table') not in pt: try: return globals()[_STORER_MAP[pt]](self, group, **kwargs) - except: + except KeyError: error('_STORER_MAP') # existing node (and must be a table) @@ -1354,12 +1354,12 @@ def error(t): fields = group.table._v_attrs.fields if len(fields) == 1 and fields[0] == u('value'): tt = u('legacy_frame') - except: + except IndexError: pass try: return globals()[_TABLE_MAP[tt]](self, group, **kwargs) - except: + except KeyError: error('_TABLE_MAP') def _write_to_group(self, key, value, format, index=True, append=False, @@ -1624,7 +1624,7 @@ def is_indexed(self): """ return whether I am an indexed column """ try: return getattr(self.table.cols, self.cname).is_indexed - except: + except AttributeError: False def copy(self): @@ -1654,9 +1654,10 @@ def convert(self, values, nan_rep, encoding, errors): kwargs['freq'] = _ensure_decoded(self.freq) if self.index_name is not None: kwargs['name'] = _ensure_decoded(self.index_name) + # making an Index instance could throw a number of different errors try: self.values = Index(values, **kwargs) - except: + except Exception: # noqa: E722 # if the output freq is different that what we recorded, # it should be None (see also 'doc example part 2') @@ -1869,7 +1870,7 @@ def create_for_block( m = re.search(r"values_block_(\d+)", name) if m: name = "values_%s" % m.groups()[0] - except: + except IndexError: pass return cls(name=name, cname=cname, **kwargs) @@ -2232,7 +2233,7 @@ def convert(self, values, nan_rep, encoding, errors): try: self.data = self.data.astype(dtype, copy=False) - except: + except TypeError: self.data = self.data.astype('O', copy=False) # convert nans / decode @@ -2325,7 +2326,7 @@ def set_version(self): self.version = tuple(int(x) for x in version.split('.')) if len(self.version) == 2: self.version = self.version + (0,) - except: + except AttributeError: self.version = (0, 0, 0) @property @@ -2769,7 +2770,7 @@ def write_array(self, key, value, items=None): else: try: items = list(items) - except: + except TypeError: pass ws = performance_doc % (inferred_type, key, items) warnings.warn(ws, PerformanceWarning, stacklevel=7) @@ -2843,7 +2844,7 @@ class SeriesFixed(GenericFixed): def shape(self): try: return len(getattr(self.group, 'values')), - except: + except (TypeError, AttributeError): return None def read(self, **kwargs): @@ -2961,7 +2962,7 @@ def shape(self): shape = shape[::-1] return shape - except: + except AttributeError: return None def read(self, start=None, stop=None, **kwargs): @@ -3495,7 +3496,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, if axes is None: try: axes = _AXES_MAP[type(obj)] - except: + except KeyError: raise TypeError("cannot properly create the storer for: " "[group->%s,value->%s]" % (self.group._v_name, type(obj))) @@ -3614,7 +3615,7 @@ def get_blk_items(mgr, blocks): b, b_items = by_items.pop(items) new_blocks.append(b) new_blk_items.append(b_items) - except: + except (IndexError, KeyError): raise ValueError( "cannot match existing table structure for [%s] on " "appending data" % ','.join(pprint_thing(item) for @@ -3642,7 +3643,7 @@ def get_blk_items(mgr, blocks): if existing_table is not None and validate: try: existing_col = existing_table.values_axes[i] - except: + except (IndexError, KeyError): raise ValueError("Incompatible appended table [%s] with " "existing table [%s]" % (blocks, existing_table.values_axes)) @@ -4460,7 +4461,7 @@ def _get_info(info, name): """ get/create the info for this name """ try: idx = info[name] - except: + except KeyError: idx = info[name] = dict() return idx @@ -4782,7 +4783,7 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs): ) self.coordinates = where - except: + except ValueError: pass if self.coordinates is None: From c44bad24996f9e747f2119fa0c6a90d893f6e2aa Mon Sep 17 00:00:00 2001 From: Pamela Wu Date: Tue, 2 Oct 2018 17:16:25 -0400 Subject: [PATCH 03/11] CLN GH22873 Replace base excepts in pandas/core (#22901) --- doc/source/whatsnew/v0.24.0.txt | 1 - pandas/core/computation/pytables.py | 2 +- pandas/core/dtypes/common.py | 2 +- pandas/core/dtypes/dtypes.py | 8 ++++---- pandas/core/frame.py | 4 ++-- pandas/core/indexes/frozen.py | 2 +- pandas/core/indexes/multi.py | 12 +++++++----- pandas/core/indexing.py | 6 +++--- pandas/core/internals/blocks.py | 10 +++++----- pandas/core/nanops.py | 5 +++-- pandas/core/ops.py | 3 ++- pandas/core/sparse/array.py | 2 +- pandas/core/tools/datetimes.py | 10 +++++----- pandas/core/window.py | 2 +- 14 files changed, 36 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 851c1a3fbd6e9..f83185173c3e3 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -834,4 +834,3 @@ Other - :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`) - :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) -- diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 2bd1b0c5b3507..e08df3e340138 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -411,7 +411,7 @@ def visit_Subscript(self, node, **kwargs): slobj = self.visit(node.slice) try: value = value.value - except: + except AttributeError: pass try: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index e2b9e246aee50..5f0b71d4505c2 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -467,7 +467,7 @@ def is_timedelta64_dtype(arr_or_dtype): return False try: tipo = _get_dtype_type(arr_or_dtype) - except: + except (TypeError, ValueError, SyntaxError): return False return issubclass(tipo, np.timedelta64) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index d879ded4f0f09..fe5cc9389a8ba 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -358,11 +358,11 @@ def construct_from_string(cls, string): try: if string == 'category': return cls() - except: + else: + raise TypeError("cannot construct a CategoricalDtype") + except AttributeError: pass - raise TypeError("cannot construct a CategoricalDtype") - @staticmethod def validate_ordered(ordered): """ @@ -519,7 +519,7 @@ def __new__(cls, unit=None, tz=None): if m is not None: unit = m.groupdict()['unit'] tz = m.groupdict()['tz'] - except: + except TypeError: raise ValueError("could not construct DatetimeTZDtype") elif isinstance(unit, compat.string_types): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 15cebb88faea7..abe8a519afe1b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3260,7 +3260,7 @@ def _ensure_valid_index(self, value): if not len(self.index) and is_list_like(value): try: value = Series(value) - except: + except (ValueError, NotImplementedError, TypeError): raise ValueError('Cannot set a frame with no defined index ' 'and a value that cannot be converted to a ' 'Series') @@ -7750,7 +7750,7 @@ def convert(v): values = np.array([convert(v) for v in values]) else: values = convert(values) - except: + except (ValueError, TypeError): values = convert(values) else: diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 5a37e03b700f9..289970aaf3a82 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -139,7 +139,7 @@ def searchsorted(self, value, side="left", sorter=None): # xref: https://github.com/numpy/numpy/issues/5370 try: value = self.dtype.type(value) - except: + except ValueError: pass return super(FrozenNDArray, self).searchsorted( diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3e6b934e1e863..119a607fc0e68 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -6,6 +6,7 @@ import numpy as np from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp +from pandas._libs import tslibs from pandas.compat import range, zip, lrange, lzip, map from pandas.compat.numpy import function as nv @@ -1002,12 +1003,13 @@ def _try_mi(k): return _try_mi(key) except (KeyError): raise - except: + except (IndexError, ValueError, TypeError): pass try: return _try_mi(Timestamp(key)) - except: + except (KeyError, TypeError, + IndexError, ValueError, tslibs.OutOfBoundsDatetime): pass raise InvalidIndexError(key) @@ -1686,7 +1688,7 @@ def append(self, other): # if all(isinstance(x, MultiIndex) for x in other): try: return MultiIndex.from_tuples(new_tuples, names=self.names) - except: + except (TypeError, IndexError): return Index(new_tuples) def argsort(self, *args, **kwargs): @@ -2315,7 +2317,7 @@ def maybe_droplevels(indexer, levels, drop_level): for i in sorted(levels, reverse=True): try: new_index = new_index.droplevel(i) - except: + except ValueError: # no dropping here return orig_index @@ -2818,7 +2820,7 @@ def _convert_can_do_setop(self, other): msg = 'other must be a MultiIndex or a list of tuples' try: other = MultiIndex.from_tuples(other) - except: + except TypeError: raise TypeError(msg) else: result_names = self.names if self.names == other.names else None diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b63f874abff85..150518aadcfd9 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2146,7 +2146,7 @@ def _getitem_tuple(self, tup): self._has_valid_tuple(tup) try: return self._getitem_lowerdim(tup) - except: + except IndexingError: pass retval = self.obj @@ -2705,13 +2705,13 @@ def maybe_droplevels(index, key): for _ in key: try: index = index.droplevel(0) - except: + except ValueError: # we have dropped too much, so back out return original_index else: try: index = index.droplevel(0) - except: + except ValueError: pass return index diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6576db9f642a6..0e57dd33b1c4e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -666,7 +666,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, newb = make_block(values, placement=self.mgr_locs, klass=klass, ndim=self.ndim) - except: + except Exception: # noqa: E722 if errors == 'raise': raise newb = self.copy() if copy else self @@ -1142,7 +1142,7 @@ def check_int_bool(self, inplace): # a fill na type method try: m = missing.clean_fill_method(method) - except: + except ValueError: m = None if m is not None: @@ -1157,7 +1157,7 @@ def check_int_bool(self, inplace): # try an interp method try: m = missing.clean_interp_method(method, **kwargs) - except: + except ValueError: m = None if m is not None: @@ -2438,7 +2438,7 @@ def set(self, locs, values, check=False): try: if (self.values[locs] == values).all(): return - except: + except (IndexError, ValueError): pass try: self.values[locs] = values @@ -3172,7 +3172,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, def __len__(self): try: return self.sp_index.length - except: + except AttributeError: return 0 def copy(self, deep=True, mgr=None): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 7619d47cbc8f9..232d030da7f1e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -503,7 +503,8 @@ def reduction(values, axis=None, skipna=True): try: result = getattr(values, meth)(axis, dtype=dtype_max) result.fill(np.nan) - except: + except (AttributeError, TypeError, + ValueError, np.core._internal.AxisError): result = np.nan else: result = getattr(values, meth)(axis) @@ -815,7 +816,7 @@ def _ensure_numeric(x): elif is_object_dtype(x): try: x = x.astype(np.complex128) - except: + except (TypeError, ValueError): x = x.astype(np.float64) else: if not np.any(x.imag): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 70fe7de0a973e..ad187b08e0742 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1545,7 +1545,8 @@ def na_op(x, y): y = bool(y) try: result = libops.scalar_binop(x, y, op) - except: + except (TypeError, ValueError, AttributeError, + OverflowError, NotImplementedError): raise TypeError("cannot compare a dtyped [{dtype}] array " "with a scalar of type [{typ}]" .format(dtype=x.dtype, diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index eb07e5ef6c85f..186a2490a5f2e 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -306,7 +306,7 @@ def __setstate__(self, state): def __len__(self): try: return self.sp_index.length - except: + except AttributeError: return 0 def __unicode__(self): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 4a5290a90313d..eb8d2b0b6c809 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -244,7 +244,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) - except: + except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") @@ -334,7 +334,7 @@ def _adjust_to_origin(arg, origin, unit): raise ValueError("unit must be 'D' for origin='julian'") try: arg = arg - j0 - except: + except TypeError: raise ValueError("incompatible 'arg' type for given " "'origin'='julian'") @@ -731,21 +731,21 @@ def calc_with_mask(carg, mask): # try intlike / strings that are ints try: return calc(arg.astype(np.int64)) - except: + except ValueError: pass # a float with actual np.nan try: carg = arg.astype(np.float64) return calc_with_mask(carg, notna(carg)) - except: + except ValueError: pass # string with NaN-like try: mask = ~algorithms.isin(arg, list(tslib.nat_strings)) return calc_with_mask(arg, mask) - except: + except ValueError: pass return None diff --git a/pandas/core/window.py b/pandas/core/window.py index 5cdf62d5a5537..4281d66a640e3 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -2504,7 +2504,7 @@ def _offset(window, center): offset = (window - 1) / 2. if center else 0 try: return int(offset) - except: + except TypeError: return offset.astype(int) From 08ecba8dab4a35ad3cad89fe02c7240674938b97 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 2 Oct 2018 14:22:53 -0700 Subject: [PATCH 04/11] BUG: fix DataFrame+DataFrame op with timedelta64 dtype (#22696) --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/frame.py | 2 +- pandas/core/ops.py | 42 +++++++++++++++++++++++++-- pandas/tests/frame/test_arithmetic.py | 15 ++++++++++ 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f83185173c3e3..9b71ab656920d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -666,7 +666,7 @@ Timedelta - Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) - Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) - Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`) -- +- Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`) - Timezones diff --git a/pandas/core/frame.py b/pandas/core/frame.py index abe8a519afe1b..138d1017aa43d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4889,7 +4889,7 @@ def _arith_op(left, right): left, right = ops.fill_binop(left, right, fill_value) return func(left, right) - if this._is_mixed_type or other._is_mixed_type: + if ops.should_series_dispatch(this, other, func): # iterate over columns return ops.dispatch_to_series(this, other, _arith_op) else: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index ad187b08e0742..8171840c96b6e 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -900,6 +900,42 @@ def invalid_comparison(left, right, op): return res_values +# ----------------------------------------------------------------------------- +# Dispatch logic + +def should_series_dispatch(left, right, op): + """ + Identify cases where a DataFrame operation should dispatch to its + Series counterpart. + + Parameters + ---------- + left : DataFrame + right : DataFrame + op : binary operator + + Returns + ------- + override : bool + """ + if left._is_mixed_type or right._is_mixed_type: + return True + + if not len(left.columns) or not len(right.columns): + # ensure obj.dtypes[0] exists for each obj + return False + + ldtype = left.dtypes.iloc[0] + rdtype = right.dtypes.iloc[0] + + if ((is_timedelta64_dtype(ldtype) and is_integer_dtype(rdtype)) or + (is_timedelta64_dtype(rdtype) and is_integer_dtype(ldtype))): + # numpy integer dtypes as timedelta64 dtypes in this scenario + return True + + return False + + # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods @@ -1803,8 +1839,10 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): other = _align_method_FRAME(self, other, axis) - if isinstance(other, ABCDataFrame): # Another DataFrame - return self._combine_frame(other, na_op, fill_value, level) + if isinstance(other, ABCDataFrame): + # Another DataFrame + pass_op = op if should_series_dispatch(self, other, op) else na_op + return self._combine_frame(other, pass_op, fill_value, level) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, fill_value=fill_value, axis=axis, diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 2b08897864db0..2eb11c3a2e2f7 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -266,3 +266,18 @@ def test_df_bool_mul_int(self): result = 1 * df kinds = result.dtypes.apply(lambda x: x.kind) assert (kinds == 'i').all() + + def test_td64_df_add_int_frame(self): + # GH#22696 Check that we don't dispatch to numpy implementation, + # which treats int64 as m8[ns] + tdi = pd.timedelta_range('1', periods=3) + df = tdi.to_frame() + other = pd.DataFrame([1, 2, 3], index=tdi) # indexed like `df` + with pytest.raises(TypeError): + df + other + with pytest.raises(TypeError): + other + df + with pytest.raises(TypeError): + df - other + with pytest.raises(TypeError): + other - df From b0f9a104f323d687a56ea878ff78ff005f37b42d Mon Sep 17 00:00:00 2001 From: Tony Tao <34781056+tonytao2012@users.noreply.github.com> Date: Tue, 2 Oct 2018 19:01:08 -0500 Subject: [PATCH 05/11] DOC GH22893 Fix docstring of groupby in pandas/core/generic.py (#22920) --- pandas/core/generic.py | 101 +++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8fed92f7ed6b9..cc157cc7228a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7034,8 +7034,12 @@ def clip_lower(self, threshold, axis=None, inplace=False): def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, **kwargs): """ - Group series using mapper (dict or key function, apply given function - to group, return result as series) or by a series of columns. + Group DataFrame or Series using a mapper or by a Series of columns. + + A groupby operation involves some combination of splitting the + object, applying a function, and combining the results. This can be + used to group large amounts of data and compute operations on these + groups. Parameters ---------- @@ -7048,54 +7052,95 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, values are used as-is determine the groups. A label or list of labels may be passed to group by the columns in ``self``. Notice that a tuple is interpreted a (single) key. - axis : int, default 0 + axis : {0 or 'index', 1 or 'columns'}, default 0 + Split along rows (0) or columns (1). level : int, level name, or sequence of such, default None If the axis is a MultiIndex (hierarchical), group by a particular - level or levels - as_index : boolean, default True + level or levels. + as_index : bool, default True For aggregated output, return object with group labels as the index. Only relevant for DataFrame input. as_index=False is - effectively "SQL-style" grouped output - sort : boolean, default True + effectively "SQL-style" grouped output. + sort : bool, default True Sort group keys. Get better performance by turning this off. Note this does not influence the order of observations within each - group. groupby preserves the order of rows within each group. - group_keys : boolean, default True - When calling apply, add group keys to index to identify pieces - squeeze : boolean, default False - reduce the dimensionality of the return type if possible, - otherwise return a consistent type - observed : boolean, default False - This only applies if any of the groupers are Categoricals + group. Groupby preserves the order of rows within each group. + group_keys : bool, default True + When calling apply, add group keys to index to identify pieces. + squeeze : bool, default False + Reduce the dimensionality of the return type if possible, + otherwise return a consistent type. + observed : bool, default False + This only applies if any of the groupers are Categoricals. If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. .. versionadded:: 0.23.0 + **kwargs + Optional, only accepts keyword argument 'mutated' and is passed + to groupby. + Returns ------- - GroupBy object + DataFrameGroupBy or SeriesGroupBy + Depends on the calling object and returns groupby object that + contains information about the groups. - Examples + See Also -------- - DataFrame results - - >>> data.groupby(func, axis=0).mean() - >>> data.groupby(['col1', 'col2'])['col3'].mean() - - DataFrame with hierarchical index - - >>> data.groupby(['col1', 'col2']).mean() + resample : Convenience method for frequency conversion and resampling + of time series. Notes ----- See the `user guide `_ for more. - See also + Examples -------- - resample : Convenience method for frequency conversion and resampling - of time series. + >>> df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon', + ... 'Parrot', 'Parrot'], + ... 'Max Speed' : [380., 370., 24., 26.]}) + >>> df + Animal Max Speed + 0 Falcon 380.0 + 1 Falcon 370.0 + 2 Parrot 24.0 + 3 Parrot 26.0 + >>> df.groupby(['Animal']).mean() + Max Speed + Animal + Falcon 375.0 + Parrot 25.0 + + **Hierarchical Indexes** + + We can groupby different levels of a hierarchical index + using the `level` parameter: + + >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], + ... ['Capitve', 'Wild', 'Capitve', 'Wild']] + >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) + >>> df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]}, + ... index=index) + >>> df + Max Speed + Animal Type + Falcon Capitve 390.0 + Wild 350.0 + Parrot Capitve 30.0 + Wild 20.0 + >>> df.groupby(level=0).mean() + Max Speed + Animal + Falcon 370.0 + Parrot 25.0 + >>> df.groupby(level=1).mean() + Max Speed + Type + Capitve 210.0 + Wild 185.0 """ from pandas.core.groupby.groupby import groupby From 04ea51ddf7623b897aaaf2e504952d3c11e88205 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Oct 2018 09:24:36 +0200 Subject: [PATCH 06/11] CLN: small clean-up of IntervalIndex (#22956) --- pandas/core/arrays/interval.py | 7 +---- pandas/core/indexes/interval.py | 49 ++++++--------------------------- 2 files changed, 9 insertions(+), 47 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 90df596b98296..134999f05364f 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -108,12 +108,7 @@ class IntervalArray(IntervalMixin, ExtensionArray): _na_value = _fill_value = np.nan def __new__(cls, data, closed=None, dtype=None, copy=False, - fastpath=False, verify_integrity=True): - - if fastpath: - return cls._simple_new(data.left, data.right, closed, - copy=copy, dtype=dtype, - verify_integrity=False) + verify_integrity=True): if isinstance(data, ABCSeries) and is_interval_dtype(data): data = data.values diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 4b125580bd7e0..f72f87aeb2af6 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -146,17 +146,13 @@ class IntervalIndex(IntervalMixin, Index): _mask = None def __new__(cls, data, closed=None, dtype=None, copy=False, - name=None, fastpath=False, verify_integrity=True): - - if fastpath: - return cls._simple_new(data, name) + name=None, verify_integrity=True): if name is None and hasattr(data, 'name'): name = data.name with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray(data, closed=closed, copy=copy, dtype=dtype, - fastpath=fastpath, verify_integrity=verify_integrity) return cls._simple_new(array, name) @@ -187,14 +183,6 @@ def _shallow_copy(self, left=None, right=None, **kwargs): attributes.update(kwargs) return self._simple_new(result, **attributes) - @cache_readonly - def hasnans(self): - """ - Return if the IntervalIndex has any nans; enables various performance - speedups - """ - return self._isnan.any() - @cache_readonly def _isnan(self): """Return a mask indicating if each value is NA""" @@ -206,10 +194,6 @@ def _isnan(self): def _engine(self): return IntervalTree(self.left, self.right, closed=self.closed) - @property - def _constructor(self): - return type(self) - def __contains__(self, key): """ return a boolean if this key is IN the index @@ -394,18 +378,7 @@ def _values(self): @cache_readonly def _ndarray_values(self): - left = self.left - right = self.right - mask = self._isnan - closed = self.closed - - result = np.empty(len(left), dtype=object) - for i in range(len(left)): - if mask[i]: - result[i] = np.nan - else: - result[i] = Interval(left[i], right[i], closed) - return result + return np.array(self._data) def __array__(self, result=None): """ the array interface, return my values """ @@ -892,18 +865,12 @@ def take(self, indices, axis=0, allow_fill=True, return self._simple_new(result, **attributes) def __getitem__(self, value): - mask = self._isnan[value] - if is_scalar(mask) and mask: - return self._na_value - - left = self.left[value] - right = self.right[value] - - # scalar - if not isinstance(left, Index): - return Interval(left, right, self.closed) - - return self._shallow_copy(left, right) + result = self._data[value] + if isinstance(result, IntervalArray): + return self._shallow_copy(result) + else: + # scalar + return result # __repr__ associated methods are based on MultiIndex From 03181f0569c8b1f93f620a2986b4f174f9b6179b Mon Sep 17 00:00:00 2001 From: Wenhuan Date: Wed, 3 Oct 2018 15:28:07 +0800 Subject: [PATCH 07/11] BUG: fix Series(extension array) + extension array values addition (#22479) --- pandas/core/ops.py | 2 +- pandas/tests/extension/base/ops.py | 6 ++++++ pandas/tests/extension/json/test_json.py | 5 +++++ pandas/tests/extension/test_categorical.py | 6 ++++++ pandas/tests/extension/test_integer.py | 6 ++++++ 5 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 8171840c96b6e..a02152a123b48 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1218,7 +1218,7 @@ def dispatch_to_extension_op(op, left, right): new_right = [new_right] new_right = list(new_right) elif is_extension_array_dtype(right) and type(left) != type(right): - new_right = list(new_right) + new_right = list(right) else: new_right = right diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 05351c56862b8..ee4a92146128b 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -77,6 +77,12 @@ def test_divmod(self, data): self._check_divmod_op(s, divmod, 1, exc=TypeError) self._check_divmod_op(1, ops.rdivmod, s, exc=TypeError) + def test_add_series_with_extension_array(self, data): + s = pd.Series(data) + result = s + data + expected = pd.Series(data + data) + self.assert_series_equal(result, expected) + def test_error(self, data, all_arithmetic_operators): # invalid ops op_name = all_arithmetic_operators diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 0126d771caf7f..93f10b7fbfc23 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -261,6 +261,11 @@ class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests): def test_error(self, data, all_arithmetic_operators): pass + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with tm.assert_raises_regex(TypeError, "unsupported"): + ser + data + class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests): pass diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index ff66f53eab6f6..c588552572aed 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -22,6 +22,7 @@ from pandas.api.types import CategoricalDtype from pandas import Categorical from pandas.tests.extension import base +import pandas.util.testing as tm def make_data(): @@ -202,6 +203,11 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): else: pytest.skip('rmod never called when string is first argument') + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with tm.assert_raises_regex(TypeError, "cannot perform"): + ser + data + class TestComparisonOps(base.BaseComparisonOpsTests): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 7aa33006dadda..fa5c89d85e548 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -143,6 +143,12 @@ def test_error(self, data, all_arithmetic_operators): # other specific errors tested in the integer array specific tests pass + @pytest.mark.xfail(reason="EA is listified. GH-22922", strict=True) + def test_add_series_with_extension_array(self, data): + super(TestArithmeticOps, self).test_add_series_with_extension_array( + data + ) + class TestComparisonOps(base.BaseComparisonOpsTests): From e756e991d57c2656906d0a3e8fc76950844e3f3e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 3 Oct 2018 02:19:27 -0700 Subject: [PATCH 08/11] CLN: Use is_period_dtype instead of ABCPeriodIndex checks (#22958) --- pandas/core/arrays/period.py | 2 +- pandas/core/indexes/datetimelike.py | 18 +++++++++--------- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/range.py | 18 +++++++++--------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 481d5313f0e25..41b4c5c669efc 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -264,7 +264,7 @@ def asfreq(self, freq=None, how='E'): if self.hasnans: new_data[self._isnan] = iNaT - return self._simple_new(new_data, self.name, freq=freq) + return self._shallow_copy(new_data, freq=freq) # ------------------------------------------------------------------ # Arithmetic Methods diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 37a12a588db03..1ec30ecbb3a3b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -21,6 +21,7 @@ is_list_like, is_scalar, is_bool_dtype, + is_period_dtype, is_categorical_dtype, is_datetime_or_timedelta_dtype, is_float_dtype, @@ -28,7 +29,7 @@ is_object_dtype, is_string_dtype) from pandas.core.dtypes.generic import ( - ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) + ABCIndex, ABCSeries, ABCIndexClass) from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms, ops @@ -239,9 +240,8 @@ def equals(self, other): # have different timezone return False - # ToDo: Remove this when PeriodDtype is added - elif isinstance(self, ABCPeriodIndex): - if not isinstance(other, ABCPeriodIndex): + elif is_period_dtype(self): + if not is_period_dtype(other): return False if self.freq != other.freq: return False @@ -359,7 +359,7 @@ def sort_values(self, return_indexer=False, ascending=True): attribs = self._get_attributes_dict() freq = attribs['freq'] - if freq is not None and not isinstance(self, ABCPeriodIndex): + if freq is not None and not is_period_dtype(self): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: @@ -386,8 +386,8 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=fill_value, na_value=iNaT) - # keep freq in PeriodIndex, reset otherwise - freq = self.freq if isinstance(self, ABCPeriodIndex) else None + # keep freq in PeriodArray/Index, reset otherwise + freq = self.freq if is_period_dtype(self) else None return self._shallow_copy(taken, freq=freq) _can_hold_na = True @@ -618,7 +618,7 @@ def repeat(self, repeats, *args, **kwargs): Analogous to ndarray.repeat """ nv.validate_repeat(args, kwargs) - if isinstance(self, ABCPeriodIndex): + if is_period_dtype(self): freq = self.freq else: freq = None @@ -673,7 +673,7 @@ def _concat_same_dtype(self, to_concat, name): attribs = self._get_attributes_dict() attribs['name'] = name - if not isinstance(self, ABCPeriodIndex): + if not is_period_dtype(self): # reset freq attribs['freq'] = None diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 119a607fc0e68..6091df776a01b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1001,7 +1001,7 @@ def _try_mi(k): (compat.PY3 and isinstance(key, compat.string_types))): try: return _try_mi(key) - except (KeyError): + except KeyError: raise except (IndexError, ValueError, TypeError): pass diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 981bfddeadac1..fd8e17c369f5a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -512,33 +512,33 @@ def __getitem__(self, key): # This is basically PySlice_GetIndicesEx, but delegation to our # super routines if we don't have integers - l = len(self) + length = len(self) # complete missing slice information step = 1 if key.step is None else key.step if key.start is None: - start = l - 1 if step < 0 else 0 + start = length - 1 if step < 0 else 0 else: start = key.start if start < 0: - start += l + start += length if start < 0: start = -1 if step < 0 else 0 - if start >= l: - start = l - 1 if step < 0 else l + if start >= length: + start = length - 1 if step < 0 else length if key.stop is None: - stop = -1 if step < 0 else l + stop = -1 if step < 0 else length else: stop = key.stop if stop < 0: - stop += l + stop += length if stop < 0: stop = -1 - if stop > l: - stop = l + if stop > length: + stop = length # delegate non-integer slices if (start != int(start) or From 3e3256bb6038111812b4b28f6b3b049214d83d2d Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Wed, 3 Oct 2018 12:23:22 +0100 Subject: [PATCH 09/11] Allow passing a mask to NanOps (#22865) --- pandas/core/nanops.py | 404 ++++++++++++++++++++++++++++++++---- pandas/tests/test_nanops.py | 36 +++- 2 files changed, 391 insertions(+), 49 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 232d030da7f1e..2884bc1a19491 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,12 +1,16 @@ -import itertools import functools +import itertools import operator import warnings from distutils.version import LooseVersion import numpy as np + +import pandas.core.common as com from pandas import compat from pandas._libs import tslibs, lib +from pandas.core.config import get_option +from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.common import ( _get_dtype, is_float, is_scalar, @@ -17,10 +21,7 @@ is_datetime64_dtype, is_timedelta64_dtype, is_datetime_or_timedelta_dtype, is_int_or_datetime_dtype, is_any_int_dtype) -from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype -from pandas.core.config import get_option -import pandas.core.common as com _BOTTLENECK_INSTALLED = False _MIN_BOTTLENECK_VERSION = '1.0.0' @@ -200,16 +201,18 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): def _get_values(values, skipna, fill_value=None, fill_value_typ=None, - isfinite=False, copy=True): + isfinite=False, copy=True, mask=None): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = com.values_from_object(values) - if isfinite: - mask = _isfinite(values) - else: - mask = isna(values) + + if mask is None: + if isfinite: + mask = _isfinite(values) + else: + mask = isna(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) @@ -315,19 +318,98 @@ def _na_for_min_count(values, axis): return result -def nanany(values, axis=None, skipna=True): - values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna) +def nanany(values, axis=None, skipna=True, mask=None): + """ + Check if any elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2]) + >>> nanops.nanany(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([np.nan]) + >>> nanops.nanany(s) + False + """ + values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna, + mask=mask) return values.any(axis) -def nanall(values, axis=None, skipna=True): - values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna) +def nanall(values, axis=None, skipna=True, mask=None): + """ + Check if all elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanall(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 0]) + >>> nanops.nanall(s) + False + """ + values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna, + mask=mask) return values.all(axis) @disallow('M8') -def nansum(values, axis=None, skipna=True, min_count=0): - values, mask, dtype, dtype_max = _get_values(values, skipna, 0) +def nansum(values, axis=None, skipna=True, min_count=0, mask=None): + """ + Sum the elements along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray[dtype] + axis: int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : dtype + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nansum(s) + 3.0 + """ + values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) dtype_sum = dtype_max if is_float_dtype(dtype): dtype_sum = dtype @@ -341,9 +423,32 @@ def nansum(values, axis=None, skipna=True, min_count=0): @disallow('M8') @bottleneck_switch() -def nanmean(values, axis=None, skipna=True): - values, mask, dtype, dtype_max = _get_values(values, skipna, 0) +def nanmean(values, axis=None, skipna=True, mask=None): + """ + Compute the mean of the element along an axis ignoring NaNs + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanmean(s) + 1.5 + """ + values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) dtype_sum = dtype_max dtype_count = np.float64 if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype): @@ -367,15 +472,36 @@ def nanmean(values, axis=None, skipna=True): @disallow('M8') @bottleneck_switch() -def nanmedian(values, axis=None, skipna=True): +def nanmedian(values, axis=None, skipna=True, mask=None): + """ + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 2]) + >>> nanops.nanmedian(s) + 2.0 + """ def get_median(x): mask = notna(x) if not skipna and not mask.all(): return np.nan return np.nanmedian(x[mask]) - values, mask, dtype, dtype_max = _get_values(values, skipna) + values, mask, dtype, dtype_max = _get_values(values, skipna, mask=mask) if not is_float_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -431,18 +557,73 @@ def _get_counts_nanvar(mask, axis, ddof, dtype=float): @disallow('M8') @bottleneck_switch(ddof=1) -def nanstd(values, axis=None, skipna=True, ddof=1): - result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof)) +def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard deviation along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanstd(s) + 1.0 + """ + result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, + mask=mask)) return _wrap_results(result, values.dtype) @disallow('M8') @bottleneck_switch(ddof=1) -def nanvar(values, axis=None, skipna=True, ddof=1): +def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the variance along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanvar(s) + 1.0 + """ values = com.values_from_object(values) dtype = values.dtype - mask = isna(values) + if mask is None: + mask = isna(values) if is_any_int_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -465,7 +646,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1): avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) - sqr = _ensure_numeric((avg - values)**2) + sqr = _ensure_numeric((avg - values) ** 2) np.putmask(sqr, mask, 0) result = sqr.sum(axis=axis, dtype=np.float64) / d @@ -478,12 +659,41 @@ def nanvar(values, axis=None, skipna=True, ddof=1): @disallow('M8', 'm8') -def nansem(values, axis=None, skipna=True, ddof=1): +def nansem(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard error in the mean along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nansem(s) + 0.5773502691896258 + """ + # This checks if non-numeric-like data is passed with numeric_only=False # and raises a TypeError otherwise - nanvar(values, axis, skipna, ddof=ddof) + nanvar(values, axis, skipna, ddof=ddof, mask=mask) - mask = isna(values) + if mask is None: + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype) @@ -494,9 +704,9 @@ def nansem(values, axis=None, skipna=True, ddof=1): def _nanminmax(meth, fill_value_typ): @bottleneck_switch() - def reduction(values, axis=None, skipna=True): + def reduction(values, axis=None, skipna=True, mask=None): values, mask, dtype, dtype_max = _get_values( - values, skipna, fill_value_typ=fill_value_typ, ) + values, skipna, fill_value_typ=fill_value_typ, mask=mask) if ((axis is not None and values.shape[axis] == 0) or values.size == 0): @@ -521,39 +731,97 @@ def reduction(values, axis=None, skipna=True): @disallow('O') -def nanargmax(values, axis=None, skipna=True): +def nanargmax(values, axis=None, skipna=True, mask=None): """ - Returns -1 in the NA case + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + -------- + result : int + The index of max value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmax(s) + 4 """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf') + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf', + mask=mask) result = values.argmax(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result @disallow('O') -def nanargmin(values, axis=None, skipna=True): +def nanargmin(values, axis=None, skipna=True, mask=None): """ - Returns -1 in the NA case + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + -------- + result : int + The index of min value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmin(s) + 0 """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf') + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf', + mask=mask) result = values.argmin(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result @disallow('M8', 'm8') -def nanskew(values, axis=None, skipna=True): +def nanskew(values, axis=None, skipna=True, mask=None): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly from the second and third central moment. - """ + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1,np.nan, 1, 2]) + >>> nanops.nanskew(s) + 1.7320508075688787 + """ values = com.values_from_object(values) - mask = isna(values) + if mask is None: + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -602,16 +870,38 @@ def nanskew(values, axis=None, skipna=True): @disallow('M8', 'm8') -def nankurt(values, axis=None, skipna=True): - """ Compute the sample excess kurtosis. +def nankurt(values, axis=None, skipna=True, mask=None): + """ + Compute the sample excess kurtosis The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G2, computed directly from the second and fourth central moment. + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1,np.nan, 1, 3, 2]) + >>> nanops.nankurt(s) + -1.2892561983471076 """ values = com.values_from_object(values) - mask = isna(values) + if mask is None: + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -637,7 +927,7 @@ def nankurt(values, axis=None, skipna=True): with np.errstate(invalid='ignore', divide='ignore'): adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) numer = count * (count + 1) * (count - 1) * m4 - denom = (count - 2) * (count - 3) * m2**2 + denom = (count - 2) * (count - 3) * m2 ** 2 # floating point error # @@ -669,8 +959,34 @@ def nankurt(values, axis=None, skipna=True): @disallow('M8', 'm8') -def nanprod(values, axis=None, skipna=True, min_count=0): - mask = isna(values) +def nanprod(values, axis=None, skipna=True, min_count=0, mask=None): + """ + Parameters + ---------- + values : ndarray[dtype] + axis: int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : dtype + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan]) + >>> nanops.nanprod(s) + 6.0 + + Returns + -------- + The product of all elements on a given axis. ( NaNs are treated as 1) + """ + if mask is None: + mask = isna(values) if skipna and not is_any_int_dtype(values): values = values.copy() values[mask] = 1 diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index b6c2c65fb6dce..b06463d3c07aa 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1,19 +1,19 @@ # -*- coding: utf-8 -*- from __future__ import division, print_function +import warnings from functools import partial -import pytest -import warnings import numpy as np +import pytest import pandas as pd -from pandas import Series, isna -from pandas.core.dtypes.common import is_integer_dtype import pandas.core.nanops as nanops -import pandas.util.testing as tm import pandas.util._test_decorators as td +import pandas.util.testing as tm +from pandas import Series, isna from pandas.compat.numpy import _np_version_under1p13 +from pandas.core.dtypes.common import is_integer_dtype use_bn = nanops._USE_BOTTLENECK @@ -1041,3 +1041,29 @@ def test_numpy_ops_np_version_under1p13(numpy_op, expected): assert result == expected else: assert result == expected + + +@pytest.mark.parametrize("operation", [ + nanops.nanany, + nanops.nanall, + nanops.nansum, + nanops.nanmean, + nanops.nanmedian, + nanops.nanstd, + nanops.nanvar, + nanops.nansem, + nanops.nanargmax, + nanops.nanargmin, + nanops.nanmax, + nanops.nanmin, + nanops.nanskew, + nanops.nankurt, + nanops.nanprod, +]) +def test_nanops_independent_of_mask_param(operation): + # GH22764 + s = pd.Series([1, 2, np.nan, 3, np.nan, 4]) + mask = s.isna() + median_expected = operation(s) + median_result = operation(s, mask=mask) + assert median_expected == median_result From 15d32bbad832908c9d06a9019e613bb6b35d6878 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 3 Oct 2018 04:32:35 -0700 Subject: [PATCH 10/11] [CLN] Dispatch (some) Frame ops to Series, avoiding _data.eval (#22019) * avoid casting to object dtype in mixed-type frames * Dispatch to Series ops in _combine_match_columns * comment * docstring * flake8 fixup * dont bother with try_cast_result * revert non-central change * simplify * revert try_cast_results * revert non-central changes * Fixup typo syntaxerror * simplify assertion * use dispatch_to_series in combine_match_columns * Pass unwrapped op where appropriate * catch correct error * whatsnew note * comment * whatsnew section * remove unnecessary tester * doc fixup --- doc/source/whatsnew/v0.24.0.txt | 29 ++++++++++++++++ pandas/core/frame.py | 7 +--- pandas/core/ops.py | 17 ++++++++-- pandas/tests/arithmetic/test_timedelta64.py | 34 ++++++++----------- .../tests/frame/test_axis_select_reindex.py | 2 +- pandas/tests/reshape/test_pivot.py | 8 +++-- pandas/tests/series/test_operators.py | 10 +++--- 7 files changed, 70 insertions(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 9b71ab656920d..700916ba6066e 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -532,6 +532,35 @@ Current Behavior: ... OverflowError: Trying to coerce negative values to unsigned integers +.. _whatsnew_0240.api.crosstab_dtypes + +Crosstab Preserves Dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`crosstab` will preserve now dtypes in some cases that previously would +cast from integer dtype to floating dtype (:issue:`22019`) + +Previous Behavior: + +.. code-block:: ipython + + In [3]: df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4], + ...: 'c': [1, 1, np.nan, 1, 1]}) + In [4]: pd.crosstab(df.a, df.b, normalize='columns') + Out[4]: + b 3 4 + a + 1 0.5 0.0 + 2 0.5 1.0 + +Current Behavior: + +.. code-block:: ipython + + In [3]: df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4], + ...: 'c': [1, 1, np.nan, 1, 1]}) + In [4]: pd.crosstab(df.a, df.b, normalize='columns') + Datetimelike API Changes ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 138d1017aa43d..ff7590f6d5358 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4899,7 +4899,6 @@ def _arith_op(left, right): copy=False) def _combine_match_index(self, other, func, level=None): - assert isinstance(other, Series) left, right = self.align(other, join='outer', axis=0, level=level, copy=False) assert left.index.equals(right.index) @@ -4919,11 +4918,7 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True): left, right = self.align(other, join='outer', axis=1, level=level, copy=False) assert left.columns.equals(right.index) - - new_data = left._data.eval(func=func, other=right, - axes=[left.columns, self.index], - try_cast=try_cast) - return self._constructor(new_data) + return ops.dispatch_to_series(left, right, func, axis="columns") def _combine_const(self, other, func, errors='raise', try_cast=True): if lib.is_scalar(other) or np.ndim(other) == 0: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index a02152a123b48..dc99faaf68f51 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1666,7 +1666,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # ----------------------------------------------------------------------------- # DataFrame -def dispatch_to_series(left, right, func, str_rep=None): +def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -1677,6 +1677,7 @@ def dispatch_to_series(left, right, func, str_rep=None): right : scalar or DataFrame func : arithmetic or comparison operator str_rep : str or None, default None + axis : {None, 0, 1, "index", "columns"} Returns ------- @@ -1700,6 +1701,15 @@ def column_op(a, b): return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))} + elif isinstance(right, ABCSeries) and axis == "columns": + # We only get here if called via left._combine_match_columns, + # in which case we specifically want to operate row-by-row + assert right.index.equals(left.columns) + + def column_op(a, b): + return {i: func(a.iloc[:, i], b.iloc[i]) + for i in range(len(a.columns))} + elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later @@ -1844,7 +1854,10 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): pass_op = op if should_series_dispatch(self, other, op) else na_op return self._combine_frame(other, pass_op, fill_value, level) elif isinstance(other, ABCSeries): - return _combine_series_frame(self, other, na_op, + # For these values of `axis`, we end up dispatching to Series op, + # so do not want the masked op. + pass_op = op if axis in [0, "columns", None] else na_op + return _combine_series_frame(self, other, pass_op, fill_value=fill_value, axis=axis, level=level, try_cast=True) else: diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 5050922173564..a09efe6d4761c 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -505,33 +505,25 @@ def test_tdi_add_dt64_array(self, box_df_broadcast_failure): # ------------------------------------------------------------------ # Operations with int-like others - def test_td64arr_add_int_series_invalid(self, box_df_broadcast_failure, - tdser): - box = box_df_broadcast_failure + def test_td64arr_add_int_series_invalid(self, box, tdser): tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): tdser + Series([2, 3, 4]) - def test_td64arr_radd_int_series_invalid(self, box_df_broadcast_failure, - tdser): - box = box_df_broadcast_failure + def test_td64arr_radd_int_series_invalid(self, box, tdser): tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): Series([2, 3, 4]) + tdser - def test_td64arr_sub_int_series_invalid(self, box_df_broadcast_failure, - tdser): - box = box_df_broadcast_failure + def test_td64arr_sub_int_series_invalid(self, box, tdser): tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): tdser - Series([2, 3, 4]) - def test_td64arr_rsub_int_series_invalid(self, box_df_broadcast_failure, - tdser): - box = box_df_broadcast_failure + def test_td64arr_rsub_int_series_invalid(self, box, tdser): tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): @@ -605,9 +597,10 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser): Series([1, 2, 3]) # TODO: Add DataFrame in here? ], ids=lambda x: type(x).__name__) - def test_td64arr_add_sub_numeric_arr_invalid( - self, box_df_broadcast_failure, vec, dtype, tdser): - box = box_df_broadcast_failure + def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser): + if box is pd.DataFrame and not isinstance(vec, Series): + raise pytest.xfail(reason="Tries to broadcast incorrectly") + tdser = tm.box_expected(tdser, box) err = TypeError if box is pd.Index and not dtype.startswith('float'): @@ -930,9 +923,9 @@ def test_td64arr_sub_offset_array(self, box_df_broadcast_failure): @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) - def test_td64arr_with_offset_series(self, names, box_df_broadcast_failure): + def test_td64arr_with_offset_series(self, names, box_df_fail): # GH#18849 - box = box_df_broadcast_failure + box = box_df_fail box2 = Series if box is pd.Index else box tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], @@ -963,10 +956,11 @@ def test_td64arr_with_offset_series(self, names, box_df_broadcast_failure): tm.assert_equal(res3, expected_sub) @pytest.mark.parametrize('obox', [np.array, pd.Index, pd.Series]) - def test_td64arr_addsub_anchored_offset_arraylike( - self, obox, box_df_broadcast_failure): + def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box): # GH#18824 - box = box_df_broadcast_failure + if box is pd.DataFrame and obox is not pd.Series: + raise pytest.xfail(reason="Attempts to broadcast incorrectly") + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) tdi = tm.box_expected(tdi, box) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 0bc74c6890ee9..6186ce4d45ef2 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -721,7 +721,7 @@ def test_align_int_fill_bug(self): result = df1 - df1.mean() expected = df2 - df2.mean() - assert_frame_equal(result, expected) + assert_frame_equal(result.astype('f8'), expected) def test_align_multiindex(self): # GH 10665 diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 1ee48d0120c7d..1cb036dccf23c 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1566,8 +1566,9 @@ def test_crosstab_normalize(self): full_normal) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index'), row_normal) - tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns'), - col_normal) + tm.assert_frame_equal( + pd.crosstab(df.a, df.b, normalize='columns').astype('f8'), + col_normal) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize=1), pd.crosstab(df.a, df.b, normalize='columns')) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize=0), @@ -1600,7 +1601,8 @@ def test_crosstab_normalize(self): tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index', margins=True), row_normal_margins) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns', - margins=True), col_normal_margins) + margins=True).astype('f8'), + col_normal_margins) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 601e251d45b4b..f3ab197771d53 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -758,9 +758,6 @@ def test_operators_bitwise(self): def test_scalar_na_cmp_corners(self): s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) - def tester(a, b): - return a & b - with pytest.raises(TypeError): s & datetime(2005, 1, 1) @@ -780,8 +777,11 @@ def tester(a, b): # this is an alignment issue; these are equivalent # https://github.com/pandas-dev/pandas/issues/5284 - pytest.raises(ValueError, lambda: d.__and__(s, axis='columns')) - pytest.raises(ValueError, tester, s, d) + with pytest.raises(TypeError): + d.__and__(s, axis='columns') + + with pytest.raises(TypeError): + s & d # this is wrong as its not a boolean result # result = d.__and__(s,axis='index') From fea27f0736a4b8f6626da60a6abc2f6e26b8a365 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 Oct 2018 08:49:44 -0500 Subject: [PATCH 11/11] CI: pin moto to 1.3.4 (#22959) --- ci/travis-27.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml index a921bcb46dba4..6955db363ca1f 100644 --- a/ci/travis-27.yaml +++ b/ci/travis-27.yaml @@ -44,7 +44,7 @@ dependencies: # universal - pytest - pytest-xdist - - moto + - moto==1.3.4 - hypothesis>=3.58.0 - pip: - backports.lzma