DEPR: removal of deprecation warnings for float indexers in a positio…

…nal setting, and raise a TypeError, xref pandas-dev#4892 BUG: index type coercion when setting with an integer-like closes pandas-dev#11836
jreback · Feb 12, 2016 · 5f7c9e9 · 5f7c9e9
1 parent 0181ef4
commit 5f7c9e9
Show file tree

Hide file tree

Showing 10 changed files with 547 additions and 227 deletions.
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -52,6 +52,7 @@ Highlights include:
 - ``pd.test()`` top-level nose test runner is available (:issue:`4327`)
 - Adding support for a ``RangeIndex`` as a specialized form of the ``Int64Index`` for memory savings, see :ref:`here <whatsnew_0180.enhancements.rangeindex>`.
 - API breaking ``.resample`` changes to make it more ``.groupby`` like, see :ref:`here <whatsnew_0180.breaking.resample>`.
+- Removal of support for deprecated float indexers; these will now raise a ``TypeError``, see :ref:`here <whatsnew_0180.enhancements.float_indexers>`.
 
 See the :ref:`v0.18.0 Whatsnew <whatsnew_0180>` overview for an extensive list
 of all enhancements and bugs that have been fixed in 0.17.1.

diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -21,6 +21,7 @@ Highlights include:
 - ``pd.test()`` top-level nose test runner is available (:issue:`4327`)
 - Adding support for a ``RangeIndex`` as a specialized form of the ``Int64Index`` for memory savings, see :ref:`here <whatsnew_0180.enhancements.rangeindex>`.
 - API breaking ``.resample`` changes to make it more ``.groupby`` like, see :ref:`here <whatsnew_0180.breaking.resample>`.
+- Removal of support for deprecated float indexers; these will now raise a ``TypeError``, see :ref:`here <whatsnew_0180.enhancements.float_indexers>`.
 
 Check the :ref:`API Changes <whatsnew_0180.api_breaking>` and :ref:`deprecations <whatsnew_0180.deprecations>` before updating.
 
@@ -865,9 +866,45 @@ Deprecations
   is better handled by matplotlib's `style sheets`_ (:issue:`11783`).
 
 
+.. _style sheets: http://matplotlib.org/users/style_sheets.html
 
+.. _whatsnew_0180.float_indexers:
 
-.. _style sheets: http://matplotlib.org/users/style_sheets.html
+Removal of deprecated float indexers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In :issue:`4892` indexing with floating point numbers on a non-``Float64Index`` was deprecated (in version 0.14.0).
+In 0.18.0, this deprecation warning is removed and these will now raise a ``TypeError``. (:issue:`12165`)
+
+Previous Behavior:
+
+.. code-block:
+
+   In [1]: s = Series([1,2,3])
+   In [2]: s[1.0]
+   FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point
+   Out[2]: 2
+
+   In [3]: s.iloc[1.0]
+   FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point
+   Out[3]: 2
+
+   In [4]: s.loc[1.0]
+   FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point
+   Out[4]: 2
+
+New Behavior:
+
+.. code-block:
+
+   In [4]: s[1.0]
+   TypeError: cannot do label indexing on <class 'pandas.indexes.range.RangeIndex'> with these indexers [1.0] of <type 'float'>
+
+   In [4]: s.iloc[1.0]
+   TypeError: cannot do label indexing on <class 'pandas.indexes.range.RangeIndex'> with these indexers [1.0] of <type 'float'>
+
+   In [4]: s.loc[1.0]
+   TypeError: cannot do label indexing on <class 'pandas.indexes.range.RangeIndex'> with these indexers [1.0] of <type 'float'>
 
 .. _whatsnew_0180.prior_deprecations:
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -115,7 +115,11 @@ def _get_setitem_indexer(self, key):
 
         try:
             return self._convert_to_indexer(key, is_setter=True)
-        except TypeError:
+        except TypeError as e:
+
+            # invalid indexer type vs 'other' indexing errors
+            if 'cannot do' in str(e):
+                raise
             raise IndexingError(key)
 
     def __setitem__(self, key, value):
@@ -312,6 +316,18 @@ def _setitem_with_indexer(self, indexer, value):
                     index = self.obj.index
                     new_index = index.insert(len(index), indexer)
 
+                    # we have a coerced indexer, e.g. a float
+                    # that matches in an Int64Index, so
+                    # we will not create a duplicate index, rather
+                    # index to that element
+                    # e.g. 0.0 -> 0
+                    # GH12246
+                    if index.is_unique:
+                        new_indexer = index.get_indexer([new_index[-1]])
+                        if (new_indexer != -1).any():
+                            return self._setitem_with_indexer(new_indexer,
+                                                              value)
+
                     # this preserves dtype of the value
                     new_values = Series([value])._values
                     if len(self.obj._values):
@@ -1091,8 +1107,17 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
         """
         labels = self.obj._get_axis(axis)
 
-        # if we are a scalar indexer and not type correct raise
-        obj = self._convert_scalar_indexer(obj, axis)
+        if isinstance(obj, slice):
+            return self._convert_slice_indexer(obj, axis)
+
+        # try to find out correct indexer, if not type correct raise
+        try:
+            obj = self._convert_scalar_indexer(obj, axis)
+        except TypeError:
+
+            # but we will allow setting
+            if is_setter:
+                pass
 
         # see if we are positional in nature
         is_int_index = labels.is_integer()
@@ -1131,10 +1156,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
 
             return obj
 
-        if isinstance(obj, slice):
-            return self._convert_slice_indexer(obj, axis)
-
-        elif is_nested_tuple(obj, labels):
+        if is_nested_tuple(obj, labels):
             return labels.get_locs(obj)
         elif is_list_like_indexer(obj):
             if is_bool_indexer(obj):
@@ -1278,7 +1300,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
 
         labels = obj._get_axis(axis)
         indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop,
-                                       slice_obj.step)
+                                       slice_obj.step, kind=self.name)
 
         if isinstance(indexer, slice):
             return self._slice(indexer, axis=axis, kind='iloc')

diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
@@ -21,12 +21,12 @@
 from pandas.core.missing import _clean_reindex_fill_method
 from pandas.core.common import (isnull, array_equivalent,
                                 is_object_dtype, is_datetimetz, ABCSeries,
-                                ABCPeriodIndex,
+                                ABCPeriodIndex, ABCMultiIndex,
                                 _values_from_object, is_float, is_integer,
                                 is_iterator, is_categorical_dtype,
                                 _ensure_object, _ensure_int64, is_bool_indexer,
                                 is_list_like, is_bool_dtype,
-                                is_integer_dtype)
+                                is_integer_dtype, is_float_dtype)
 from pandas.core.strings import StringAccessorMixin
 
 from pandas.core.config import get_option
@@ -162,7 +162,46 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
 
             if dtype is not None:
                 try:
-                    data = np.array(data, dtype=dtype, copy=copy)
+
+                    # we need to avoid having numpy coerce
+                    # things that look like ints/floats to ints unless
+                    # they are actually ints, e.g. '0' and 0.0
+                    # should not be coerced
+                    # GH 11836
+                    if is_integer_dtype(dtype):
+                        inferred = lib.infer_dtype(data)
+                        if inferred == 'integer':
+                            data = np.array(data, copy=copy, dtype=dtype)
+                        elif inferred in ['floating', 'mixed-integer-float']:
+
+                            # if we are actually all equal to integers
+                            # then coerce to integer
+                            from .numeric import Int64Index, Float64Index
+                            try:
+                                res = data.astype('i8')
+                                if (res == data).all():
+                                    return Int64Index(res, copy=copy,
+                                                      name=name)
+                            except (TypeError, ValueError):
+                                pass
+
+                            # return an actual float index
+                            return Float64Index(data, copy=copy, dtype=dtype,
+                                                name=name)
+
+                        elif inferred == 'string':
+                            pass
+                        else:
+                            data = data.astype(dtype)
+                    elif is_float_dtype(dtype):
+                        inferred = lib.infer_dtype(data)
+                        if inferred == 'string':
+                            pass
+                        else:
+                            data = data.astype(dtype)
+                    else:
+                        data = np.array(data, dtype=dtype, copy=copy)
+
                 except (TypeError, ValueError):
                     pass
 
@@ -930,35 +969,32 @@ def _convert_scalar_indexer(self, key, kind=None):
         kind : optional, type of the indexing operation (loc/ix/iloc/None)
 
         right now we are converting
-        floats -> ints if the index supports it
         """
 
-        def to_int():
-            ikey = int(key)
-            if ikey != key:
-                return self._invalid_indexer('label', key)
-            return ikey
-
         if kind == 'iloc':
             if is_integer(key):
                 return key
-            elif is_float(key):
-                key = to_int()
-                warnings.warn("scalar indexers for index type {0} should be "
-                              "integers and not floating point".format(
-                                  type(self).__name__),
-                              FutureWarning, stacklevel=5)
-                return key
             return self._invalid_indexer('label', key)
+        else:
 
-        if is_float(key):
-            if isnull(key):
-                return self._invalid_indexer('label', key)
-            warnings.warn("scalar indexers for index type {0} should be "
-                          "integers and not floating point".format(
-                              type(self).__name__),
-                          FutureWarning, stacklevel=3)
-            return to_int()
+            if len(self):
+
+                # we can safely disallow
+                # if we are not a MultiIndex
+                # or a Float64Index
+                # or have mixed inferred type (IOW we have the possiblity
+                # of a float in with say strings)
+                if is_float(key):
+                    if not (isinstance(self, ABCMultiIndex,) or
+                            self.is_floating() or self.is_mixed()):
+                        return self._invalid_indexer('label', key)
+
+                # we can disallow integers with loc
+                # if could not contain and integer
+                elif is_integer(key) and kind == 'loc':
+                    if not (isinstance(self, ABCMultiIndex,) or
+                            self.holds_integer() or self.is_mixed()):
+                        return self._invalid_indexer('label', key)
 
         return key
 
@@ -991,14 +1027,6 @@ def f(c):
                 v = getattr(key, c)
                 if v is None or is_integer(v):
                     return v
-
-                # warn if it's a convertible float
-                if v == int(v):
-                    warnings.warn("slice indexers when using iloc should be "
-                                  "integers and not floating point",
-                                  FutureWarning, stacklevel=7)
-                    return int(v)
-
                 self._invalid_indexer('slice {0} value'.format(c), v)
 
             return slice(*[f(c) for c in ['start', 'stop', 'step']])
@@ -1057,7 +1085,7 @@ def is_int(v):
             indexer = key
         else:
             try:
-                indexer = self.slice_indexer(start, stop, step)
+                indexer = self.slice_indexer(start, stop, step, kind=kind)
             except Exception:
                 if is_index_slice:
                     if self.is_integer():
@@ -1891,10 +1919,7 @@ def get_value(self, series, key):
         s = _values_from_object(series)
         k = _values_from_object(key)
 
-        # prevent integer truncation bug in indexing
-        if is_float(k) and not self.is_floating():
-            raise KeyError
-
+        k = self._convert_scalar_indexer(k, kind='getitem')
         try:
             return self._engine.get_value(s, k,
                                           tz=getattr(series.dtype, 'tz', None))
@@ -2236,6 +2261,7 @@ def reindex(self, target, method=None, level=None, limit=None,
             if self.equals(target):
                 indexer = None
             else:
+
                 if self.is_unique:
                     indexer = self.get_indexer(target, method=method,
                                                limit=limit,
@@ -2722,7 +2748,9 @@ def _maybe_cast_slice_bound(self, label, side, kind):
         # datetimelike Indexes
         # reject them
         if is_float(label):
-            self._invalid_indexer('slice', label)
+            if not (kind in ['ix'] and (self.holds_integer() or
+                                        self.is_floating())):
+                self._invalid_indexer('slice', label)
 
         # we are trying to find integer bounds on a non-integer based index
         # this is rejected (generally .loc gets you here)

diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py
@@ -42,9 +42,14 @@ def _maybe_cast_slice_bound(self, label, side, kind):
         """
 
         # we are a numeric index, so we accept
-        # integer/floats directly
-        if not (com.is_integer(label) or com.is_float(label)):
-            self._invalid_indexer('slice', label)
+        # integer directly
+        if com.is_integer(label):
+            pass
+
+        # disallow floats only if we not-strict
+        elif com.is_float(label):
+            if not (self.is_floating() or kind in ['ix']):
+                self._invalid_indexer('slice', label)
 
         return label
 
@@ -200,6 +205,18 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
 
         if dtype is None:
             dtype = np.float64
+        dtype = np.dtype(dtype)
+
+        # allow integer / object dtypes to be passed, but coerce to float64
+        if dtype.kind in ['i', 'O']:
+            dtype = np.float64
+
+        elif dtype.kind in ['f']:
+            pass
+
+        else:
+            raise TypeError("cannot support {0} dtype in "
+                            "Float64Index".format(dtype))
 
         try:
             subarr = np.array(data, dtype=dtype, copy=copy)