Merge pull request #4850 from jreback/loc_float

BUG/ENH: provide better .loc based semantics for float based indicies, continuing not to fallback (related GH236)
pandas-dev · Sep 25, 2013 · 0fa4d06 · 0fa4d06
2 parents d2f95f2 + 60efe85
commit 0fa4d06
Show file tree

Hide file tree

Showing 19 changed files with 1,080 additions and 317 deletions.
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -1199,22 +1199,109 @@ numpy array.  For instance,
   dflookup = DataFrame(np.random.rand(20,4), columns = ['A','B','C','D'])
   dflookup.lookup(list(range(0,10,2)), ['B','C','A','B','D'])
 
-Setting values in mixed-type DataFrame
---------------------------------------
+.. _indexing.float64index:
 
-.. _indexing.mixed_type_setting:
+Float64Index
+------------
+
+.. versionadded:: 0.13.0
 
-Setting values on a mixed-type DataFrame or Panel is supported when using
-scalar values, though setting arbitrary vectors is not yet supported:
+By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation.
+This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the
+same.
 
 .. ipython:: python
 
-   df2 = df[:4]
-   df2['foo'] = 'bar'
-   print(df2)
-   df2.ix[2] = np.nan
-   print(df2)
-   print(df2.dtypes)
+   indexf = Index([1.5, 2, 3, 4.5, 5])
+   indexf
+   sf = Series(range(5),index=indexf)
+   sf
+
+Scalar selection for ``[],.ix,.loc`` will always be label based. An integer will match an equal float index (e.g. ``3`` is equivalent to ``3.0``)
+
+.. ipython:: python
+
+   sf[3]
+   sf[3.0]
+   sf.ix[3]
+   sf.ix[3.0]
+   sf.loc[3]
+   sf.loc[3.0]
+
+The only positional indexing is via ``iloc``
+
+.. ipython:: python
+
+   sf.iloc[3]
+
+A scalar index that is not found will raise ``KeyError``
+
+Slicing is ALWAYS on the values of the index, for ``[],ix,loc`` and ALWAYS positional with ``iloc``
+
+.. ipython:: python
+
+   sf[2:4]
+   sf.ix[2:4]
+   sf.loc[2:4]
+   sf.iloc[2:4]
+
+In float indexes, slicing using floats is allowed
+
+.. ipython:: python
+
+   sf[2.1:4.6]
+   sf.loc[2.1:4.6]
+
+In non-float indexes, slicing using floats will raise a ``TypeError``
+
+.. code-block:: python
+
+   In [1]: Series(range(5))[3.5]
+   TypeError: the label [3.5] is not a proper indexer for this index type (Int64Index)
+
+   In [1]: Series(range(5))[3.5:4.5]
+   TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index)
+
+Using a scalar float indexer will be deprecated in a future version, but is allowed for now.
+
+.. code-block:: python
+
+   In [3]: Series(range(5))[3.0]
+   Out[3]: 3
+
+Here is a typical use-case for using this type of indexing. Imagine that you have a somewhat
+irregular timedelta-like indexing scheme, but the data is recorded as floats. This could for
+example be millisecond offsets.
+
+.. ipython:: python
+
+   dfir = concat([DataFrame(randn(5,2),
+                     index=np.arange(5) * 250.0,
+                     columns=list('AB')),
+                  DataFrame(randn(6,2),
+                     index=np.arange(4,10) * 250.1,
+                     columns=list('AB'))])
+   dfir
+
+Selection operations then will always work on a value basis, for all selection operators.
+
+.. ipython:: python
+
+   dfir[0:1000.4]
+   dfir.loc[0:1001,'A']
+   dfir.loc[1000.4]
+
+You could then easily pick out the first 1 second (1000 ms) of data then.
+
+.. ipython:: python
+
+   dfir[0:1000]
+
+Of course if you need integer based selection, then use ``iloc``
+
+.. ipython:: python
+
+   dfir.iloc[0:5]
 
 .. _indexing.view_versus_copy:
 

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -229,6 +229,10 @@ API Changes
     add top-level ``to_timedelta`` function
   - ``NDFrame`` now is compatible with Python's toplevel ``abs()`` function (:issue:`4821`).
   - raise a ``TypeError`` on invalid comparison ops on Series/DataFrame (e.g. integer/datetime) (:issue:`4968`)
+  - Added a new index type, ``Float64Index``. This will be automatically created when passing floating values in index creation.
+    This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the same.
+    Indexing on other index types are preserved (and positional fallback for ``[],ix``), with the exception, that floating point slicing
+    on indexes on non ``Float64Index`` will raise a ``TypeError``, e.g. ``Series(range(5))[3.5:4.5]`` (:issue:`263`)
 
 Internal Refactoring
 ~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -116,6 +116,72 @@ Indexing API Changes
        p
        p.loc[:,:,'C']
 
+Float64Index API Change
+~~~~~~~~~~~~~~~~~~~~~~~
+
+  - Added a new index type, ``Float64Index``. This will be automatically created when passing floating values in index creation.
+    This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the
+    same. See :ref:`the docs<indexing.float64index>`, (:issue:`263`)
+
+    Construction is by default for floating type values.
+
+    .. ipython:: python
+
+       index = Index([1.5, 2, 3, 4.5, 5])
+       index
+       s = Series(range(5),index=index)
+       s
+
+    Scalar selection for ``[],.ix,.loc`` will always be label based. An integer will match an equal float index (e.g. ``3`` is equivalent to ``3.0``)
+
+    .. ipython:: python
+
+       s[3]
+       s.ix[3]
+       s.loc[3]
+
+    The only positional indexing is via ``iloc``
+
+    .. ipython:: python
+
+       s.iloc[3]
+
+    A scalar index that is not found will raise ``KeyError``
+
+    Slicing is ALWAYS on the values of the index, for ``[],ix,loc`` and ALWAYS positional with ``iloc``
+
+    .. ipython:: python
+
+       s[2:4]
+       s.ix[2:4]
+       s.loc[2:4]
+       s.iloc[2:4]
+
+    In float indexes, slicing using floats are allowed
+
+    .. ipython:: python
+
+       s[2.1:4.6]
+       s.loc[2.1:4.6]
+
+  - Indexing on other index types are preserved (and positional fallback for ``[],ix``), with the exception, that floating point slicing
+    on indexes on non ``Float64Index`` will now raise a ``TypeError``.
+
+    .. code-block:: python
+
+       In [1]: Series(range(5))[3.5]
+       TypeError: the label [3.5] is not a proper indexer for this index type (Int64Index)
+
+       In [1]: Series(range(5))[3.5:4.5]
+       TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index)
+
+    Using a scalar float indexer will be deprecated in a future version, but is allowed for now.
+
+    .. code-block:: python
+
+       In [3]: Series(range(5))[3.0]
+       Out[3]: 3
+
 HDFStore API Changes
 ~~~~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/v0.4.x.txt b/doc/source/v0.4.x.txt
@@ -15,8 +15,7 @@ New Features
   with choice of join method (ENH56_)
 - :ref:`Added <indexing.get_level_values>` method ``get_level_values`` to
   ``MultiIndex`` (:issue:`188`)
-- :ref:`Set <indexing.mixed_type_setting>` values in mixed-type
-  ``DataFrame`` objects via ``.ix`` indexing attribute (:issue:`135`)
+- Set values in mixed-type ``DataFrame`` objects via ``.ix`` indexing attribute (:issue:`135`)
 - Added new ``DataFrame`` :ref:`methods <basics.dtypes>`
   ``get_dtype_counts`` and property ``dtypes`` (ENHdc_)
 - Added :ref:`ignore_index <merging.ignore_index>` option to

diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
@@ -778,7 +778,7 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs):
 class TestAlignment(object):
 
     index_types = 'i', 'u', 'dt'
-    lhs_index_types = index_types + ('f', 's')  # 'p'
+    lhs_index_types = index_types + ('s',)  # 'p'
 
     def check_align_nested_unary_op(self, engine, parser):
         skip_if_no_ne(engine)

diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -8,7 +8,7 @@
 from pandas.core.categorical import Categorical, Factor
 from pandas.core.format import (set_printoptions, reset_printoptions,
                                 set_eng_float_format)
-from pandas.core.index import Index, Int64Index, MultiIndex
+from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex
 
 from pandas.core.series import Series, TimeSeries
 from pandas.core.frame import DataFrame

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2050,7 +2050,7 @@ def eval(self, expr, **kwargs):
         kwargs['local_dict'] = _ensure_scope(resolvers=resolvers, **kwargs)
         return _eval(expr, **kwargs)
 
-    def _slice(self, slobj, axis=0, raise_on_error=False):
+    def _slice(self, slobj, axis=0, raise_on_error=False, typ=None):
         axis = self._get_block_manager_axis(axis)
         new_data = self._data.get_slice(
             slobj, axis=axis, raise_on_error=raise_on_error)