diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index beb803282ebe3..4d1354a515b1c 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -717,6 +717,10 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index. Int64Index and RangeIndex ~~~~~~~~~~~~~~~~~~~~~~~~~ +.. warning:: + + Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here `. + ``Int64Index`` is a fundamental basic index in *pandas*. This is an Immutable array implementing an ordered, sliceable set. Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects. @@ -736,7 +740,6 @@ Float64Index operations by about 30x and boolean indexing operations on the ``Float64Index`` itself are about 2x as fast. - .. versionadded:: 0.13.0 By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation. @@ -797,12 +800,12 @@ In non-float indexes, slicing using floats will raise a ``TypeError`` .. warning:: - Using a scalar float indexer has been removed in 0.18.0, so the following will raise a ``TypeError`` + Using a scalar float indexer for ``.iloc`` has been removed in 0.18.0, so the following will raise a ``TypeError`` .. code-block:: python - In [3]: pd.Series(range(5))[3.0] - TypeError: cannot do label indexing on with these indexers [3.0] of + In [3]: pd.Series(range(5)).iloc[3.0] + TypeError: cannot do positional indexing on with these indexers [3.0] of Further the treatment of ``.ix`` with a float indexer on a non-float index, will be label based, and thus coerce the index. diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 98bc50bae9260..7494f8ae88307 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -53,6 +53,10 @@ advanced indexing. but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `) +.. warning:: + + Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here `. + See the :ref:`MultiIndex / Advanced Indexing ` for ``MultiIndex`` and more advanced indexing documentation. See the :ref:`cookbook` for some advanced strategies diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index bdc20d964a06a..d4a5224f6f775 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -1024,11 +1024,11 @@ Removal of deprecated float indexers ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In :issue:`4892` indexing with floating point numbers on a non-``Float64Index`` was deprecated (in version 0.14.0). -In 0.18.0, this deprecation warning is removed and these will now raise a ``TypeError``. (:issue:`12165`) +In 0.18.0, this deprecation warning is removed and these will now raise a ``TypeError``. (:issue:`12165`, :issue:`12333`) .. ipython:: python - s = pd.Series([1,2,3]) + s = pd.Series([1, 2, 3], index=[4, 5, 6]) s s2 = pd.Series([1, 2, 3], index=list('abc')) s2 @@ -1037,15 +1037,18 @@ Previous Behavior: .. code-block:: python - In [2]: s[1.0] + # this is label indexing + In [2]: s[5.0] FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point Out[2]: 2 + # this is positional indexing In [3]: s.iloc[1.0] FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point Out[3]: 2 - In [4]: s.loc[1.0] + # this is label indexing + In [4]: s.loc[5.0] FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point Out[4]: 2 @@ -1062,33 +1065,54 @@ Previous Behavior: New Behavior: +For iloc, getting & setting via a float scalar will always raise. + .. code-block:: python - In [2]: s[1.0] - TypeError: cannot do label indexing on with these indexers [1.0] of + In [3]: s.iloc[2.0] + TypeError: cannot do label indexing on with these indexers [2.0] of - In [3]: s.iloc[1.0] - TypeError: cannot do label indexing on with these indexers [1.0] of +Other indexers will coerce to a like integer for both getting and setting. The ``FutureWarning`` has been dropped for ``.loc``, ``.ix`` and ``[]``. - In [4]: s.loc[1.0] - TypeError: cannot do label indexing on with these indexers [1.0] of +.. ipython:: python - # .ix will now cause this to be a label lookup and coerce to and Index - In [5]: s2.ix[1.0] = 10 + s[5.0] + s.loc[5.0] + s.ix[5.0] - In [6]: s2 - Out[3]: - a 1 - b 2 - c 3 - 1.0 10 - dtype: int64 +and setting + +.. ipython:: python + + s_copy = s.copy() + s_copy[5.0] = 10 + s_copy + s_copy = s.copy() + s_copy.loc[5.0] = 10 + s_copy + s_copy = s.copy() + s_copy.ix[5.0] = 10 + s_copy + +Slicing will also coerce integer-like floats to integers for a non-``Float64Index``. + +.. ipython:: python + + s.loc[5.0:6] + s.ix[5.0:6] + +Note that for floats that are NOT coercible to ints, the label based bounds will be excluded + +.. ipython:: python + + s.loc[5.1:6] + s.ix[5.1:6] Float indexing on a ``Float64Index`` is unchanged. .. ipython:: python - s = pd.Series([1,2,3],index=np.arange(3.)) + s = pd.Series([1, 2, 3], index=np.arange(3.)) s[1.0] s[1.0:2.5] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6c2d4f7919ac6..80f3d0d66ca9a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2016,7 +2016,7 @@ def _getitem_array(self, key): # with all other indexing behavior if isinstance(key, Series) and not key.index.equals(self.index): warnings.warn("Boolean Series key will be reindexed to match " - "DataFrame index.", UserWarning) + "DataFrame index.", UserWarning, stacklevel=3) elif len(key) != len(self.index): raise ValueError('Item wrong length %d instead of %d.' % (len(key), len(self.index))) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 03fa072db83da..b0dd2596fccd5 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -995,6 +995,10 @@ def _getitem_axis(self, key, axis=0): return self._getitem_iterable(key, axis=axis) else: + + # maybe coerce a float scalar to integer + key = labels._maybe_cast_indexer(key) + if is_integer(key): if axis == 0 and isinstance(labels, MultiIndex): try: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 8a679b1575e26..852cddc456213 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -902,6 +902,7 @@ def _mpl_repr(self): _na_value = np.nan """The expected NA value to use with this index.""" + # introspection @property def is_monotonic(self): """ alias for is_monotonic_increasing (deprecated) """ @@ -954,11 +955,12 @@ def is_categorical(self): return self.inferred_type in ['categorical'] def is_mixed(self): - return 'mixed' in self.inferred_type + return self.inferred_type in ['mixed'] def holds_integer(self): return self.inferred_type in ['integer', 'mixed-integer'] + # validate / convert indexers def _convert_scalar_indexer(self, key, kind=None): """ convert a scalar indexer @@ -966,44 +968,42 @@ def _convert_scalar_indexer(self, key, kind=None): Parameters ---------- key : label of the slice bound - kind : optional, type of the indexing operation (loc/ix/iloc/None) - - right now we are converting + kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ + assert kind in ['ix', 'loc', 'getitem', 'iloc', None] + if kind == 'iloc': - if is_integer(key): - return key - return self._invalid_indexer('positional', key) - else: + return self._validate_indexer('positional', key, kind) - if len(self): - - # we can safely disallow - # if we are not a MultiIndex - # or a Float64Index - # or have mixed inferred type (IOW we have the possiblity - # of a float in with say strings) - if is_float(key): - if not (isinstance(self, ABCMultiIndex,) or - self.is_floating() or self.is_mixed()): - return self._invalid_indexer('label', key) - - # we can disallow integers with loc - # if could not contain and integer - elif is_integer(key) and kind == 'loc': - if not (isinstance(self, ABCMultiIndex,) or - self.holds_integer() or self.is_mixed()): - return self._invalid_indexer('label', key) + if len(self) and not isinstance(self, ABCMultiIndex,): - return key + # we can raise here if we are definitive that this + # is positional indexing (eg. .ix on with a float) + # or label indexing if we are using a type able + # to be represented in the index - def _convert_slice_indexer_getitem(self, key, is_index_slice=False): - """ called from the getitem slicers, determine how to treat the key - whether positional or not """ - if self.is_integer() or is_index_slice: - return key - return self._convert_slice_indexer(key) + if kind in ['getitem', 'ix'] and is_float(key): + if not self.is_floating(): + return self._invalid_indexer('label', key) + + elif kind in ['loc'] and is_float(key): + + # we want to raise KeyError on string/mixed here + # technically we *could* raise a TypeError + # on anything but mixed though + if self.inferred_type not in ['floating', + 'mixed-integer-float', + 'string', + 'unicode', + 'mixed']: + return self._invalid_indexer('label', key) + + elif kind in ['loc'] and is_integer(key): + if not self.holds_integer(): + return self._invalid_indexer('label', key) + + return key def _convert_slice_indexer(self, key, kind=None): """ @@ -1012,8 +1012,9 @@ def _convert_slice_indexer(self, key, kind=None): Parameters ---------- key : label of the slice bound - kind : optional, type of the indexing operation (loc/ix/iloc/None) + kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ + assert kind in ['ix', 'loc', 'getitem', 'iloc', None] # if we are not a slice, then we are done if not isinstance(key, slice): @@ -1021,38 +1022,14 @@ def _convert_slice_indexer(self, key, kind=None): # validate iloc if kind == 'iloc': + return slice(self._validate_indexer('slice', key.start, kind), + self._validate_indexer('slice', key.stop, kind), + self._validate_indexer('slice', key.step, kind)) - # need to coerce to_int if needed - def f(c): - v = getattr(key, c) - if v is None or is_integer(v): - return v - self._invalid_indexer('slice {0} value'.format(c), v) - - return slice(*[f(c) for c in ['start', 'stop', 'step']]) - - # validate slicers - def validate(v): - if v is None or is_integer(v): - return True - - # dissallow floats (except for .ix) - elif is_float(v): - if kind == 'ix': - return True - - return False - - return True - - for c in ['start', 'stop', 'step']: - v = getattr(key, c) - if not validate(v): - self._invalid_indexer('slice {0} value'.format(c), v) - - # figure out if this is a positional indexer + # potentially cast the bounds to integers start, stop, step = key.start, key.stop, key.step + # figure out if this is a positional indexer def is_int(v): return v is None or is_integer(v) @@ -1061,8 +1038,14 @@ def is_int(v): is_positional = is_index_slice and not self.is_integer() if kind == 'getitem': - return self._convert_slice_indexer_getitem( - key, is_index_slice=is_index_slice) + """ + called from the getitem slicers, validate that we are in fact + integers + """ + if self.is_integer() or is_index_slice: + return slice(self._validate_indexer('slice', key.start, kind), + self._validate_indexer('slice', key.stop, kind), + self._validate_indexer('slice', key.step, kind)) # convert the slice to an indexer here @@ -1889,7 +1872,10 @@ def get_loc(self, key, method=None, tolerance=None): raise ValueError('tolerance argument only valid if using pad, ' 'backfill or nearest lookups') key = _values_from_object(key) - return self._engine.get_loc(key) + try: + return self._engine.get_loc(key) + except KeyError: + return self._engine.get_loc(self._maybe_cast_indexer(key)) indexer = self.get_indexer([key], method=method, tolerance=tolerance) if indexer.ndim > 1 or indexer.size > 1: @@ -2721,6 +2707,37 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): return slice(start_slice, end_slice, step) + def _maybe_cast_indexer(self, key): + """ + If we have a float key and are not a floating index + then try to cast to an int if equivalent + """ + + if is_float(key) and not self.is_floating(): + try: + ckey = int(key) + if ckey == key: + key = ckey + except (ValueError, TypeError): + pass + return key + + def _validate_indexer(self, form, key, kind): + """ + if we are positional indexer + validate that we have appropriate typed bounds + must be an integer + """ + assert kind in ['ix', 'loc', 'getitem', 'iloc'] + + if key is None: + pass + elif is_integer(key): + pass + elif kind in ['iloc', 'getitem']: + self._invalid_indexer(form, key) + return key + def _maybe_cast_slice_bound(self, label, side, kind): """ This function should be overloaded in subclasses that allow non-trivial @@ -2731,7 +2748,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : string / None + kind : {'ix', 'loc', 'getitem'} Returns ------- @@ -2742,6 +2759,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): Value of `side` parameter should be validated in caller. """ + assert kind in ['ix', 'loc', 'getitem', None] # We are a plain index here (sub-class override this method if they # wish to have special treatment for floats/ints, e.g. Float64Index and @@ -2783,9 +2801,11 @@ def get_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : string / None, the type of indexer + kind : {'ix', 'loc', 'getitem'} """ + assert kind in ['ix', 'loc', 'getitem', None] + if side not in ('left', 'right'): raise ValueError("Invalid value for side kwarg," " must be either 'left' or 'right': %s" % @@ -2841,7 +2861,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): If None, defaults to the end step : int, defaults None If None, defaults to 1 - kind : string, defaults None + kind : {'ix', 'loc', 'getitem'} or None Returns ------- diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index fea153b2de391..d14568ceca258 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -1409,6 +1409,7 @@ def _tuple_index(self): return Index(self._values) def get_slice_bound(self, label, side, kind): + if not isinstance(label, tuple): label = label, return self._partial_tup_index(label, side=side) @@ -1743,7 +1744,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): # we have a partial slice (like looking up a partial date # string) start = stop = level_index.slice_indexer(key.start, key.stop, - key.step) + key.step, kind='loc') step = start.step if isinstance(start, slice) or isinstance(stop, slice): diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 0c102637ab70d..4b021c51456b9 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -7,6 +7,7 @@ from pandas.indexes.base import Index, InvalidIndexError from pandas.util.decorators import Appender, cache_readonly import pandas.core.common as com +from pandas.core.common import is_dtype_equal, isnull import pandas.indexes.base as ibase @@ -29,7 +30,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : string / None + kind : {'ix', 'loc', 'getitem'} Returns ------- @@ -40,18 +41,10 @@ def _maybe_cast_slice_bound(self, label, side, kind): Value of `side` parameter should be validated in caller. """ + assert kind in ['ix', 'loc', 'getitem', None] - # we are a numeric index, so we accept - # integer directly - if com.is_integer(label): - pass - - # disallow floats only if we not-strict - elif com.is_float(label): - if not (self.is_floating() or kind in ['ix']): - self._invalid_indexer('slice', label) - - return label + # we will try to coerce to integers + return self._maybe_cast_indexer(label) def _convert_tolerance(self, tolerance): try: @@ -140,6 +133,24 @@ def is_all_dates(self): """ return False + def _convert_scalar_indexer(self, key, kind=None): + """ + convert a scalar indexer + + Parameters + ---------- + key : label of the slice bound + kind : {'ix', 'loc', 'getitem'} or None + """ + + assert kind in ['ix', 'loc', 'getitem', 'iloc', None] + + # don't coerce ilocs to integers + if kind != 'iloc': + key = self._maybe_cast_indexer(key) + return (super(Int64Index, self) + ._convert_scalar_indexer(key, kind=kind)) + def equals(self, other): """ Determines if two Index objects contain the same elements. @@ -247,18 +258,13 @@ def _convert_scalar_indexer(self, key, kind=None): Parameters ---------- key : label of the slice bound - kind : optional, type of the indexing operation (loc/ix/iloc/None) - - right now we are converting - floats -> ints if the index supports it + kind : {'ix', 'loc', 'getitem'} or None """ - if kind == 'iloc': - if com.is_integer(key): - return key + assert kind in ['ix', 'loc', 'getitem', 'iloc', None] - return (super(Float64Index, self) - ._convert_scalar_indexer(key, kind=kind)) + if kind == 'iloc': + return self._validate_indexer('positional', key, kind) return key @@ -282,7 +288,7 @@ def _convert_slice_indexer(self, key, kind=None): kind=kind) # translate to locations - return self.slice_indexer(key.start, key.stop, key.step) + return self.slice_indexer(key.start, key.stop, key.step, kind=kind) def _format_native_types(self, na_rep='', float_format=None, decimal='.', quoting=None, **kwargs): @@ -324,7 +330,7 @@ def equals(self, other): try: if not isinstance(other, Float64Index): other = self._constructor(other) - if (not com.is_dtype_equal(self.dtype, other.dtype) or + if (not is_dtype_equal(self.dtype, other.dtype) or self.shape != other.shape): return False left, right = self._values, other._values @@ -380,7 +386,7 @@ def isin(self, values, level=None): if level is not None: self._validate_index_level(level) return lib.ismember_nans(np.array(self), value_set, - com.isnull(list(value_set)).any()) + isnull(list(value_set)).any()) Float64Index._add_numeric_methods() diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index 0bed2ec231dbe..4b06af9240436 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -487,8 +487,8 @@ def __getitem__(self, key): stop = l # delegate non-integer slices - if (start != int(start) and - stop != int(stop) and + if (start != int(start) or + stop != int(stop) or step != int(step)): return super_getitem(key) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 264302866b023..2a3ee774af6e5 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -216,7 +216,7 @@ def test_getitem_boolean(self): # we are producing a warning that since the passed boolean # key is not the same as the given index, we will reindex # not sure this is really necessary - with tm.assert_produces_warning(UserWarning): + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): indexer_obj = indexer_obj.reindex(self.tsframe.index[::-1]) subframe_obj = self.tsframe[indexer_obj] assert_frame_equal(subframe_obj, subframe) diff --git a/pandas/tests/indexing/__init__.py b/pandas/tests/indexing/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py new file mode 100644 index 0000000000000..4e31fb350f6ee --- /dev/null +++ b/pandas/tests/indexing/test_categorical.py @@ -0,0 +1,339 @@ +# -*- coding: utf-8 -*- + +import pandas as pd +import numpy as np +from pandas import Series, DataFrame +from pandas.util.testing import assert_series_equal, assert_frame_equal +from pandas.util import testing as tm + + +class TestCategoricalIndex(tm.TestCase): + + def setUp(self): + + self.df = DataFrame({'A': np.arange(6, dtype='int64'), + 'B': Series(list('aabbca')).astype( + 'category', categories=list( + 'cab'))}).set_index('B') + self.df2 = DataFrame({'A': np.arange(6, dtype='int64'), + 'B': Series(list('aabbca')).astype( + 'category', categories=list( + 'cabe'))}).set_index('B') + self.df3 = DataFrame({'A': np.arange(6, dtype='int64'), + 'B': (Series([1, 1, 2, 1, 3, 2]) + .astype('category', categories=[3, 2, 1], + ordered=True))}).set_index('B') + self.df4 = DataFrame({'A': np.arange(6, dtype='int64'), + 'B': (Series([1, 1, 2, 1, 3, 2]) + .astype('category', categories=[3, 2, 1], + ordered=False))}).set_index('B') + + def test_loc_scalar(self): + result = self.df.loc['a'] + expected = (DataFrame({'A': [0, 1, 5], + 'B': (Series(list('aaa')) + .astype('category', + categories=list('cab')))}) + .set_index('B')) + assert_frame_equal(result, expected) + + df = self.df.copy() + df.loc['a'] = 20 + expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20], + 'B': (Series(list('aabbca')) + .astype('category', + categories=list('cab')))}) + .set_index('B')) + assert_frame_equal(df, expected) + + # value not in the categories + self.assertRaises(KeyError, lambda: df.loc['d']) + + def f(): + df.loc['d'] = 10 + + self.assertRaises(TypeError, f) + + def f(): + df.loc['d', 'A'] = 10 + + self.assertRaises(TypeError, f) + + def f(): + df.loc['d', 'C'] = 10 + + self.assertRaises(TypeError, f) + + def test_loc_listlike(self): + + # list of labels + result = self.df.loc[['c', 'a']] + expected = self.df.iloc[[4, 0, 1, 5]] + assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.loc[['a', 'b', 'e']] + exp_index = pd.CategoricalIndex( + list('aaabbe'), categories=list('cabe'), name='B') + expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) + assert_frame_equal(result, expected, check_index_type=True) + + # element in the categories but not in the values + self.assertRaises(KeyError, lambda: self.df2.loc['e']) + + # assign is ok + df = self.df2.copy() + df.loc['e'] = 20 + result = df.loc[['a', 'b', 'e']] + exp_index = pd.CategoricalIndex( + list('aaabbe'), categories=list('cabe'), name='B') + expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index) + assert_frame_equal(result, expected) + + df = self.df2.copy() + result = df.loc[['a', 'b', 'e']] + exp_index = pd.CategoricalIndex( + list('aaabbe'), categories=list('cabe'), name='B') + expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) + assert_frame_equal(result, expected, check_index_type=True) + + # not all labels in the categories + self.assertRaises(KeyError, lambda: self.df2.loc[['a', 'd']]) + + def test_loc_listlike_dtypes(self): + # GH 11586 + + # unique categories and codes + index = pd.CategoricalIndex(['a', 'b', 'c']) + df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index) + + # unique slice + res = df.loc[['a', 'b']] + exp = DataFrame({'A': [1, 2], + 'B': [4, 5]}, index=pd.CategoricalIndex(['a', 'b'])) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[['a', 'a', 'b']] + exp = DataFrame({'A': [1, 1, 2], + 'B': [4, 4, 5]}, + index=pd.CategoricalIndex(['a', 'a', 'b'])) + tm.assert_frame_equal(res, exp, check_index_type=True) + + with tm.assertRaisesRegexp( + KeyError, + 'a list-indexer must only include values that are ' + 'in the categories'): + df.loc[['a', 'x']] + + # duplicated categories and codes + index = pd.CategoricalIndex(['a', 'b', 'a']) + df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index) + + # unique slice + res = df.loc[['a', 'b']] + exp = DataFrame({'A': [1, 3, 2], + 'B': [4, 6, 5]}, + index=pd.CategoricalIndex(['a', 'a', 'b'])) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[['a', 'a', 'b']] + exp = DataFrame( + {'A': [1, 3, 1, 3, 2], + 'B': [4, 6, 4, 6, 5 + ]}, index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) + tm.assert_frame_equal(res, exp, check_index_type=True) + + with tm.assertRaisesRegexp( + KeyError, + 'a list-indexer must only include values ' + 'that are in the categories'): + df.loc[['a', 'x']] + + # contains unused category + index = pd.CategoricalIndex( + ['a', 'b', 'a', 'c'], categories=list('abcde')) + df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index) + + res = df.loc[['a', 'b']] + exp = DataFrame({'A': [1, 3, 2], + 'B': [5, 7, 6]}, index=pd.CategoricalIndex( + ['a', 'a', 'b'], categories=list('abcde'))) + tm.assert_frame_equal(res, exp, check_index_type=True) + + res = df.loc[['a', 'e']] + exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]}, + index=pd.CategoricalIndex(['a', 'a', 'e'], + categories=list('abcde'))) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[['a', 'a', 'b']] + exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]}, + index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'], + categories=list('abcde'))) + tm.assert_frame_equal(res, exp, check_index_type=True) + + with tm.assertRaisesRegexp( + KeyError, + 'a list-indexer must only include values ' + 'that are in the categories'): + df.loc[['a', 'x']] + + def test_read_only_source(self): + # GH 10043 + rw_array = np.eye(10) + rw_df = DataFrame(rw_array) + + ro_array = np.eye(10) + ro_array.setflags(write=False) + ro_df = DataFrame(ro_array) + + assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]]) + assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]]) + assert_series_equal(rw_df.iloc[1], ro_df.iloc[1]) + assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3]) + + assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]]) + assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]]) + assert_series_equal(rw_df.loc[1], ro_df.loc[1]) + assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3]) + + def test_reindexing(self): + + # reindexing + # convert to a regular index + result = self.df2.reindex(['a', 'b', 'e']) + expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan], + 'B': Series(list('aaabbe'))}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.reindex(['a', 'b']) + expected = DataFrame({'A': [0, 1, 5, 2, 3], + 'B': Series(list('aaabb'))}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.reindex(['e']) + expected = DataFrame({'A': [np.nan], + 'B': Series(['e'])}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.reindex(['d']) + expected = DataFrame({'A': [np.nan], + 'B': Series(['d'])}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + # since we are actually reindexing with a Categorical + # then return a Categorical + cats = list('cabe') + + result = self.df2.reindex(pd.Categorical(['a', 'd'], categories=cats)) + expected = DataFrame({'A': [0, 1, 5, np.nan], + 'B': Series(list('aaad')).astype( + 'category', categories=cats)}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.reindex(pd.Categorical(['a'], categories=cats)) + expected = DataFrame({'A': [0, 1, 5], + 'B': Series(list('aaa')).astype( + 'category', categories=cats)}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.reindex(['a', 'b', 'e']) + expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan], + 'B': Series(list('aaabbe'))}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.reindex(['a', 'b']) + expected = DataFrame({'A': [0, 1, 5, 2, 3], + 'B': Series(list('aaabb'))}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.reindex(['e']) + expected = DataFrame({'A': [np.nan], + 'B': Series(['e'])}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + # give back the type of categorical that we received + result = self.df2.reindex(pd.Categorical( + ['a', 'd'], categories=cats, ordered=True)) + expected = DataFrame( + {'A': [0, 1, 5, np.nan], + 'B': Series(list('aaad')).astype('category', categories=cats, + ordered=True)}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.reindex(pd.Categorical( + ['a', 'd'], categories=['a', 'd'])) + expected = DataFrame({'A': [0, 1, 5, np.nan], + 'B': Series(list('aaad')).astype( + 'category', categories=['a', 'd' + ])}).set_index('B') + assert_frame_equal(result, expected, check_index_type=True) + + # passed duplicate indexers are not allowed + self.assertRaises(ValueError, lambda: self.df2.reindex(['a', 'a'])) + + # args NotImplemented ATM + self.assertRaises(NotImplementedError, + lambda: self.df2.reindex(['a'], method='ffill')) + self.assertRaises(NotImplementedError, + lambda: self.df2.reindex(['a'], level=1)) + self.assertRaises(NotImplementedError, + lambda: self.df2.reindex(['a'], limit=2)) + + def test_loc_slice(self): + # slicing + # not implemented ATM + # GH9748 + + self.assertRaises(TypeError, lambda: self.df.loc[1:5]) + + # result = df.loc[1:5] + # expected = df.iloc[[1,2,3,4]] + # assert_frame_equal(result, expected) + + def test_boolean_selection(self): + + df3 = self.df3 + df4 = self.df4 + + result = df3[df3.index == 'a'] + expected = df3.iloc[[]] + assert_frame_equal(result, expected) + + result = df4[df4.index == 'a'] + expected = df4.iloc[[]] + assert_frame_equal(result, expected) + + result = df3[df3.index == 1] + expected = df3.iloc[[0, 1, 3]] + assert_frame_equal(result, expected) + + result = df4[df4.index == 1] + expected = df4.iloc[[0, 1, 3]] + assert_frame_equal(result, expected) + + # since we have an ordered categorical + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=True, + # name=u'B') + result = df3[df3.index < 2] + expected = df3.iloc[[4]] + assert_frame_equal(result, expected) + + result = df3[df3.index > 1] + expected = df3.iloc[[]] + assert_frame_equal(result, expected) + + # unordered + # cannot be compared + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=False, + # name=u'B') + self.assertRaises(TypeError, lambda: df4[df4.index < 2]) + self.assertRaises(TypeError, lambda: df4[df4.index > 1]) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py new file mode 100644 index 0000000000000..2a2f8678694de --- /dev/null +++ b/pandas/tests/indexing/test_floats.py @@ -0,0 +1,676 @@ +# -*- coding: utf-8 -*- + +import numpy as np +from pandas import Series, DataFrame, Index, Float64Index +from pandas.util.testing import assert_series_equal, assert_almost_equal +import pandas.util.testing as tm + + +class TestFloatIndexers(tm.TestCase): + + def check(self, result, original, indexer, getitem): + """ + comparator for results + we need to take care if we are indexing on a + Series or a frame + """ + if isinstance(original, Series): + expected = original.iloc[indexer] + else: + if getitem: + expected = original.iloc[:, indexer] + else: + expected = original.iloc[indexer] + + assert_almost_equal(result, expected) + + def test_scalar_error(self): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + # this duplicates the code below + # but is spefically testing for the error + # message + + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeCategoricalIndex, + tm.makeDateIndex, tm.makeTimedeltaIndex, + tm.makePeriodIndex, tm.makeIntIndex, + tm.makeRangeIndex]: + + i = index(5) + + s = Series(np.arange(len(i)), index=i) + + def f(): + s.iloc[3.0] + self.assertRaisesRegexp(TypeError, + 'cannot do positional indexing', + f) + + def f(): + s.iloc[3.0] = 0 + self.assertRaises(TypeError, f) + + def test_scalar_non_numeric(self): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeCategoricalIndex, + tm.makeDateIndex, tm.makeTimedeltaIndex, + tm.makePeriodIndex]: + + i = index(5) + + for s in [Series( + np.arange(len(i)), index=i), DataFrame( + np.random.randn( + len(i), len(i)), index=i, columns=i)]: + + # getting + for idxr, getitem in [(lambda x: x.ix, False), + (lambda x: x.iloc, False), + (lambda x: x, True)]: + + def f(): + idxr(s)[3.0] + + # gettitem on a DataFrame is a KeyError as it is indexing + # via labels on the columns + if getitem and isinstance(s, DataFrame): + error = KeyError + else: + error = TypeError + self.assertRaises(error, f) + + # label based can be a TypeError or KeyError + def f(): + s.loc[3.0] + + if s.index.inferred_type in ['string', 'unicode', 'mixed']: + error = KeyError + else: + error = TypeError + self.assertRaises(error, f) + + # contains + self.assertFalse(3.0 in s) + + # setting with a float fails with iloc + def f(): + s.iloc[3.0] = 0 + self.assertRaises(TypeError, f) + + # setting with an indexer + if s.index.inferred_type in ['categorical']: + # Value or Type Error + pass + elif s.index.inferred_type in ['datetime64', 'timedelta64', + 'period']: + + # these should prob work + # and are inconsisten between series/dataframe ATM + # for idxr in [lambda x: x.ix, + # lambda x: x]: + # s2 = s.copy() + # def f(): + # idxr(s2)[3.0] = 0 + # self.assertRaises(TypeError, f) + pass + + else: + + s2 = s.copy() + s2.loc[3.0] = 10 + self.assertTrue(s2.index.is_object()) + + for idxr in [lambda x: x.ix, + lambda x: x]: + s2 = s.copy() + idxr(s2)[3.0] = 0 + self.assertTrue(s2.index.is_object()) + + # fallsback to position selection, series only + s = Series(np.arange(len(i)), index=i) + s[3] + self.assertRaises(TypeError, lambda: s[3.0]) + + def test_scalar_with_mixed(self): + + s2 = Series([1, 2, 3], index=['a', 'b', 'c']) + s3 = Series([1, 2, 3], index=['a', 'b', 1.5]) + + # lookup in a pure string index + # with an invalid indexer + for idxr in [lambda x: x.ix, + lambda x: x, + lambda x: x.iloc]: + + def f(): + idxr(s2)[1.0] + + self.assertRaises(TypeError, f) + + self.assertRaises(KeyError, lambda: s2.loc[1.0]) + + result = s2.loc['b'] + expected = 2 + self.assertEqual(result, expected) + + # mixed index so we have label + # indexing + for idxr in [lambda x: x.ix, + lambda x: x]: + + def f(): + idxr(s3)[1.0] + + self.assertRaises(TypeError, f) + + result = idxr(s3)[1] + expected = 2 + self.assertEqual(result, expected) + + self.assertRaises(TypeError, lambda: s3.iloc[1.0]) + self.assertRaises(KeyError, lambda: s3.loc[1.0]) + + result = s3.loc[1.5] + expected = 3 + self.assertEqual(result, expected) + + def test_scalar_integer(self): + + # test how scalar float indexers work on int indexes + + # integer index + for index in [tm.makeIntIndex, tm.makeRangeIndex]: + + i = index(5) + for s in [Series(np.arange(len(i))), + DataFrame(np.random.randn(len(i), len(i)), + index=i, columns=i)]: + + # coerce to equal int + for idxr, getitem in [(lambda x: x.ix, False), + (lambda x: x.loc, False), + (lambda x: x, True)]: + + result = idxr(s)[3.0] + self.check(result, s, 3, getitem) + + # coerce to equal int + for idxr, getitem in [(lambda x: x.ix, False), + (lambda x: x.loc, False), + (lambda x: x, True)]: + + if isinstance(s, Series): + compare = self.assertEqual + expected = 100 + else: + compare = tm.assert_series_equal + if getitem: + expected = Series(100, + index=range(len(s)), name=3) + else: + expected = Series(100., + index=range(len(s)), name=3) + + s2 = s.copy() + idxr(s2)[3.0] = 100 + + result = idxr(s2)[3.0] + compare(result, expected) + + result = idxr(s2)[3] + compare(result, expected) + + # contains + # coerce to equal int + self.assertTrue(3.0 in s) + + def test_scalar_float(self): + + # scalar float indexers work on a float index + index = Index(np.arange(5.)) + for s in [Series(np.arange(len(index)), index=index), + DataFrame(np.random.randn(len(index), len(index)), + index=index, columns=index)]: + + # assert all operations except for iloc are ok + indexer = index[3] + for idxr, getitem in [(lambda x: x.ix, False), + (lambda x: x.loc, False), + (lambda x: x, True)]: + + # getting + result = idxr(s)[indexer] + self.check(result, s, 3, getitem) + + # setting + s2 = s.copy() + + def f(): + idxr(s2)[indexer] = expected + result = idxr(s2)[indexer] + self.check(result, s, 3, getitem) + + # random integer is a KeyError + self.assertRaises(KeyError, lambda: idxr(s)[3.5]) + + # contains + self.assertTrue(3.0 in s) + + # iloc succeeds with an integer + expected = s.iloc[3] + s2 = s.copy() + + s2.iloc[3] = expected + result = s2.iloc[3] + self.check(result, s, 3, False) + + # iloc raises with a float + self.assertRaises(TypeError, lambda: s.iloc[3.0]) + + def g(): + s2.iloc[3.0] = 0 + self.assertRaises(TypeError, g) + + def test_slice_non_numeric(self): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makeTimedeltaIndex, + tm.makePeriodIndex]: + + index = index(5) + for s in [Series(range(5), index=index), + DataFrame(np.random.randn(5, 2), index=index)]: + + # getitem + for l in [slice(3.0, 4), + slice(3, 4.0), + slice(3.0, 4.0)]: + + def f(): + s.iloc[l] + self.assertRaises(TypeError, f) + + for idxr in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x.iloc, + lambda x: x]: + + def f(): + idxr(s)[l] + self.assertRaises(TypeError, f) + + # setitem + for l in [slice(3.0, 4), + slice(3, 4.0), + slice(3.0, 4.0)]: + + def f(): + s.iloc[l] = 0 + self.assertRaises(TypeError, f) + + for idxr in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x.iloc, + lambda x: x]: + def f(): + idxr(s)[l] = 0 + self.assertRaises(TypeError, f) + + def test_slice_integer(self): + + # same as above, but for Integer based indexes + # these coerce to a like integer + # oob indiciates if we are out of bounds + # of positional indexing + for index, oob in [(tm.makeIntIndex(5), False), + (tm.makeRangeIndex(5), False), + (tm.makeIntIndex(5) + 10, True)]: + + # s is an in-range index + s = Series(range(5), index=index) + + # getitem + for l in [slice(3.0, 4), + slice(3, 4.0), + slice(3.0, 4.0)]: + + for idxr in [lambda x: x.loc, + lambda x: x.ix]: + + result = idxr(s)[l] + + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(3, 5) + self.check(result, s, indexer, False) + + # positional indexing + def f(): + s[l] + + self.assertRaises(TypeError, f) + + # getitem out-of-bounds + for l in [slice(-6, 6), + slice(-6.0, 6.0)]: + + for idxr in [lambda x: x.loc, + lambda x: x.ix]: + result = idxr(s)[l] + + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(-6, 6) + self.check(result, s, indexer, False) + + # positional indexing + def f(): + s[slice(-6.0, 6.0)] + + self.assertRaises(TypeError, f) + + # getitem odd floats + for l, res1 in [(slice(2.5, 4), slice(3, 5)), + (slice(2, 3.5), slice(2, 4)), + (slice(2.5, 3.5), slice(3, 4))]: + + for idxr in [lambda x: x.loc, + lambda x: x.ix]: + + result = idxr(s)[l] + if oob: + res = slice(0, 0) + else: + res = res1 + + self.check(result, s, res, False) + + # positional indexing + def f(): + s[l] + + self.assertRaises(TypeError, f) + + # setitem + for l in [slice(3.0, 4), + slice(3, 4.0), + slice(3.0, 4.0)]: + + for idxr in [lambda x: x.loc, + lambda x: x.ix]: + sc = s.copy() + idxr(sc)[l] = 0 + result = idxr(sc)[l].values.ravel() + self.assertTrue((result == 0).all()) + + # positional indexing + def f(): + s[l] = 0 + + self.assertRaises(TypeError, f) + + def test_integer_positional_indexing(self): + """ make sure that we are raising on positional indexing + w.r.t. an integer index """ + + s = Series(range(2, 6), index=range(2, 6)) + + result = s[2:4] + expected = s.iloc[2:4] + assert_series_equal(result, expected) + + for idxr in [lambda x: x, + lambda x: x.iloc]: + + for l in [slice(2, 4.0), + slice(2.0, 4), + slice(2.0, 4.0)]: + + def f(): + idxr(s)[l] + + self.assertRaises(TypeError, f) + + def test_slice_integer_frame_getitem(self): + + # similar to above, but on the getitem dim (of a DataFrame) + for index in [tm.makeIntIndex, tm.makeRangeIndex]: + + index = index(5) + s = DataFrame(np.random.randn(5, 2), index=index) + + for idxr in [lambda x: x.loc, + lambda x: x.ix]: + + # getitem + for l in [slice(0.0, 1), + slice(0, 1.0), + slice(0.0, 1.0)]: + + result = idxr(s)[l] + indexer = slice(0, 2) + self.check(result, s, indexer, False) + + # positional indexing + def f(): + s[l] + + self.assertRaises(TypeError, f) + + # getitem out-of-bounds + for l in [slice(-10, 10), + slice(-10.0, 10.0)]: + + result = idxr(s)[l] + self.check(result, s, slice(-10, 10), True) + + # positional indexing + def f(): + s[slice(-10.0, 10.0)] + + self.assertRaises(TypeError, f) + + # getitem odd floats + for l, res in [(slice(0.5, 1), slice(1, 2)), + (slice(0, 0.5), slice(0, 1)), + (slice(0.5, 1.5), slice(1, 2))]: + + result = idxr(s)[l] + self.check(result, s, res, False) + + # positional indexing + def f(): + s[l] + + self.assertRaises(TypeError, f) + + # setitem + for l in [slice(3.0, 4), + slice(3, 4.0), + slice(3.0, 4.0)]: + + sc = s.copy() + idxr(sc)[l] = 0 + result = idxr(sc)[l].values.ravel() + self.assertTrue((result == 0).all()) + + # positional indexing + def f(): + s[l] = 0 + + self.assertRaises(TypeError, f) + + def test_slice_float(self): + + # same as above, but for floats + index = Index(np.arange(5.)) + 0.1 + for s in [Series(range(5), index=index), + DataFrame(np.random.randn(5, 2), index=index)]: + + for l in [slice(3.0, 4), + slice(3, 4.0), + slice(3.0, 4.0)]: + + expected = s.iloc[3:4] + for idxr in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x]: + + # getitem + result = idxr(s)[l] + self.assertTrue(result.equals(expected)) + + # setitem + s2 = s.copy() + idxr(s2)[l] = 0 + result = idxr(s2)[l].values.ravel() + self.assertTrue((result == 0).all()) + + def test_floating_index_doc_example(self): + + index = Index([1.5, 2, 3, 4.5, 5]) + s = Series(range(5), index=index) + self.assertEqual(s[3], 2) + self.assertEqual(s.ix[3], 2) + self.assertEqual(s.loc[3], 2) + self.assertEqual(s.iloc[3], 3) + + def test_floating_misc(self): + + # related 236 + # scalar/slicing of a float index + s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64) + + # label based slicing + result1 = s[1.0:3.0] + result2 = s.ix[1.0:3.0] + result3 = s.loc[1.0:3.0] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + + # exact indexing when found + result1 = s[5.0] + result2 = s.loc[5.0] + result3 = s.ix[5.0] + self.assertEqual(result1, result2) + self.assertEqual(result1, result3) + + result1 = s[5] + result2 = s.loc[5] + result3 = s.ix[5] + self.assertEqual(result1, result2) + self.assertEqual(result1, result3) + + self.assertEqual(s[5.0], s[5]) + + # value not found (and no fallbacking at all) + + # scalar integers + self.assertRaises(KeyError, lambda: s.loc[4]) + self.assertRaises(KeyError, lambda: s.ix[4]) + self.assertRaises(KeyError, lambda: s[4]) + + # fancy floats/integers create the correct entry (as nan) + # fancy tests + expected = Series([2, 0], index=Float64Index([5.0, 0.0])) + for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float + assert_series_equal(s[fancy_idx], expected) + assert_series_equal(s.loc[fancy_idx], expected) + assert_series_equal(s.ix[fancy_idx], expected) + + expected = Series([2, 0], index=Index([5, 0], dtype='int64')) + for fancy_idx in [[5, 0], np.array([5, 0])]: # int + assert_series_equal(s[fancy_idx], expected) + assert_series_equal(s.loc[fancy_idx], expected) + assert_series_equal(s.ix[fancy_idx], expected) + + # all should return the same as we are slicing 'the same' + result1 = s.loc[2:5] + result2 = s.loc[2.0:5.0] + result3 = s.loc[2.0:5] + result4 = s.loc[2.1:5] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, result4) + + # previously this did fallback indexing + result1 = s[2:5] + result2 = s[2.0:5.0] + result3 = s[2.0:5] + result4 = s[2.1:5] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, result4) + + result1 = s.ix[2:5] + result2 = s.ix[2.0:5.0] + result3 = s.ix[2.0:5] + result4 = s.ix[2.1:5] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, result4) + + # combined test + result1 = s.loc[2:5] + result2 = s.ix[2:5] + result3 = s[2:5] + + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + + # list selection + result1 = s[[0.0, 5, 10]] + result2 = s.loc[[0.0, 5, 10]] + result3 = s.ix[[0.0, 5, 10]] + result4 = s.iloc[[0, 2, 4]] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, result4) + + result1 = s[[1.6, 5, 10]] + result2 = s.loc[[1.6, 5, 10]] + result3 = s.ix[[1.6, 5, 10]] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, Series( + [np.nan, 2, 4], index=[1.6, 5, 10])) + + result1 = s[[0, 1, 2]] + result2 = s.ix[[0, 1, 2]] + result3 = s.loc[[0, 1, 2]] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, Series( + [0.0, np.nan, np.nan], index=[0, 1, 2])) + + result1 = s.loc[[2.5, 5]] + result2 = s.ix[[2.5, 5]] + assert_series_equal(result1, result2) + assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0])) + + result1 = s[[2.5]] + result2 = s.ix[[2.5]] + result3 = s.loc[[2.5]] + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) + assert_series_equal(result1, Series([1], index=[2.5])) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/indexing/test_indexing.py similarity index 84% rename from pandas/tests/test_indexing.py rename to pandas/tests/indexing/test_indexing.py index c95be009ee38d..89552ab776608 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -17,7 +17,7 @@ from pandas import option_context from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice from pandas.core.api import (DataFrame, Index, Series, Panel, isnull, - MultiIndex, Float64Index, Timestamp, Timedelta) + MultiIndex, Timestamp, Timedelta) from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_attr_equal) @@ -3518,29 +3518,29 @@ def test_iloc_mask(self): 'integer type is not available'), } - warnings.filterwarnings(action='ignore', category=UserWarning) - result = dict() - for idx in [None, 'index', 'locs']: - mask = (df.nums > 2).values - if idx: - mask = Series(mask, list(reversed(getattr(df, idx)))) - for method in ['', '.loc', '.iloc']: - try: - if method: - accessor = getattr(df, method[1:]) - else: - accessor = df - ans = str(bin(accessor[mask]['nums'].sum())) - except Exception as e: - ans = str(e) - - key = tuple([idx, method]) - r = expected.get(key) - if r != ans: - raise AssertionError( - "[%s] does not match [%s], received [%s]" - % (key, ans, r)) - warnings.filterwarnings(action='always', category=UserWarning) + # UserWarnings from reindex of a boolean mask + with warnings.catch_warnings(record=True): + result = dict() + for idx in [None, 'index', 'locs']: + mask = (df.nums > 2).values + if idx: + mask = Series(mask, list(reversed(getattr(df, idx)))) + for method in ['', '.loc', '.iloc']: + try: + if method: + accessor = getattr(df, method[1:]) + else: + accessor = df + ans = str(bin(accessor[mask]['nums'].sum())) + except Exception as e: + ans = str(e) + + key = tuple([idx, method]) + r = expected.get(key) + if r != ans: + raise AssertionError( + "[%s] does not match [%s], received [%s]" + % (key, ans, r)) def test_ix_slicing_strings(self): # GH3836 @@ -4979,324 +4979,6 @@ def test_float64index_slicing_bug(self): result = s.value_counts() str(result) - def test_floating_index_doc_example(self): - - index = Index([1.5, 2, 3, 4.5, 5]) - s = Series(range(5), index=index) - self.assertEqual(s[3], 2) - self.assertEqual(s.ix[3], 2) - self.assertEqual(s.loc[3], 2) - self.assertEqual(s.iloc[3], 3) - - def test_floating_index(self): - - # related 236 - # scalar/slicing of a float index - s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64) - - # label based slicing - result1 = s[1.0:3.0] - result2 = s.ix[1.0:3.0] - result3 = s.loc[1.0:3.0] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - - # exact indexing when found - result1 = s[5.0] - result2 = s.loc[5.0] - result3 = s.ix[5.0] - self.assertEqual(result1, result2) - self.assertEqual(result1, result3) - - result1 = s[5] - result2 = s.loc[5] - result3 = s.ix[5] - self.assertEqual(result1, result2) - self.assertEqual(result1, result3) - - self.assertEqual(s[5.0], s[5]) - - # value not found (and no fallbacking at all) - - # scalar integers - self.assertRaises(KeyError, lambda: s.loc[4]) - self.assertRaises(KeyError, lambda: s.ix[4]) - self.assertRaises(KeyError, lambda: s[4]) - - # fancy floats/integers create the correct entry (as nan) - # fancy tests - expected = Series([2, 0], index=Float64Index([5.0, 0.0])) - for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float - assert_series_equal(s[fancy_idx], expected) - assert_series_equal(s.loc[fancy_idx], expected) - assert_series_equal(s.ix[fancy_idx], expected) - - expected = Series([2, 0], index=Index([5, 0], dtype='int64')) - for fancy_idx in [[5, 0], np.array([5, 0])]: # int - assert_series_equal(s[fancy_idx], expected) - assert_series_equal(s.loc[fancy_idx], expected) - assert_series_equal(s.ix[fancy_idx], expected) - - # all should return the same as we are slicing 'the same' - result1 = s.loc[2:5] - result2 = s.loc[2.0:5.0] - result3 = s.loc[2.0:5] - result4 = s.loc[2.1:5] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - assert_series_equal(result1, result4) - - # previously this did fallback indexing - result1 = s[2:5] - result2 = s[2.0:5.0] - result3 = s[2.0:5] - result4 = s[2.1:5] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - assert_series_equal(result1, result4) - - result1 = s.ix[2:5] - result2 = s.ix[2.0:5.0] - result3 = s.ix[2.0:5] - result4 = s.ix[2.1:5] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - assert_series_equal(result1, result4) - - # combined test - result1 = s.loc[2:5] - result2 = s.ix[2:5] - result3 = s[2:5] - - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - - # list selection - result1 = s[[0.0, 5, 10]] - result2 = s.loc[[0.0, 5, 10]] - result3 = s.ix[[0.0, 5, 10]] - result4 = s.iloc[[0, 2, 4]] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - assert_series_equal(result1, result4) - - result1 = s[[1.6, 5, 10]] - result2 = s.loc[[1.6, 5, 10]] - result3 = s.ix[[1.6, 5, 10]] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - assert_series_equal(result1, Series( - [np.nan, 2, 4], index=[1.6, 5, 10])) - - result1 = s[[0, 1, 2]] - result2 = s.ix[[0, 1, 2]] - result3 = s.loc[[0, 1, 2]] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - assert_series_equal(result1, Series( - [0.0, np.nan, np.nan], index=[0, 1, 2])) - - result1 = s.loc[[2.5, 5]] - result2 = s.ix[[2.5, 5]] - assert_series_equal(result1, result2) - assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0])) - - result1 = s[[2.5]] - result2 = s.ix[[2.5]] - result3 = s.loc[[2.5]] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - assert_series_equal(result1, Series([1], index=[2.5])) - - def test_scalar_indexer(self): - # float indexing checked above - - def check_invalid(index, loc=None, iloc=None, ix=None, getitem=None): - - # related 236/4850 - # trying to access with a float index - s = Series(np.arange(len(index)), index=index) - - if iloc is None: - iloc = TypeError - self.assertRaises(iloc, lambda: s.iloc[3.5]) - if loc is None: - loc = TypeError - self.assertRaises(loc, lambda: s.loc[3.5]) - if ix is None: - ix = TypeError - self.assertRaises(ix, lambda: s.ix[3.5]) - if getitem is None: - getitem = TypeError - self.assertRaises(getitem, lambda: s[3.5]) - - for index in [tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeIntIndex, tm.makeRangeIndex, tm.makeDateIndex, - tm.makePeriodIndex]: - check_invalid(index()) - check_invalid(Index(np.arange(5) * 2.5), - loc=KeyError, - ix=KeyError, - getitem=KeyError) - - def check_index(index, error): - index = index() - s = Series(np.arange(len(index)), index=index) - - # positional selection - result1 = s[5] - self.assertRaises(TypeError, lambda: s[5.0]) - result3 = s.iloc[5] - self.assertRaises(TypeError, lambda: s.iloc[5.0]) - - # by value - self.assertRaises(TypeError, lambda: s.loc[5]) - self.assertRaises(TypeError, lambda: s.loc[5.0]) - - # this is fallback, so it works - result5 = s.ix[5] - self.assertRaises(error, lambda: s.ix[5.0]) - - self.assertEqual(result1, result3) - self.assertEqual(result1, result5) - - # string-like - for index in [tm.makeStringIndex, tm.makeUnicodeIndex]: - check_index(index, TypeError) - - # datetimelike - for index in [tm.makeDateIndex, tm.makeTimedeltaIndex, - tm.makePeriodIndex]: - check_index(index, TypeError) - - # exact indexing when found on IntIndex - s = Series(np.arange(10), dtype='int64') - - self.assertRaises(TypeError, lambda: s[5.0]) - self.assertRaises(TypeError, lambda: s.loc[5.0]) - self.assertRaises(TypeError, lambda: s.ix[5.0]) - result4 = s[5] - result5 = s.loc[5] - result6 = s.ix[5] - self.assertEqual(result4, result5) - self.assertEqual(result4, result6) - - def test_slice_indexer(self): - def check_iloc_compat(s): - # these are exceptions - self.assertRaises(TypeError, lambda: s.iloc[6.0:8]) - self.assertRaises(TypeError, lambda: s.iloc[6.0:8.0]) - self.assertRaises(TypeError, lambda: s.iloc[6:8.0]) - - def check_slicing_positional(index): - - s = Series(np.arange(len(index)) + 10, index=index) - - # these are all positional - result1 = s[2:5] - result2 = s.ix[2:5] - result3 = s.iloc[2:5] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - - # loc will fail - self.assertRaises(TypeError, lambda: s.loc[2:5]) - - # make all float slicing fail - self.assertRaises(TypeError, lambda: s[2.0:5]) - self.assertRaises(TypeError, lambda: s[2.0:5.0]) - self.assertRaises(TypeError, lambda: s[2:5.0]) - - self.assertRaises(TypeError, lambda: s.ix[2.0:5]) - self.assertRaises(TypeError, lambda: s.ix[2.0:5.0]) - self.assertRaises(TypeError, lambda: s.ix[2:5.0]) - - self.assertRaises(TypeError, lambda: s.loc[2.0:5]) - self.assertRaises(TypeError, lambda: s.loc[2.0:5.0]) - self.assertRaises(TypeError, lambda: s.loc[2:5.0]) - - check_iloc_compat(s) - - # all index types except int, float - for index in [tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makeTimedeltaIndex, - tm.makePeriodIndex]: - check_slicing_positional(index()) - - ############ - # IntIndex # - ############ - for index in [tm.makeIntIndex(), tm.makeRangeIndex()]: - - s = Series(np.arange(len(index), dtype='int64') + 10, index + 5) - - # this is positional - result1 = s[2:5] - result4 = s.iloc[2:5] - assert_series_equal(result1, result4) - - # these are all label based - result2 = s.ix[2:5] - result3 = s.loc[2:5] - assert_series_equal(result2, result3) - - # float slicers on an int index with ix - expected = Series([11, 12, 13], index=[6, 7, 8]) - result = s.ix[6.0:8.5] - assert_series_equal(result, expected) - - result = s.ix[5.5:8.5] - assert_series_equal(result, expected) - - result = s.ix[5.5:8.0] - assert_series_equal(result, expected) - - for method in ['loc', 'iloc']: - # make all float slicing fail for .loc with an int index - self.assertRaises(TypeError, - lambda: getattr(s, method)[6.0:8]) - self.assertRaises(TypeError, - lambda: getattr(s, method)[6.0:8.0]) - self.assertRaises(TypeError, - lambda: getattr(s, method)[6:8.0]) - - # make all float slicing fail for [] with an int index - self.assertRaises(TypeError, lambda: s[6.0:8]) - self.assertRaises(TypeError, lambda: s[6.0:8.0]) - self.assertRaises(TypeError, lambda: s[6:8.0]) - - check_iloc_compat(s) - - ############## - # FloatIndex # - ############## - s.index = s.index.astype('float64') - - # these are all value based - result1 = s[6:8] - result2 = s.ix[6:8] - result3 = s.loc[6:8] - assert_series_equal(result1, result2) - assert_series_equal(result1, result3) - - # these are valid for all methods - # these are treated like labels (e.g. the rhs IS included) - def compare(slicers, expected): - for method in [lambda x: x, lambda x: x.loc, lambda x: x.ix]: - for slices in slicers: - - result = method(s)[slices] - assert_series_equal(result, expected) - - compare([slice(6.0, 8), slice(6.0, 8.0), slice(6, 8.0)], - s[(s.index >= 6.0) & (s.index <= 8)]) - compare([slice(6.5, 8), slice(6.5, 8.5)], - s[(s.index >= 6.5) & (s.index <= 8.5)]) - compare([slice(6, 8.5)], s[(s.index >= 6.0) & (s.index <= 8.5)]) - compare([slice(6.5, 6.5)], s[(s.index >= 6.5) & (s.index <= 6.5)]) - - check_iloc_compat(s) - def test_set_ix_out_of_bounds_axis_0(self): df = pd.DataFrame( randn(2, 5), index=["row%s" % i for i in range(2)], @@ -5362,347 +5044,46 @@ def test_index_type_coercion(self): self.assertTrue(s.index.is_integer()) - for attr in ['ix', 'loc']: + for indexer in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x]: s2 = s.copy() - getattr(s2, attr)[0.1] = 0 + indexer(s2)[0.1] = 0 self.assertTrue(s2.index.is_floating()) - self.assertTrue(getattr(s2, attr)[0.1] == 0) + self.assertTrue(indexer(s2)[0.1] == 0) s2 = s.copy() - getattr(s2, attr)[0.0] = 0 + indexer(s2)[0.0] = 0 exp = s.index if 0 not in s: exp = Index(s.index.tolist() + [0]) tm.assert_index_equal(s2.index, exp) s2 = s.copy() - getattr(s2, attr)['0'] = 0 + indexer(s2)['0'] = 0 self.assertTrue(s2.index.is_object()) - # setitem - s2 = s.copy() - s2[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(s2[0.1] == 0) - - s2 = s.copy() - s2[0.0] = 0 - exp = s.index - if 0 not in s: - exp = Index(s.index.tolist() + [0]) - tm.assert_index_equal(s2.index, exp) - - s2 = s.copy() - s2['0'] = 0 - self.assertTrue(s2.index.is_object()) - for s in [Series(range(5), index=np.arange(5.))]: self.assertTrue(s.index.is_floating()) - for attr in ['ix', 'loc']: + for idxr in [lambda x: x.ix, + lambda x: x.loc, + lambda x: x]: s2 = s.copy() - getattr(s2, attr)[0.1] = 0 + idxr(s2)[0.1] = 0 self.assertTrue(s2.index.is_floating()) - self.assertTrue(getattr(s2, attr)[0.1] == 0) + self.assertTrue(idxr(s2)[0.1] == 0) s2 = s.copy() - getattr(s2, attr)[0.0] = 0 + idxr(s2)[0.0] = 0 tm.assert_index_equal(s2.index, s.index) s2 = s.copy() - getattr(s2, attr)['0'] = 0 + idxr(s2)['0'] = 0 self.assertTrue(s2.index.is_object()) - # setitem - s2 = s.copy() - s2[0.1] = 0 - self.assertTrue(s2.index.is_floating()) - self.assertTrue(s2[0.1] == 0) - - s2 = s.copy() - s2[0.0] = 0 - tm.assert_index_equal(s2.index, s.index) - - s2 = s.copy() - s2['0'] = 0 - self.assertTrue(s2.index.is_object()) - - def test_invalid_scalar_float_indexers_error(self): - - for index in [tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeCategoricalIndex, - tm.makeDateIndex, tm.makeTimedeltaIndex, - tm.makePeriodIndex]: - - i = index(5) - - s = Series(np.arange(len(i)), index=i) - - def f(): - s.iloc[3.0] - self.assertRaisesRegexp(TypeError, - 'cannot do positional indexing', - f) - - def test_invalid_scalar_float_indexers(self): - - # GH 4892 - # float_indexers should raise exceptions - # on appropriate Index types & accessors - - for index in [tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeCategoricalIndex, - tm.makeDateIndex, tm.makeTimedeltaIndex, - tm.makePeriodIndex]: - - i = index(5) - - for s in [Series( - np.arange(len(i)), index=i), DataFrame( - np.random.randn( - len(i), len(i)), index=i, columns=i)]: - - for attr in ['iloc', 'loc', 'ix', '__getitem__']: - def f(): - getattr(s, attr)()[3.0] - self.assertRaises(TypeError, f) - - # setting only fails with iloc as - # the others expand the index - def f(): - s.iloc[3.0] = 0 - self.assertRaises(TypeError, f) - - # fallsback to position selection ,series only - s = Series(np.arange(len(i)), index=i) - s[3] - self.assertRaises(TypeError, lambda: s[3.0]) - - # integer index - for index in [tm.makeIntIndex, tm.makeRangeIndex]: - - i = index(5) - for s in [Series(np.arange(len(i))), - DataFrame(np.random.randn(len(i), len(i)), - index=i, columns=i)]: - - # any kind of get access should fail - for attr in ['iloc', 'loc', 'ix']: - def f(): - getattr(s, attr)[3.0] - self.assertRaises(TypeError, f) - error = KeyError if isinstance(s, DataFrame) else TypeError - self.assertRaises(error, lambda: s[3.0]) - - # setting only fails with iloc as - def f(): - s.iloc[3.0] = 0 - self.assertRaises(TypeError, f) - - # other indexers will coerce to an object index - # tested explicity in: test_invalid_scalar_float_indexers - # above - - # floats index - index = tm.makeFloatIndex(5) - for s in [Series(np.arange(len(index)), index=index), - DataFrame(np.random.randn(len(index), len(index)), - index=index, columns=index)]: - - # assert all operations except for iloc are ok - indexer = index[3] - expected = s.iloc[3] - - if isinstance(s, Series): - compare = self.assertEqual - else: - compare = tm.assert_series_equal - - for attr in ['loc', 'ix']: - - # getting - result = getattr(s, attr)[indexer] - compare(result, expected) - - # setting - s2 = s.copy() - - def f(): - getattr(s2, attr)[indexer] = expected - result = getattr(s2, attr)[indexer] - compare(result, expected) - - # random integer is a KeyError - self.assertRaises(KeyError, lambda: getattr(s, attr)[3]) - - # iloc succeeds with an integer - result = s.iloc[3] - compare(result, expected) - - s2 = s.copy() - - def f(): - s2.iloc[3] = expected - result = s2.iloc[3] - compare(result, expected) - - # iloc raises with a float - self.assertRaises(TypeError, lambda: s.iloc[3.0]) - - def f(): - s.iloc[3.0] = 0 - self.assertRaises(TypeError, f) - - # getitem - - # getting - if isinstance(s, DataFrame): - expected = s.iloc[:, 3] - result = s[indexer] - compare(result, expected) - - # setting - s2 = s.copy() - - def f(): - s2[indexer] = expected - result = s2[indexer] - compare(result, expected) - - # random integer is a KeyError - result = self.assertRaises(KeyError, lambda: s[3]) - - def test_invalid_slice_float_indexers(self): - - # GH 4892 - # float_indexers should raise exceptions - # on appropriate Index types & accessors - - for index in [tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makeTimedeltaIndex, - tm.makePeriodIndex]: - - index = index(5) - for s in [Series(range(5), index=index), - DataFrame(np.random.randn(5, 2), index=index)]: - - # getitem - for l in [slice(3.0, 4), - slice(3, 4.0), - slice(3.0, 4.0)]: - - def f(): - s.iloc[l] - self.assertRaises(TypeError, f) - - def f(): - s.loc[l] - self.assertRaises(TypeError, f) - - def f(): - s[l] - self.assertRaises(TypeError, f) - - def f(): - s.ix[l] - self.assertRaises(TypeError, f) - - # setitem - for l in [slice(3.0, 4), - slice(3, 4.0), - slice(3.0, 4.0)]: - - def f(): - s.iloc[l] = 0 - self.assertRaises(TypeError, f) - - def f(): - s.loc[l] = 0 - self.assertRaises(TypeError, f) - - def f(): - s[l] = 0 - self.assertRaises(TypeError, f) - - def f(): - s.ix[l] = 0 - self.assertRaises(TypeError, f) - - # same as above, but for Integer based indexes - for index in [tm.makeIntIndex, tm.makeRangeIndex]: - - index = index(5) - for s in [Series(range(5), index=index), - DataFrame(np.random.randn(5, 2), index=index)]: - - # getitem - for l in [slice(3.0, 4), - slice(3, 4.0), - slice(3.0, 4.0)]: - - def f(): - s.iloc[l] - self.assertRaises(TypeError, f) - - def f(): - s.loc[l] - self.assertRaises(TypeError, f) - - def f(): - s[l] - self.assertRaises(TypeError, f) - - # ix allows float slicing - s.ix[l] - - # setitem - for l in [slice(3.0, 4), - slice(3, 4.0), - slice(3.0, 4.0)]: - - def f(): - s.iloc[l] = 0 - self.assertRaises(TypeError, f) - - def f(): - s.loc[l] = 0 - self.assertRaises(TypeError, f) - - def f(): - s[l] = 0 - self.assertRaises(TypeError, f) - - # ix allows float slicing - s.ix[l] = 0 - - # same as above, but for floats - index = tm.makeFloatIndex(5) - for s in [Series(range(5), index=index), - DataFrame(np.random.randn(5, 2), index=index)]: - - # getitem - for l in [slice(3.0, 4), - slice(3, 4.0), - slice(3.0, 4.0)]: - - # ix is ok - result1 = s.ix[3:4] - result2 = s.ix[3.0:4] - result3 = s.ix[3.0:4.0] - result4 = s.ix[3:4.0] - self.assertTrue(result1.equals(result2)) - self.assertTrue(result1.equals(result3)) - self.assertTrue(result1.equals(result4)) - - # setitem - for l in [slice(3.0, 4), - slice(3, 4.0), - slice(3.0, 4.0)]: - - pass - def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) df['a'] = 10 @@ -5929,338 +5310,6 @@ def test_maybe_numeric_slice(self): self.assertEqual(result, expected) -class TestCategoricalIndex(tm.TestCase): - - def setUp(self): - - self.df = DataFrame({'A': np.arange(6, dtype='int64'), - 'B': Series(list('aabbca')).astype( - 'category', categories=list( - 'cab'))}).set_index('B') - self.df2 = DataFrame({'A': np.arange(6, dtype='int64'), - 'B': Series(list('aabbca')).astype( - 'category', categories=list( - 'cabe'))}).set_index('B') - self.df3 = DataFrame({'A': np.arange(6, dtype='int64'), - 'B': (Series([1, 1, 2, 1, 3, 2]) - .astype('category', categories=[3, 2, 1], - ordered=True))}).set_index('B') - self.df4 = DataFrame({'A': np.arange(6, dtype='int64'), - 'B': (Series([1, 1, 2, 1, 3, 2]) - .astype('category', categories=[3, 2, 1], - ordered=False))}).set_index('B') - - def test_loc_scalar(self): - result = self.df.loc['a'] - expected = (DataFrame({'A': [0, 1, 5], - 'B': (Series(list('aaa')) - .astype('category', - categories=list('cab')))}) - .set_index('B')) - assert_frame_equal(result, expected) - - df = self.df.copy() - df.loc['a'] = 20 - expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20], - 'B': (Series(list('aabbca')) - .astype('category', - categories=list('cab')))}) - .set_index('B')) - assert_frame_equal(df, expected) - - # value not in the categories - self.assertRaises(KeyError, lambda: df.loc['d']) - - def f(): - df.loc['d'] = 10 - - self.assertRaises(TypeError, f) - - def f(): - df.loc['d', 'A'] = 10 - - self.assertRaises(TypeError, f) - - def f(): - df.loc['d', 'C'] = 10 - - self.assertRaises(TypeError, f) - - def test_loc_listlike(self): - - # list of labels - result = self.df.loc[['c', 'a']] - expected = self.df.iloc[[4, 0, 1, 5]] - assert_frame_equal(result, expected, check_index_type=True) - - result = self.df2.loc[['a', 'b', 'e']] - exp_index = pd.CategoricalIndex( - list('aaabbe'), categories=list('cabe'), name='B') - expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) - assert_frame_equal(result, expected, check_index_type=True) - - # element in the categories but not in the values - self.assertRaises(KeyError, lambda: self.df2.loc['e']) - - # assign is ok - df = self.df2.copy() - df.loc['e'] = 20 - result = df.loc[['a', 'b', 'e']] - exp_index = pd.CategoricalIndex( - list('aaabbe'), categories=list('cabe'), name='B') - expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index) - assert_frame_equal(result, expected) - - df = self.df2.copy() - result = df.loc[['a', 'b', 'e']] - exp_index = pd.CategoricalIndex( - list('aaabbe'), categories=list('cabe'), name='B') - expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) - assert_frame_equal(result, expected, check_index_type=True) - - # not all labels in the categories - self.assertRaises(KeyError, lambda: self.df2.loc[['a', 'd']]) - - def test_loc_listlike_dtypes(self): - # GH 11586 - - # unique categories and codes - index = pd.CategoricalIndex(['a', 'b', 'c']) - df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index) - - # unique slice - res = df.loc[['a', 'b']] - exp = DataFrame({'A': [1, 2], - 'B': [4, 5]}, index=pd.CategoricalIndex(['a', 'b'])) - tm.assert_frame_equal(res, exp, check_index_type=True) - - # duplicated slice - res = df.loc[['a', 'a', 'b']] - exp = DataFrame({'A': [1, 1, 2], - 'B': [4, 4, 5]}, - index=pd.CategoricalIndex(['a', 'a', 'b'])) - tm.assert_frame_equal(res, exp, check_index_type=True) - - with tm.assertRaisesRegexp( - KeyError, - 'a list-indexer must only include values that are ' - 'in the categories'): - df.loc[['a', 'x']] - - # duplicated categories and codes - index = pd.CategoricalIndex(['a', 'b', 'a']) - df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index) - - # unique slice - res = df.loc[['a', 'b']] - exp = DataFrame({'A': [1, 3, 2], - 'B': [4, 6, 5]}, - index=pd.CategoricalIndex(['a', 'a', 'b'])) - tm.assert_frame_equal(res, exp, check_index_type=True) - - # duplicated slice - res = df.loc[['a', 'a', 'b']] - exp = DataFrame( - {'A': [1, 3, 1, 3, 2], - 'B': [4, 6, 4, 6, 5 - ]}, index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) - tm.assert_frame_equal(res, exp, check_index_type=True) - - with tm.assertRaisesRegexp( - KeyError, - 'a list-indexer must only include values ' - 'that are in the categories'): - df.loc[['a', 'x']] - - # contains unused category - index = pd.CategoricalIndex( - ['a', 'b', 'a', 'c'], categories=list('abcde')) - df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index) - - res = df.loc[['a', 'b']] - exp = DataFrame({'A': [1, 3, 2], - 'B': [5, 7, 6]}, index=pd.CategoricalIndex( - ['a', 'a', 'b'], categories=list('abcde'))) - tm.assert_frame_equal(res, exp, check_index_type=True) - - res = df.loc[['a', 'e']] - exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]}, - index=pd.CategoricalIndex(['a', 'a', 'e'], - categories=list('abcde'))) - tm.assert_frame_equal(res, exp, check_index_type=True) - - # duplicated slice - res = df.loc[['a', 'a', 'b']] - exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]}, - index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'], - categories=list('abcde'))) - tm.assert_frame_equal(res, exp, check_index_type=True) - - with tm.assertRaisesRegexp( - KeyError, - 'a list-indexer must only include values ' - 'that are in the categories'): - df.loc[['a', 'x']] - - def test_read_only_source(self): - # GH 10043 - rw_array = np.eye(10) - rw_df = DataFrame(rw_array) - - ro_array = np.eye(10) - ro_array.setflags(write=False) - ro_df = DataFrame(ro_array) - - assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]]) - assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]]) - assert_series_equal(rw_df.iloc[1], ro_df.iloc[1]) - assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3]) - - assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]]) - assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]]) - assert_series_equal(rw_df.loc[1], ro_df.loc[1]) - assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3]) - - def test_reindexing(self): - - # reindexing - # convert to a regular index - result = self.df2.reindex(['a', 'b', 'e']) - expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan], - 'B': Series(list('aaabbe'))}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - result = self.df2.reindex(['a', 'b']) - expected = DataFrame({'A': [0, 1, 5, 2, 3], - 'B': Series(list('aaabb'))}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - result = self.df2.reindex(['e']) - expected = DataFrame({'A': [np.nan], - 'B': Series(['e'])}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - result = self.df2.reindex(['d']) - expected = DataFrame({'A': [np.nan], - 'B': Series(['d'])}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - # since we are actually reindexing with a Categorical - # then return a Categorical - cats = list('cabe') - - result = self.df2.reindex(pd.Categorical(['a', 'd'], categories=cats)) - expected = DataFrame({'A': [0, 1, 5, np.nan], - 'B': Series(list('aaad')).astype( - 'category', categories=cats)}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - result = self.df2.reindex(pd.Categorical(['a'], categories=cats)) - expected = DataFrame({'A': [0, 1, 5], - 'B': Series(list('aaa')).astype( - 'category', categories=cats)}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - result = self.df2.reindex(['a', 'b', 'e']) - expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan], - 'B': Series(list('aaabbe'))}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - result = self.df2.reindex(['a', 'b']) - expected = DataFrame({'A': [0, 1, 5, 2, 3], - 'B': Series(list('aaabb'))}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - result = self.df2.reindex(['e']) - expected = DataFrame({'A': [np.nan], - 'B': Series(['e'])}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - # give back the type of categorical that we received - result = self.df2.reindex(pd.Categorical( - ['a', 'd'], categories=cats, ordered=True)) - expected = DataFrame( - {'A': [0, 1, 5, np.nan], - 'B': Series(list('aaad')).astype('category', categories=cats, - ordered=True)}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - result = self.df2.reindex(pd.Categorical( - ['a', 'd'], categories=['a', 'd'])) - expected = DataFrame({'A': [0, 1, 5, np.nan], - 'B': Series(list('aaad')).astype( - 'category', categories=['a', 'd' - ])}).set_index('B') - assert_frame_equal(result, expected, check_index_type=True) - - # passed duplicate indexers are not allowed - self.assertRaises(ValueError, lambda: self.df2.reindex(['a', 'a'])) - - # args NotImplemented ATM - self.assertRaises(NotImplementedError, - lambda: self.df2.reindex(['a'], method='ffill')) - self.assertRaises(NotImplementedError, - lambda: self.df2.reindex(['a'], level=1)) - self.assertRaises(NotImplementedError, - lambda: self.df2.reindex(['a'], limit=2)) - - def test_loc_slice(self): - # slicing - # not implemented ATM - # GH9748 - - self.assertRaises(TypeError, lambda: self.df.loc[1:5]) - - # result = df.loc[1:5] - # expected = df.iloc[[1,2,3,4]] - # assert_frame_equal(result, expected) - - def test_boolean_selection(self): - - df3 = self.df3 - df4 = self.df4 - - result = df3[df3.index == 'a'] - expected = df3.iloc[[]] - assert_frame_equal(result, expected) - - result = df4[df4.index == 'a'] - expected = df4.iloc[[]] - assert_frame_equal(result, expected) - - result = df3[df3.index == 1] - expected = df3.iloc[[0, 1, 3]] - assert_frame_equal(result, expected) - - result = df4[df4.index == 1] - expected = df4.iloc[[0, 1, 3]] - assert_frame_equal(result, expected) - - # since we have an ordered categorical - - # CategoricalIndex([1, 1, 2, 1, 3, 2], - # categories=[3, 2, 1], - # ordered=True, - # name=u'B') - result = df3[df3.index < 2] - expected = df3.iloc[[4]] - assert_frame_equal(result, expected) - - result = df3[df3.index > 1] - expected = df3.iloc[[]] - assert_frame_equal(result, expected) - - # unordered - # cannot be compared - - # CategoricalIndex([1, 1, 2, 1, 3, 2], - # categories=[3, 2, 1], - # ordered=False, - # name=u'B') - self.assertRaises(TypeError, lambda: df4[df4.index < 2]) - self.assertRaises(TypeError, lambda: df4[df4.index > 1]) - - class TestSeriesNoneCoercion(tm.TestCase): EXPECTED_RESULTS = [ # For numeric series, we should coerce to NaN. diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 62a7ad078da70..7584b99dbdb97 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -453,12 +453,20 @@ def _convert_scalar_indexer(self, key, kind=None): Parameters ---------- key : label of the slice bound - kind : optional, type of the indexing operation (loc/ix/iloc/None) + kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ - if (kind in ['loc'] and lib.isscalar(key) and - (is_integer(key) or is_float(key))): - self._invalid_indexer('index', key) + assert kind in ['ix', 'loc', 'getitem', 'iloc', None] + + # we don't allow integer/float indexing for loc + # we don't allow float indexing for ix/getitem + if lib.isscalar(key): + is_int = is_integer(key) + is_flt = is_float(key) + if kind in ['loc'] and (is_int or is_flt): + self._invalid_indexer('index', key) + elif kind in ['ix', 'getitem'] and is_flt: + self._invalid_indexer('index', key) return (super(DatetimeIndexOpsMixin, self) ._convert_scalar_indexer(key, kind=kind)) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index c745f1b2eddf9..b3b43e1a5babb 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1443,7 +1443,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : string / None + kind : {'ix', 'loc', 'getitem'} Returns ------- @@ -1454,6 +1454,8 @@ def _maybe_cast_slice_bound(self, label, side, kind): Value of `side` parameter should be validated in caller. """ + assert kind in ['ix', 'loc', 'getitem', None] + if is_float(label) or isinstance(label, time) or is_integer(label): self._invalid_indexer('slice', label) @@ -1500,7 +1502,7 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): raise KeyError('Cannot mix time and non-time slice keys') try: - return Index.slice_indexer(self, start, end, step) + return Index.slice_indexer(self, start, end, step, kind=kind) except KeyError: # For historical reasons DatetimeIndex by default supports # value-based partial (aka string) slices on non-monotonic arrays, diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 2795dc846f6de..df04984bcb582 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -692,7 +692,7 @@ def get_loc(self, key, method=None, tolerance=None): except ValueError: # we cannot construct the Period # as we have an invalid type - return self._invalid_indexer('label', key) + raise KeyError(key) try: return Index.get_loc(self, key.ordinal, method, tolerance) except KeyError: @@ -707,7 +707,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : string / None + kind : {'ix', 'loc', 'getitem'} Returns ------- @@ -718,6 +718,8 @@ def _maybe_cast_slice_bound(self, label, side, kind): Value of `side` parameter should be validated in caller. """ + assert kind in ['ix', 'loc', 'getitem'] + if isinstance(label, datetime): return Period(label, freq=self.freq) elif isinstance(label, compat.string_types): diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 9759d13fe4632..bea2aeb508358 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -710,13 +710,15 @@ def _maybe_cast_slice_bound(self, label, side, kind): ---------- label : object side : {'left', 'right'} - kind : string / None + kind : {'ix', 'loc', 'getitem'} Returns ------- label : object """ + assert kind in ['ix', 'loc', 'getitem', None] + if isinstance(label, compat.string_types): parsed = _coerce_scalar_to_timedelta_type(label, box=True) lbound = parsed.round(parsed.resolution)