diff --git a/pandas/src/engines.pyx b/pandas/src/engines.pyx index 1cd3e85fc0420..92972d94d0f61 100644 --- a/pandas/src/engines.pyx +++ b/pandas/src/engines.pyx @@ -137,7 +137,6 @@ cdef class IndexEngine: if self.is_monotonic: values = self._get_index_values() - left = values.searchsorted(val, side='left') right = values.searchsorted(val, side='right') @@ -149,14 +148,15 @@ cdef class IndexEngine: else: return slice(left, right) else: - return self._get_bool_indexer(val) + return self._maybe_get_bool_indexer(val) - cdef _get_bool_indexer(self, object val): + cdef _maybe_get_bool_indexer(self, object val): cdef: ndarray[uint8_t] indexer ndarray[object] values int count = 0 Py_ssize_t i, n + int last_true values = self._get_index_values() n = len(values) @@ -168,11 +168,14 @@ cdef class IndexEngine: if values[i] == val: count += 1 indexer[i] = 1 + last_true = i else: indexer[i] = 0 if count == 0: raise KeyError(val) + if count == 1: + return last_true return result @@ -275,13 +278,14 @@ cdef class Int64Engine(IndexEngine): return _algos.backfill_int64(self._get_index_values(), other, limit=limit) - cdef _get_bool_indexer(self, object val): + cdef _maybe_get_bool_indexer(self, object val): cdef: ndarray[uint8_t, cast=True] indexer ndarray[int64_t] values int count = 0 Py_ssize_t i, n int64_t ival + int last_true if not util.is_integer_object(val): raise KeyError(val) @@ -298,11 +302,14 @@ cdef class Int64Engine(IndexEngine): if values[i] == val: count += 1 indexer[i] = 1 + last_true = i else: indexer[i] = 0 if count == 0: raise KeyError(val) + if count == 1: + return last_true return result diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index d9c70dfb93a53..b8e3b50718215 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -999,6 +999,11 @@ def test_get_loc_duplicates(self): assert(result == expected) # self.assertRaises(Exception, index.get_loc, 2) + index = Index(['c', 'a', 'a', 'b', 'b']) + rs = index.get_loc('c') + xp = 0 + assert(rs == xp) + def test_get_loc_level(self): index = MultiIndex(levels=[Index(range(4)), Index(range(4)), diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index b24a93023a39d..44a24e5ee12a9 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -599,6 +599,11 @@ def test_getitem_ambiguous_keyerror(self): self.assertRaises(KeyError, s.__getitem__, 1) self.assertRaises(KeyError, s.ix.__getitem__, 1) + def test_getitem_unordered_dup(self): + obj = Series(range(5), index=['c', 'a', 'a', 'b', 'b']) + self.assert_(np.isscalar(obj['c'])) + self.assert_(obj['c'] == 0) + def test_setitem_ambiguous_keyerror(self): s = Series(range(10), index=range(0, 20, 2)) self.assertRaises(KeyError, s.__setitem__, 1, 5)