diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 25abc10eae4e7..9a537fc5b678a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -633,7 +633,8 @@ def keys(self): def iteritems(self): """Iterator over (column, series) pairs""" - return ((k, self[k]) for k in self.columns) + for i, k in enumerate(self.columns): + yield (k,self.take([i],axis=1)[k]) def iterrows(self): """ @@ -836,6 +837,10 @@ def to_dict(self, outtype='dict'): ------- result : dict like {column -> {index -> value}} """ + import warnings + if not self.columns.is_unique: + warnings.warn("DataFrame columns are not unique, some " + "columns will be omitted.",UserWarning) if outtype.lower().startswith('d'): return dict((k, v.to_dict()) for k, v in self.iteritems()) elif outtype.lower().startswith('l'): @@ -1795,13 +1800,18 @@ def _getitem_array(self, key): indexer = self.columns.get_indexer(key) mask = indexer == -1 if mask.any(): - raise KeyError("No column(s) named: %s" % str(key[mask])) + raise KeyError("No column(s) named: %s" % + com.pprint_thing(key[mask])) result = self.reindex(columns=key) if result.columns.name is None: result.columns.name = self.columns.name return result else: mask = self.columns.isin(key) + for k in key: + if k not in self.columns: + raise KeyError("No column(s) named: %s" % + com.pprint_thing(k)) return self.take(mask.nonzero()[0], axis=1) def _slice(self, slobj, axis=0): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5c6a8270c9441..49173d2ae209f 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -62,6 +62,15 @@ def test_getitem(self): self.assert_('random' not in self.frame) self.assertRaises(Exception, self.frame.__getitem__, 'random') + def test_getitem_dupe_cols(self): + df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b']) + try: + df[['baf']] + except KeyError: + pass + else: + self.fail("Dataframe failed to raise KeyError") + def test_get(self): b = self.frame.get('B') assert_series_equal(b, self.frame['B']) @@ -1147,6 +1156,11 @@ def test_get_value(self): expected = self.frame[col][idx] assert_almost_equal(result, expected) + def test_iteritems(self): + df=DataFrame([[1,2,3],[4,5,6]],columns=['a','a','b']) + for k,v in df.iteritems(): + self.assertEqual(type(v),Series) + def test_lookup(self): def alt(df, rows, cols): result = [] @@ -7523,6 +7537,7 @@ def __nonzero__(self): self.assert_(r0.all()) self.assert_(r1.all()) + if __name__ == '__main__': # unittest.main() import nose