Skip to content

Commit

Permalink
Merge remote branch 'y-p/fix_pop_with_dupe_columns'
Browse files Browse the repository at this point in the history
* y-p/fix_pop_with_dupe_columns:
  DOC: docstring of Index.get_loc, clarify return type
  TST: split_block_at() after changes
  BUG: deletion of non-unique column. closes #2347
  TST: add tests for com.split_ranges()
  ENH: add com.split_ranges util function
  TST: df.pop() of non-unique column
  • Loading branch information
wesm committed Nov 25, 2012
2 parents b4f142a + 2dcd5d4 commit c1ec3c5
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 46 deletions.
17 changes: 17 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,23 @@ def iterpairs(seq):

return itertools.izip(seq_it, seq_it_next)

def split_ranges(mask):
""" Generates tuples of ranges which cover all True value in mask
>>> list(split_ranges([1,0,0,1,0]))
[(0, 1), (3, 4)]
"""
ranges = [(0,len(mask))]

for pos,val in enumerate(mask):
if not val: # this pos should be ommited, split off the prefix range
r = ranges.pop()
if pos > r[0]: # yield non-zero range
yield (r[0],pos)
if pos+1 < len(mask): # save the rest for processing
ranges.append((pos+1,len(mask)))
if ranges:
yield ranges[-1]

def indent(string, spaces=4):
dent = ' ' * spaces
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ def get_loc(self, key):
Returns
-------
loc : int
loc : int if unique index, possibly slice or mask if not
"""
return self._engine.get_loc(key)

Expand Down
52 changes: 19 additions & 33 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,38 +181,26 @@ def delete(self, item):

def split_block_at(self, item):
"""
Split block around given column, for "deleting" a column without
having to copy data by returning views on the original array
Split block into zero or more blocks around columns with given label,
for "deleting" a column without having to copy data by returning views
on the original array.
Returns
-------
leftb, rightb : (Block or None, Block or None)
generator of Block
"""
loc = self.items.get_loc(item)

if len(self.items) == 1:
# no blocks left
return None, None

if loc == 0:
# at front
left_block = None
right_block = make_block(self.values[1:], self.items[1:].copy(),
self.ref_items)
elif loc == len(self.values) - 1:
# at back
left_block = make_block(self.values[:-1], self.items[:-1].copy(),
self.ref_items)
right_block = None
else:
# in the middle
left_block = make_block(self.values[:loc],
self.items[:loc].copy(), self.ref_items)
right_block = make_block(self.values[loc + 1:],
self.items[loc + 1:].copy(),
self.ref_items)
if type(loc) == slice or type(loc) == int:
mask = [True]*len(self)
mask[loc] = False
else: # already a mask, inverted
mask = -loc

return left_block, right_block
for s,e in com.split_ranges(mask):
yield make_block(self.values[s:e],
self.items[s:e].copy(),
self.ref_items)

def fillna(self, value, inplace=False):
new_values = self.values if inplace else self.values.copy()
Expand Down Expand Up @@ -906,9 +894,12 @@ def delete(self, item):
i, _ = self._find_block(item)
loc = self.items.get_loc(item)

self._delete_from_block(i, item)
if com._is_bool_indexer(loc): # dupe keys may return mask
loc = [i for i,v in enumerate(loc) if v]

new_items = self.items.delete(loc)

self._delete_from_block(i, item)
self.set_items_norename(new_items)

def set(self, item, value):
Expand Down Expand Up @@ -970,13 +961,8 @@ def _delete_from_block(self, i, item):
Delete and maybe remove the whole block
"""
block = self.blocks.pop(i)
new_left, new_right = block.split_block_at(item)

if new_left is not None:
self.blocks.append(new_left)

if new_right is not None:
self.blocks.append(new_right)
for b in block.split_block_at(item):
self.blocks.append(b)

def _add_new_block(self, item, value, loc=None):
# Do we care about dtype at the moment?
Expand Down
29 changes: 29 additions & 0 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,35 @@ def test_iterpairs():

assert(result == expected)

def test_split_ranges():
def _bin(x, width):
"return int(x) as a base2 string of given width"
return ''.join(str((x>>i)&1) for i in xrange(width-1,-1,-1))

def test_locs(mask):
nfalse = sum(np.array(mask) == 0)

remaining=0
for s, e in com.split_ranges(mask):
remaining += e-s

assert 0 not in mask[s:e]

# make sure the total items covered by the ranges are a complete cover
assert remaining + nfalse == len(mask)

# exhaustively test all possible mask sequences of length 8
ncols=8
for i in range(2**ncols):
cols=map(int,list(_bin(i,ncols))) # count up in base2
mask=[cols[i] == 1 for i in range(len(cols))]
test_locs(mask)

# base cases
test_locs([])
test_locs([0])
test_locs([1])

def test_indent():
s = 'a b c\nd e f'
result = com.indent(s, spaces=6)
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2983,6 +2983,18 @@ def test_pop(self):
foo = self.frame.pop('foo')
self.assert_('foo' not in self.frame)

def test_pop_non_unique_cols(self):
df=DataFrame({0:[0,1],1:[0,1],2:[4,5]})
df.columns=["a","b","a"]

res=df.pop("a")
self.assertEqual(type(res),DataFrame)
self.assertEqual(len(res),2)
self.assertEqual(len(df.columns),1)
self.assertTrue("b" in df.columns)
self.assertFalse("a" in df.columns)
self.assertEqual(len(df.index),2)

def test_iter(self):
self.assert_(tm.equalContents(list(self.frame), self.frame.columns))

Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,22 +155,22 @@ def test_delete(self):
self.assertRaises(Exception, self.fblock.delete, 'b')

def test_split_block_at(self):
left, right = self.fblock.split_block_at('a')
self.assert_(left is None)
self.assert_(np.array_equal(right.items, ['c', 'e']))
bs = list(self.fblock.split_block_at('a'))
self.assertEqual(len(bs),1)
self.assertTrue(np.array_equal(bs[0].items, ['c', 'e']))

left, right = self.fblock.split_block_at('c')
self.assert_(np.array_equal(left.items, ['a']))
self.assert_(np.array_equal(right.items, ['e']))
bs = list(self.fblock.split_block_at('c'))
self.assertEqual(len(bs),2)
self.assertTrue(np.array_equal(bs[0].items, ['a']))
self.assertTrue(np.array_equal(bs[1].items, ['e']))

left, right = self.fblock.split_block_at('e')
self.assert_(np.array_equal(left.items, ['a', 'c']))
self.assert_(right is None)
bs = list(self.fblock.split_block_at('e'))
self.assertEqual(len(bs),1)
self.assertTrue(np.array_equal(bs[0].items, ['a', 'c']))

bblock = get_bool_ex(['f'])
left, right = bblock.split_block_at('f')
self.assert_(left is None)
self.assert_(right is None)
bs = list(bblock.split_block_at('f'))
self.assertEqual(len(bs),0)

def test_unicode_repr(self):
mat = np.empty((N, 2), dtype=object)
Expand Down

0 comments on commit c1ec3c5

Please sign in to comment.