Skip to content

Commit

Permalink
ENH: implement multi-key joining. fairly naive impl for now
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Oct 11, 2011
1 parent 0adcfce commit 5ae1a59
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 10 deletions.
6 changes: 3 additions & 3 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,11 +514,11 @@ def intersection(*seqs):
result &= seq
return type(seqs[0])(list(result))

def _asarray_tuplesafe(values):
if not isinstance(values, (list, np.ndarray)):
def _asarray_tuplesafe(values, dtype=None):
if not isinstance(values, (list, tuple, np.ndarray)):
values = list(values)

result = np.asarray(values)
result = np.asarray(values, dtype=dtype)

if issubclass(result.dtype.type, basestring):
result = np.asarray(values, dtype=object)
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2219,7 +2219,13 @@ def _join_on(self, other, on, lsuffix, rsuffix):
if len(other.index) == 0:
return self

new_data = self._data.join_on(other._data, self[on], axis=1,
if isinstance(on, (list, tuple)):
join_key = zip(*[self[k] for k in on])
join_key = common._asarray_tuplesafe(join_key, dtype=object)
else:
join_key = np.asarray(self[on])

new_data = self._data.join_on(other._data, join_key, axis=1,
lsuffix=lsuffix, rsuffix=rsuffix)
return self._constructor(new_data)

Expand Down
11 changes: 7 additions & 4 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,13 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
'of some kind, %s was passed' % repr(data))
else:
# other iterable of some kind
if not isinstance(data, (list, tuple)):
data = list(data)
subarr = np.empty(len(data), dtype=object)
subarr[:] = data
subarr = _asarray_tuplesafe(data, dtype=object)

# if not isinstance(data, (list, tuple)):
# data = list(data)

# subarr = np.empty(len(data), dtype=object)
# subarr[:] = data

subarr = subarr.view(cls)
subarr.name = name
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,8 +732,7 @@ def join_on(self, other, on, axis=1, lsuffix=None, rsuffix=None):
this, other = self._maybe_rename_join(other, lsuffix, rsuffix)

other_axis = other.axes[axis]
indexer = lib.merge_indexer_object(on.astype(object),
other_axis.indexMap)
indexer = other_axis.get_indexer(on)

# TODO: deal with length-0 case? or does it fall out?
mask = indexer == -1
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2551,6 +2551,36 @@ def test_join_on(self):
self.assertRaises(Exception, target.join, source, on='C',
how='left')

def test_join_on_multikey(self):
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
to_join = DataFrame(np.random.randn(10, 3), index=index,
columns=['j_one', 'j_two', 'j_three'])

# a little relevant example with NAs
key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux',
'qux', 'snap']
key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two',
'three', 'one']

data = np.random.randn(len(key1))
data = DataFrame({'key1' : key1, 'key2' : key2,
'data' : data})

joined = data.join(to_join, on=['key1', 'key2'])

join_key = Index(zip(key1, key2))
indexer = to_join.index.get_indexer(join_key)
ex_values = to_join.values.take(indexer, axis=0)
ex_values[indexer == -1] = np.nan
expected = data.join(DataFrame(ex_values, columns=to_join.columns))

# TODO: columns aren't in the same order yet
assert_frame_equal(joined, expected.ix[:, joined.columns])

def test_join_index_mixed(self):

df1 = DataFrame({'A' : 1., 'B' : 2, 'C' : 'foo', 'D' : True},
Expand Down

0 comments on commit 5ae1a59

Please sign in to comment.