From 5ae1a59bc940947ffb45c73bf7f927effada6070 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesmckinn@gmail.com>
Date: Tue, 11 Oct 2011 15:32:50 -0400
Subject: [PATCH] ENH: implement multi-key joining. fairly naive impl for now

---
 pandas/core/common.py      |  6 +++---
 pandas/core/frame.py       |  8 +++++++-
 pandas/core/index.py       | 11 +++++++----
 pandas/core/internals.py   |  3 +--
 pandas/tests/test_frame.py | 30 ++++++++++++++++++++++++++++++
 5 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index ea2aea1ef7e6b..fd2863735d0bf 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -514,11 +514,11 @@ def intersection(*seqs):
         result &= seq
     return type(seqs[0])(list(result))
 
-def _asarray_tuplesafe(values):
-    if not isinstance(values, (list, np.ndarray)):
+def _asarray_tuplesafe(values, dtype=None):
+    if not isinstance(values, (list, tuple, np.ndarray)):
         values = list(values)
 
-    result = np.asarray(values)
+    result = np.asarray(values, dtype=dtype)
 
     if issubclass(result.dtype.type, basestring):
         result = np.asarray(values, dtype=object)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b6f5154d66c92..125120d2ecb41 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2219,7 +2219,13 @@ def _join_on(self, other, on, lsuffix, rsuffix):
         if len(other.index) == 0:
             return self
 
-        new_data = self._data.join_on(other._data, self[on], axis=1,
+        if isinstance(on, (list, tuple)):
+            join_key = zip(*[self[k] for k in on])
+            join_key = common._asarray_tuplesafe(join_key, dtype=object)
+        else:
+            join_key = np.asarray(self[on])
+
+        new_data = self._data.join_on(other._data, join_key, axis=1,
                                       lsuffix=lsuffix, rsuffix=rsuffix)
         return self._constructor(new_data)
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 672736956c8d0..8335ec0429b43 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -49,10 +49,13 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
                              'of some kind, %s was passed' % repr(data))
         else:
             # other iterable of some kind
-            if not isinstance(data, (list, tuple)):
-                data = list(data)
-            subarr = np.empty(len(data), dtype=object)
-            subarr[:] = data
+            subarr = _asarray_tuplesafe(data, dtype=object)
+
+            # if not isinstance(data, (list, tuple)):
+            #     data = list(data)
+
+            # subarr = np.empty(len(data), dtype=object)
+            # subarr[:] = data
 
         subarr = subarr.view(cls)
         subarr.name = name
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 5f1d9c5be5035..a420cc1e615ff 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -732,8 +732,7 @@ def join_on(self, other, on, axis=1, lsuffix=None, rsuffix=None):
         this, other = self._maybe_rename_join(other, lsuffix, rsuffix)
 
         other_axis = other.axes[axis]
-        indexer = lib.merge_indexer_object(on.astype(object),
-                                           other_axis.indexMap)
+        indexer = other_axis.get_indexer(on)
 
         # TODO: deal with length-0 case? or does it fall out?
         mask = indexer == -1
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 3f2ebfc6e9ddf..cf64e5a5d377f 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -2551,6 +2551,36 @@ def test_join_on(self):
         self.assertRaises(Exception, target.join, source, on='C',
                           how='left')
 
+    def test_join_on_multikey(self):
+        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
+                                   ['one', 'two', 'three']],
+                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                           names=['first', 'second'])
+        to_join = DataFrame(np.random.randn(10, 3), index=index,
+                            columns=['j_one', 'j_two', 'j_three'])
+
+        # a little relevant example with NAs
+        key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux',
+                'qux', 'snap']
+        key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two',
+                'three', 'one']
+
+        data = np.random.randn(len(key1))
+        data = DataFrame({'key1' : key1, 'key2' : key2,
+                          'data' : data})
+
+        joined = data.join(to_join, on=['key1', 'key2'])
+
+        join_key = Index(zip(key1, key2))
+        indexer = to_join.index.get_indexer(join_key)
+        ex_values = to_join.values.take(indexer, axis=0)
+        ex_values[indexer == -1] = np.nan
+        expected = data.join(DataFrame(ex_values, columns=to_join.columns))
+
+        # TODO: columns aren't in the same order yet
+        assert_frame_equal(joined, expected.ix[:, joined.columns])
+
     def test_join_index_mixed(self):
 
         df1 = DataFrame({'A' : 1., 'B' : 2, 'C' : 'foo', 'D' : True},