diff --git a/RELEASE.rst b/RELEASE.rst index c96675fe876dd..eded818ae0b09 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -101,6 +101,8 @@ pandas 0.10.1 - Fix setitem on a Series with a boolean key and a non-scalar as value (GH2686_) - Box datetime64 values in Series.apply/map (GH2627_, GH2689_) - Upconvert datetime + datetime64 values when concatenating frames (GH2624_) + - Raise a more helpful error message in merge operations when one DataFrame + has duplicate columns (GH2649_) **API Changes** @@ -124,10 +126,11 @@ pandas 0.10.1 .. _GH2624: https://github.com/pydata/pandas/issues/2624 .. _GH2625: https://github.com/pydata/pandas/issues/2625 .. _GH2627: https://github.com/pydata/pandas/issues/2627 -.. _GH2643: https://github.com/pydata/pandas/issues/2643 .. _GH2631: https://github.com/pydata/pandas/issues/2631 .. _GH2633: https://github.com/pydata/pandas/issues/2633 .. _GH2637: https://github.com/pydata/pandas/issues/2637 +.. _GH2643: https://github.com/pydata/pandas/issues/2643 +.. _GH2649: https://github.com/pydata/pandas/issues/2649 .. _GH2668: https://github.com/pydata/pandas/issues/2668 .. _GH2689: https://github.com/pydata/pandas/issues/2689 .. _GH2690: https://github.com/pydata/pandas/issues/2690 diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 276264c243de2..c058580ab0f45 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -378,6 +378,14 @@ def _validate_specification(self): if self.left_on is None: raise MergeError('Must pass left_on or left_index=True') else: + if not self.left.columns.is_unique: + raise MergeError("Left data columns not unique: %s" + % repr(self.left.columns)) + + if not self.right.columns.is_unique: + raise MergeError("Right data columns not unique: %s" + % repr(self.right.columns)) + # use the common columns common_cols = self.left.columns.intersection( self.right.columns) diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index bd6e026ef4c2a..47ab02d892c3f 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -713,6 +713,19 @@ def test_merge_nosort(self): self.assert_((df.var3.unique() == result.var3.unique()).all()) + def test_overlapping_columns_error_message(self): + # #2649 + df = DataFrame({'key': [1, 2, 3], + 'v1': [4, 5, 6], + 'v2': [7, 8, 9]}) + df2 = DataFrame({'key': [1, 2, 3], + 'v1': [4, 5, 6], + 'v2': [7, 8, 9]}) + + df.columns = ['key', 'foo', 'foo'] + df2.columns = ['key', 'bar', 'bar'] + + self.assertRaises(Exception, merge, df, df2) def _check_merge(x, y): for how in ['inner', 'left', 'outer']: