diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 97adf2aa39710..1b6a948dee61f 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -896,12 +896,6 @@ def _maybe_coerce_merge_keys(self): if lk.is_dtype_equal(rk): continue - # if we are dates with differing categories - # then allow them to proceed because - # coercing to object below results in integers. - if is_datetimelike(lk.categories) and is_datetimelike(rk.categories): - continue - elif is_categorical_dtype(lk) or is_categorical_dtype(rk): pass @@ -923,11 +917,13 @@ def _maybe_coerce_merge_keys(self): # Houston, we have a problem! # let's coerce to object if name in self.left.columns: + typ = lk.categories.dtype if is_categorical_dtype(lk) else object self.left = self.left.assign( - **{name: self.left[name].astype(object)}) + **{name: self.left[name].astype(typ)}) if name in self.right.columns: + typ = rk.categories.dtype if is_categorical_dtype(rk) else object self.right = self.right.assign( - **{name: self.right[name].astype(object)}) + **{name: self.right[name].astype(typ)}) def _validate_specification(self): # Hm, any way to make this logic less complicated?? diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 93177c7e29ffb..396ee6ec2375f 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -1515,7 +1515,7 @@ def test_self_join_multiple_categories(self): assert_frame_equal(result, df) - def test_dtype_on_categorical_dates(self): + def test_categorical_dates(self): # GH 16900 # dates should not be coerced to ints @@ -1534,10 +1534,13 @@ def test_dtype_on_categorical_dates(self): df2['date'] = df2['date'].astype('category') result = pd.merge(df, df2, how='outer', on=['date']) - assert result['date'].dtype == 'category' + assert result.shape == (3, 3) + assert result['date'].iloc[0] == pd.Timestamp('2001-01-01') + assert result['date'].iloc[-1] == pd.Timestamp('2001-01-03') result_inner = pd.merge(df, df2, how='inner', on=['date']) - assert result_inner['date'].dtype == 'category' + assert result_inner.shape == (1, 3) + assert result_inner['date'].iloc[-1] == pd.Timestamp('2001-01-01') @pytest.fixture