pandas-dev · jorisvandenbossche · Jan 20, 2021 · Jan 17, 2021 · Jan 18, 2021 · Jan 18, 2021
diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
@@ -36,6 +36,79 @@ Fixed regressions
 
 .. ---------------------------------------------------------------------------
 
+.. _whatsnew_121.ufunc_deprecation:
+
+Calling NumPy ufuncs on non-aligned DataFrames
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Before pandas 1.2.0, calling a NumPy ufunc on non-aligned DataFrames (or
+DataFrame / Series combination) would ignore the indices, only match
+the inputs by shape, and use the index/columns of the first DataFrame for
+the result:
+
+.. code-block:: python
+
+    >>> df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[0, 1])
+    ... df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[1, 2])
+    >>> df1
+       a  b
+    0  1  3
+    1  2  4
+    >>> df2
+       a  b
+    1  1  3
+    2  2  4
+
+    >>> np.add(df1, df2)
+       a  b
+    0  2  6
+    1  4  8
+
+This contrasts with how other pandas operations work, which first align
+the inputs:
+
+.. code-block:: python
+
+    >>> df1 + df2
+         a    b
+    0  NaN  NaN
+    1  3.0  7.0
+    2  NaN  NaN
+
+In pandas 1.2.0, we refactored how NumPy ufuncs are called on DataFrames, and
+this started to align the inputs first, as happens in other pandas operations
+and as it happens for ufuncs called on Series objects.
+
+For pandas 1.2.1, we restored the previous behaviour to avoid a breaking
+change, but the above example of ``np.add(df1, df2)`` with non-aligned inputs
+will now to raise a warning, and a future pandas 2.0 release will start
+aligning the inputs first (:issue:`39184`). Calling a NumPy ufunc on Series
+objects (eg ``np.add(s1, s2)``) already aligns and continues to do so.
+
+To avoid the warning and keep the current behaviour of ignoring the indices,
+convert one of the arguments to a NumPy array:
+
+.. code-block:: python
+
+    >>> np.add(df1, np.asarray(df2))
+       a  b
+    0  2  6
+    1  4  8
+
+To obtain the future behaviour and silence the warning, you can align manually
+before passing the arguments to the ufunc:
+
+.. code-block:: python
+
+    >>> df1, df2 = df1.align(df2)
+    >>> np.add(df1, df2)
+         a    b
+    0  NaN  NaN
+    1  3.0  7.0
+    2  NaN  NaN
+
+.. ---------------------------------------------------------------------------
+
 .. _whatsnew_121.bug_fixes:
 
 Bug fixes

diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -157,11 +157,67 @@ def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any)
     --------
     numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
     """
+    from pandas.core.frame import DataFrame
     from pandas.core.generic import NDFrame
     from pandas.core.internals import BlockManager
 
     cls = type(self)
 
+    is_ndframe = [isinstance(x, NDFrame) for x in inputs]
+    is_frame = [isinstance(x, DataFrame) for x in inputs]
+
+    if (sum(is_ndframe) >= 2) and (sum(is_frame) >= 1):
+        # if there are 2 alignable inputs, of which at least 1 is a
+        # DataFrame -> we would have had no alignment before -> warn that this
+        # will align in the future
+
+        # the first frame is what determines the output index/columns in pandas < 1.2
+        for x in inputs:
+            if isinstance(x, DataFrame):
+                first_frame = x
+                break
+
+        # check if the objects are aligned or not
+        def is_aligned(frame, other):
+            if isinstance(other, DataFrame):
+                return frame._indexed_same(other)
+            else:
+                # Series -> match index
+                return frame.columns.equals(other.index)
+
+        non_aligned = sum(
+            not is_aligned(first_frame, x) for x in inputs if isinstance(x, NDFrame)
+        )
+
+        # if at least one is not aligned -> warn and fallback to array behaviour
+        if non_aligned:
+            warnings.warn(
+                "Calling a ufunc on non-aligned DataFrames/Series. Currently, the "
+                "indices are ignored and the result takes the index/columns of the "
+                "first DataFrame. In the future (pandas 2.0), the DataFrames/Series "
+                "will be aligned before applying the ufunc.\nConvert one of the "
+                "arguments to a NumPy array (eg 'ufunc(df1, np.asarray(df2)') to keep "
+                "the current behaviour, or align manually (eg "
+                "'df1, df2 = df1.align(df2)') before passing to the ufunc to obtain "
+                "the future behaviour and silence this warning.",
+                FutureWarning,
+                stacklevel=3,
+            )
+
+            # keep the first dataframe of the inputs, other DataFrame/Series is
+            # converted to array for fallback behaviour
+            new_inputs = []
+            for x in inputs:
+                if x is first_frame:
+                    new_inputs.append(x)
+                elif isinstance(x, NDFrame):
+                    new_inputs.append(np.asarray(x))
+                else:
+                    new_inputs.append(x)
+
+            # call the ufunc on those transformed inputs
+            return getattr(ufunc, method)(*new_inputs, **kwargs)
+
     # for binary ops, use our custom dunder methods
     result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
     if result is not NotImplemented:

diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -78,12 +80,19 @@ def test_binary_input_aligns_columns(request, dtype_a, dtype_b):
         dtype_b["C"] = dtype_b.pop("B")
 
     df2 = pd.DataFrame({"A": [1, 2], "C": [3, 4]}).astype(dtype_b)
-    result = np.heaviside(df1, df2)
-    expected = np.heaviside(
-        np.array([[1, 3, np.nan], [2, 4, np.nan]]),
-        np.array([[1, np.nan, 3], [2, np.nan, 4]]),
-    )
-    expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"])
+    with tm.assert_produces_warning(FutureWarning):
+        result = np.heaviside(df1, df2)
+    # Expected future behaviour:
+    # expected = np.heaviside(
+    #     np.array([[1, 3, np.nan], [2, 4, np.nan]]),
+    #     np.array([[1, np.nan, 3], [2, np.nan, 4]]),
+    # )
+    # expected = pd.DataFrame(expected, index=[0, 1], columns=["A", "B", "C"])
+    expected = pd.DataFrame([[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+    # ensure the expected is the same when applying with numpy array
+    result = np.heaviside(df1, df2.values)
     tm.assert_frame_equal(result, expected)
 
 
@@ -97,27 +106,128 @@ def test_binary_input_aligns_index(request, dtype):
         )
     df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).astype(dtype)
     df2 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "c"]).astype(dtype)
-    result = np.heaviside(df1, df2)
-    expected = np.heaviside(
-        np.array([[1, 3], [3, 4], [np.nan, np.nan]]),
-        np.array([[1, 3], [np.nan, np.nan], [3, 4]]),
+    with tm.assert_produces_warning(FutureWarning):
+        result = np.heaviside(df1, df2)
+    # Expected future behaviour:
+    # expected = np.heaviside(
+    #     np.array([[1, 3], [3, 4], [np.nan, np.nan]]),
+    #     np.array([[1, 3], [np.nan, np.nan], [3, 4]]),
+    # )
+    # # TODO(FloatArray): this will be Float64Dtype.
+    # expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"])
+    expected = pd.DataFrame(
+        [[1.0, 1.0], [1.0, 1.0]], columns=["A", "B"], index=["a", "b"]
     )
-    # TODO(FloatArray): this will be Float64Dtype.
-    expected = pd.DataFrame(expected, index=["a", "b", "c"], columns=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+    # ensure the expected is the same when applying with numpy array
+    result = np.heaviside(df1, df2.values)
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Calling a ufunc on non-aligned:FutureWarning")
 def test_binary_frame_series_raises():
     # We don't currently implement
     df = pd.DataFrame({"A": [1, 2]})
-    with pytest.raises(NotImplementedError, match="logaddexp"):
+    # with pytest.raises(NotImplementedError, match="logaddexp"):
+    with pytest.raises(ValueError, match=""):
         np.logaddexp(df, df["A"])
 
-    with pytest.raises(NotImplementedError, match="logaddexp"):
+    # with pytest.raises(NotImplementedError, match="logaddexp"):
+    with pytest.raises(ValueError, match=""):
         np.logaddexp(df["A"], df)
 
 
 def test_frame_outer_deprecated():
     df = pd.DataFrame({"A": [1, 2]})
     with tm.assert_produces_warning(FutureWarning):
         np.subtract.outer(df, df)
+
+
+def test_alignment_deprecation():
+    # https://github.com/pandas-dev/pandas/issues/39184
+    df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
+    s1 = pd.Series([1, 2], index=["a", "b"])
+    s2 = pd.Series([1, 2], index=["b", "c"])
+
+    # binary dataframe / dataframe
+    expected = pd.DataFrame({"a": [2, 4, 6], "b": [8, 10, 12]})
+
+    with tm.assert_produces_warning(None):
+        # aligned -> no warning!
+        result = np.add(df1, df1)
+    tm.assert_frame_equal(result, expected)
+
+    with tm.assert_produces_warning(FutureWarning):
+        # non-aligned -> warns
+        result = np.add(df1, df2)
+    tm.assert_frame_equal(result, expected)
+
+    result = np.add(df1, df2.values)
+    tm.assert_frame_equal(result, expected)
+
+    result = np.add(df1.values, df2)
+    expected = pd.DataFrame({"b": [2, 4, 6], "c": [8, 10, 12]})
+    tm.assert_frame_equal(result, expected)
+
+    # binary dataframe / series
+    expected = pd.DataFrame({"a": [2, 3, 4], "b": [6, 7, 8]})
+
+    with tm.assert_produces_warning(None):
+        # aligned -> no warning!
+        result = np.add(df1, s1)
+    tm.assert_frame_equal(result, expected)
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = np.add(df1, s2)
+    tm.assert_frame_equal(result, expected)
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = np.add(s2, df1)
+    tm.assert_frame_equal(result, expected)
+
+    result = np.add(df1, s2.values)
+    tm.assert_frame_equal(result, expected)
+
+
+@td.skip_if_no("numba", "0.46.0")
+def test_alignment_deprecation_many_inputs():
+    # https://github.com/pandas-dev/pandas/issues/39184
+    # test that the deprecation also works with > 2 inputs -> using a numba
+    # written ufunc for this because numpy itself doesn't have such ufuncs
+    from numba import float64, vectorize
+
+    @vectorize([float64(float64, float64, float64)])
+    def my_ufunc(x, y, z):
+        return x + y + z
+
+    df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df2 = pd.DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]})
+    df3 = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]})
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = my_ufunc(df1, df2, df3)
+    expected = pd.DataFrame([[3.0, 12.0], [6.0, 15.0], [9.0, 18.0]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+    # all aligned -> no warning
+    with tm.assert_produces_warning(None):
+        result = my_ufunc(df1, df1, df1)
+    tm.assert_frame_equal(result, expected)
+
+    # mixed frame / arrays
+    with tm.assert_produces_warning(FutureWarning):
+        result = my_ufunc(df1, df2, df3.values)
+    tm.assert_frame_equal(result, expected)
+
+    # single frame -> no warning
+    with tm.assert_produces_warning(None):
+        result = my_ufunc(df1, df2.values, df3.values)
+    tm.assert_frame_equal(result, expected)
+
+    # takes indices of first frame
+    with tm.assert_produces_warning(FutureWarning):
+        result = my_ufunc(df1.values, df2, df3)
+    expected = expected.set_axis(["b", "c"], axis=1)
+    tm.assert_frame_equal(result, expected)